Update to LLVM 3.5a.

Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 2ee7767..25c438c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -403,8 +403,18 @@
       continue;
 
     // Update def for Reg and aliases.
-    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+      // We need to be careful here not to define already-live super registers.
+      // If the super register is already live, then this definition is not
+      // a definition of the whole super register (just a partial insertion
+      // into it). Earlier subregister definitions (which we've not yet visited
+      // because we're iterating bottom-up) need to be linked to the same group
+      // as this definition.
+      if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI))
+        continue;
+
       DefIndices[*AI] = Count;
+    }
   }
 }
 
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 6683630..29b6a10 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -136,7 +136,7 @@
     ~AggressiveAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
-    void StartBlock(MachineBasicBlock *BB);
+    void StartBlock(MachineBasicBlock *BB) override;
 
     /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
     /// path
@@ -146,15 +146,16 @@
                                    MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex,
-                                   DbgValueVector &DbgValues);
+                                   DbgValueVector &DbgValues) override;
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
-    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+    void Observe(MachineInstr *MI, unsigned Count,
+                 unsigned InsertPosIndex) override;
 
     /// Finish - Finish anti-dep breaking for a basic block.
-    void FinishBlock();
+    void FinishBlock() override;
 
   private:
     /// Keep track of a position in the allocation order for each regclass.
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index aed461a..64ff2a7 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -45,10 +45,12 @@
   /// Return the next physical register in the allocation order, or 0.
   /// It is safe to call next() again after it returned 0, it will keep
   /// returning 0 until rewind() is called.
-  unsigned next() {
+  unsigned next(unsigned Limit = 0) {
     if (Pos < 0)
       return Hints.end()[Pos++];
-    while (Pos < int(Order.size())) {
+    if (!Limit)
+      Limit = Order.size();
+    while (Pos < int(Limit)) {
       unsigned Reg = Order[Pos++];
       if (!isHint(Reg))
         return Reg;
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 1600c67..6ac5de2 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -498,8 +498,7 @@
   // chain interposes between I and the return.
   if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
       !isSafeToSpeculativelyExecute(I))
-    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
-         --BBI) {
+    for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) {
       if (&*BBI == I)
         break;
       // Debug info intrinsics do not get in the way of tail call optimization.
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index be0c6e2..26f04d0 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -9,6 +9,7 @@
   CalcSpillWeights.cpp \
   CallingConvLower.cpp \
   CodeGen.cpp \
+  CodeGenPrepare.cpp \
   CriticalAntiDepBreaker.cpp \
   DeadMachineInstructionElim.cpp \
   DFAPacketizer.cpp \
@@ -33,10 +34,10 @@
   LiveIntervalAnalysis.cpp \
   LiveInterval.cpp \
   LiveIntervalUnion.cpp \
+  LivePhysRegs.cpp \
   LiveRangeCalc.cpp \
   LiveRangeEdit.cpp \
   LiveRegMatrix.cpp \
-  LiveRegUnits.cpp \
   LiveStackAnalysis.cpp \
   LiveVariables.cpp \
   LLVMTargetMachine.cpp \
@@ -97,6 +98,7 @@
   SpillPlacement.cpp \
   SplitKit.cpp \
   StackColoring.cpp \
+  StackMapLivenessAnalysis.cpp \
   StackMaps.cpp \
   StackProtector.cpp \
   StackSlotColoring.cpp \
@@ -127,6 +129,7 @@
 
 # For the device
 # =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := $(codegen_SRC_FILES)
@@ -137,3 +140,4 @@
 include $(LLVM_DEVICE_BUILD_MK)
 include $(LLVM_GEN_INTRINSICS_MK)
 include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 5d82dd9..403feb4 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -30,43 +31,52 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
-static cl::opt<bool>
-EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
-  cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
-  cl::init(false));
-
-
 ARMException::ARMException(AsmPrinter *A)
-  : DwarfException(A) {}
+  : DwarfException(A),
+    shouldEmitCFI(false) {}
 
 ARMException::~ARMException() {}
 
 ARMTargetStreamer &ARMException::getTargetStreamer() {
-  MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer();
+  MCTargetStreamer &TS = *Asm->OutStreamer.getTargetStreamer();
   return static_cast<ARMTargetStreamer &>(TS);
 }
 
-void ARMException::EndModule() {
+/// endModule - Emit all exception information that should come after the
+/// content.
+void ARMException::endModule() {
+  if (shouldEmitCFI)
+    Asm->OutStreamer.EmitCFISections(false, true);
 }
 
-/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// beginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
-void ARMException::BeginFunction(const MachineFunction *MF) {
+void ARMException::beginFunction(const MachineFunction *MF) {
   getTargetStreamer().emitFnStart();
   if (Asm->MF->getFunction()->needsUnwindTableEntry())
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
                                                   Asm->getFunctionNumber()));
+  // See if we need call frame info.
+  AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
+  assert(MoveType != AsmPrinter::CFI_M_EH &&
+         "non-EH CFI not yet supported in prologue with EHABI lowering");
+  if (MoveType == AsmPrinter::CFI_M_Debug) {
+    shouldEmitCFI = true;
+    Asm->OutStreamer.EmitCFIStartProc(false);
+  }
 }
 
-/// EndFunction - Gather and emit post-function exception information.
+/// endFunction - Gather and emit post-function exception information.
 ///
-void ARMException::EndFunction() {
+void ARMException::endFunction(const MachineFunction *) {
+  if (shouldEmitCFI)
+    Asm->OutStreamer.EmitCFIEndProc();
+
   ARMTargetStreamer &ATS = getTargetStreamer();
   if (!Asm->MF->getFunction()->needsUnwindTableEntry())
     ATS.emitCantUnwind();
@@ -74,25 +84,23 @@
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                   Asm->getFunctionNumber()));
 
-    if (EnableARMEHABIDescriptors) {
-      // Map all labels and get rid of any dead landing pads.
-      MMI->TidyLandingPads();
+    // Map all labels and get rid of any dead landing pads.
+    MMI->TidyLandingPads();
 
-      if (!MMI->getLandingPads().empty()) {
-        // Emit references to personality.
-        if (const Function * Personality =
-            MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
-          MCSymbol *PerSym = Asm->getSymbol(Personality);
-          Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
-          ATS.emitPersonality(PerSym);
-        }
-
-        // Emit .handlerdata directive.
-        ATS.emitHandlerData();
-
-        // Emit actual exception table
-        EmitExceptionTable();
+    if (!MMI->getLandingPads().empty()) {
+      // Emit references to personality.
+      if (const Function * Personality =
+          MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+        MCSymbol *PerSym = Asm->getSymbol(Personality);
+        Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+        ATS.emitPersonality(PerSym);
       }
+
+      // Emit .handlerdata directive.
+      ATS.emitHandlerData();
+
+      // Emit actual exception table
+      EmitExceptionTable();
     }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index b2cc47e..a725fba 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -16,12 +16,13 @@
 	DIEHash.cpp \
 	DwarfAccelTable.cpp \
 	DwarfCFIException.cpp \
-	DwarfCompileUnit.cpp \
 	DwarfDebug.cpp	\
 	DwarfException.cpp	\
+        DwarfUnit.cpp \
 	ErlangGCPrinter.cpp \
 	OcamlGCPrinter.cpp \
-	Win64Exception.cpp
+	Win64Exception.cpp \
+	WinCodeViewLineTables.cpp
 
 LOCAL_MODULE:= libLLVMAsmPrinter
 
@@ -33,6 +34,7 @@
 
 # For the device
 # =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES :=	\
@@ -44,12 +46,13 @@
 	DIEHash.cpp \
 	DwarfAccelTable.cpp \
 	DwarfCFIException.cpp \
-	DwarfCompileUnit.cpp \
 	DwarfDebug.cpp  \
 	DwarfException.cpp      \
+        DwarfUnit.cpp \
 	ErlangGCPrinter.cpp \
+	OcamlGCPrinter.cpp \
 	Win64Exception.cpp \
-	$(LOCAL_SRC_FILES)
+	WinCodeViewLineTables.cpp
 
 LOCAL_MODULE:= libLLVMAsmPrinter
 
@@ -58,3 +61,4 @@
 include $(LLVM_DEVICE_BUILD_MK)
 include $(LLVM_GEN_INTRINSICS_MK)
 include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 308b0e0..c3afc8b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -15,19 +15,21 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "DwarfDebug.h"
 #include "DwarfException.h"
+#include "WinCodeViewLineTables.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -41,19 +43,20 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 using namespace llvm;
 
 static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "DWARF Debug Writer";
+static const char *const DbgTimerName = "Debug Info Emission";
 static const char *const EHTimerName = "DWARF Exception Writer";
+static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables";
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
@@ -99,14 +102,14 @@
     OutContext(Streamer.getContext()),
     OutStreamer(Streamer),
     LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
-  DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0;
+  DD = 0; MMI = 0; LI = 0; MF = 0;
   CurrentFnSym = CurrentFnSymForSize = 0;
   GCMetadataPrinters = 0;
   VerboseAsm = Streamer.isVerboseAsm();
 }
 
 AsmPrinter::~AsmPrinter() {
-  assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+  assert(DD == 0 && Handlers.empty() && "Debug/EH info didn't get finalized");
 
   if (GCMetadataPrinters != 0) {
     gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
@@ -135,6 +138,14 @@
   return *TM.getDataLayout();
 }
 
+const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
+  return TM.getSubtarget<MCSubtargetInfo>();
+}
+
+void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
+  S.EmitInstruction(Inst, getSubtargetInfo());
+}
+
 StringRef AsmPrinter::getTargetTriple() const {
   return TM.getTargetTriple();
 }
@@ -163,9 +174,28 @@
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
 
-  OutStreamer.InitStreamer();
+  OutStreamer.InitSections();
 
-  Mang = new Mangler(&TM);
+  Mang = new Mangler(TM.getDataLayout());
+
+  // Emit the version-min deplyment target directive if needed.
+  //
+  // FIXME: If we end up with a collection of these sorts of Darwin-specific
+  // or ELF-specific things, it may make sense to have a platform helper class
+  // that will work with the target helper class. For now keep it here, as the
+  // alternative is duplicated code in each of the target asm printers that
+  // use the directive, where it would need the same conditionalization
+  // anyway.
+  Triple TT(getTargetTriple());
+  if (TT.isOSDarwin()) {
+    unsigned Major, Minor, Update;
+    TT.getOSVersion(Major, Minor, Update);
+    // If there is a version specified, Major will be non-zero.
+    if (Major)
+      OutStreamer.EmitVersionMin((TT.isMacOSX() ?
+                                  MCVM_OSXVersionMin : MCVM_IOSVersionMin),
+                                 Major, Minor, Update);
+  }
 
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -192,25 +222,65 @@
     OutStreamer.AddBlankLine();
   }
 
-  if (MAI->doesSupportDebugInformation())
-    DD = new DwarfDebug(this, &M);
+  if (MAI->doesSupportDebugInformation()) {
+    if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) {
+      Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
+                                     DbgTimerName,
+                                     CodeViewLineTablesGroupName));
+    } else {
+      DD = new DwarfDebug(this, &M);
+      Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+    }
+  }
 
+  DwarfException *DE = 0;
   switch (MAI->getExceptionHandlingType()) {
   case ExceptionHandling::None:
-    return false;
+    break;
   case ExceptionHandling::SjLj:
   case ExceptionHandling::DwarfCFI:
     DE = new DwarfCFIException(this);
-    return false;
+    break;
   case ExceptionHandling::ARM:
     DE = new ARMException(this);
-    return false;
+    break;
   case ExceptionHandling::Win64:
     DE = new Win64Exception(this);
+    break;
+  }
+  if (DE)
+    Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName));
+  return false;
+}
+
+static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) {
+  GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+  if (Linkage != GlobalValue::LinkOnceODRLinkage)
     return false;
+
+  if (!MAI.hasWeakDefCanBeHiddenDirective())
+    return false;
+
+  if (GV->hasUnnamedAddr())
+    return true;
+
+  // This is only used for MachO, so right now it doesn't really matter how
+  // we handle alias. Revisit this once the MachO linker implements aliases.
+  if (isa<GlobalAlias>(GV))
+    return false;
+
+  // If it is a non constant variable, it needs to be uniqued across shared
+  // objects.
+  if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
+    if (!Var->isConstant())
+      return false;
   }
 
-  llvm_unreachable("Unknown exception type.");
+  GlobalStatus GS;
+  if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared)
+    return true;
+
+  return false;
 }
 
 void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
@@ -221,29 +291,16 @@
   case GlobalValue::LinkOnceODRLinkage:
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
-  case GlobalValue::LinkerPrivateWeakLinkage:
-    if (MAI->getWeakDefDirective() != 0) {
+    if (MAI->hasWeakDefDirective()) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
 
-      bool CanBeHidden = false;
-
-      if (Linkage == GlobalValue::LinkOnceODRLinkage) {
-        if (GV->hasUnnamedAddr()) {
-          CanBeHidden = true;
-        } else {
-          GlobalStatus GS;
-          if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared)
-            CanBeHidden = true;
-        }
-      }
-
-      if (!CanBeHidden)
+      if (!canBeHidden(GV, *MAI))
         // .weak_definition _foo
         OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
       else
         OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
-    } else if (MAI->getLinkOnceDirective() != 0) {
+    } else if (MAI->hasLinkOnceDirective()) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
       //NOTE: linkonce is handled by the section the symbol was assigned to.
@@ -252,7 +309,6 @@
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
     }
     return;
-  case GlobalValue::DLLExportLinkage:
   case GlobalValue::AppendingLinkage:
     // FIXME: appending linkage variables should go into a section of
     // their name or something.  For now, just emit them as external.
@@ -263,19 +319,22 @@
     return;
   case GlobalValue::PrivateLinkage:
   case GlobalValue::InternalLinkage:
-  case GlobalValue::LinkerPrivateLinkage:
     return;
   case GlobalValue::AvailableExternallyLinkage:
     llvm_unreachable("Should never emit this");
-  case GlobalValue::DLLImportLinkage:
   case GlobalValue::ExternalWeakLinkage:
     llvm_unreachable("Don't know how to emit these");
   }
   llvm_unreachable("Unknown linkage type!");
 }
 
+void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name,
+                                   const GlobalValue *GV) const {
+  TM.getNameWithPrefix(Name, GV, *Mang);
+}
+
 MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
-  return getObjFileLowering().getSymbol(*Mang, GV);
+  return TM.getSymbol(GV, *Mang);
 }
 
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
@@ -286,7 +345,7 @@
       return;
 
     if (isVerbose()) {
-      WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+      GV->printAsOperand(OutStreamer.GetCommentOS(),
                      /*PrintType=*/false, GV->getParent());
       OutStreamer.GetCommentOS() << '\n';
     }
@@ -311,8 +370,11 @@
   // sections and expected to be contiguous (e.g. ObjC metadata).
   unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
 
-  if (DD)
-    DD->setSymbolSize(GVSym, Size);
+  for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
+    const HandlerInfo &OI = Handlers[I];
+    NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled);
+    OI.Handler->setSymbolSize(GVSym, Size);
+  }
 
   // Handle common and BSS local symbols (.lcomm).
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
@@ -332,7 +394,7 @@
     // Handle local BSS symbols.
     if (MAI->hasMachoZeroFillDirective()) {
       const MCSection *TheSection =
-        getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+        getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
       // .zerofill __DATA, __bss, _foo, 400, 5
       OutStreamer.EmitZerofill(TheSection, GVSym, Size, Align);
       return;
@@ -361,7 +423,7 @@
   }
 
   const MCSection *TheSection =
-    getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+    getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
 
   // Handle the zerofill directive on darwin, which is a special form of BSS
   // emission.
@@ -452,7 +514,8 @@
   // Print the 'header' of function.
   const Function *F = MF->getFunction();
 
-  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  OutStreamer.SwitchSection(
+      getObjFileLowering().SectionForGlobal(F, *Mang, TM));
   EmitVisibility(CurrentFnSym, F->getVisibility());
 
   EmitLinkage(F, CurrentFnSym);
@@ -462,7 +525,7 @@
     OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
 
   if (isVerbose()) {
-    WriteAsOperand(OutStreamer.GetCommentOS(), F,
+    F->printAsOperand(OutStreamer.GetCommentOS(),
                    /*PrintType=*/false, F->getParent());
     OutStreamer.GetCommentOS() << '\n';
   }
@@ -482,13 +545,10 @@
   }
 
   // Emit pre-function debug and/or EH information.
-  if (DE) {
-    NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
-    DE->BeginFunction(MF);
-  }
-  if (DD) {
-    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-    DD->beginFunction(MF);
+  for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
+    const HandlerInfo &OI = Handlers[I];
+    NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled);
+    OI.Handler->beginFunction(MF);
   }
 
   // Emit the prefix data.
@@ -576,10 +636,9 @@
 
   SmallString<128> Str;
   raw_svector_ostream OS(Str);
-  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+  OS << "DEBUG_VALUE: ";
 
-  // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+  DIVariable V(MI->getOperand(2).getMetadata());
   if (V.getContext().isSubprogram()) {
     StringRef Name = DISubprogram(V.getContext()).getDisplayName();
     if (!Name.empty())
@@ -625,7 +684,7 @@
       // Suppress offset, it is not meaningful here.
       OS << "undef";
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      AP.OutStreamer.EmitRawText(OS.str());
+      AP.OutStreamer.emitRawComment(OS.str());
       return true;
     }
     if (Deref)
@@ -637,7 +696,7 @@
     OS << '+' << Offset << ']';
 
   // NOTE: Want this comment at start of line, don't emit with AddComment.
-  AP.OutStreamer.EmitRawText(OS.str());
+  AP.OutStreamer.emitRawComment(OS.str());
   return true;
 }
 
@@ -657,14 +716,11 @@
     MF->getFunction()->needsUnwindTableEntry();
 }
 
-bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
-  return MAI->doesDwarfUseRelocationsAcrossSections();
-}
-
-void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
-  const MCSymbol *Label = MI.getOperand(0).getMCSymbol();
-
-  if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI)
+void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
+  ExceptionHandling::ExceptionsType ExceptionHandlingType =
+      MAI->getExceptionHandlingType();
+  if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
+      ExceptionHandlingType != ExceptionHandling::ARM)
     return;
 
   if (needsCFIMoves() == CFI_M_None)
@@ -675,16 +731,9 @@
 
   const MachineModuleInfo &MMI = MF->getMMI();
   const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
-  bool FoundOne = false;
-  (void)FoundOne;
-  for (std::vector<MCCFIInstruction>::const_iterator I = Instrs.begin(),
-         E = Instrs.end(); I != E; ++I) {
-    if (I->getLabel() == Label) {
-      emitCFIInstruction(*I);
-      FoundOne = true;
-    }
-  }
-  assert(FoundOne);
+  unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
+  const MCCFIInstruction &CFI = Instrs[CFIIndex];
+  emitCFIInstruction(CFI);
 }
 
 /// EmitFunctionBody - This method emits the body and trailer for a
@@ -693,7 +742,7 @@
   // Emit target-specific gunk before the function body.
   EmitFunctionBodyStart();
 
-  bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+  bool ShouldPrintDebugScopes = MMI->hasDebugInfo();
 
   // Print out code for the function.
   bool HasAnyRealCode = false;
@@ -707,23 +756,27 @@
       LastMI = II;
 
       // Print the assembly for the instruction.
-      if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+      if (!II->isPosition() && !II->isImplicitDef() && !II->isKill() &&
           !II->isDebugValue()) {
         HasAnyRealCode = true;
         ++EmittedInsts;
       }
 
       if (ShouldPrintDebugScopes) {
-        NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-        DD->beginInstruction(II);
+        for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) {
+          const HandlerInfo &OI = Handlers[III];
+          NamedRegionTimer T(OI.TimerName, OI.TimerGroupName,
+                             TimePassesIsEnabled);
+          OI.Handler->beginInstruction(II);
+        }
       }
 
       if (isVerbose())
         emitComments(*II, OutStreamer.GetCommentOS());
 
       switch (II->getOpcode()) {
-      case TargetOpcode::PROLOG_LABEL:
-        emitPrologLabel(*II);
+      case TargetOpcode::CFI_INSTRUCTION:
+        emitCFIInstruction(*II);
         break;
 
       case TargetOpcode::EH_LABEL:
@@ -746,16 +799,17 @@
         if (isVerbose()) emitKill(II, *this);
         break;
       default:
-        if (!TM.hasMCUseLoc())
-          MCLineEntry::Make(&OutStreamer, getCurrentSection());
-
         EmitInstruction(II);
         break;
       }
 
       if (ShouldPrintDebugScopes) {
-        NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-        DD->endInstruction(II);
+        for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) {
+          const HandlerInfo &OI = Handlers[III];
+          NamedRegionTimer T(OI.TimerName, OI.TimerGroupName,
+                             TimePassesIsEnabled);
+          OI.Handler->endInstruction();
+        }
       }
     }
   }
@@ -765,7 +819,7 @@
   // label equaling the end of function label and an invalid "row" in the
   // FDE. We need to emit a noop in this situation so that the FDE's rows are
   // valid.
-  bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+  bool RequiresNoop = LastMI && LastMI->isCFIInstruction();
 
   // If the function is empty and the object file uses .subsections_via_symbols,
   // then we need to emit *something* to the function body to prevent the
@@ -775,7 +829,7 @@
     TM.getInstrInfo()->getNoopForMachoTarget(Noop);
     if (Noop.getOpcode()) {
       OutStreamer.AddComment("avoids zero-length function");
-      OutStreamer.EmitInstruction(Noop);
+      OutStreamer.EmitInstruction(Noop, getSubtargetInfo());
     } else  // Target not mc-ized yet.
       OutStreamer.EmitRawText(StringRef("\tnop\n"));
   }
@@ -811,14 +865,11 @@
     OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
   }
 
-  // Emit post-function debug information.
-  if (DD) {
-    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-    DD->endFunction(MF);
-  }
-  if (DE) {
-    NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
-    DE->EndFunction();
+  // Emit post-function debug and/or EH information.
+  for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
+    const HandlerInfo &OI = Handlers[I];
+    NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled);
+    OI.Handler->endFunction(MF);
   }
   MMI->EndFunction();
 
@@ -828,56 +879,6 @@
   OutStreamer.AddBlankLine();
 }
 
-/// EmitDwarfRegOp - Emit dwarf register operation.
-void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc,
-                                bool Indirect) const {
-  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
-
-  for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0;
-       ++SR) {
-    Reg = TRI->getDwarfRegNum(*SR, false);
-    // FIXME: Get the bit range this register uses of the superregister
-    // so that we can produce a DW_OP_bit_piece
-  }
-
-  // FIXME: Handle cases like a super register being encoded as
-  // DW_OP_reg 32 DW_OP_piece 4 DW_OP_reg 33
-
-  // FIXME: We have no reasonable way of handling errors in here. The
-  // caller might be in the middle of an dwarf expression. We should
-  // probably assert that Reg >= 0 once debug info generation is more mature.
-
-  if (MLoc.isIndirect() || Indirect) {
-    if (Reg < 32) {
-      OutStreamer.AddComment(
-        dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
-      EmitInt8(dwarf::DW_OP_breg0 + Reg);
-    } else {
-      OutStreamer.AddComment("DW_OP_bregx");
-      EmitInt8(dwarf::DW_OP_bregx);
-      OutStreamer.AddComment(Twine(Reg));
-      EmitULEB128(Reg);
-    }
-    EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset());
-    if (MLoc.isIndirect() && Indirect)
-      EmitInt8(dwarf::DW_OP_deref);
-  } else {
-    if (Reg < 32) {
-      OutStreamer.AddComment(
-        dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
-      EmitInt8(dwarf::DW_OP_reg0 + Reg);
-    } else {
-      OutStreamer.AddComment("DW_OP_regx");
-      EmitInt8(dwarf::DW_OP_regx);
-      OutStreamer.AddComment(Twine(Reg));
-      EmitULEB128(Reg);
-    }
-  }
-
-  // FIXME: Produce a DW_OP_bit_piece if we used a superregister
-}
-
 bool AsmPrinter::doFinalization(Module &M) {
   // Emit global variables.
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
@@ -901,26 +902,21 @@
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
 
   // Make sure we wrote out everything we need.
   OutStreamer.Flush();
 
   // Finalize debug and EH information.
-  if (DE) {
-    {
-      NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
-      DE->EndModule();
-    }
-    delete DE; DE = 0;
+  for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
+    const HandlerInfo &OI = Handlers[I];
+    NamedRegionTimer T(OI.TimerName, OI.TimerGroupName,
+                       TimePassesIsEnabled);
+    OI.Handler->endModule();
+    delete OI.Handler;
   }
-  if (DD) {
-    {
-      NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
-      DD->endModule();
-    }
-    delete DD; DD = 0;
-  }
+  Handlers.clear();
+  DD = 0;
 
   // If the target wants to know about weak references, print them all.
   if (MAI->getWeakRefDirective()) {
@@ -949,11 +945,7 @@
       MCSymbol *Name = getSymbol(I);
 
       const GlobalValue *GV = I->getAliasedGlobal();
-      if (GV->isDeclaration()) {
-        report_fatal_error(Name->getName() +
-                           ": Target doesn't support aliases to declarations");
-      }
-
+      assert(!GV->isDeclaration());
       MCSymbol *Target = getSymbol(GV);
 
       if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
@@ -1106,6 +1098,7 @@
 /// by the current function to the current output stream.
 ///
 void AsmPrinter::EmitJumpTableInfo() {
+  const DataLayout *DL = MF->getTarget().getDataLayout();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return;
   if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1125,7 +1118,8 @@
       // FIXME: this isn't the right predicate, should be based on the MCSection
       // for the function.
       F->isWeakForLinker()) {
-    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+    OutStreamer.SwitchSection(
+        getObjFileLowering().SectionForGlobal(F, *Mang, TM));
   } else {
     // Otherwise, drop it in the readonly section.
     const MCSection *ReadOnlySection =
@@ -1171,7 +1165,7 @@
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
-    if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+    if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix())
       // FIXME: This doesn't have to have any specific name, just any randomly
       // named and numbered 'l' label would work.  Simplify GetJTISymbol.
       OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
@@ -1309,7 +1303,7 @@
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
-    if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+    if (GV)
       OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
   }
 }
@@ -1366,7 +1360,7 @@
   if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) {
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
       const MDNode *N = NMD->getOperand(i);
-      assert(N->getNumOperands() == 1 && 
+      assert(N->getNumOperands() == 1 &&
              "llvm.ident metadata entry can have only one operand");
       const MDString *S = cast<MDString>(N->getOperand(0));
       OutStreamer.EmitIdent(S->getString());
@@ -1422,8 +1416,8 @@
 /// where the size in bytes of the directive is specified by Size and Hi/Lo
 /// specify the labels.  This implicitly uses .set if it is available.
 void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
-                                           const MCSymbol *Lo, unsigned Size)
-  const {
+                                           const MCSymbol *Lo,
+                                           unsigned Size) const {
 
   // Emit Hi+Offset - Lo
   // Get the Hi+Offset expression.
@@ -1452,8 +1446,8 @@
 /// where the size in bytes of the directive is specified by Size and Label
 /// specifies the label.  This implicitly uses .set if it is available.
 void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
-                                      unsigned Size, bool IsSectionRelative)
-  const {
+                                     unsigned Size,
+                                     bool IsSectionRelative) const {
   if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
     OutStreamer.EmitCOFFSecRel32(Label);
     return;
@@ -1462,14 +1456,12 @@
   // Emit Label+Offset (or just Label if Offset is zero)
   const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext);
   if (Offset)
-    Expr = MCBinaryExpr::CreateAdd(Expr,
-                                   MCConstantExpr::Create(Offset, OutContext),
-                                   OutContext);
+    Expr = MCBinaryExpr::CreateAdd(
+        Expr, MCConstantExpr::Create(Offset, OutContext), OutContext);
 
   OutStreamer.EmitValue(Expr, Size);
 }
 
-
 //===----------------------------------------------------------------------===//
 
 // EmitAlignment - Emit an alignment directive to the specified power of
@@ -1486,7 +1478,7 @@
   if (getCurrentSection()->getKind().isText())
     OutStreamer.EmitCodeAlignment(1 << NumBits);
   else
-    OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+    OutStreamer.EmitValueToAlignment(1 << NumBits);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1515,6 +1507,11 @@
     llvm_unreachable("Unknown constant value to lower!");
   }
 
+  if (const MCExpr *RelocExpr =
+          AP.getObjFileLowering().getExecutableRelativeSymbol(CE, *AP.Mang,
+                                                              AP.TM))
+    return RelocExpr;
+
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
@@ -1530,7 +1527,7 @@
       std::string S;
       raw_string_ostream OS(S);
       OS << "Unsupported expression in static initializer: ";
-      WriteAsOperand(OS, CE, /*PrintType=*/false,
+      CE->printAsOperand(OS, /*PrintType=*/false,
                      !AP.MF ? 0 : AP.MF->getFunction()->getParent());
       report_fatal_error(OS.str());
     }
@@ -1994,15 +1991,17 @@
 
 /// GetTempSymbol - Return the MCSymbol corresponding to the assembler
 /// temporary label with the specified stem and unique ID.
-MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
-  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const {
+  const DataLayout *DL = TM.getDataLayout();
+  return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
                                       Name + Twine(ID));
 }
 
 /// GetTempSymbol - Return an assembler temporary label with the specified
 /// stem.
-MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
-  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const {
+  const DataLayout *DL = TM.getDataLayout();
+  return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
                                       Name);
 }
 
@@ -2017,8 +2016,9 @@
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+  const DataLayout *DL = TM.getDataLayout();
   return OutContext.GetOrCreateSymbol
-    (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+    (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
      + "_" + Twine(CPID));
 }
 
@@ -2030,21 +2030,16 @@
 /// GetJTSetSymbol - Return the symbol for the specified jump table .set
 /// FIXME: privatize to AsmPrinter.
 MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+  const DataLayout *DL = TM.getDataLayout();
   return OutContext.GetOrCreateSymbol
-  (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+  (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
    Twine(UID) + "_set_" + Twine(MBBID));
 }
 
-/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
-/// global value name as its base, with the specified suffix, and where the
-/// symbol is forced to have private linkage if ForcePrivate is true.
-MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
-                                                   StringRef Suffix,
-                                                   bool ForcePrivate) const {
-  SmallString<60> NameStr;
-  Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
-  NameStr.append(Suffix.begin(), Suffix.end());
-  return OutContext.GetOrCreateSymbol(NameStr.str());
+MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
+                                                   StringRef Suffix) const {
+  return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang,
+                                                           TM);
 }
 
 /// GetExternalSymbolSymbol - Return the MCSymbol for the specified
@@ -2155,10 +2150,9 @@
 
   // Print the main label for the block.
   if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
-    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+    if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
-      OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
-                              Twine(MBB->getNumber()) + ":");
+      OutStreamer.emitRawComment(" BB#" + Twine(MBB->getNumber()) + ":", false);
     }
   } else {
     OutStreamer.EmitLabel(MBB->getSymbol());
@@ -2221,14 +2215,13 @@
     if (!MI.isBranch() || MI.isIndirectBranch())
       return false;
 
-    // If we are the operands of one of the branches, this is not
-    // a fall through.
-    for (MachineInstr::mop_iterator OI = MI.operands_begin(),
-           OE = MI.operands_end(); OI != OE; ++OI) {
-      const MachineOperand& OP = *OI;
-      if (OP.isJTI())
+    // If we are the operands of one of the branches, this is not a fall
+    // through. Note that targets with delay slots will usually bundle
+    // terminators with the delay slot instruction.
+    for (ConstMIBundleOperands OP(&MI); OP.isValid(); ++OP) {
+      if (OP->isJTI())
         return false;
-      if (OP.isMBB() && OP.getMBB() == MBB)
+      if (OP->isMBB() && OP->getMBB() == MBB)
         return false;
     }
   }
@@ -2261,3 +2254,6 @@
 
   report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
 }
+
+/// Pin vtable to this file.
+AsmPrinterHandler::~AsmPrinterHandler() {}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b92f49c..b696069 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,7 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
+#include "ByteStreamer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -52,9 +54,9 @@
 /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
 void AsmPrinter::EmitCFAByte(unsigned Val) const {
   if (isVerbose()) {
-    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64)
       OutStreamer.AddComment("DW_CFA_offset + Reg (" +
-                             Twine(Val-dwarf::DW_CFA_offset) + ")");
+                             Twine(Val - dwarf::DW_CFA_offset) + ")");
     else
       OutStreamer.AddComment(dwarf::CallFrameString(Val));
   }
@@ -63,43 +65,56 @@
 
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
   switch (Encoding) {
-  case dwarf::DW_EH_PE_absptr: return "absptr";
-  case dwarf::DW_EH_PE_omit:   return "omit";
-  case dwarf::DW_EH_PE_pcrel:  return "pcrel";
-  case dwarf::DW_EH_PE_udata4: return "udata4";
-  case dwarf::DW_EH_PE_udata8: return "udata8";
-  case dwarf::DW_EH_PE_sdata4: return "sdata4";
-  case dwarf::DW_EH_PE_sdata8: return "sdata8";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
-  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+  case dwarf::DW_EH_PE_absptr:
+    return "absptr";
+  case dwarf::DW_EH_PE_omit:
+    return "omit";
+  case dwarf::DW_EH_PE_pcrel:
+    return "pcrel";
+  case dwarf::DW_EH_PE_udata4:
+    return "udata4";
+  case dwarf::DW_EH_PE_udata8:
+    return "udata8";
+  case dwarf::DW_EH_PE_sdata4:
+    return "sdata4";
+  case dwarf::DW_EH_PE_sdata8:
+    return "sdata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+    return "pcrel udata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+    return "pcrel sdata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+    return "pcrel udata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+    return "pcrel sdata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4
+      :
     return "indirect pcrel udata4";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+      :
     return "indirect pcrel sdata4";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8
+      :
     return "indirect pcrel udata8";
-  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8
+      :
     return "indirect pcrel sdata8";
   }
 
   return "<unknown encoding>";
 }
 
-
 /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
 /// encoding.  If verbose assembly output is enabled, we output comments
 /// describing the encoding.  Desc is an optional string saying what the
 /// encoding is specifying (e.g. "LSDA").
 void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
   if (isVerbose()) {
-    if (Desc != 0)
-      OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+    if (Desc)
+      OutStreamer.AddComment(Twine(Desc) + " Encoding = " +
                              Twine(DecodeDWARFEncoding(Val)));
     else
-      OutStreamer.AddComment(Twine("Encoding = ") +
-                             DecodeDWARFEncoding(Val));
+      OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val));
   }
 
   OutStreamer.EmitIntValue(Val, 1);
@@ -111,11 +126,16 @@
     return 0;
 
   switch (Encoding & 0x07) {
-  default: llvm_unreachable("Invalid encoded value.");
-  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
-  case dwarf::DW_EH_PE_udata2: return 2;
-  case dwarf::DW_EH_PE_udata4: return 4;
-  case dwarf::DW_EH_PE_udata8: return 8;
+  default:
+    llvm_unreachable("Invalid encoded value.");
+  case dwarf::DW_EH_PE_absptr:
+    return TM.getDataLayout()->getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+    return 8;
   }
 }
 
@@ -125,7 +145,7 @@
     const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
     const MCExpr *Exp =
-      TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, OutStreamer);
     OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
   } else
     OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
@@ -165,6 +185,150 @@
   EmitLabelDifference(Label, SectionLabel, 4);
 }
 
+/// Emit a dwarf register operation.
+static void emitDwarfRegOp(ByteStreamer &Streamer, int Reg) {
+  assert(Reg >= 0);
+  if (Reg < 32) {
+    Streamer.EmitInt8(dwarf::DW_OP_reg0 + Reg,
+                      dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+  } else {
+    Streamer.EmitInt8(dwarf::DW_OP_regx, "DW_OP_regx");
+    Streamer.EmitULEB128(Reg, Twine(Reg));
+  }
+}
+
+/// Emit an (double-)indirect dwarf register operation.
+static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset,
+                                   bool Deref) {
+  assert(Reg >= 0);
+  if (Reg < 32) {
+    Streamer.EmitInt8(dwarf::DW_OP_breg0 + Reg,
+                      dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
+  } else {
+    Streamer.EmitInt8(dwarf::DW_OP_bregx, "DW_OP_bregx");
+    Streamer.EmitULEB128(Reg, Twine(Reg));
+  }
+  Streamer.EmitSLEB128(Offset);
+  if (Deref)
+    Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
+}
+
+/// Emit a dwarf register operation for describing
+/// - a small value occupying only part of a register or
+/// - a small register representing only part of a value.
+static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned Size,
+                             unsigned Offset) {
+  assert(Size > 0);
+  if (Offset > 0) {
+    Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece");
+    Streamer.EmitULEB128(Size, Twine(Size));
+    Streamer.EmitULEB128(Offset, Twine(Offset));
+  } else {
+    Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece");
+    unsigned ByteSize = Size / 8; // Assuming 8 bits per byte.
+    Streamer.EmitULEB128(ByteSize, Twine(ByteSize));
+  }
+}
+
+/// Some targets do not provide a DWARF register number for every
+/// register.  This function attempts to emit a dwarf register by
+/// emitting a piece of a super-register or by piecing together
+/// multiple subregisters that alias the register.
+static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP,
+                                const MachineLocation &MLoc) {
+  assert(!MLoc.isIndirect());
+  const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+
+  // Walk up the super-register chain until we find a valid number.
+  // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
+  for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) {
+    Reg = TRI->getDwarfRegNum(*SR, false);
+    if (Reg >= 0) {
+      unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg());
+      unsigned Size = TRI->getSubRegIdxSize(Idx);
+      unsigned Offset = TRI->getSubRegIdxOffset(Idx);
+      AP.OutStreamer.AddComment("super-register");
+      emitDwarfRegOp(Streamer, Reg);
+      emitDwarfOpPiece(Streamer, Size, Offset);
+      return;
+    }
+  }
+
+  // Otherwise, attempt to find a covering set of sub-register numbers.
+  // For example, Q0 on ARM is a composition of D0+D1.
+  //
+  // Keep track of the current position so we can emit the more
+  // efficient DW_OP_piece.
+  unsigned CurPos = 0;
+  // The size of the register in bits, assuming 8 bits per byte.
+  unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8;
+  // Keep track of the bits in the register we already emitted, so we
+  // can avoid emitting redundant aliasing subregs.
+  SmallBitVector Coverage(RegSize, false);
+  for (MCSubRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) {
+    unsigned Idx = TRI->getSubRegIndex(MLoc.getReg(), *SR);
+    unsigned Size = TRI->getSubRegIdxSize(Idx);
+    unsigned Offset = TRI->getSubRegIdxOffset(Idx);
+    Reg = TRI->getDwarfRegNum(*SR, false);
+
+    // Intersection between the bits we already emitted and the bits
+    // covered by this subregister.
+    SmallBitVector Intersection(RegSize, false);
+    Intersection.set(Offset, Offset + Size);
+    Intersection ^= Coverage;
+
+    // If this sub-register has a DWARF number and we haven't covered
+    // its range, emit a DWARF piece for it.
+    if (Reg >= 0 && Intersection.any()) {
+      AP.OutStreamer.AddComment("sub-register");
+      emitDwarfRegOp(Streamer, Reg);
+      emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
+      CurPos = Offset + Size;
+
+      // Mark it as emitted.
+      Coverage.set(Offset, Offset + Size);
+    }
+  }
+
+  if (CurPos == 0) {
+    // FIXME: We have no reasonable way of handling errors in here.
+    Streamer.EmitInt8(dwarf::DW_OP_nop,
+                      "nop (could not find a dwarf register number)");
+  }
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
+                                const MachineLocation &MLoc,
+                                bool Indirect) const {
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+  if (Reg < 0) {
+    // We assume that pointers are always in an addressable register.
+    if (Indirect || MLoc.isIndirect()) {
+      // FIXME: We have no reasonable way of handling errors in here. The
+      // caller might be in the middle of a dwarf expression. We should
+      // probably assert that Reg >= 0 once debug info generation is more
+      // mature.
+      Streamer.EmitInt8(dwarf::DW_OP_nop,
+                        "nop (invalid dwarf register number for indirect loc)");
+      return;
+    }
+
+    // Attempt to find a valid super- or sub-register.
+    if (!Indirect && !MLoc.isIndirect())
+      return EmitDwarfRegOpPiece(Streamer, *this, MLoc);
+  }
+
+  if (MLoc.isIndirect())
+    emitDwarfRegOpIndirect(Streamer, Reg, MLoc.getOffset(), Indirect);
+  else if (Indirect)
+    emitDwarfRegOpIndirect(Streamer, Reg, 0, false);
+  else
+    emitDwarfRegOp(Streamer, Reg);
+}
+
 //===----------------------------------------------------------------------===//
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
@@ -191,5 +355,8 @@
   case MCCFIInstruction::OpWindowSave:
     OutStreamer.EmitCFIWindowSave();
     break;
+  case MCCFIInstruction::OpSameValue:
+    OutStreamer.EmitCFISameValue(Inst.getRegister());
+    break;
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
new file mode 100644
index 0000000..2825367
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -0,0 +1,57 @@
+//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a generic interface for AsmPrinter handlers,
+// like debug and EH info emitters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__
+#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+class MCSymbol;
+
+/// \brief Collects and handles AsmPrinter objects required to build debug
+/// or EH information.
+class AsmPrinterHandler {
+public:
+  virtual ~AsmPrinterHandler();
+
+  /// \brief For symbols that have a size designated (e.g. common symbols),
+  /// this tracks that size.
+  virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0;
+
+  /// \brief Emit all sections that should come after the content.
+  virtual void endModule() = 0;
+
+  /// \brief Gather pre-function debug information.
+  /// Every beginFunction(MF) call should be followed by an endFunction(MF)
+  /// call.
+  virtual void beginFunction(const MachineFunction *MF) = 0;
+
+  /// \brief Gather post-function debug information.
+  /// Please note that some AsmPrinter implementations may not call
+  /// beginFunction at all.
+  virtual void endFunction(const MachineFunction *MF) = 0;
+
+  /// \brief Process beginning of an instruction.
+  virtual void beginInstruction(const MachineInstr *MI) = 0;
+
+  /// \brief Process end of an instruction.
+  virtual void endInstruction() = 0;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4f927f6..567b6e3 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -13,12 +13,12 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -33,6 +33,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 namespace {
@@ -77,11 +78,17 @@
   if (isNullTerminated)
     Str = Str.substr(0, Str.size()-1);
 
-  // If the output streamer is actually a .s file, just emit the blob textually.
+  // If the output streamer does not have mature MC support or the integrated
+  // assembler has been disabled, just emit the blob textually.
+  // Otherwise parse the asm and emit it via MC support.
   // This is useful in case the asm parser doesn't handle something but the
   // system assembler does.
-  if (OutStreamer.hasRawTextSupport()) {
+  const MCAsmInfo *MCAI = TM.getMCAsmInfo();
+  assert(MCAI && "No MCAsmInfo");
+  if (!MCAI->useIntegratedAssembler() &&
+      !OutStreamer.isIntegratedAssemblerRequired()) {
     OutStreamer.EmitRawText(Str);
+    emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), 0);
     return;
   }
 
@@ -110,20 +117,25 @@
   // Tell SrcMgr about this buffer, it takes ownership of the buffer.
   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
 
-  OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
-                                                  OutContext, OutStreamer,
-                                                  *MAI));
+  std::unique_ptr<MCAsmParser> Parser(
+      createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
 
-  // FIXME: It would be nice if we can avoid createing a new instance of
-  // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
-  // we have to watch out for asm directives which can change subtarget
-  // state. e.g. .code 16, .code 32.
-  OwningPtr<MCSubtargetInfo>
-    STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
-                                             TM.getTargetCPU(),
-                                             TM.getTargetFeatureString()));
-  OwningPtr<MCTargetAsmParser>
-    TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII));
+  // Initialize the parser with a fresh subtarget info. It is better to use a
+  // new STI here because the parser may modify it and we do not want those
+  // modifications to persist after parsing the inlineasm. The modifications
+  // made by the parser will be seen by the code emitters because it passes
+  // the current STI down to the EncodeInstruction() method.
+  std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+      TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
+
+  // Preserve a copy of the original STI because the parser may modify it.  For
+  // example, when switching between arm and thumb mode. If the target needs to
+  // emit code to return to the original state it can do so in
+  // emitInlineAsmEnd().
+  MCSubtargetInfo STIOrig = *STI;
+
+  std::unique_ptr<MCTargetAsmParser> TAP(
+      TM.getTarget().createMCAsmParser(*STI, *Parser, *MII));
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
@@ -133,6 +145,7 @@
   // Don't implicitly switch to the text section before the asm.
   int Res = Parser->Run(/*NoInitialTextSection*/ true,
                         /*NoFinalize*/ true);
+  emitInlineAsmEnd(STIOrig, STI.get());
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
 }
@@ -427,21 +440,14 @@
   // If this asmstr is empty, just print the #APP/#NOAPP markers.
   // These are useful to see where empty asm's wound up.
   if (AsmStr[0] == 0) {
-    // Don't emit the comments if writing to a .o file.
-    if (!OutStreamer.hasRawTextSupport()) return;
-
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmStart());
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmEnd());
+    OutStreamer.emitRawComment(MAI->getInlineAsmStart());
+    OutStreamer.emitRawComment(MAI->getInlineAsmEnd());
     return;
   }
 
   // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
-  // enabled, so we use EmitRawText.
-  if (OutStreamer.hasRawTextSupport())
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmStart());
+  // enabled, so we use emitRawComment.
+  OutStreamer.emitRawComment(MAI->getInlineAsmStart());
 
   // Get the !srcloc metadata node if we have it, and decode the loc cookie from
   // it.
@@ -476,10 +482,8 @@
   EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
 
   // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
-  // enabled, so we use EmitRawText.
-  if (OutStreamer.hasRawTextSupport())
-    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
-                            MAI->getInlineAsmEnd());
+  // enabled, so we use emitRawComment.
+  OutStreamer.emitRawComment(MAI->getInlineAsmEnd());
 }
 
 
@@ -491,8 +495,9 @@
 /// for their own strange codes.
 void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
                               const char *Code) const {
+  const DataLayout *DL = TM.getDataLayout();
   if (!strcmp(Code, "private")) {
-    OS << MAI->getPrivateGlobalPrefix();
+    OS << DL->getPrivateGlobalPrefix();
   } else if (!strcmp(Code, "comment")) {
     OS << MAI->getCommentString();
   } else if (!strcmp(Code, "uid")) {
@@ -551,3 +556,5 @@
   return true;
 }
 
+void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
+                                  const MCSubtargetInfo *EndInfo) const {}
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
new file mode 100644
index 0000000..6c01d65
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -0,0 +1,71 @@
+//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class that can take bytes that would normally be
+// streamed via the AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BYTESTREAMER_H
+#define LLVM_CODEGEN_BYTESTREAMER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "DIEHash.h"
+
+namespace llvm {
+class ByteStreamer {
+ public:
+  virtual ~ByteStreamer() {}
+
+  // For now we're just handling the calls we need for dwarf emission/hashing.
+  virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
+  virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+  virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+};
+
+class APByteStreamer : public ByteStreamer {
+private:
+  AsmPrinter &AP;
+
+public:
+  APByteStreamer(AsmPrinter &Asm) : AP(Asm) {}
+  void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+    AP.OutStreamer.AddComment(Comment);
+    AP.EmitInt8(Byte);
+  }
+  void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+    AP.OutStreamer.AddComment(Comment);
+    AP.EmitSLEB128(DWord);
+  }
+  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+    AP.OutStreamer.AddComment(Comment);
+    AP.EmitULEB128(DWord);
+  }
+};
+
+class HashingByteStreamer : public ByteStreamer {
+ private:
+  DIEHash &Hash;
+ public:
+ HashingByteStreamer(DIEHash &H) : Hash(H) {}
+  void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+    Hash.update(Byte);
+  }
+  void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+    Hash.addSLEB128(DWord);
+  }
+  void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+    Hash.addULEB128(DWord);
+  }
+};
+}
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index be484a6..b3eddac 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -7,12 +7,13 @@
   DIEHash.cpp
   DwarfAccelTable.cpp
   DwarfCFIException.cpp
-  DwarfCompileUnit.cpp
   DwarfDebug.cpp
   DwarfException.cpp
+  DwarfUnit.cpp
   ErlangGCPrinter.cpp
   OcamlGCPrinter.cpp
   Win64Exception.cpp
+  WinCodeViewLineTables.cpp
   )
 
 add_dependencies(LLVMAsmPrinter intrinsics_gen)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 6944428..26e8f2d 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -13,17 +13,18 @@
 
 #include "DIE.h"
 #include "DwarfDebug.h"
+#include "DwarfUnit.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
 using namespace llvm;
 
@@ -48,7 +49,7 @@
 ///
 void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
   ID.AddInteger(unsigned(Tag));
-  ID.AddInteger(ChildrenFlag);
+  ID.AddInteger(unsigned(Children));
 
   // For each attribute description.
   for (unsigned i = 0, N = Data.size(); i < N; ++i)
@@ -62,7 +63,7 @@
   AP->EmitULEB128(Tag, dwarf::TagString(Tag));
 
   // Emit whether it has children DIEs.
-  AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+  AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children));
 
   // For each attribute description.
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -89,7 +90,7 @@
     << "  "
     << dwarf::TagString(Tag)
     << " "
-    << dwarf::ChildrenString(ChildrenFlag)
+    << dwarf::ChildrenString(Children)
     << '\n';
 
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -112,27 +113,28 @@
     delete Children[i];
 }
 
-/// Climb up the parent chain to get the compile unit DIE to which this DIE
+/// Climb up the parent chain to get the unit DIE to which this DIE
 /// belongs.
-const DIE *DIE::getCompileUnit() const {
-  const DIE *Cu = getCompileUnitOrNull();
+const DIE *DIE::getUnit() const {
+  const DIE *Cu = getUnitOrNull();
   assert(Cu && "We should not have orphaned DIEs.");
   return Cu;
 }
 
-/// Climb up the parent chain to get the compile unit DIE this DIE belongs
+/// Climb up the parent chain to get the unit DIE this DIE belongs
 /// to. Return NULL if DIE is not added to an owner yet.
-const DIE *DIE::getCompileUnitOrNull() const {
+const DIE *DIE::getUnitOrNull() const {
   const DIE *p = this;
   while (p) {
-    if (p->getTag() == dwarf::DW_TAG_compile_unit)
+    if (p->getTag() == dwarf::DW_TAG_compile_unit ||
+        p->getTag() == dwarf::DW_TAG_type_unit)
       return p;
     p = p->getParent();
   }
   return NULL;
 }
 
-DIEValue *DIE::findAttribute(uint16_t Attribute) {
+DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const {
   const SmallVectorImpl<DIEValue *> &Values = getValues();
   const DIEAbbrev &Abbrevs = getAbbrev();
 
@@ -159,7 +161,7 @@
     O << Indent
       << dwarf::TagString(Abbrev.getTag())
       << " "
-      << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+      << dwarf::ChildrenString(Abbrev.hasChildren()) << "\n";
   } else {
     O << "Size: " << Size << "\n";
   }
@@ -215,8 +217,7 @@
   case dwarf::DW_FORM_flag_present:
     // Emit something to keep the lines and comments in sync.
     // FIXME: Is there a better way to do this?
-    if (Asm->OutStreamer.hasRawTextSupport())
-      Asm->OutStreamer.EmitRawText("");
+    Asm->OutStreamer.AddBlankLine();
     return;
   case dwarf::DW_FORM_flag:  // Fall thru
   case dwarf::DW_FORM_ref1:  // Fall thru
@@ -227,6 +228,7 @@
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: Size = 4; break;
   case dwarf::DW_FORM_ref8:  // Fall thru
+  case dwarf::DW_FORM_ref_sig8:  // Fall thru
   case dwarf::DW_FORM_data8: Size = 8; break;
   case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
@@ -253,11 +255,12 @@
   case dwarf::DW_FORM_ref4:  // Fall thru
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
   case dwarf::DW_FORM_ref8:  // Fall thru
+  case dwarf::DW_FORM_ref_sig8:  // Fall thru
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
-  case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer);
-  case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer);
-  case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
-  case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+  case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer);
+  case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
+  case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
   case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
   default: llvm_unreachable("DIE Value form not supported yet");
   }
@@ -338,6 +341,7 @@
 ///
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
   return AP->getDataLayout().getPointerSize();
 }
@@ -378,7 +382,26 @@
 /// EmitValue - Emit debug information entry offset.
 ///
 void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
-  AP->EmitInt32(Entry->getOffset());
+
+  if (Form == dwarf::DW_FORM_ref_addr) {
+    const DwarfDebug *DD = AP->getDwarfDebug();
+    unsigned Addr = Entry->getOffset();
+    assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
+    // For DW_FORM_ref_addr, output the offset from beginning of debug info
+    // section. Entry->getOffset() returns the offset from start of the
+    // compile unit.
+    DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
+    assert(CU && "CUDie should belong to a CU.");
+    Addr += CU->getDebugInfoOffset();
+    if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+      AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
+                              DIEEntry::getRefAddrSize(AP));
+    else
+      AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr,
+                                    CU->getSectionSym(),
+                                    DIEEntry::getRefAddrSize(AP));
+  } else
+    AP->EmitInt32(Entry->getOffset());
 }
 
 unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
@@ -386,7 +409,9 @@
   // specified to be four bytes in the DWARF 32-bit format and eight bytes
   // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
   // references have the same size as an address on the target system.
-  if (AP->getDwarfDebug()->getDwarfVersion() == 2)
+  const DwarfDebug *DD = AP->getDwarfDebug();
+  assert(DD && "Expected Dwarf Debug info to be available");
+  if (DD->getDwarfVersion() == 2)
     return AP->getDataLayout().getPointerSize();
   return sizeof(int32_t);
 }
@@ -398,12 +423,83 @@
 #endif
 
 //===----------------------------------------------------------------------===//
+// DIETypeSignature Implementation
+//===----------------------------------------------------------------------===//
+void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+  assert(Form == dwarf::DW_FORM_ref_sig8);
+  Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8);
+}
+
+#ifndef NDEBUG
+void DIETypeSignature::print(raw_ostream &O) const {
+  O << format("Type Unit: 0x%lx", Unit.getTypeSignature());
+}
+
+void DIETypeSignature::dump() const { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELoc Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the location expression.
+///
+unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
+  if (!Size) {
+    const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+    for (unsigned i = 0, N = Values.size(); i < N; ++i)
+      Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+  }
+
+  return Size;
+}
+
+/// EmitValue - Emit location data.
+///
+void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+  switch (Form) {
+  default: llvm_unreachable("Improper form for block");
+  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
+  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
+  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
+  case dwarf::DW_FORM_block:
+  case dwarf::DW_FORM_exprloc:
+    Asm->EmitULEB128(Size); break;
+  }
+
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+  for (unsigned i = 0, N = Values.size(); i < N; ++i)
+    Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of location data in bytes.
+///
+unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+  case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+  case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+  case dwarf::DW_FORM_block:
+  case dwarf::DW_FORM_exprloc:
+    return Size + getULEB128Size(Size);
+  default: llvm_unreachable("Improper form for block");
+  }
+}
+
+#ifndef NDEBUG
+void DIELoc::print(raw_ostream &O) const {
+  O << "ExprLoc: ";
+  DIE::print(O, 5);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
 // DIEBlock Implementation
 //===----------------------------------------------------------------------===//
 
 /// ComputeSize - calculate the size of the block.
 ///
-unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
   if (!Size) {
     const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
     for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -436,7 +532,7 @@
   case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
-  case dwarf::DW_FORM_block:  return Size + MCAsmInfo::getULEB128Size(Size);
+  case dwarf::DW_FORM_block:  return Size + getULEB128Size(Size);
   default: llvm_unreachable("Improper form for block");
   }
 }
@@ -447,3 +543,34 @@
   DIE::print(O, 5);
 }
 #endif
+
+//===----------------------------------------------------------------------===//
+// DIELocList Implementation
+//===----------------------------------------------------------------------===//
+
+unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+  if (Form == dwarf::DW_FORM_data4)
+    return 4;
+  if (Form == dwarf::DW_FORM_sec_offset)
+    return 4;
+  return AP->getDataLayout().getPointerSize();
+}
+
+/// EmitValue - Emit label value.
+///
+void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+  DwarfDebug *DD = AP->getDwarfDebug();
+  MCSymbol *Label = DD->getDebugLocEntries()[Index].Label;
+
+  if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf())
+    AP->EmitSectionOffset(Label, DD->getDebugLocSym());
+  else
+    AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4);
+}
+
+#ifndef NDEBUG
+void DIELocList::print(raw_ostream &O) const {
+  O << "LocList: " << Index;
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index f4fa326..7fefd4f 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -16,437 +16,560 @@
 
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/MC/MCExpr.h"
 #include <vector>
 
 namespace llvm {
-  class AsmPrinter;
-  class MCSymbol;
-  class MCSymbolRefExpr;
-  class raw_ostream;
+class AsmPrinter;
+class MCExpr;
+class MCSymbol;
+class raw_ostream;
+class DwarfTypeUnit;
 
-  //===--------------------------------------------------------------------===//
-  /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
-  /// Dwarf abbreviation.
-  class DIEAbbrevData {
-    /// Attribute - Dwarf attribute code.
-    ///
-    dwarf::Attribute Attribute;
-
-    /// Form - Dwarf form code.
-    ///
-    dwarf::Form Form;
-  public:
-    DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {}
-
-    // Accessors.
-    dwarf::Attribute getAttribute() const { return Attribute; }
-    dwarf::Form getForm() const { return Form; }
-
-    /// Profile - Used to gather unique data for the abbreviation folding set.
-    ///
-    void Profile(FoldingSetNodeID &ID) const;
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
-  /// information object.
-  class DIEAbbrev : public FoldingSetNode {
-    /// Tag - Dwarf tag code.
-    ///
-    dwarf::Tag Tag;
-
-    /// ChildrenFlag - Dwarf children flag.
-    ///
-    uint16_t ChildrenFlag;
-
-    /// Unique number for node.
-    ///
-    unsigned Number;
-
-    /// Data - Raw data bytes for abbreviation.
-    ///
-    SmallVector<DIEAbbrevData, 12> Data;
-
-  public:
-    DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
-
-    // Accessors.
-    dwarf::Tag getTag() const { return Tag; }
-    unsigned getNumber() const { return Number; }
-    uint16_t getChildrenFlag() const { return ChildrenFlag; }
-    const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
-    void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
-    void setNumber(unsigned N) { Number = N; }
-
-    /// AddAttribute - Adds another set of attribute information to the
-    /// abbreviation.
-    void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
-      Data.push_back(DIEAbbrevData(Attribute, Form));
-    }
-
-    /// Profile - Used to gather unique data for the abbreviation folding set.
-    ///
-    void Profile(FoldingSetNodeID &ID) const;
-
-    /// Emit - Print the abbreviation using the specified asm printer.
-    ///
-    void Emit(AsmPrinter *AP) const;
-
-#ifndef NDEBUG
-    void print(raw_ostream &O);
-    void dump();
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIE - A structured debug information entry.  Has an abbreviation which
-  /// describes its organization.
-  class DIEValue;
-
-  class DIE {
-  protected:
-    /// Offset - Offset in debug info section.
-    ///
-    unsigned Offset;
-
-    /// Size - Size of instance + children.
-    ///
-    unsigned Size;
-
-    /// Abbrev - Buffer for constructing abbreviation.
-    ///
-    DIEAbbrev Abbrev;
-
-    /// Children DIEs.
-    ///
-    std::vector<DIE *> Children;
-
-    DIE *Parent;
-
-    /// Attribute values.
-    ///
-    SmallVector<DIEValue*, 12> Values;
-
-  public:
-    explicit DIE(unsigned Tag)
-        : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
-          Parent(0) {}
-    virtual ~DIE();
-
-    // Accessors.
-    DIEAbbrev &getAbbrev() { return Abbrev; }
-    const DIEAbbrev &getAbbrev() const { return Abbrev; }
-    unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
-    dwarf::Tag getTag() const { return Abbrev.getTag(); }
-    unsigned getOffset() const { return Offset; }
-    unsigned getSize() const { return Size; }
-    const std::vector<DIE *> &getChildren() const { return Children; }
-    const SmallVectorImpl<DIEValue*> &getValues() const { return Values; }
-    DIE *getParent() const { return Parent; }
-    /// Climb up the parent chain to get the compile unit DIE this DIE belongs
-    /// to.
-    const DIE *getCompileUnit() const;
-    /// Similar to getCompileUnit, returns null when DIE is not added to an
-    /// owner yet.
-    const DIE *getCompileUnitOrNull() const;
-    void setOffset(unsigned O) { Offset = O; }
-    void setSize(unsigned S) { Size = S; }
-
-    /// addValue - Add a value and attributes to a DIE.
-    ///
-    void addValue(dwarf::Attribute Attribute, dwarf::Form Form,
-                  DIEValue *Value) {
-      Abbrev.AddAttribute(Attribute, Form);
-      Values.push_back(Value);
-    }
-
-    /// addChild - Add a child to the DIE.
-    ///
-    void addChild(DIE *Child) {
-      assert(!Child->getParent());
-      Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
-      Children.push_back(Child);
-      Child->Parent = this;
-    }
-
-    /// findAttribute - Find a value in the DIE with the attribute given, returns NULL
-    /// if no such attribute exists.
-    DIEValue *findAttribute(uint16_t Attribute);
-
-#ifndef NDEBUG
-    void print(raw_ostream &O, unsigned IndentCount = 0) const;
-    void dump();
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIEValue - A debug information entry value.
+//===--------------------------------------------------------------------===//
+/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
+/// Dwarf abbreviation.
+class DIEAbbrevData {
+  /// Attribute - Dwarf attribute code.
   ///
-  class DIEValue {
-    virtual void anchor();
-  public:
-    enum {
-      isInteger,
-      isString,
-      isExpr,
-      isLabel,
-      isDelta,
-      isEntry,
-      isBlock
-    };
-  protected:
-    /// Type - Type of data stored in the value.
-    ///
-    unsigned Type;
-  public:
-    explicit DIEValue(unsigned T) : Type(T) {}
-    virtual ~DIEValue() {}
+  dwarf::Attribute Attribute;
 
-    // Accessors
-    unsigned getType()  const { return Type; }
+  /// Form - Dwarf form code.
+  ///
+  dwarf::Form Form;
 
-    /// EmitValue - Emit value via the Dwarf writer.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0;
+public:
+  DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {}
 
-    /// SizeOf - Return the size of a value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0;
+  // Accessors.
+  dwarf::Attribute getAttribute() const { return Attribute; }
+  dwarf::Form getForm() const { return Form; }
+
+  /// Profile - Used to gather unique data for the abbreviation folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+/// information object.
+class DIEAbbrev : public FoldingSetNode {
+  /// Unique number for node.
+  ///
+  unsigned Number;
+
+  /// Tag - Dwarf tag code.
+  ///
+  dwarf::Tag Tag;
+
+  /// Children - Whether or not this node has children.
+  ///
+  // This cheats a bit in all of the uses since the values in the standard
+  // are 0 and 1 for no children and children respectively.
+  bool Children;
+
+  /// Data - Raw data bytes for abbreviation.
+  ///
+  SmallVector<DIEAbbrevData, 12> Data;
+
+public:
+  DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {}
+
+  // Accessors.
+  dwarf::Tag getTag() const { return Tag; }
+  unsigned getNumber() const { return Number; }
+  bool hasChildren() const { return Children; }
+  const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
+  void setChildrenFlag(bool hasChild) { Children = hasChild; }
+  void setNumber(unsigned N) { Number = N; }
+
+  /// AddAttribute - Adds another set of attribute information to the
+  /// abbreviation.
+  void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
+    Data.push_back(DIEAbbrevData(Attribute, Form));
+  }
+
+  /// Profile - Used to gather unique data for the abbreviation folding set.
+  ///
+  void Profile(FoldingSetNodeID &ID) const;
+
+  /// Emit - Print the abbreviation using the specified asm printer.
+  ///
+  void Emit(AsmPrinter *AP) const;
 
 #ifndef NDEBUG
-    virtual void print(raw_ostream &O) const = 0;
-    void dump() const;
+  void print(raw_ostream &O);
+  void dump();
 #endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIE - A structured debug information entry.  Has an abbreviation which
+/// describes its organization.
+class DIEValue;
+
+class DIE {
+protected:
+  /// Offset - Offset in debug info section.
+  ///
+  unsigned Offset;
+
+  /// Size - Size of instance + children.
+  ///
+  unsigned Size;
+
+  /// Abbrev - Buffer for constructing abbreviation.
+  ///
+  DIEAbbrev Abbrev;
+
+  /// Children DIEs.
+  ///
+  std::vector<DIE *> Children;
+
+  DIE *Parent;
+
+  /// Attribute values.
+  ///
+  SmallVector<DIEValue *, 12> Values;
+
+public:
+  explicit DIE(unsigned Tag)
+      : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
+        Parent(0) {}
+  ~DIE();
+
+  // Accessors.
+  DIEAbbrev &getAbbrev() { return Abbrev; }
+  const DIEAbbrev &getAbbrev() const { return Abbrev; }
+  unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+  dwarf::Tag getTag() const { return Abbrev.getTag(); }
+  unsigned getOffset() const { return Offset; }
+  unsigned getSize() const { return Size; }
+  const std::vector<DIE *> &getChildren() const { return Children; }
+  const SmallVectorImpl<DIEValue *> &getValues() const { return Values; }
+  DIE *getParent() const { return Parent; }
+  /// Climb up the parent chain to get the compile or type unit DIE this DIE
+  /// belongs to.
+  const DIE *getUnit() const;
+  /// Similar to getUnit, returns null when DIE is not added to an
+  /// owner yet.
+  const DIE *getUnitOrNull() const;
+  void setOffset(unsigned O) { Offset = O; }
+  void setSize(unsigned S) { Size = S; }
+
+  /// addValue - Add a value and attributes to a DIE.
+  ///
+  void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) {
+    Abbrev.AddAttribute(Attribute, Form);
+    Values.push_back(Value);
+  }
+
+  /// addChild - Add a child to the DIE.
+  ///
+  void addChild(DIE *Child) {
+    assert(!Child->getParent());
+    Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+    Children.push_back(Child);
+    Child->Parent = this;
+  }
+
+  /// findAttribute - Find a value in the DIE with the attribute given,
+  /// returns NULL if no such attribute exists.
+  DIEValue *findAttribute(dwarf::Attribute Attribute) const;
+
+#ifndef NDEBUG
+  void print(raw_ostream &O, unsigned IndentCount = 0) const;
+  void dump();
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEValue - A debug information entry value. Some of these roughly correlate
+/// to DWARF attribute classes.
+///
+class DIEValue {
+  virtual void anchor();
+
+public:
+  enum Type {
+    isInteger,
+    isString,
+    isExpr,
+    isLabel,
+    isDelta,
+    isEntry,
+    isTypeSignature,
+    isBlock,
+    isLoc,
+    isLocList,
   };
 
-  //===--------------------------------------------------------------------===//
-  /// DIEInteger - An integer value DIE.
+protected:
+  /// Ty - Type of data stored in the value.
   ///
-  class DIEInteger : public DIEValue {
-    uint64_t Integer;
-  public:
-    explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+  Type Ty;
 
-    /// BestForm - Choose the best form for integer.
-    ///
-    static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
-      if (IsSigned) {
-        const int64_t SignedInt = Int;
-        if ((char)Int == SignedInt)     return dwarf::DW_FORM_data1;
-        if ((short)Int == SignedInt)    return dwarf::DW_FORM_data2;
-        if ((int)Int == SignedInt)      return dwarf::DW_FORM_data4;
-      } else {
-        if ((unsigned char)Int == Int)  return dwarf::DW_FORM_data1;
-        if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
-        if ((unsigned int)Int == Int)   return dwarf::DW_FORM_data4;
-      }
-      return dwarf::DW_FORM_data8;
+  explicit DIEValue(Type T) : Ty(T) {}
+  virtual ~DIEValue() {}
+
+public:
+  // Accessors
+  Type getType() const { return Ty; }
+
+  /// EmitValue - Emit value via the Dwarf writer.
+  ///
+  virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0;
+
+  /// SizeOf - Return the size of a value in bytes.
+  ///
+  virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0;
+
+#ifndef NDEBUG
+  virtual void print(raw_ostream &O) const = 0;
+  void dump() const;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEInteger - An integer value DIE.
+///
+class DIEInteger : public DIEValue {
+  uint64_t Integer;
+
+public:
+  explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+  /// BestForm - Choose the best form for integer.
+  ///
+  static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
+    if (IsSigned) {
+      const int64_t SignedInt = Int;
+      if ((char)Int == SignedInt)
+        return dwarf::DW_FORM_data1;
+      if ((short)Int == SignedInt)
+        return dwarf::DW_FORM_data2;
+      if ((int)Int == SignedInt)
+        return dwarf::DW_FORM_data4;
+    } else {
+      if ((unsigned char)Int == Int)
+        return dwarf::DW_FORM_data1;
+      if ((unsigned short)Int == Int)
+        return dwarf::DW_FORM_data2;
+      if ((unsigned int)Int == Int)
+        return dwarf::DW_FORM_data4;
     }
+    return dwarf::DW_FORM_data8;
+  }
 
-    /// EmitValue - Emit integer of appropriate size.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
-
-    uint64_t getValue() const { return Integer; }
-
-    /// SizeOf - Determine size of integer value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIEExpr - An expression DIE.
-  //
-  class DIEExpr : public DIEValue {
-    const MCExpr *Expr;
-  public:
-    explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {}
-
-    /// EmitValue - Emit expression value.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
-
-    /// getValue - Get MCExpr.
-    ///
-    const MCExpr *getValue() const { return Expr; }
-
-    /// SizeOf - Determine size of expression value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIELabel - A label DIE.
-  //
-  class DIELabel : public DIEValue {
-    const MCSymbol *Label;
-  public:
-    explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
-
-    /// EmitValue - Emit label value.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
-
-    /// getValue - Get MCSymbol.
-    ///
-    const MCSymbol *getValue() const { return Label; }
-
-    /// SizeOf - Determine size of label value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIEDelta - A simple label difference DIE.
+  /// EmitValue - Emit integer of appropriate size.
   ///
-  class DIEDelta : public DIEValue {
-    const MCSymbol *LabelHi;
-    const MCSymbol *LabelLo;
-  public:
-    DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  uint64_t getValue() const { return Integer; }
+
+  /// SizeOf - Determine size of integer value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEExpr - An expression DIE.
+//
+class DIEExpr : public DIEValue {
+  const MCExpr *Expr;
+
+public:
+  explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {}
+
+  /// EmitValue - Emit expression value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// getValue - Get MCExpr.
+  ///
+  const MCExpr *getValue() const { return Expr; }
+
+  /// SizeOf - Determine size of expression value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isExpr; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELabel - A label DIE.
+//
+class DIELabel : public DIEValue {
+  const MCSymbol *Label;
+
+public:
+  explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+  /// EmitValue - Emit label value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// getValue - Get MCSymbol.
+  ///
+  const MCSymbol *getValue() const { return Label; }
+
+  /// SizeOf - Determine size of label value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEDelta - A simple label difference DIE.
+///
+class DIEDelta : public DIEValue {
+  const MCSymbol *LabelHi;
+  const MCSymbol *LabelLo;
+
+public:
+  DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
       : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
 
-    /// EmitValue - Emit delta value.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
-
-    /// SizeOf - Determine size of delta value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIEString - A container for string values.
+  /// EmitValue - Emit delta value.
   ///
-  class DIEString : public DIEValue {
-    const DIEValue *Access;
-    const StringRef Str;
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
 
-  public:
-    DIEString(const DIEValue *Acc, const StringRef S)
-        : DIEValue(isString), Access(Acc), Str(S) {}
+  /// SizeOf - Determine size of delta value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
 
-    /// getString - Grab the string out of the object.
-    StringRef getString() const { return Str; }
-
-    /// EmitValue - Emit delta value.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
-
-    /// SizeOf - Determine size of delta value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *D) { return D->getType() == isString; }
-
-  #ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
-  #endif
-  };
-
-  //===--------------------------------------------------------------------===//
-  /// DIEEntry - A pointer to another debug information entry.  An instance of
-  /// this class can also be used as a proxy for a debug information entry not
-  /// yet defined (ie. types.)
-  class DIEEntry : public DIEValue {
-    DIE *const Entry;
-  public:
-    explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {
-      assert(E && "Cannot construct a DIEEntry with a null DIE");
-    }
-
-    DIE *getEntry() const { return Entry; }
-
-    /// EmitValue - Emit debug information entry offset.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
-
-    /// SizeOf - Determine size of debug information entry in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
-      return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
-                                             : sizeof(int32_t);
-    }
-
-    /// Returns size of a ref_addr entry.
-    static unsigned getRefAddrSize(AsmPrinter *AP);
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
 
 #ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
+  void print(raw_ostream &O) const override;
 #endif
-  };
+};
 
-  //===--------------------------------------------------------------------===//
-  /// DIEBlock - A block of values.  Primarily used for location expressions.
-  //
-  class DIEBlock : public DIEValue, public DIE {
-    unsigned Size;                // Size in bytes excluding size header.
-  public:
-    DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {}
+//===--------------------------------------------------------------------===//
+/// DIEString - A container for string values.
+///
+class DIEString : public DIEValue {
+  const DIEValue *Access;
+  const StringRef Str;
 
-    /// ComputeSize - calculate the size of the block.
-    ///
-    unsigned ComputeSize(AsmPrinter *AP);
+public:
+  DIEString(const DIEValue *Acc, const StringRef S)
+      : DIEValue(isString), Access(Acc), Str(S) {}
 
-    /// BestForm - Choose the best form for data.
-    ///
-    dwarf::Form BestForm() const {
-      if ((unsigned char)Size == Size)  return dwarf::DW_FORM_block1;
-      if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
-      if ((unsigned int)Size == Size)   return dwarf::DW_FORM_block4;
-      return dwarf::DW_FORM_block;
-    }
+  /// getString - Grab the string out of the object.
+  StringRef getString() const { return Str; }
 
-    /// EmitValue - Emit block data.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const;
+  /// EmitValue - Emit delta value.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
 
-    /// SizeOf - Determine size of block data in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const;
+  /// SizeOf - Determine size of delta value in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
 
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *D) { return D->getType() == isString; }
 
 #ifndef NDEBUG
-    virtual void print(raw_ostream &O) const;
+  void print(raw_ostream &O) const override;
 #endif
-  };
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEEntry - A pointer to another debug information entry.  An instance of
+/// this class can also be used as a proxy for a debug information entry not
+/// yet defined (ie. types.)
+class DIEEntry : public DIEValue {
+  DIE *const Entry;
+
+public:
+  explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {
+    assert(E && "Cannot construct a DIEEntry with a null DIE");
+  }
+
+  DIE *getEntry() const { return Entry; }
+
+  /// EmitValue - Emit debug information entry offset.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of debug information entry in bytes.
+  ///
+   unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override {
+    return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
+                                           : sizeof(int32_t);
+  }
+
+  /// Returns size of a ref_addr entry.
+  static unsigned getRefAddrSize(AsmPrinter *AP);
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// \brief A signature reference to a type unit.
+class DIETypeSignature : public DIEValue {
+  const DwarfTypeUnit &Unit;
+
+public:
+  explicit DIETypeSignature(const DwarfTypeUnit &Unit)
+      : DIEValue(isTypeSignature), Unit(Unit) {}
+
+  /// \brief Emit type unit signature.
+  void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override;
+
+  /// Returns size of a ref_sig8 entry.
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override {
+    assert(Form == dwarf::DW_FORM_ref_sig8);
+    return 8;
+  }
+
+  // \brief Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) {
+    return E->getType() == isTypeSignature;
+  }
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+  void dump() const;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELoc - Represents an expression location.
+//
+class DIELoc : public DIEValue, public DIE {
+  mutable unsigned Size; // Size in bytes excluding size header.
+public:
+  DIELoc() : DIEValue(isLoc), DIE(0), Size(0) {}
+
+  /// ComputeSize - Calculate the size of the location expression.
+  ///
+  unsigned ComputeSize(AsmPrinter *AP) const;
+
+  /// BestForm - Choose the best form for data.
+  ///
+  dwarf::Form BestForm(unsigned DwarfVersion) const {
+    if (DwarfVersion > 3)
+      return dwarf::DW_FORM_exprloc;
+    // Pre-DWARF4 location expressions were blocks and not exprloc.
+    if ((unsigned char)Size == Size)
+      return dwarf::DW_FORM_block1;
+    if ((unsigned short)Size == Size)
+      return dwarf::DW_FORM_block2;
+    if ((unsigned int)Size == Size)
+      return dwarf::DW_FORM_block4;
+    return dwarf::DW_FORM_block;
+  }
+
+  /// EmitValue - Emit location data.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of location data in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isLoc; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIEBlock - Represents a block of values.
+//
+class DIEBlock : public DIEValue, public DIE {
+  mutable unsigned Size; // Size in bytes excluding size header.
+public:
+  DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {}
+
+  /// ComputeSize - Calculate the size of the location expression.
+  ///
+  unsigned ComputeSize(AsmPrinter *AP) const;
+
+  /// BestForm - Choose the best form for data.
+  ///
+  dwarf::Form BestForm() const {
+    if ((unsigned char)Size == Size)
+      return dwarf::DW_FORM_block1;
+    if ((unsigned short)Size == Size)
+      return dwarf::DW_FORM_block2;
+    if ((unsigned int)Size == Size)
+      return dwarf::DW_FORM_block4;
+    return dwarf::DW_FORM_block;
+  }
+
+  /// EmitValue - Emit location data.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of location data in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
+
+//===--------------------------------------------------------------------===//
+/// DIELocList - Represents a pointer to a location list in the debug_loc
+/// section.
+//
+class DIELocList : public DIEValue {
+  // Index into the .debug_loc vector.
+  size_t Index;
+
+public:
+  DIELocList(size_t I) : DIEValue(isLocList), Index(I) {}
+
+  /// getValue - Grab the current index out.
+  size_t getValue() const { return Index; }
+
+  /// EmitValue - Emit location data.
+  ///
+  void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  /// SizeOf - Determine size of location data in bytes.
+  ///
+  unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override;
+
+  // Implement isa/cast/dyncast.
+  static bool classof(const DIEValue *E) { return E->getType() == isLocList; }
+
+#ifndef NDEBUG
+  void print(raw_ostream &O) const override;
+#endif
+};
 
 } // end llvm namespace
 
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 95eca90..74beec1 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -13,12 +13,13 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
+#include "ByteStreamer.h"
 #include "DIEHash.h"
-
 #include "DIE.h"
-#include "DwarfCompileUnit.h"
+#include "DwarfDebug.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Endian.h"
@@ -75,7 +76,7 @@
   do {
     uint8_t Byte = Value & 0x7f;
     Value >>= 7;
-    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+    More = !((((Value == 0) && ((Byte & 0x40) == 0)) ||
               ((Value == -1) && ((Byte & 0x40) != 0))));
     if (More)
       Byte |= 0x80; // Mark this byte to show that more bytes will follow.
@@ -92,10 +93,12 @@
   // outermost such construct...
   SmallVector<const DIE *, 1> Parents;
   const DIE *Cur = &Parent;
-  while (Cur->getTag() != dwarf::DW_TAG_compile_unit) {
+  while (Cur->getParent()) {
     Parents.push_back(Cur);
     Cur = Cur->getParent();
   }
+  assert(Cur->getTag() == dwarf::DW_TAG_compile_unit ||
+         Cur->getTag() == dwarf::DW_TAG_type_unit);
 
   // Reverse iterate over our list to go from the outermost construct to the
   // innermost.
@@ -134,55 +137,55 @@
                  << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute())
                  << " added.\n");
     switch (Abbrevs.getData()[i].getAttribute()) {
-    COLLECT_ATTR(DW_AT_name);
-    COLLECT_ATTR(DW_AT_accessibility);
-    COLLECT_ATTR(DW_AT_address_class);
-    COLLECT_ATTR(DW_AT_allocated);
-    COLLECT_ATTR(DW_AT_artificial);
-    COLLECT_ATTR(DW_AT_associated);
-    COLLECT_ATTR(DW_AT_binary_scale);
-    COLLECT_ATTR(DW_AT_bit_offset);
-    COLLECT_ATTR(DW_AT_bit_size);
-    COLLECT_ATTR(DW_AT_bit_stride);
-    COLLECT_ATTR(DW_AT_byte_size);
-    COLLECT_ATTR(DW_AT_byte_stride);
-    COLLECT_ATTR(DW_AT_const_expr);
-    COLLECT_ATTR(DW_AT_const_value);
-    COLLECT_ATTR(DW_AT_containing_type);
-    COLLECT_ATTR(DW_AT_count);
-    COLLECT_ATTR(DW_AT_data_bit_offset);
-    COLLECT_ATTR(DW_AT_data_location);
-    COLLECT_ATTR(DW_AT_data_member_location);
-    COLLECT_ATTR(DW_AT_decimal_scale);
-    COLLECT_ATTR(DW_AT_decimal_sign);
-    COLLECT_ATTR(DW_AT_default_value);
-    COLLECT_ATTR(DW_AT_digit_count);
-    COLLECT_ATTR(DW_AT_discr);
-    COLLECT_ATTR(DW_AT_discr_list);
-    COLLECT_ATTR(DW_AT_discr_value);
-    COLLECT_ATTR(DW_AT_encoding);
-    COLLECT_ATTR(DW_AT_enum_class);
-    COLLECT_ATTR(DW_AT_endianity);
-    COLLECT_ATTR(DW_AT_explicit);
-    COLLECT_ATTR(DW_AT_is_optional);
-    COLLECT_ATTR(DW_AT_location);
-    COLLECT_ATTR(DW_AT_lower_bound);
-    COLLECT_ATTR(DW_AT_mutable);
-    COLLECT_ATTR(DW_AT_ordering);
-    COLLECT_ATTR(DW_AT_picture_string);
-    COLLECT_ATTR(DW_AT_prototyped);
-    COLLECT_ATTR(DW_AT_small);
-    COLLECT_ATTR(DW_AT_segment);
-    COLLECT_ATTR(DW_AT_string_length);
-    COLLECT_ATTR(DW_AT_threads_scaled);
-    COLLECT_ATTR(DW_AT_upper_bound);
-    COLLECT_ATTR(DW_AT_use_location);
-    COLLECT_ATTR(DW_AT_use_UTF8);
-    COLLECT_ATTR(DW_AT_variable_parameter);
-    COLLECT_ATTR(DW_AT_virtuality);
-    COLLECT_ATTR(DW_AT_visibility);
-    COLLECT_ATTR(DW_AT_vtable_elem_location);
-    COLLECT_ATTR(DW_AT_type);
+      COLLECT_ATTR(DW_AT_name);
+      COLLECT_ATTR(DW_AT_accessibility);
+      COLLECT_ATTR(DW_AT_address_class);
+      COLLECT_ATTR(DW_AT_allocated);
+      COLLECT_ATTR(DW_AT_artificial);
+      COLLECT_ATTR(DW_AT_associated);
+      COLLECT_ATTR(DW_AT_binary_scale);
+      COLLECT_ATTR(DW_AT_bit_offset);
+      COLLECT_ATTR(DW_AT_bit_size);
+      COLLECT_ATTR(DW_AT_bit_stride);
+      COLLECT_ATTR(DW_AT_byte_size);
+      COLLECT_ATTR(DW_AT_byte_stride);
+      COLLECT_ATTR(DW_AT_const_expr);
+      COLLECT_ATTR(DW_AT_const_value);
+      COLLECT_ATTR(DW_AT_containing_type);
+      COLLECT_ATTR(DW_AT_count);
+      COLLECT_ATTR(DW_AT_data_bit_offset);
+      COLLECT_ATTR(DW_AT_data_location);
+      COLLECT_ATTR(DW_AT_data_member_location);
+      COLLECT_ATTR(DW_AT_decimal_scale);
+      COLLECT_ATTR(DW_AT_decimal_sign);
+      COLLECT_ATTR(DW_AT_default_value);
+      COLLECT_ATTR(DW_AT_digit_count);
+      COLLECT_ATTR(DW_AT_discr);
+      COLLECT_ATTR(DW_AT_discr_list);
+      COLLECT_ATTR(DW_AT_discr_value);
+      COLLECT_ATTR(DW_AT_encoding);
+      COLLECT_ATTR(DW_AT_enum_class);
+      COLLECT_ATTR(DW_AT_endianity);
+      COLLECT_ATTR(DW_AT_explicit);
+      COLLECT_ATTR(DW_AT_is_optional);
+      COLLECT_ATTR(DW_AT_location);
+      COLLECT_ATTR(DW_AT_lower_bound);
+      COLLECT_ATTR(DW_AT_mutable);
+      COLLECT_ATTR(DW_AT_ordering);
+      COLLECT_ATTR(DW_AT_picture_string);
+      COLLECT_ATTR(DW_AT_prototyped);
+      COLLECT_ATTR(DW_AT_small);
+      COLLECT_ATTR(DW_AT_segment);
+      COLLECT_ATTR(DW_AT_string_length);
+      COLLECT_ATTR(DW_AT_threads_scaled);
+      COLLECT_ATTR(DW_AT_upper_bound);
+      COLLECT_ATTR(DW_AT_use_location);
+      COLLECT_ATTR(DW_AT_use_UTF8);
+      COLLECT_ATTR(DW_AT_variable_parameter);
+      COLLECT_ATTR(DW_AT_virtuality);
+      COLLECT_ATTR(DW_AT_visibility);
+      COLLECT_ATTR(DW_AT_vtable_elem_location);
+      COLLECT_ATTR(DW_AT_type);
     default:
       break;
     }
@@ -269,6 +272,24 @@
   computeHash(Entry);
 }
 
+// Hash all of the values in a block like set of values. This assumes that
+// all of the data is going to be added as integers.
+void DIEHash::hashBlockData(const SmallVectorImpl<DIEValue *> &Values) {
+  for (SmallVectorImpl<DIEValue *>::const_iterator I = Values.begin(),
+                                                   E = Values.end();
+       I != E; ++I)
+    Hash.update((uint64_t)cast<DIEInteger>(*I)->getValue());
+}
+
+// Hash the contents of a loclistptr class.
+void DIEHash::hashLocList(const DIELocList &LocList) {
+  HashingByteStreamer Streamer(*this);
+  DwarfDebug &DD = *AP->getDwarfDebug();
+  for (const auto &Entry :
+       DD.getDebugLocEntries()[LocList.getValue()].List)
+    DD.emitDebugLocEntry(Streamer, Entry);
+}
+
 // Hash an individual attribute \param Attr based on the type of attribute and
 // the form.
 void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
@@ -276,43 +297,76 @@
   const DIEAbbrevData *Desc = Attr.Desc;
   dwarf::Attribute Attribute = Desc->getAttribute();
 
-  // 7.27 Step 3
-  // ... An attribute that refers to another type entry T is processed as
-  // follows:
-  if (const DIEEntry *EntryAttr = dyn_cast<DIEEntry>(Value)) {
-    hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry());
-    return;
+  // Other attribute values use the letter 'A' as the marker, and the value
+  // consists of the form code (encoded as an unsigned LEB128 value) followed by
+  // the encoding of the value according to the form code. To ensure
+  // reproducibility of the signature, the set of forms used in the signature
+  // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag,
+  // DW_FORM_string, and DW_FORM_block.
+
+  switch (Value->getType()) {
+    // 7.27 Step 3
+    // ... An attribute that refers to another type entry T is processed as
+    // follows:
+  case DIEValue::isEntry:
+    hashDIEEntry(Attribute, Tag, *cast<DIEEntry>(Value)->getEntry());
+    break;
+  case DIEValue::isInteger: {
+    addULEB128('A');
+    addULEB128(Attribute);
+    switch (Desc->getForm()) {
+    case dwarf::DW_FORM_data1:
+    case dwarf::DW_FORM_data2:
+    case dwarf::DW_FORM_data4:
+    case dwarf::DW_FORM_data8:
+    case dwarf::DW_FORM_udata:
+    case dwarf::DW_FORM_sdata:
+      addULEB128(dwarf::DW_FORM_sdata);
+      addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue());
+      break;
+    // DW_FORM_flag_present is just flag with a value of one. We still give it a
+    // value so just use the value.
+    case dwarf::DW_FORM_flag_present:
+    case dwarf::DW_FORM_flag:
+      addULEB128(dwarf::DW_FORM_flag);
+      addULEB128((int64_t)cast<DIEInteger>(Value)->getValue());
+      break;
+    default:
+      llvm_unreachable("Unknown integer form!");
+    }
+    break;
   }
-
-  // Other attribute values use the letter 'A' as the marker, ...
-  addULEB128('A');
-
-  addULEB128(Attribute);
-
-  // ... and the value consists of the form code (encoded as an unsigned LEB128
-  // value) followed by the encoding of the value according to the form code. To
-  // ensure reproducibility of the signature, the set of forms used in the
-  // signature computation is limited to the following: DW_FORM_sdata,
-  // DW_FORM_flag, DW_FORM_string, and DW_FORM_block.
-  switch (Desc->getForm()) {
-  case dwarf::DW_FORM_string:
-    llvm_unreachable(
-        "Add support for DW_FORM_string if we ever start emitting them again");
-  case dwarf::DW_FORM_GNU_str_index:
-  case dwarf::DW_FORM_strp:
+  case DIEValue::isString:
+    addULEB128('A');
+    addULEB128(Attribute);
     addULEB128(dwarf::DW_FORM_string);
     addString(cast<DIEString>(Value)->getString());
     break;
-  case dwarf::DW_FORM_data1:
-  case dwarf::DW_FORM_data2:
-  case dwarf::DW_FORM_data4:
-  case dwarf::DW_FORM_data8:
-  case dwarf::DW_FORM_udata:
-    addULEB128(dwarf::DW_FORM_sdata);
-    addSLEB128((int64_t)cast<DIEInteger>(Value)->getValue());
+  case DIEValue::isBlock:
+  case DIEValue::isLoc:
+  case DIEValue::isLocList:
+    addULEB128('A');
+    addULEB128(Attribute);
+    addULEB128(dwarf::DW_FORM_block);
+    if (isa<DIEBlock>(Value)) {
+      addULEB128(cast<DIEBlock>(Value)->ComputeSize(AP));
+      hashBlockData(cast<DIEBlock>(Value)->getValues());
+    } else if (isa<DIELoc>(Value)) {
+      addULEB128(cast<DIELoc>(Value)->ComputeSize(AP));
+      hashBlockData(cast<DIELoc>(Value)->getValues());
+    } else {
+      // We could add the block length, but that would take
+      // a bit of work and not add a lot of uniqueness
+      // to the hash in some way we could test.
+      hashLocList(*cast<DIELocList>(Value));
+    }
     break;
-  default:
-    llvm_unreachable("Add support for additional forms");
+    // FIXME: It's uncertain whether or not we should handle this at the moment.
+  case DIEValue::isExpr:
+  case DIEValue::isLabel:
+  case DIEValue::isDelta:
+  case DIEValue::isTypeSignature:
+    llvm_unreachable("Add support for additional value types.");
   }
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index f0c4ef9..48f1601 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -11,17 +11,22 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__
+#define CODEGEN_ASMPRINTER_DIEHASH_H__
+
 #include "DIE.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/MD5.h"
 
 namespace llvm {
 
+class AsmPrinter;
 class CompileUnit;
 
 /// \brief An object containing the capability of hashing and adding hash
 /// attributes onto a DIE.
 class DIEHash {
+
   // The entry for a particular attribute.
   struct AttrEntry {
     const DIEValue *Val;
@@ -84,6 +89,8 @@
   };
 
 public:
+  DIEHash(AsmPrinter *A = NULL) : AP(A) {}
+
   /// \brief Computes the ODR signature.
   uint64_t computeDIEODRSignature(const DIE &Die);
 
@@ -105,13 +112,17 @@
   void computeHash(const DIE &Die);
 
   // Routines that add DIEValues to the hash.
-private:
+public:
+  /// \brief Adds \param Value to the hash.
+  void update(uint8_t Value) { Hash.update(Value); }
+
   /// \brief Encodes and adds \param Value to the hash as a ULEB128.
   void addULEB128(uint64_t Value);
 
   /// \brief Encodes and adds \param Value to the hash as a SLEB128.
   void addSLEB128(int64_t Value);
 
+private:
   /// \brief Adds \param Str to the hash and includes a NULL byte.
   void addString(StringRef Str);
 
@@ -122,6 +133,13 @@
   /// \brief Hashes the attributes in \param Attrs in order.
   void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag);
 
+  /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or
+  /// DW_FORM_exprloc.
+  void hashBlockData(const SmallVectorImpl<DIEValue *> &Values);
+
+  /// \brief Hashes the contents pointed to in the .debug_loc section.
+  void hashLocList(const DIELocList &LocList);
+
   /// \brief Hashes an individual attribute.
   void hashAttribute(AttrEntry Attr, dwarf::Tag Tag);
 
@@ -136,12 +154,16 @@
                                 StringRef Name);
 
   /// \brief Hashes a reference to a previously referenced type DIE.
-  void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber);
+  void hashRepeatedTypeReference(dwarf::Attribute Attribute,
+                                 unsigned DieNumber);
 
   void hashNestedType(const DIE &Die, StringRef Name);
 
 private:
   MD5 Hash;
+  AsmPrinter *AP;
   DenseMap<const DIE *, unsigned> Numbering;
 };
 }
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
new file mode 100644
index 0000000..470453f
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -0,0 +1,123 @@
+//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__
+#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCSymbol.h"
+
+namespace llvm {
+class DwarfCompileUnit;
+class MDNode;
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
+class DebugLocEntry {
+  // Begin and end symbols for the address range that this location is valid.
+  const MCSymbol *Begin;
+  const MCSymbol *End;
+
+  // Type of entry that this represents.
+  enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+  enum EntryType EntryKind;
+
+  union {
+    int64_t Int;
+    const ConstantFP *CFP;
+    const ConstantInt *CIP;
+  } Constants;
+
+  // The location in the machine frame.
+  MachineLocation Loc;
+
+  // The variable to which this location entry corresponds.
+  const MDNode *Variable;
+
+  // The compile unit to which this location entry is referenced by.
+  const DwarfCompileUnit *Unit;
+
+  bool hasSameValueOrLocation(const DebugLocEntry &Next) {
+    if (EntryKind != Next.EntryKind)
+      return false;
+
+    bool EqualValues;
+    switch (EntryKind) {
+    case E_Location:
+      EqualValues = Loc == Next.Loc;
+      break;
+    case E_Integer:
+      EqualValues = Constants.Int == Next.Constants.Int;
+      break;
+    case E_ConstantFP:
+      EqualValues = Constants.CFP == Next.Constants.CFP;
+      break;
+    case E_ConstantInt:
+      EqualValues = Constants.CIP == Next.Constants.CIP;
+      break;
+    }
+
+    return EqualValues;
+  }
+
+public:
+  DebugLocEntry() : Begin(0), End(0), Variable(0), Unit(0) {
+    Constants.Int = 0;
+  }
+  DebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
+                const MDNode *V, const DwarfCompileUnit *U)
+      : Begin(B), End(E), Loc(L), Variable(V), Unit(U) {
+    Constants.Int = 0;
+    EntryKind = E_Location;
+  }
+  DebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i,
+                const DwarfCompileUnit *U)
+      : Begin(B), End(E), Variable(0), Unit(U) {
+    Constants.Int = i;
+    EntryKind = E_Integer;
+  }
+  DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr,
+                const DwarfCompileUnit *U)
+      : Begin(B), End(E), Variable(0), Unit(U) {
+    Constants.CFP = FPtr;
+    EntryKind = E_ConstantFP;
+  }
+  DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr,
+                const DwarfCompileUnit *U)
+      : Begin(B), End(E), Variable(0), Unit(U) {
+    Constants.CIP = IPtr;
+    EntryKind = E_ConstantInt;
+  }
+
+  /// \brief Attempt to merge this DebugLocEntry with Next and return
+  /// true if the merge was successful. Entries can be merged if they
+  /// share the same Loc/Constant and if Next immediately follows this
+  /// Entry.
+  bool Merge(const DebugLocEntry &Next) {
+    if (End == Next.Begin && hasSameValueOrLocation(Next)) {
+      End = Next.End;
+      return true;
+    }
+    return false;
+  }
+  bool isLocation() const { return EntryKind == E_Location; }
+  bool isInt() const { return EntryKind == E_Integer; }
+  bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+  bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+  int64_t getInt() const { return Constants.Int; }
+  const ConstantFP *getConstantFP() const { return Constants.CFP; }
+  const ConstantInt *getConstantInt() const { return Constants.CIP; }
+  const MDNode *getVariable() const { return Variable; }
+  const MCSymbol *getBeginSym() const { return Begin; }
+  const MCSymbol *getEndSym() const { return End; }
+  const DwarfCompileUnit *getCU() const { return Unit; }
+  MachineLocation getLoc() const { return Loc; }
+};
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h
new file mode 100644
index 0000000..7a51c7b
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DebugLocList.h
@@ -0,0 +1,23 @@
+//===--- lib/CodeGen/DebugLocList.h - DWARF debug_loc list ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__
+#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__
+
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/SmallVector.h"
+#include "DebugLocEntry.h"
+
+namespace llvm {
+struct DebugLocList {
+  MCSymbol *Label;
+  SmallVector<DebugLocEntry, 4> List;
+};
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 689aeda..bcbb6c8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -31,7 +31,7 @@
 
 DwarfAccelTable::~DwarfAccelTable() {}
 
-void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) {
+void DwarfAccelTable::AddName(StringRef Name, const DIE *die, char Flags) {
   assert(Data.empty() && "Already finalized!");
   // If the string is in the list already then add this die to the list
   // otherwise add a new one.
@@ -172,7 +172,7 @@
 // Walk through the buckets and emit the full data for each element in
 // the bucket. For the string case emit the dies and the various offsets.
 // Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) {
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
   uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
@@ -207,7 +207,7 @@
 }
 
 // Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) {
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) {
   // Emit the header.
   EmitHeader(Asm);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 7627313..4a14497 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -24,7 +24,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
-#include <map>
 #include <vector>
 
 // The dwarf accelerator tables are an indirect hash table optimized
@@ -62,8 +61,7 @@
 namespace llvm {
 
 class AsmPrinter;
-class DIE;
-class DwarfUnits;
+class DwarfFile;
 
 class DwarfAccelTable {
 
@@ -165,10 +163,10 @@
   // HashData[hash_data_count]
 public:
   struct HashDataContents {
-    DIE *Die;   // Offsets
+    const DIE *Die;   // Offsets
     char Flags; // Specific flags to output
 
-    HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {}
+    HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {}
 #ifndef NDEBUG
     void print(raw_ostream &O) const {
       O << "  Offset: " << Die->getOffset() << "\n";
@@ -216,7 +214,7 @@
   void EmitBuckets(AsmPrinter *);
   void EmitHashes(AsmPrinter *);
   void EmitOffsets(AsmPrinter *, MCSymbol *);
-  void EmitData(AsmPrinter *, DwarfUnits *D);
+  void EmitData(AsmPrinter *, DwarfFile *D);
 
   // Allocator for HashData and HashDataContents.
   BumpPtrAllocator Allocator;
@@ -241,9 +239,9 @@
 public:
   DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
   ~DwarfAccelTable();
-  void AddName(StringRef, DIE *, char = 0);
+  void AddName(StringRef, const DIE *, char = 0);
   void FinalizeTable(AsmPrinter *, StringRef);
-  void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *);
+  void Emit(AsmPrinter *, MCSymbol *, DwarfFile *);
 #ifndef NDEBUG
   void print(raw_ostream &O);
   void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 8918f3d..30312ac 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -31,7 +32,6 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -46,9 +46,9 @@
 
 DwarfCFIException::~DwarfCFIException() {}
 
-/// EndModule - Emit all exception information that should come after the
+/// endModule - Emit all exception information that should come after the
 /// content.
-void DwarfCFIException::EndModule() {
+void DwarfCFIException::endModule() {
   if (moveTypeModule == AsmPrinter::CFI_M_Debug)
     Asm->OutStreamer.EmitCFISections(false, true);
 
@@ -82,9 +82,9 @@
   }
 }
 
-/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// beginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
-void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
   // If any landing pads survive, we need an EH table.
@@ -113,13 +113,14 @@
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  Asm->OutStreamer.EmitCFIStartProc();
+  Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false);
 
   // Indicate personality routine, if any.
   if (!shouldEmitPersonality)
     return;
 
-  const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+  const MCSymbol *Sym =
+      TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
   Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
 
   Asm->OutStreamer.EmitDebugLabel
@@ -135,9 +136,9 @@
                                LSDAEncoding);
 }
 
-/// EndFunction - Gather and emit post-function exception information.
+/// endFunction - Gather and emit post-function exception information.
 ///
-void DwarfCFIException::EndFunction() {
+void DwarfCFIException::endFunction(const MachineFunction *) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
deleted file mode 100644
index d782c88..0000000
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ /dev/null
@@ -1,409 +0,0 @@
-//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains support for writing dwarf compile unit.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
-#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
-
-#include "DIE.h"
-#include "DwarfDebug.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/MC/MCExpr.h"
-
-namespace llvm {
-
-class MachineLocation;
-class MachineOperand;
-class ConstantInt;
-class ConstantFP;
-class DbgVariable;
-
-//===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associated
-/// with a source file.
-class CompileUnit {
-  /// UniqueID - a numeric ID unique among all CUs in the module
-  ///
-  unsigned UniqueID;
-
-  /// Node - MDNode for the compile unit.
-  DICompileUnit Node;
-
-  /// CUDie - Compile unit debug information entry.
-  ///
-  const OwningPtr<DIE> CUDie;
-
-  /// Asm - Target of Dwarf emission.
-  AsmPrinter *Asm;
-
-  // Holders for some common dwarf information.
-  DwarfDebug *DD;
-  DwarfUnits *DU;
-
-  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
-  DIE *IndexTyDie;
-
-  /// MDNodeToDieMap - Tracks the mapping of unit level debug information
-  /// variables to debug information entries.
-  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
-
-  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information
-  /// descriptors to debug information entries using a DIEEntry proxy.
-  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
-
-  /// GlobalNames - A map of globally visible named entities for this unit.
-  ///
-  StringMap<DIE *> GlobalNames;
-
-  /// GlobalTypes - A map of globally visible types for this unit.
-  ///
-  StringMap<DIE *> GlobalTypes;
-
-  /// AccelNames - A map of names for the name accelerator table.
-  ///
-  StringMap<std::vector<DIE *> > AccelNames;
-  StringMap<std::vector<DIE *> > AccelObjC;
-  StringMap<std::vector<DIE *> > AccelNamespace;
-  StringMap<std::vector<std::pair<DIE *, unsigned> > > AccelTypes;
-
-  /// DIEBlocks - A list of all the DIEBlocks in use.
-  std::vector<DIEBlock *> DIEBlocks;
-
-  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
-  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
-  /// corresponds to the MDNode mapped with the subprogram DIE.
-  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
-
-  // DIEValueAllocator - All DIEValues are allocated through this allocator.
-  BumpPtrAllocator DIEValueAllocator;
-
-  // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently.
-  DIEInteger *DIEIntegerOne;
-
-public:
-  CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A,
-              DwarfDebug *DW, DwarfUnits *DWU);
-  ~CompileUnit();
-
-  // Accessors.
-  unsigned getUniqueID() const { return UniqueID; }
-  uint16_t getLanguage() const { return Node.getLanguage(); }
-  DICompileUnit getNode() const { return Node; }
-  DIE *getCUDie() const { return CUDie.get(); }
-  const StringMap<DIE *> &getGlobalNames() const { return GlobalNames; }
-  const StringMap<DIE *> &getGlobalTypes() const { return GlobalTypes; }
-
-  const StringMap<std::vector<DIE *> > &getAccelNames() const {
-    return AccelNames;
-  }
-  const StringMap<std::vector<DIE *> > &getAccelObjC() const {
-    return AccelObjC;
-  }
-  const StringMap<std::vector<DIE *> > &getAccelNamespace() const {
-    return AccelNamespace;
-  }
-  const StringMap<std::vector<std::pair<DIE *, unsigned> > > &
-  getAccelTypes() const {
-    return AccelTypes;
-  }
-
-  unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
-  void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
-
-  /// hasContent - Return true if this compile unit has something to write out.
-  ///
-  bool hasContent() const { return !CUDie->getChildren().empty(); }
-
-  /// getParentContextString - Get a string containing the language specific
-  /// context for a global name.
-  std::string getParentContextString(DIScope Context) const;
-
-  /// addGlobalName - Add a new global entity to the compile unit.
-  ///
-  void addGlobalName(StringRef Name, DIE *Die, DIScope Context);
-
-  /// addGlobalType - Add a new global type to the compile unit.
-  ///
-  void addGlobalType(DIType Ty);
-
-  /// addPubTypes - Add a set of types from the subprogram to the global types.
-  void addPubTypes(DISubprogram SP);
-
-  /// addAccelName - Add a new name to the name accelerator table.
-  void addAccelName(StringRef Name, DIE *Die);
-
-  /// addAccelObjC - Add a new name to the ObjC accelerator table.
-  void addAccelObjC(StringRef Name, DIE *Die);
-
-  /// addAccelNamespace - Add a new name to the namespace accelerator table.
-  void addAccelNamespace(StringRef Name, DIE *Die);
-
-  /// addAccelType - Add a new type to the type accelerator table.
-  void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die);
-
-  /// getDIE - Returns the debug information entry map slot for the
-  /// specified debug variable. We delegate the request to DwarfDebug
-  /// when the MDNode can be part of the type system, since DIEs for
-  /// the type system can be shared across CUs and the mappings are
-  /// kept in DwarfDebug.
-  DIE *getDIE(DIDescriptor D) const;
-
-  DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); }
-
-  /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
-  /// when the MDNode can be part of the type system, since DIEs for
-  /// the type system can be shared across CUs and the mappings are
-  /// kept in DwarfDebug.
-  void insertDIE(DIDescriptor Desc, DIE *D);
-
-  /// addDie - Adds or interns the DIE to the compile unit.
-  ///
-  void addDie(DIE *Buffer) { CUDie->addChild(Buffer); }
-
-  /// addFlag - Add a flag that is true to the DIE.
-  void addFlag(DIE *Die, dwarf::Attribute Attribute);
-
-  /// addUInt - Add an unsigned integer attribute data and value.
-  ///
-  void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
-               uint64_t Integer);
-
-  void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer);
-
-  /// addSInt - Add an signed integer attribute data and value.
-  ///
-  void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
-               int64_t Integer);
-
-  void addSInt(DIEBlock *Die, Optional<dwarf::Form> Form, int64_t Integer);
-
-  /// addString - Add a string attribute data and value.
-  ///
-  void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str);
-
-  /// addLocalString - Add a string attribute data and value.
-  ///
-  void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str);
-
-  /// addExpr - Add a Dwarf expression attribute data and value.
-  ///
-  void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr);
-
-  /// addLabel - Add a Dwarf label attribute data and value.
-  ///
-  void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form,
-                const MCSymbol *Label);
-
-  void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label);
-
-  /// addLabelAddress - Add a dwarf label attribute data and value using
-  /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
-  ///
-  void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label);
-
-  /// addOpAddress - Add a dwarf op address data and value using the
-  /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
-  ///
-  void addOpAddress(DIEBlock *Die, const MCSymbol *Label);
-
-  /// addDelta - Add a label delta attribute data and value.
-  ///
-  void addDelta(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Hi,
-                const MCSymbol *Lo);
-
-  /// addDIEEntry - Add a DIE attribute data and value.
-  ///
-  void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry);
-
-  /// addDIEEntry - Add a DIE attribute data and value.
-  ///
-  void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry);
-
-  /// addBlock - Add block data.
-  ///
-  void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block);
-
-  /// addSourceLine - Add location information to specified debug information
-  /// entry.
-  void addSourceLine(DIE *Die, DIVariable V);
-  void addSourceLine(DIE *Die, DIGlobalVariable G);
-  void addSourceLine(DIE *Die, DISubprogram SP);
-  void addSourceLine(DIE *Die, DIType Ty);
-  void addSourceLine(DIE *Die, DINameSpace NS);
-  void addSourceLine(DIE *Die, DIObjCProperty Ty);
-
-  /// addAddress - Add an address attribute to a die based on the location
-  /// provided.
-  void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location,
-                  bool Indirect = false);
-
-  /// addConstantValue - Add constant value entry in variable DIE.
-  void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
-  void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
-  void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
-
-  /// addConstantFPValue - Add constant value entry in variable DIE.
-  void addConstantFPValue(DIE *Die, const MachineOperand &MO);
-  void addConstantFPValue(DIE *Die, const ConstantFP *CFP);
-
-  /// addTemplateParams - Add template parameters in buffer.
-  void addTemplateParams(DIE &Buffer, DIArray TParams);
-
-  /// addRegisterOp - Add register operand.
-  void addRegisterOp(DIEBlock *TheDie, unsigned Reg);
-
-  /// addRegisterOffset - Add register offset.
-  void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset);
-
-  /// addComplexAddress - Start with the address based on the location provided,
-  /// and generate the DWARF information necessary to find the actual variable
-  /// (navigating the extra location information encoded in the type) based on
-  /// the starting location.  Add the DWARF information to the die.
-  ///
-  void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute,
-                         const MachineLocation &Location);
-
-  // FIXME: Should be reformulated in terms of addComplexAddress.
-  /// addBlockByrefAddress - Start with the address based on the location
-  /// provided, and generate the DWARF information necessary to find the
-  /// actual Block variable (navigating the Block struct) based on the
-  /// starting location.  Add the DWARF information to the die.  Obsolete,
-  /// please use addComplexAddress instead.
-  ///
-  void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute,
-                            const MachineLocation &Location);
-
-  /// addVariableAddress - Add DW_AT_location attribute for a
-  /// DbgVariable based on provided MachineLocation.
-  void addVariableAddress(const DbgVariable &DV, DIE *Die,
-                          MachineLocation Location);
-
-  /// addType - Add a new type attribute to the specified entity. This takes
-  /// and attribute parameter because DW_AT_friend attributes are also
-  /// type references.
-  void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type);
-
-  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-  DIE *getOrCreateNameSpace(DINameSpace NS);
-
-  /// getOrCreateSubprogramDIE - Create new DIE using SP.
-  DIE *getOrCreateSubprogramDIE(DISubprogram SP);
-
-  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
-  /// given DIType.
-  DIE *getOrCreateTypeDIE(const MDNode *N);
-
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *getOrCreateContextDIE(DIScope Context);
-
-  /// createGlobalVariableDIE - create global variable DIE.
-  void createGlobalVariableDIE(DIGlobalVariable GV);
-
-  /// constructContainingTypeDIEs - Construct DIEs for types that contain
-  /// vtables.
-  void constructContainingTypeDIEs();
-
-  /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-  DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract);
-
-  /// Create a DIE with the given Tag, add the DIE to its parent, and
-  /// call insertDIE if MD is not null.
-  DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor());
-
-  /// Compute the size of a header for this unit, not including the initial
-  /// length field.
-  unsigned getHeaderSize() const {
-    return sizeof(int16_t) + // DWARF version number
-           sizeof(int32_t) + // Offset Into Abbrev. Section
-           sizeof(int8_t);   // Pointer Size (in bytes)
-  }
-
-  /// Emit the header for this unit, not including the initial length field.
-  void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym);
-
-private:
-  /// constructTypeDIE - Construct basic type die from DIBasicType.
-  void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
-
-  /// constructTypeDIE - Construct derived type die from DIDerivedType.
-  void constructTypeDIE(DIE &Buffer, DIDerivedType DTy);
-
-  /// constructTypeDIE - Construct type DIE from DICompositeType.
-  void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
-
-  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy);
-
-  /// constructMemberDIE - Construct member DIE from DIDerivedType.
-  void constructMemberDIE(DIE &Buffer, DIDerivedType DT);
-
-  /// constructTemplateTypeParameterDIE - Construct new DIE for the given
-  /// DITemplateTypeParameter.
-  void constructTemplateTypeParameterDIE(DIE &Buffer,
-                                         DITemplateTypeParameter TP);
-
-  /// constructTemplateValueParameterDIE - Construct new DIE for the given
-  /// DITemplateValueParameter.
-  void constructTemplateValueParameterDIE(DIE &Buffer,
-                                          DITemplateValueParameter TVP);
-
-  /// getOrCreateStaticMemberDIE - Create new static data member DIE.
-  DIE *getOrCreateStaticMemberDIE(DIDerivedType DT);
-
-  /// Offset of the CUDie from beginning of debug info section.
-  unsigned DebugInfoOffset;
-
-  /// getLowerBoundDefault - Return the default lower bound for an array. If the
-  /// DWARF version doesn't handle the language, return -1.
-  int64_t getDefaultLowerBound() const;
-
-  /// getDIEEntry - Returns the debug information entry for the specified
-  /// debug variable.
-  DIEEntry *getDIEEntry(const MDNode *N) const {
-    return MDNodeToDIEEntryMap.lookup(N);
-  }
-
-  /// insertDIEEntry - Insert debug information entry into the map.
-  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
-    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
-  }
-
-  // getIndexTyDie - Get an anonymous type for index type.
-  DIE *getIndexTyDie() { return IndexTyDie; }
-
-  // setIndexTyDie - Set D as anonymous type for index which can be reused
-  // later.
-  void setIndexTyDie(DIE *D) { IndexTyDie = D; }
-
-  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
-  /// information entry.
-  DIEEntry *createDIEEntry(DIE *Entry);
-
-  /// resolve - Look in the DwarfDebug map for the MDNode that
-  /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
-    return DD->resolve(Ref);
-  }
-};
-
-} // end llvm namespace
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 24e2c05..11345eb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -12,23 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dwarfdebug"
+#include "ByteStreamer.h"
 #include "DwarfDebug.h"
 #include "DIE.h"
 #include "DIEHash.h"
 #include "DwarfAccelTable.h"
-#include "DwarfCompileUnit.h"
+#include "DwarfUnit.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
@@ -38,10 +40,10 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Timer.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -59,26 +61,17 @@
     cl::init(false));
 
 static cl::opt<bool>
-GenerateODRHash("generate-odr-hash", cl::Hidden,
-                cl::desc("Add an ODR hash to external type DIEs."),
-                cl::init(false));
-
-static cl::opt<bool>
-GenerateCUHash("generate-cu-hash", cl::Hidden,
-               cl::desc("Add the CU hash as the dwo_id."),
-               cl::init(false));
-
-static cl::opt<bool>
 GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
                        cl::desc("Generate GNU-style pubnames and pubtypes"),
                        cl::init(false));
 
+static cl::opt<bool> GenerateARangeSection("generate-arange-section",
+                                           cl::Hidden,
+                                           cl::desc("Generate dwarf aranges"),
+                                           cl::init(false));
+
 namespace {
-enum DefaultOnOff {
-  Default,
-  Enable,
-  Disable
-};
+enum DefaultOnOff { Default, Enable, Disable };
 }
 
 static cl::opt<DefaultOnOff>
@@ -91,7 +84,7 @@
 
 static cl::opt<DefaultOnOff>
 SplitDwarf("split-dwarf", cl::Hidden,
-           cl::desc("Output prototype dwarf split debug info."),
+           cl::desc("Output DWARF5 split debug info."),
            cl::values(clEnumVal(Default, "Default for platform"),
                       clEnumVal(Enable, "Enabled"),
                       clEnumVal(Disable, "Disabled"), clEnumValEnd),
@@ -105,29 +98,34 @@
                             clEnumVal(Disable, "Disabled"), clEnumValEnd),
                  cl::init(Default));
 
+static cl::opt<unsigned>
+DwarfVersionNumber("dwarf-version", cl::Hidden,
+                   cl::desc("Generate DWARF for dwarf version."), cl::init(0));
+
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
 //===----------------------------------------------------------------------===//
 
-// Configuration values for initial hash set sizes (log2).
-//
-static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
-
 namespace llvm {
 
 /// resolve - Look in the DwarfDebug map for the MDNode that
 /// corresponds to the reference.
-template <typename T>
-T DbgVariable::resolve(DIRef<T> Ref) const {
+template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const {
   return DD->resolve(Ref);
 }
 
+bool DbgVariable::isBlockByrefVariable() const {
+  assert(Var.isVariable() && "Invalid complex DbgVariable!");
+  return Var.isBlockByrefVariable(DD->getTypeIdentifierMap());
+}
+
+
 DIType DbgVariable::getType() const {
-  DIType Ty = Var.getType();
+  DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap());
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
-  if (Var.isBlockByrefVariable()) {
+  if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) {
     /* Byref variables, in Blocks, are declared by the programmer as
        "SomeType VarName;", but the compiler creates a
        __Block_byref_x_VarName struct, and gives the variable VarName
@@ -179,22 +177,18 @@
 }
 
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
-  : Asm(A), MMI(Asm->MMI), FirstCU(0),
-    AbbreviationsSet(InitAbbreviationsSetSize),
-    SourceIdMap(DIEValueAllocator),
-    PrevLabel(NULL), GlobalCUIndexCount(0),
-    InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string",
-               DIEValueAllocator),
-    SkeletonAbbrevSet(InitAbbreviationsSetSize),
-    SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string",
-                   DIEValueAllocator) {
+    : Asm(A), MMI(Asm->MMI), FirstCU(0), PrevLabel(NULL), GlobalRangeCount(0),
+      InfoHolder(A, "info_string", DIEValueAllocator),
+      UsedNonDefaultText(false),
+      SkeletonHolder(A, "skel_string", DIEValueAllocator) {
 
-  DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
-  DwarfStrSectionSym = TextSectionSym = 0;
+  DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
   DwarfAddrSectionSym = 0;
   DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
+  CurFn = 0;
+  CurMI = 0;
 
   // Turn on accelerator tables for Darwin by default, pubnames by
   // default for non-Darwin, and handle split dwarf.
@@ -215,7 +209,9 @@
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
-  DwarfVersion = getDwarfVersionFromModule(MMI->getModule());
+  DwarfVersion = DwarfVersionNumber
+                     ? DwarfVersionNumber
+                     : getDwarfVersionFromModule(MMI->getModule());
 
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -228,53 +224,57 @@
 static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
                                 const char *SymbolStem = 0) {
   Asm->OutStreamer.SwitchSection(Section);
-  if (!SymbolStem) return 0;
+  if (!SymbolStem)
+    return 0;
 
   MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
   Asm->OutStreamer.EmitLabel(TmpSym);
   return TmpSym;
 }
 
-MCSymbol *DwarfUnits::getStringPoolSym() {
+DwarfFile::~DwarfFile() {
+  for (DwarfUnit *DU : CUs)
+    delete DU;
+}
+
+MCSymbol *DwarfFile::getStringPoolSym() {
   return Asm->GetTempSymbol(StringPref);
 }
 
-MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) {
-  std::pair<MCSymbol*, unsigned> &Entry =
-    StringPool.GetOrCreateValue(Str).getValue();
-  if (Entry.first) return Entry.first;
+MCSymbol *DwarfFile::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol *, unsigned> &Entry =
+      StringPool.GetOrCreateValue(Str).getValue();
+  if (Entry.first)
+    return Entry.first;
 
   Entry.second = NextStringPoolNumber++;
   return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
 }
 
-unsigned DwarfUnits::getStringPoolIndex(StringRef Str) {
-  std::pair<MCSymbol*, unsigned> &Entry =
-    StringPool.GetOrCreateValue(Str).getValue();
-  if (Entry.first) return Entry.second;
+unsigned DwarfFile::getStringPoolIndex(StringRef Str) {
+  std::pair<MCSymbol *, unsigned> &Entry =
+      StringPool.GetOrCreateValue(Str).getValue();
+  if (Entry.first)
+    return Entry.second;
 
   Entry.second = NextStringPoolNumber++;
   Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
   return Entry.second;
 }
 
-unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) {
-  return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext));
-}
-
-unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) {
-  std::pair<DenseMap<const MCExpr *, unsigned>::iterator, bool> P =
-      AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber));
+unsigned DwarfFile::getAddrPoolIndex(const MCSymbol *Sym, bool TLS) {
+  std::pair<AddrPool::iterator, bool> P = AddressPool.insert(
+      std::make_pair(Sym, AddressPoolEntry(NextAddrPoolNumber, TLS)));
   if (P.second)
     ++NextAddrPoolNumber;
-  return P.first->second;
+  return P.first->second.Number;
 }
 
 // Define a unique number for the abbreviation.
 //
-void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   // Check the set for priors.
-  DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
+  DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
 
   // If it's newly added.
   if (InSet == &Abbrev) {
@@ -294,7 +294,8 @@
 }
 
 static bool hasObjCCategory(StringRef Name) {
-  if (!isObjCClass(Name)) return false;
+  if (!isObjCClass(Name))
+    return false;
 
   return Name.find(") ") != StringRef::npos;
 }
@@ -318,35 +319,35 @@
 
 // Helper for sorting sections into a stable output order.
 static bool SectionSort(const MCSection *A, const MCSection *B) {
-    std::string LA = (A ? A->getLabelBeginName() : "");
-    std::string LB = (B ? B->getLabelBeginName() : "");
-    return LA < LB;
+  std::string LA = (A ? A->getLabelBeginName() : "");
+  std::string LB = (B ? B->getLabelBeginName() : "");
+  return LA < LB;
 }
 
 // Add the various names to the Dwarf accelerator table names.
 // TODO: Determine whether or not we should add names for programs
 // that do not have a DW_AT_name or DW_AT_linkage_name field - this
 // is only slightly different than the lookup of non-standard ObjC names.
-static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
-                               DIE* Die) {
-  if (!SP.isDefinition()) return;
-  TheCU->addAccelName(SP.getName(), Die);
+static void addSubprogramNames(DwarfUnit *TheU, DISubprogram SP, DIE *Die) {
+  if (!SP.isDefinition())
+    return;
+  TheU->addAccelName(SP.getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
   // that as well into the name table.
   if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
-    TheCU->addAccelName(SP.getLinkageName(), Die);
+    TheU->addAccelName(SP.getLinkageName(), Die);
 
   // If this is an Objective-C selector name add it to the ObjC accelerator
   // too.
   if (isObjCClass(SP.getName())) {
     StringRef Class, Category;
     getObjCClassCategory(SP.getName(), Class, Category);
-    TheCU->addAccelObjC(Class, Die);
+    TheU->addAccelObjC(Class, Die);
     if (Category != "")
-      TheCU->addAccelObjC(Category, Die);
+      TheU->addAccelObjC(Category, Die);
     // Also add the base method name to the name table.
-    TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+    TheU->addAccelName(getObjCMethodName(SP.getName()), Die);
   }
 }
 
@@ -366,7 +367,8 @@
 // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
 // and DW_AT_high_pc attributes. If there are global variables in this
 // scope then create and insert DIEs for these variables.
-DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) {
+DIE *DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit *SPCU,
+                                          DISubprogram SP) {
   DIE *SPDie = SPCU->getDIE(SP);
 
   assert(SPDie && "Unable to find subprogram DIE!");
@@ -376,7 +378,8 @@
   // concrete DIE twice.
   if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) {
     // Pick up abstract subprogram DIE.
-    SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie());
+    SPDie =
+        SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getUnitDie());
     SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE);
   } else {
     DISubprogram SPDecl = SP.getFunctionDeclaration();
@@ -388,8 +391,7 @@
       // specification DIE for a function defined inside a function.
       DIScope SPContext = resolve(SP.getContext());
       if (SP.isDefinition() && !SPContext.isCompileUnit() &&
-          !SPContext.isFile() &&
-          !isSubprogramContext(SPContext)) {
+          !SPContext.isFile() && !isSubprogramContext(SPContext)) {
         SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
 
         // Add arguments.
@@ -397,30 +399,17 @@
         DIArray Args = SPTy.getTypeArray();
         uint16_t SPTag = SPTy.getTag();
         if (SPTag == dwarf::DW_TAG_subroutine_type)
-          for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-            DIE *Arg =
-                SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie);
-            DIType ATy(Args.getElement(i));
-            SPCU->addType(Arg, ATy);
-            if (ATy.isArtificial())
-              SPCU->addFlag(Arg, dwarf::DW_AT_artificial);
-            if (ATy.isObjectPointer())
-              SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg);
-          }
+          SPCU->constructSubprogramArguments(*SPDie, Args);
         DIE *SPDeclDie = SPDie;
-        SPDie =
-            SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie());
+        SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram,
+                                      *SPCU->getUnitDie());
         SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie);
       }
     }
   }
 
-  SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc,
-                        Asm->GetTempSymbol("func_begin",
-                                           Asm->getFunctionNumber()));
-  SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc,
-                        Asm->GetTempSymbol("func_end",
-                                           Asm->getFunctionNumber()));
+  attachLowHighPC(SPCU, SPDie, FunctionBeginSym, FunctionEndSym);
+
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
   SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
@@ -453,9 +442,43 @@
   return !End;
 }
 
+static void addSectionLabel(AsmPrinter *Asm, DwarfUnit *U, DIE *D,
+                            dwarf::Attribute A, const MCSymbol *L,
+                            const MCSymbol *Sec) {
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    U->addSectionLabel(D, A, L);
+  else
+    U->addSectionDelta(D, A, L, Sec);
+}
+
+void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE,
+                                   const SmallVectorImpl<InsnRange> &Range) {
+  // Emit offset in .debug_range as a relocatable label. emitDIE will handle
+  // emitting it appropriately.
+  MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++);
+
+  // Under fission, ranges are specified by constant offsets relative to the
+  // CU's DW_AT_GNU_ranges_base.
+  if (useSplitDwarf())
+    TheCU->addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
+                           DwarfDebugRangeSectionSym);
+  else
+    addSectionLabel(Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
+                    DwarfDebugRangeSectionSym);
+
+  RangeSpanList List(RangeSym);
+  for (const InsnRange &R : Range) {
+    RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second));
+    List.addRange(std::move(Span));
+  }
+
+  // Add the range list to the set of ranges to be emitted.
+  TheCU->addRangeList(std::move(List));
+}
+
 // Construct new DW_TAG_lexical_block for this scope and attach
 // DW_AT_low_pc/DW_AT_high_pc labels.
-DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
+DIE *DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit *TheCU,
                                           LexicalScope *Scope) {
   if (isLexicalScopeDIENull(Scope))
     return 0;
@@ -464,29 +487,16 @@
   if (Scope->isAbstractScope())
     return ScopeDIE;
 
-  const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
-  // If we have multiple ranges, emit them into the range section.
-  if (Ranges.size() > 1) {
-    // .debug_range section has not been laid out yet. Emit offset in
-    // .debug_range as a uint, size 4, for now. emitDIE will handle
-    // DW_AT_ranges appropriately.
-    TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-                   DebugRangeSymbols.size()
-                   * Asm->getDataLayout().getPointerSize());
-    for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(),
-         RE = Ranges.end(); RI != RE; ++RI) {
-      DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
-      DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
-    }
+  const SmallVectorImpl<InsnRange> &ScopeRanges = Scope->getRanges();
 
-    // Terminate the range list.
-    DebugRangeSymbols.push_back(NULL);
-    DebugRangeSymbols.push_back(NULL);
+  // If we have multiple ranges, emit them into the range section.
+  if (ScopeRanges.size() > 1) {
+    addScopeRangeList(TheCU, ScopeDIE, ScopeRanges);
     return ScopeDIE;
   }
 
   // Construct the address range for this DIE.
-  SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
+  SmallVectorImpl<InsnRange>::const_iterator RI = ScopeRanges.begin();
   MCSymbol *Start = getLabelBeforeInsn(RI->first);
   MCSymbol *End = getLabelAfterInsn(RI->second);
   assert(End && "End label should not be null!");
@@ -494,18 +504,17 @@
   assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
   assert(End->isDefined() && "Invalid end label for an inlined scope!");
 
-  TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start);
-  TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End);
+  attachLowHighPC(TheCU, ScopeDIE, Start, End);
 
   return ScopeDIE;
 }
 
 // This scope represents inlined body of a function. Construct DIE to
 // represent this concrete inlined copy of the function.
-DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
+DIE *DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit *TheCU,
                                           LexicalScope *Scope) {
-  const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges();
-  assert(Ranges.empty() == false &&
+  const SmallVectorImpl<InsnRange> &ScopeRanges = Scope->getRanges();
+  assert(!ScopeRanges.empty() &&
          "LexicalScope does not have instruction markers!");
 
   if (!Scope->getScopeNode())
@@ -521,22 +530,11 @@
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
   TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE);
 
-  if (Ranges.size() > 1) {
-    // .debug_range section has not been laid out yet. Emit offset in
-    // .debug_range as a uint, size 4, for now. emitDIE will handle
-    // DW_AT_ranges appropriately.
-    TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
-                   DebugRangeSymbols.size()
-                   * Asm->getDataLayout().getPointerSize());
-    for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(),
-         RE = Ranges.end(); RI != RE; ++RI) {
-      DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
-      DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
-    }
-    DebugRangeSymbols.push_back(NULL);
-    DebugRangeSymbols.push_back(NULL);
-  } else {
-    SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin();
+  // If we have multiple ranges, emit them into the range section.
+  if (ScopeRanges.size() > 1)
+    addScopeRangeList(TheCU, ScopeDIE, ScopeRanges);
+  else {
+    SmallVectorImpl<InsnRange>::const_iterator RI = ScopeRanges.begin();
     MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
     MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
@@ -547,17 +545,16 @@
            "Invalid starting label for an inlined scope!");
     assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!");
 
-    TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel);
-    TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel);
+    attachLowHighPC(TheCU, ScopeDIE, StartLabel, EndLabel);
   }
 
   InlinedSubprogramDIEs.insert(OriginDIE);
 
   // Add the call site information to the DIE.
   DILocation DL(Scope->getInlinedAt());
-  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None,
-                 getOrCreateSourceID(DL.getFilename(), DL.getDirectory(),
-                                     TheCU->getUniqueID()));
+  TheCU->addUInt(
+      ScopeDIE, dwarf::DW_AT_call_file, None,
+      TheCU->getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
 
   // Add name to the name table, we do this here because we're guaranteed
@@ -567,37 +564,49 @@
   return ScopeDIE;
 }
 
-DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope,
-                                        SmallVectorImpl<DIE*> &Children) {
-    DIE *ObjectPointer = NULL;
+DIE *DwarfDebug::createScopeChildrenDIE(DwarfCompileUnit *TheCU,
+                                        LexicalScope *Scope,
+                                        SmallVectorImpl<DIE *> &Children) {
+  DIE *ObjectPointer = NULL;
 
   // Collect arguments for current function.
-  if (LScopes.isCurrentFunctionScope(Scope))
-    for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
-      if (DbgVariable *ArgDV = CurrentFnArguments[i])
+  if (LScopes.isCurrentFunctionScope(Scope)) {
+    for (DbgVariable *ArgDV : CurrentFnArguments)
+      if (ArgDV)
         if (DIE *Arg =
-            TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) {
+                TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) {
           Children.push_back(Arg);
-          if (ArgDV->isObjectPointer()) ObjectPointer = Arg;
+          if (ArgDV->isObjectPointer())
+            ObjectPointer = Arg;
         }
 
-  // Collect lexical scope children first.
-  const SmallVectorImpl<DbgVariable *> &Variables =ScopeVariables.lookup(Scope);
-  for (unsigned i = 0, N = Variables.size(); i < N; ++i)
-    if (DIE *Variable =
-        TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) {
-      Children.push_back(Variable);
-      if (Variables[i]->isObjectPointer()) ObjectPointer = Variable;
+    // If this is a variadic function, add an unspecified parameter.
+    DISubprogram SP(Scope->getScopeNode());
+    DIArray FnArgs = SP.getType().getTypeArray();
+    if (FnArgs.getElement(FnArgs.getNumElements() - 1)
+            .isUnspecifiedParameter()) {
+      DIE *Ellipsis = new DIE(dwarf::DW_TAG_unspecified_parameters);
+      Children.push_back(Ellipsis);
     }
-  const SmallVectorImpl<LexicalScope *> &Scopes = Scope->getChildren();
-  for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
-    if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j]))
+  }
+
+  // Collect lexical scope children first.
+  for (DbgVariable *DV : ScopeVariables.lookup(Scope))
+    if (DIE *Variable = TheCU->constructVariableDIE(*DV,
+                                                    Scope->isAbstractScope())) {
+      Children.push_back(Variable);
+      if (DV->isObjectPointer())
+        ObjectPointer = Variable;
+    }
+  for (LexicalScope *LS : Scope->getChildren())
+    if (DIE *Nested = constructScopeDIE(TheCU, LS))
       Children.push_back(Nested);
   return ObjectPointer;
 }
 
 // Construct a DIE for this scope.
-DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
+DIE *DwarfDebug::constructScopeDIE(DwarfCompileUnit *TheCU,
+                                   LexicalScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
@@ -634,10 +643,12 @@
 
     // There is no need to emit empty lexical block DIE.
     std::pair<ImportedEntityMap::const_iterator,
-              ImportedEntityMap::const_iterator> Range = std::equal_range(
-        ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
-        std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0),
-        less_first());
+              ImportedEntityMap::const_iterator> Range =
+        std::equal_range(
+            ScopesWithImportedEntities.begin(),
+            ScopesWithImportedEntities.end(),
+            std::pair<const MDNode *, const MDNode *>(DS, (const MDNode *)0),
+            less_first());
     if (Children.empty() && Range.first == Range.second)
       return NULL;
     ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
@@ -657,146 +668,55 @@
     ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children);
 
   // Add children
-  for (SmallVectorImpl<DIE *>::iterator I = Children.begin(),
-         E = Children.end(); I != E; ++I)
-    ScopeDIE->addChild(*I);
+  for (DIE *I : Children)
+    ScopeDIE->addChild(I);
 
   if (DS.isSubprogram() && ObjectPointer != NULL)
     TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer);
 
-  if (DS.isSubprogram())
-    TheCU->addPubTypes(DISubprogram(DS));
-
   return ScopeDIE;
 }
 
-// Look up the source id with the given directory and source file names.
-// If none currently exists, create a new id and insert it in the
-// SourceIds map. This can update DirectoryNames and SourceFileNames maps
-// as well.
-unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
-                                         StringRef DirName, unsigned CUID) {
-  // If we use .loc in assembly, we can't separate .file entries according to
-  // compile units. Thus all files will belong to the default compile unit.
+void DwarfDebug::addGnuPubAttributes(DwarfUnit *U, DIE *D) const {
+  if (!GenerateGnuPubSections)
+    return;
 
-  // FIXME: add a better feature test than hasRawTextSupport. Even better,
-  // extend .file to support this.
-  if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport())
-    CUID = 0;
-
-  // If FE did not provide a file name, then assume stdin.
-  if (FileName.empty())
-    return getOrCreateSourceID("<stdin>", StringRef(), CUID);
-
-  // TODO: this might not belong here. See if we can factor this better.
-  if (DirName == CompilationDir)
-    DirName = "";
-
-  // FileIDCUMap stores the current ID for the given compile unit.
-  unsigned SrcId = FileIDCUMap[CUID] + 1;
-
-  // We look up the CUID/file/dir by concatenating them with a zero byte.
-  SmallString<128> NamePair;
-  NamePair += utostr(CUID);
-  NamePair += '\0';
-  NamePair += DirName;
-  NamePair += '\0'; // Zero bytes are not allowed in paths.
-  NamePair += FileName;
-
-  StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
-  if (Ent.getValue() != SrcId)
-    return Ent.getValue();
-
-  FileIDCUMap[CUID] = SrcId;
-  // Print out a .file directive to specify files for .loc directives.
-  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID);
-
-  return SrcId;
+  U->addFlag(D, dwarf::DW_AT_GNU_pubnames);
 }
 
-// Create new CompileUnit for the given metadata node with tag
+// Create new DwarfCompileUnit for the given metadata node with tag
 // DW_TAG_compile_unit.
-CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) {
+DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
   StringRef FN = DIUnit.getFilename();
   CompilationDir = DIUnit.getDirectory();
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm,
-                                       this, &InfoHolder);
+  DwarfCompileUnit *NewCU = new DwarfCompileUnit(
+      InfoHolder.getUnits().size(), Die, DIUnit, Asm, this, &InfoHolder);
+  InfoHolder.addUnit(NewCU);
 
-  FileIDCUMap[NewCU->getUniqueID()] = 0;
-  // Call this to emit a .file directive if it wasn't emitted for the source
-  // file this CU comes from yet.
-  getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID());
+  // LTO with assembly output shares a single line table amongst multiple CUs.
+  // To avoid the compilation directory being ambiguous, let the line table
+  // explicitly describe the directory of all files, never relying on the
+  // compilation directory.
+  if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU)
+    Asm->OutStreamer.getContext().setMCLineTableCompilationDir(
+        NewCU->getUniqueID(), CompilationDir);
 
   NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
   NewCU->addString(Die, dwarf::DW_AT_name, FN);
 
-  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
-  // into an entity. We're using 0 (or a NULL label) for this. For
-  // split dwarf it's in the skeleton CU so omit it here.
-  if (!useSplitDwarf())
-    NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
-
-  // Define start line table label for each Compile Unit.
-  MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
-                                                   NewCU->getUniqueID());
-  Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
-                                                     NewCU->getUniqueID());
-
-  // Use a single line table if we are using .loc and generating assembly.
-  bool UseTheFirstCU =
-      (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) ||
-      (NewCU->getUniqueID() == 0);
-
   if (!useSplitDwarf()) {
-    // DW_AT_stmt_list is a offset of line number information for this
-    // compile unit in debug_line section. For split dwarf this is
-    // left in the skeleton CU and so not included.
-    // The line table entries are not always emitted in assembly, so it
-    // is not okay to use line_table_start here.
-    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-      NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
-                      UseTheFirstCU ? Asm->GetTempSymbol("section_line")
-                                    : LineTableStartSym);
-    else if (UseTheFirstCU)
-      NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
-    else
-      NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                      LineTableStartSym, DwarfLineSectionSym);
+    NewCU->initStmtList(DwarfLineSectionSym);
 
     // If we're using split dwarf the compilation dir is going to be in the
     // skeleton CU and so we don't need to duplicate it here.
     if (!CompilationDir.empty())
       NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
 
-    // Flags to let the linker know we have emitted new style pubnames. Only
-    // emit it here if we don't have a skeleton CU for split dwarf.
-    if (GenerateGnuPubSections) {
-      if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-        NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames,
-                        dwarf::DW_FORM_sec_offset,
-                        Asm->GetTempSymbol("gnu_pubnames",
-                                           NewCU->getUniqueID()));
-      else
-        NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4,
-                        Asm->GetTempSymbol("gnu_pubnames",
-                                           NewCU->getUniqueID()),
-                        DwarfGnuPubNamesSectionSym);
-
-      if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-        NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes,
-                        dwarf::DW_FORM_sec_offset,
-                        Asm->GetTempSymbol("gnu_pubtypes",
-                                           NewCU->getUniqueID()));
-      else
-        NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4,
-                        Asm->GetTempSymbol("gnu_pubtypes",
-                                           NewCU->getUniqueID()),
-                        DwarfGnuPubTypesSectionSym);
-    }
+    addGnuPubAttributes(NewCU, Die);
   }
 
   if (DIUnit.isOptimized())
@@ -808,12 +728,18 @@
 
   if (unsigned RVer = DIUnit.getRunTimeVersion())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
-            dwarf::DW_FORM_data1, RVer);
+                   dwarf::DW_FORM_data1, RVer);
 
   if (!FirstCU)
     FirstCU = NewCU;
 
-  InfoHolder.addUnit(NewCU);
+  if (useSplitDwarf()) {
+    NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
+                       DwarfInfoDWOSectionSym);
+    NewCU->setSkeleton(constructSkeletonCU(NewCU));
+  } else
+    NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
+                       DwarfInfoSectionSym);
 
   CUMap.insert(std::make_pair(DIUnit, NewCU));
   CUDieMap.insert(std::make_pair(Die, NewCU));
@@ -821,12 +747,13 @@
 }
 
 // Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) {
+void DwarfDebug::constructSubprogramDIE(DwarfCompileUnit *TheCU,
+                                        const MDNode *N) {
   // FIXME: We should only call this routine once, however, during LTO if a
   // program is defined in multiple CUs we could end up calling it out of
   // beginModule as we walk the CUs.
 
-  CompileUnit *&CURef = SPMap[N];
+  DwarfCompileUnit *&CURef = SPMap[N];
   if (CURef)
     return;
   CURef = TheCU;
@@ -843,24 +770,22 @@
   TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext()));
 }
 
-void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU,
+void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU,
                                             const MDNode *N) {
   DIImportedEntity Module(N);
-  if (!Module.Verify())
-    return;
+  assert(Module.Verify());
   if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext()))
     constructImportedEntityDIE(TheCU, Module, D);
 }
 
-void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N,
-                                            DIE *Context) {
+void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU,
+                                            const MDNode *N, DIE *Context) {
   DIImportedEntity Module(N);
-  if (!Module.Verify())
-    return;
+  assert(Module.Verify());
   return constructImportedEntityDIE(TheCU, Module, Context);
 }
 
-void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU,
+void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU,
                                             const DIImportedEntity &Module,
                                             DIE *Context) {
   assert(Module.Verify() &&
@@ -869,7 +794,7 @@
   DIE *IMDie = new DIE(Module.getTag());
   TheCU->insertDIE(Module, IMDie);
   DIE *EntityDie;
-  DIDescriptor Entity = Module.getEntity();
+  DIDescriptor Entity = resolve(Module.getEntity());
   if (Entity.isNameSpace())
     EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity));
   else if (Entity.isSubprogram())
@@ -878,11 +803,9 @@
     EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity));
   else
     EntityDie = TheCU->getDIE(Entity);
-  unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(),
-                                        Module.getContext().getDirectory(),
-                                        TheCU->getUniqueID());
-  TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID);
-  TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber());
+  TheCU->addSourceLine(IMDie, Module.getLineNumber(),
+                       Module.getContext().getFilename(),
+                       Module.getContext().getDirectory());
   TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie);
   StringRef Name = Module.getName();
   if (!Name.empty())
@@ -909,9 +832,11 @@
   // Emit initial sections so we can reference labels later.
   emitSectionLabels();
 
-  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-    DICompileUnit CUNode(CU_Nodes->getOperand(i));
-    CompileUnit *CU = constructCompileUnit(CUNode);
+  SingleCU = CU_Nodes->getNumOperands() == 1;
+
+  for (MDNode *N : CU_Nodes->operands()) {
+    DICompileUnit CUNode(N);
+    DwarfCompileUnit *CU = constructDwarfCompileUnit(CUNode);
     DIArray ImportedEntities = CUNode.getImportedEntities();
     for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
       ScopesWithImportedEntities.push_back(std::make_pair(
@@ -929,8 +854,13 @@
     for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
       CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
     DIArray RetainedTypes = CUNode.getRetainedTypes();
-    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
-      CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
+      DIType Ty(RetainedTypes.getElement(i));
+      // The retained types array by design contains pointers to
+      // MDNodes rather than DIRefs. Unique them here.
+      DIType UniqueTy(resolve(Ty.getRef()));
+      CU->getOrCreateTypeDIE(UniqueTy);
+    }
     // Emit imported_modules last so that the relevant context is already
     // available.
     for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
@@ -947,14 +877,11 @@
 // Attach DW_AT_inline attribute with inlined subprogram DIEs.
 void DwarfDebug::computeInlinedDIEs() {
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
-  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
-         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
-    DIE *ISP = *AI;
+  for (DIE *ISP : InlinedSubprogramDIEs)
     FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
-  }
-  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
-         AE = AbstractSPDies.end(); AI != AE; ++AI) {
-    DIE *ISP = AI->second;
+
+  for (const auto &AI : AbstractSPDies) {
+    DIE *ISP = AI.second;
     if (InlinedSubprogramDIEs.count(ISP))
       continue;
     FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
@@ -966,8 +893,8 @@
   const Module *M = MMI->getModule();
 
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
-    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit TheCU(CU_Nodes->getOperand(i));
+    for (MDNode *N : CU_Nodes->operands()) {
+      DICompileUnit TheCU(N);
       DIArray Subprograms = TheCU.getSubprograms();
       for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
         DISubprogram SP(Subprograms.getElement(i));
@@ -982,7 +909,8 @@
           continue;
 
         // Construct subprogram DIE and add variables DIEs.
-        CompileUnit *SPCU = CUMap.lookup(TheCU);
+        DwarfCompileUnit *SPCU =
+            static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
         assert(SPCU && "Unable to find Compile Unit!");
         // FIXME: See the comment in constructSubprogramDIE about duplicate
         // subprogram DIEs.
@@ -993,8 +921,7 @@
           if (!DV.isVariable())
             continue;
           DbgVariable NewVar(DV, NULL, this);
-          if (DIE *VariableDIE =
-                  SPCU->constructVariableDIE(NewVar, false))
+          if (DIE *VariableDIE = SPCU->constructVariableDIE(NewVar, false))
             SPDIE->addChild(VariableDIE);
         }
       }
@@ -1002,41 +929,6 @@
   }
 }
 
-// Type Signature [7.27] and ODR Hash code.
-
-/// \brief Grabs the string in whichever attribute is passed in and returns
-/// a reference to it. Returns "" if the attribute doesn't exist.
-static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) {
-  DIEValue *V = Die->findAttribute(Attr);
-
-  if (DIEString *S = dyn_cast_or_null<DIEString>(V))
-    return S->getString();
-
-  return StringRef("");
-}
-
-/// Return true if the current DIE is contained within an anonymous namespace.
-static bool isContainedInAnonNamespace(DIE *Die) {
-  DIE *Parent = Die->getParent();
-
-  while (Parent) {
-    if (Parent->getTag() == dwarf::DW_TAG_namespace &&
-        getDIEStringAttr(Parent, dwarf::DW_AT_name) == "")
-      return true;
-    Parent = Parent->getParent();
-  }
-
-  return false;
-}
-
-/// Test if the current CU language is C++ and that we have
-/// a named type that is not contained in an anonymous namespace.
-static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) {
-  return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus &&
-         getDIEStringAttr(Die, dwarf::DW_AT_name) != "" &&
-         !isContainedInAnonNamespace(Die);
-}
-
 void DwarfDebug::finalizeModuleInfo() {
   // Collect info for variables that were optimized out.
   collectDeadVariables();
@@ -1044,47 +936,67 @@
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   computeInlinedDIEs();
 
-  // Split out type units and conditionally add an ODR tag to the split
-  // out type.
-  // FIXME: Do type splitting.
-  for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) {
-    DIE *Die = TypeUnits[i];
-    DIEHash Hash;
-    // If we've requested ODR hashes and it's applicable for an ODR hash then
-    // add the ODR signature now.
-    // FIXME: This should be added onto the type unit, not the type, but this
-    // works as an intermediate stage.
-    if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die))
-      CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature,
-                                     dwarf::DW_FORM_data8,
-                                     Hash.computeDIEODRSignature(*Die));
-  }
-
-  // Handle anything that needs to be done on a per-cu basis.
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
-                                                         CUE = CUMap.end();
-       CUI != CUE; ++CUI) {
-    CompileUnit *TheCU = CUI->second;
+  // Handle anything that needs to be done on a per-unit basis after
+  // all other generation.
+  for (DwarfUnit *TheU : getUnits()) {
     // Emit DW_AT_containing_type attribute to connect types with their
     // vtable holding type.
-    TheCU->constructContainingTypeDIEs();
+    TheU->constructContainingTypeDIEs();
 
-    // If we're splitting the dwarf out now that we've got the entire
-    // CU then construct a skeleton CU based upon it.
-    if (useSplitDwarf()) {
-      uint64_t ID = 0;
-      if (GenerateCUHash) {
-        DIEHash CUHash;
-        ID = CUHash.computeCUSignature(*TheCU->getCUDie());
+    // Add CU specific attributes if we need to add any.
+    if (TheU->getUnitDie()->getTag() == dwarf::DW_TAG_compile_unit) {
+      // If we're splitting the dwarf out now that we've got the entire
+      // CU then add the dwo id to it.
+      DwarfCompileUnit *SkCU =
+          static_cast<DwarfCompileUnit *>(TheU->getSkeleton());
+      if (useSplitDwarf()) {
+        // Emit a unique identifier for this CU.
+        uint64_t ID = DIEHash(Asm).computeCUSignature(*TheU->getUnitDie());
+        TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+                      dwarf::DW_FORM_data8, ID);
+        SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
+                      dwarf::DW_FORM_data8, ID);
+
+        // We don't keep track of which addresses are used in which CU so this
+        // is a bit pessimistic under LTO.
+        if (!InfoHolder.getAddrPool()->empty())
+          addSectionLabel(Asm, SkCU, SkCU->getUnitDie(),
+                          dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym,
+                          DwarfAddrSectionSym);
+        if (!TheU->getRangeLists().empty())
+          addSectionLabel(Asm, SkCU, SkCU->getUnitDie(),
+                          dwarf::DW_AT_GNU_ranges_base,
+                          DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym);
       }
-      // This should be a unique identifier when we want to build .dwp files.
-      TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
-                     dwarf::DW_FORM_data8, ID);
-      // Now construct the skeleton CU associated.
-      CompileUnit *SkCU = constructSkeletonCU(TheCU);
-      // This should be a unique identifier when we want to build .dwp files.
-      SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
-                    dwarf::DW_FORM_data8, ID);
+
+      // If we have code split among multiple sections or non-contiguous
+      // ranges of code then emit a DW_AT_ranges attribute on the unit that will
+      // remain in the .o file, otherwise add a DW_AT_low_pc.
+      // FIXME: We should use ranges allow reordering of code ala
+      // .subsections_via_symbols in mach-o. This would mean turning on
+      // ranges for all subprogram DIEs for mach-o.
+      DwarfCompileUnit *U = SkCU ? SkCU : static_cast<DwarfCompileUnit *>(TheU);
+      unsigned NumRanges = TheU->getRanges().size();
+      if (NumRanges) {
+        if (NumRanges > 1) {
+          addSectionLabel(Asm, U, U->getUnitDie(), dwarf::DW_AT_ranges,
+                          Asm->GetTempSymbol("cu_ranges", U->getUniqueID()),
+                          DwarfDebugRangeSectionSym);
+
+          // A DW_AT_low_pc attribute may also be specified in combination with
+          // DW_AT_ranges to specify the default base address for use in
+          // location lists (see Section 2.6.2) and range lists (see Section
+          // 2.17.3).
+          U->addUInt(U->getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+                     0);
+        } else {
+          RangeSpan &Range = TheU->getRanges().back();
+          U->addLocalLabelAddress(U->getUnitDie(), dwarf::DW_AT_low_pc,
+                                  Range.getStart());
+          U->addLabelDelta(U->getUnitDie(), dwarf::DW_AT_high_pc,
+                           Range.getEnd(), Range.getStart());
+        }
+      }
     }
   }
 
@@ -1095,9 +1007,8 @@
 }
 
 void DwarfDebug::endSections() {
-   // Filter labels by section.
-  for (size_t n = 0; n < ArangeLabels.size(); n++) {
-    const SymbolCU &SCU = ArangeLabels[n];
+  // Filter labels by section.
+  for (const SymbolCU &SCU : ArangeLabels) {
     if (SCU.Sym->isInSection()) {
       // Make a note of this symbol and it's section.
       const MCSection *Section = &SCU.Sym->getSection();
@@ -1113,9 +1024,8 @@
 
   // Build a list of sections used.
   std::vector<const MCSection *> Sections;
-  for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end();
-       it++) {
-    const MCSection *Section = it->first;
+  for (const auto &it : SectionMap) {
+    const MCSection *Section = it.first;
     Sections.push_back(Section);
   }
 
@@ -1124,15 +1034,15 @@
   std::sort(Sections.begin(), Sections.end(), SectionSort);
 
   // Add terminating symbols for each section.
-  for (unsigned ID=0;ID<Sections.size();ID++) {
+  for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
     const MCSection *Section = Sections[ID];
     MCSymbol *Sym = NULL;
 
     if (Section) {
       // We can't call MCSection::getLabelEndName, as it's only safe to do so
-      // if we know the section name up-front. For user-created sections, the resulting
-      // label may not be valid to use as a label. (section names can use a greater
-      // set of characters on some systems)
+      // if we know the section name up-front. For user-created sections, the
+      // resulting label may not be valid to use as a label. (section names can
+      // use a greater set of characters on some systems)
       Sym = Asm->GetTempSymbol("debug_end", ID);
       Asm->OutStreamer.SwitchSection(Section);
       Asm->OutStreamer.EmitLabel(Sym);
@@ -1145,8 +1055,11 @@
 
 // Emit all Dwarf sections that should come after the content.
 void DwarfDebug::endModule() {
+  assert(CurFn == 0);
+  assert(CurMI == 0);
 
-  if (!FirstCU) return;
+  if (!FirstCU)
+    return;
 
   // End any existing sections.
   // TODO: Does this need to happen?
@@ -1155,58 +1068,32 @@
   // Finalize the debug info for the module.
   finalizeModuleInfo();
 
-  if (!useSplitDwarf()) {
-    emitDebugStr();
+  emitDebugStr();
 
-    // Emit all the DIEs into a debug info section.
-    emitDebugInfo();
+  // Emit all the DIEs into a debug info section.
+  emitDebugInfo();
 
-    // Corresponding abbreviations into a abbrev section.
-    emitAbbreviations();
+  // Corresponding abbreviations into a abbrev section.
+  emitAbbreviations();
 
-    // Emit info into a debug loc section.
-    emitDebugLoc();
-
-    // Emit info into a debug aranges section.
+  // Emit info into a debug aranges section.
+  if (GenerateARangeSection)
     emitDebugARanges();
 
-    // Emit info into a debug ranges section.
-    emitDebugRanges();
+  // Emit info into a debug ranges section.
+  emitDebugRanges();
 
-    // Emit info into a debug macinfo section.
-    emitDebugMacInfo();
-
-  } else {
-    // TODO: Fill this in for separated debug sections and separate
-    // out information into new sections.
-    emitDebugStr();
-    if (useSplitDwarf())
-      emitDebugStrDWO();
-
-    // Emit the debug info section and compile units.
-    emitDebugInfo();
+  if (useSplitDwarf()) {
+    emitDebugStrDWO();
     emitDebugInfoDWO();
-
-    // Corresponding abbreviations into a abbrev section.
-    emitAbbreviations();
     emitDebugAbbrevDWO();
-
-    // Emit info into a debug loc section.
-    emitDebugLoc();
-
-    // Emit info into a debug aranges section.
-    emitDebugARanges();
-
-    // Emit info into a debug ranges section.
-    emitDebugRanges();
-
-    // Emit info into a debug macinfo section.
-    emitDebugMacInfo();
-
+    emitDebugLineDWO();
     // Emit DWO addresses.
     InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
-
-  }
+    emitDebugLocDWO();
+  } else
+    // Emit info into a debug loc section.
+    emitDebugLoc();
 
   // Emit info into the dwarf accelerator table sections.
   if (useDwarfAccelTables()) {
@@ -1224,13 +1111,6 @@
 
   // clean up.
   SPMap.clear();
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I)
-    delete I->second;
-
-  for (SmallVectorImpl<CompileUnit *>::iterator I = SkeletonCUs.begin(),
-         E = SkeletonCUs.end(); I != E; ++I)
-    delete *I;
 
   // Reset these for the next Module if we have one.
   FirstCU = NULL;
@@ -1257,8 +1137,7 @@
 }
 
 // If Var is a current function argument then add it to CurrentFnArguments list.
-bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
-                                      DbgVariable *Var, LexicalScope *Scope) {
+bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) {
   if (!LScopes.isCurrentFunctionScope(Scope))
     return false;
   DIVariable DV = Var->getVariable();
@@ -1270,7 +1149,7 @@
 
   size_t Size = CurrentFnArguments.size();
   if (Size == 0)
-    CurrentFnArguments.resize(MF->getFunction()->arg_size());
+    CurrentFnArguments.resize(CurFn->getFunction()->arg_size());
   // llvm::Function argument size is not good indicator of how many
   // arguments does the function have at source level.
   if (ArgNo > Size)
@@ -1280,31 +1159,26 @@
 }
 
 // Collect variable information from side table maintained by MMI.
-void
-DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
-                                   SmallPtrSet<const MDNode *, 16> &Processed) {
-  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
-  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
-         VE = VMap.end(); VI != VE; ++VI) {
-    const MDNode *Var = VI->first;
-    if (!Var) continue;
-    Processed.insert(Var);
-    DIVariable DV(Var);
-    const std::pair<unsigned, DebugLoc> &VP = VI->second;
-
-    LexicalScope *Scope = LScopes.findLexicalScope(VP.second);
+void DwarfDebug::collectVariableInfoFromMMITable(
+    SmallPtrSet<const MDNode *, 16> &Processed) {
+  for (const auto &VI : MMI->getVariableDbgInfo()) {
+    if (!VI.Var)
+      continue;
+    Processed.insert(VI.Var);
+    DIVariable DV(VI.Var);
+    LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
     // If variable scope is not found then skip this variable.
     if (Scope == 0)
       continue;
 
-    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc);
     DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this);
-    RegVar->setFrameIndex(VP.first);
-    if (!addCurrentFnArgument(MF, RegVar, Scope))
+    RegVar->setFrameIndex(VI.Slot);
+    if (!addCurrentFnArgument(RegVar, Scope))
       addScopeVariable(Scope, RegVar);
     if (AbsDbgVariable)
-      AbsDbgVariable->setFrameIndex(VP.first);
+      AbsDbgVariable->setFrameIndex(VI.Slot);
   }
 }
 
@@ -1312,18 +1186,19 @@
 // defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
   assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
-  return MI->getNumOperands() == 3 &&
-         MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
+  return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() &&
+         MI->getOperand(0).getReg() &&
          (MI->getOperand(1).isImm() ||
           (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U));
 }
 
 // Get .debug_loc entry for the instruction range starting at MI.
-static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
-                                         const MCSymbol *FLabel,
-                                         const MCSymbol *SLabel,
-                                         const MachineInstr *MI) {
-  const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
+                                      const MCSymbol *FLabel,
+                                      const MCSymbol *SLabel,
+                                      const MachineInstr *MI,
+                                      DwarfCompileUnit *Unit) {
+  const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
 
   assert(MI->getNumOperands() == 3);
   if (MI->getOperand(0).isReg()) {
@@ -1334,36 +1209,32 @@
       MLoc.set(MI->getOperand(0).getReg());
     else
       MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
-    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+    return DebugLocEntry(FLabel, SLabel, MLoc, Var, Unit);
   }
   if (MI->getOperand(0).isImm())
-    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+    return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm(), Unit);
   if (MI->getOperand(0).isFPImm())
-    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+    return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm(), Unit);
   if (MI->getOperand(0).isCImm())
-    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+    return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm(), Unit);
 
   llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
 }
 
 // Find variables for each lexical scope.
 void
-DwarfDebug::collectVariableInfo(const MachineFunction *MF,
-                                SmallPtrSet<const MDNode *, 16> &Processed) {
+DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
 
   // Grab the variable info that was squirreled away in the MMI side-table.
-  collectVariableInfoFromMMITable(MF, Processed);
+  collectVariableInfoFromMMITable(Processed);
 
-  for (SmallVectorImpl<const MDNode*>::const_iterator
-         UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE;
-         ++UVI) {
-    const MDNode *Var = *UVI;
+  for (const MDNode *Var : UserVariables) {
     if (Processed.count(Var))
       continue;
 
     // History contains relevant DBG_VALUE instructions for Var and instructions
     // clobbering it.
-    SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var];
+    SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var];
     if (History.empty())
       continue;
     const MachineInstr *MInsn = History.front();
@@ -1371,7 +1242,7 @@
     DIVariable DV(Var);
     LexicalScope *Scope = NULL;
     if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
-        DISubprogram(DV.getContext()).describes(MF->getFunction()))
+        DISubprogram(DV.getContext()).describes(CurFn->getFunction()))
       Scope = LScopes.getCurrentFunctionScope();
     else if (MDNode *IA = DV.getInlinedAt())
       Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
@@ -1385,14 +1256,14 @@
     assert(MInsn->isDebugValue() && "History must begin with debug value");
     DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
     DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this);
-    if (!addCurrentFnArgument(MF, RegVar, Scope))
+    if (!addCurrentFnArgument(RegVar, Scope))
       addScopeVariable(Scope, RegVar);
     if (AbsVar)
       AbsVar->setMInsn(MInsn);
 
     // Simplify ranges that are fully coalesced.
-    if (History.size() <= 1 || (History.size() == 2 &&
-                                MInsn->isIdenticalTo(History.back()))) {
+    if (History.size() <= 1 ||
+        (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) {
       RegVar->setMInsn(MInsn);
       continue;
     }
@@ -1400,14 +1271,21 @@
     // Handle multiple DBG_VALUE instructions describing one variable.
     RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
 
-    for (SmallVectorImpl<const MachineInstr*>::const_iterator
-           HI = History.begin(), HE = History.end(); HI != HE; ++HI) {
+    DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
+    DebugLocList &LocList = DotDebugLocEntries.back();
+    LocList.Label =
+        Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
+    SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List;
+    for (SmallVectorImpl<const MachineInstr *>::const_iterator
+             HI = History.begin(),
+             HE = History.end();
+         HI != HE; ++HI) {
       const MachineInstr *Begin = *HI;
       assert(Begin->isDebugValue() && "Invalid History entry");
 
       // Check if DBG_VALUE is truncating a range.
-      if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg()
-          && !Begin->getOperand(0).getReg())
+      if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
+          !Begin->getOperand(0).getReg())
         continue;
 
       // Compute the range for a register location.
@@ -1421,7 +1299,7 @@
       else {
         const MachineInstr *End = HI[1];
         DEBUG(dbgs() << "DotDebugLoc Pair:\n"
-              << "\t" << *Begin << "\t" << *End << "\n");
+                     << "\t" << *Begin << "\t" << *End << "\n");
         if (End->isDebugValue())
           SLabel = getLabelBeforeInsn(End);
         else {
@@ -1433,10 +1311,12 @@
       }
 
       // The value is valid until the next DBG_VALUE or clobber.
-      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
-                                                    Begin));
+      LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+      DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+      DebugLocEntry Loc = getDebugLocEntry(Asm, FLabel, SLabel, Begin, TheCU);
+      if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
+        DebugLoc.push_back(std::move(Loc));
     }
-    DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
 
   // Collect info for variables that were optimized out.
@@ -1465,6 +1345,8 @@
 
 // Process beginning of an instruction.
 void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+  assert(CurMI == 0);
+  CurMI = MI;
   // Check if source location changes, but ignore DBG_VALUE locations.
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
@@ -1487,8 +1369,8 @@
   }
 
   // Insert labels where requested.
-  DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
-    LabelsBeforeInsn.find(MI);
+  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+      LabelsBeforeInsn.find(MI);
 
   // No label needed.
   if (I == LabelsBeforeInsn.end())
@@ -1506,14 +1388,16 @@
 }
 
 // Process end of an instruction.
-void DwarfDebug::endInstruction(const MachineInstr *MI) {
+void DwarfDebug::endInstruction() {
+  assert(CurMI != 0);
   // Don't create a new label after DBG_VALUE instructions.
   // They don't generate code.
-  if (!MI->isDebugValue())
+  if (!CurMI->isDebugValue())
     PrevLabel = 0;
 
-  DenseMap<const MachineInstr*, MCSymbol*>::iterator I =
-    LabelsAfterInsn.find(MI);
+  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+      LabelsAfterInsn.find(CurMI);
+  CurMI = 0;
 
   // No label needed.
   if (I == LabelsAfterInsn.end())
@@ -1543,53 +1427,24 @@
 
     const SmallVectorImpl<LexicalScope *> &Children = S->getChildren();
     if (!Children.empty())
-      for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
-             SE = Children.end(); SI != SE; ++SI)
-        WorkList.push_back(*SI);
+      WorkList.append(Children.begin(), Children.end());
 
     if (S->isAbstractScope())
       continue;
 
-    const SmallVectorImpl<InsnRange> &Ranges = S->getRanges();
-    if (Ranges.empty())
-      continue;
-    for (SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(),
-           RE = Ranges.end(); RI != RE; ++RI) {
-      assert(RI->first && "InsnRange does not have first instruction!");
-      assert(RI->second && "InsnRange does not have second instruction!");
-      requestLabelBeforeInsn(RI->first);
-      requestLabelAfterInsn(RI->second);
+    for (const InsnRange &R : S->getRanges()) {
+      assert(R.first && "InsnRange does not have first instruction!");
+      assert(R.second && "InsnRange does not have second instruction!");
+      requestLabelBeforeInsn(R.first);
+      requestLabelAfterInsn(R.second);
     }
   }
 }
 
-// Get MDNode for DebugLoc's scope.
-static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
-  if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
-    return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
-  return DL.getScope(Ctx);
-}
-
-// Walk up the scope chain of given debug loc and find line number info
-// for the function.
-static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
-  const MDNode *Scope = getScopeNode(DL, Ctx);
-  DISubprogram SP = getDISubprogram(Scope);
-  if (SP.isSubprogram()) {
-    // Check for number of operands since the compatibility is
-    // cheap here.
-    if (SP->getNumOperands() > 19)
-      return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
-    else
-      return DebugLoc::get(SP.getLineNumber(), 0, SP);
-  }
-
-  return DebugLoc();
-}
-
 // Gather pre-function debug information.  Assumes being called immediately
 // after the function entry point has been emitted.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
+  CurFn = MF;
 
   // If there's no debug info for the function we're not going to do anything.
   if (!MMI->hasDebugInfo())
@@ -1606,14 +1461,14 @@
   // Make sure that each lexical scope will have a begin/end label.
   identifyScopeMarkers();
 
-  // Set DwarfCompileUnitID in MCContext to the Compile Unit this function
+  // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
   // belongs to so that we add to the correct per-cu line table in the
   // non-asm case.
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+  DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
-  if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport())
-    // Use a single line table if we are using .loc and generating assembly.
+  if (Asm->OutStreamer.hasRawTextSupport())
+    // Use a single line table if we are generating assembly.
     Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
   else
     Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
@@ -1670,7 +1525,7 @@
 
             // Terminate old register assignments that don't reach MI;
             MachineFunction::const_iterator PrevMBB = Prev->getParent();
-            if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) &&
+            if (PrevMBB != I && (!AtBlockEntry || std::next(PrevMBB) != I) &&
                 isDbgValueInDefinedReg(Prev)) {
               // Previous register assignment needs to terminate at the end of
               // its basic block.
@@ -1681,7 +1536,7 @@
                 DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
                              << "\t" << *Prev << "\n");
                 History.pop_back();
-              } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end())
+              } else if (std::next(PrevMBB) != PrevMBB->getParent()->end())
                 // Terminate after LastMI.
                 History.push_back(LastMI);
             }
@@ -1690,7 +1545,7 @@
         History.push_back(MI);
       } else {
         // Not a DBG_VALUE instruction.
-        if (!MI->isLabel())
+        if (!MI->isPosition())
           AtBlockEntry = false;
 
         // First known non-DBG_VALUE and non-frame setup location marks
@@ -1700,12 +1555,10 @@
           PrologEndLoc = MI->getDebugLoc();
 
         // Check if the instruction clobbers any registers with debug vars.
-        for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
-                                              MOE = MI->operands_end();
-             MOI != MOE; ++MOI) {
-          if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
+        for (const MachineOperand &MO : MI->operands()) {
+          if (!MO.isReg() || !MO.isDef() || !MO.getReg())
             continue;
-          for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid();
+          for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
                ++AI) {
             unsigned Reg = *AI;
             const MDNode *Var = LiveUserVar[Reg];
@@ -1738,9 +1591,8 @@
     }
   }
 
-  for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end();
-       I != E; ++I) {
-    SmallVectorImpl<const MachineInstr *> &History = I->second;
+  for (auto &I : DbgValues) {
+    SmallVectorImpl<const MachineInstr *> &History = I.second;
     if (History.empty())
       continue;
 
@@ -1759,8 +1611,7 @@
       }
     }
     // Request labels for the full history.
-    for (unsigned i = 0, e = History.size(); i != e; ++i) {
-      const MachineInstr *MI = History[i];
+    for (const MachineInstr *MI : History) {
       if (MI->isDebugValue())
         requestLabelBeforeInsn(MI);
       else
@@ -1774,7 +1625,7 @@
   // Record beginning of function.
   if (!PrologEndLoc.isUnknown()) {
     DebugLoc FnStartDL =
-        getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext());
+        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
     recordSourceLine(
         FnStartDL.getLine(), FnStartDL.getCol(),
         FnStartDL.getScope(MF->getFunction()->getContext()),
@@ -1817,27 +1668,42 @@
 
 // Gather and emit post-function debug information.
 void DwarfDebug::endFunction(const MachineFunction *MF) {
-  if (!MMI->hasDebugInfo() || LScopes.empty()) return;
+  // Every beginFunction(MF) call should be followed by an endFunction(MF) call,
+  // though the beginFunction may not be called at all.
+  // We should handle both cases.
+  if (CurFn == 0)
+    CurFn = MF;
+  else
+    assert(CurFn == MF);
+  assert(CurFn != 0);
+
+  if (!MMI->hasDebugInfo() || LScopes.empty()) {
+    // If we don't have a lexical scope for this function then there will
+    // be a hole in the range information. Keep note of this by setting the
+    // previously used section to nullptr.
+    PrevSection = nullptr;
+    PrevCU = nullptr;
+    CurFn = 0;
+    return;
+  }
 
   // Define end label for subprogram.
-  FunctionEndSym = Asm->GetTempSymbol("func_end",
-                                      Asm->getFunctionNumber());
+  FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber());
   // Assumes in correct section after the entry point.
   Asm->OutStreamer.EmitLabel(FunctionEndSym);
-  // Set DwarfCompileUnitID in MCContext to default value.
+
+  // Set DwarfDwarfCompileUnitID in MCContext to default value.
   Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
 
   SmallPtrSet<const MDNode *, 16> ProcessedVars;
-  collectVariableInfo(MF, ProcessedVars);
+  collectVariableInfo(ProcessedVars);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
+  DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
 
   // Construct abstract scopes.
-  ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
-  for (unsigned i = 0, e = AList.size(); i != e; ++i) {
-    LexicalScope *AScope = AList[i];
+  for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
     DISubprogram SP(AScope->getScopeNode());
     if (SP.isSubprogram()) {
       // Collect info for variables that were optimized out.
@@ -1861,14 +1727,18 @@
   }
 
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
-
-  if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
+  if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn))
     TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
 
+  // Add the range of this function to the list of ranges for the CU.
+  RangeSpan Span(FunctionBeginSym, FunctionEndSym);
+  TheCU->addRange(std::move(Span));
+  PrevSection = Asm->getCurrentSection();
+  PrevCU = TheCU;
+
   // Clear debug info
-  for (ScopeVariablesMap::iterator
-         I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I)
-    DeleteContainerPointers(I->second);
+  for (auto &I : ScopeVariables)
+    DeleteContainerPointers(I.second);
   ScopeVariables.clear();
   DeleteContainerPointers(CurrentFnArguments);
   UserVariables.clear();
@@ -1877,6 +1747,7 @@
   LabelsBeforeInsn.clear();
   LabelsAfterInsn.clear();
   PrevLabel = NULL;
+  CurFn = 0;
 }
 
 // Register a source line with debug info. Returns the  unique label that was
@@ -1886,6 +1757,7 @@
   StringRef Fn;
   StringRef Dir;
   unsigned Src = 1;
+  unsigned Discriminator = 0;
   if (S) {
     DIDescriptor Scope(S);
 
@@ -1909,13 +1781,16 @@
       DILexicalBlock DB(S);
       Fn = DB.getFilename();
       Dir = DB.getDirectory();
+      Discriminator = DB.getDiscriminator();
     } else
       llvm_unreachable("Unexpected scope info");
 
-    Src = getOrCreateSourceID(Fn, Dir,
-            Asm->OutStreamer.getContext().getDwarfCompileUnitID());
+    unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
+    Src = static_cast<DwarfCompileUnit *>(InfoHolder.getUnits()[CUID])
+              ->getOrCreateSourceID(Fn, Dir);
   }
-  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn);
+  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
+                                         Discriminator, Fn);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1924,39 +1799,36 @@
 
 // Compute the size and offset of a DIE. The offset is relative to start of the
 // CU. It returns the offset after laying out the DIE.
-unsigned
-DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
-  // Get the children.
-  const std::vector<DIE *> &Children = Die->getChildren();
-
+unsigned DwarfFile::computeSizeAndOffset(DIE *Die, unsigned Offset) {
   // Record the abbreviation.
   assignAbbrevNumber(Die->getAbbrev());
 
   // Get the abbreviation for this DIE.
-  unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+  const DIEAbbrev &Abbrev = Die->getAbbrev();
 
   // Set DIE offset
   Die->setOffset(Offset);
 
   // Start the size with the size of abbreviation code.
-  Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+  Offset += getULEB128Size(Die->getAbbrevNumber());
 
-  const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
-  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+  const SmallVectorImpl<DIEValue *> &Values = Die->getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
 
   // Size the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i)
     // Size attribute value.
     Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
 
+  // Get the children.
+  const std::vector<DIE *> &Children = Die->getChildren();
+
   // Size the DIE children if any.
   if (!Children.empty()) {
-    assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
-           "Children flag not set");
+    assert(Abbrev.hasChildren() && "Children flag not set");
 
-    for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      Offset = computeSizeAndOffset(Children[j], Offset);
+    for (DIE *Child : Children)
+      Offset = computeSizeAndOffset(Child, Offset);
 
     // End of children marker.
     Offset += sizeof(int8_t);
@@ -1967,23 +1839,22 @@
 }
 
 // Compute the size and offset for each DIE.
-void DwarfUnits::computeSizeAndOffsets() {
+void DwarfFile::computeSizeAndOffsets() {
   // Offset from the first CU in the debug info section is 0 initially.
   unsigned SecOffset = 0;
 
   // Iterate over each compile unit and set the size and offsets for each
   // DIE within each compile unit. All offsets are CU relative.
-  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
-         E = CUs.end(); I != E; ++I) {
-    (*I)->setDebugInfoOffset(SecOffset);
+  for (DwarfUnit *TheU : CUs) {
+    TheU->setDebugInfoOffset(SecOffset);
 
     // CU-relative offset is reset to 0 here.
-    unsigned Offset = sizeof(int32_t) + // Length of Unit Info
-                      (*I)->getHeaderSize(); // Unit-specific headers
+    unsigned Offset = sizeof(int32_t) +      // Length of Unit Info
+                      TheU->getHeaderSize(); // Unit-specific headers
 
     // EndOffset here is CU-relative, after laying out
     // all of the CU DIE.
-    unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset);
+    unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset);
     SecOffset += EndOffset;
   }
 }
@@ -1994,21 +1865,20 @@
 
   // Dwarf sections base addresses.
   DwarfInfoSectionSym =
-    emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
-  DwarfAbbrevSectionSym =
-    emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+      emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
   if (useSplitDwarf())
-    DwarfAbbrevDWOSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(),
-                     "section_abbrev_dwo");
-  emitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
-  if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
-    emitSectionSym(Asm, MacroInfo);
+    DwarfInfoDWOSectionSym =
+        emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
+  DwarfAbbrevSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+  if (useSplitDwarf())
+    DwarfAbbrevDWOSectionSym = emitSectionSym(
+        Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo");
+  if (GenerateARangeSection)
+    emitSectionSym(Asm, TLOF.getDwarfARangesSection());
 
   DwarfLineSectionSym =
-    emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
-  emitSectionSym(Asm, TLOF.getDwarfLocSection());
+      emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
   if (GenerateGnuPubSections) {
     DwarfGnuPubNamesSectionSym =
         emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection());
@@ -2020,39 +1890,36 @@
   }
 
   DwarfStrSectionSym =
-    emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
+      emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
   if (useSplitDwarf()) {
     DwarfStrDWOSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+        emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
     DwarfAddrSectionSym =
-      emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
-  }
-  DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
-                                             "debug_range");
-
-  DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(),
-                                           "section_debug_loc");
-
-  TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
-  emitSectionSym(Asm, TLOF.getDataSection());
+        emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
+    DwarfDebugLocSectionSym =
+        emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc");
+  } else
+    DwarfDebugLocSectionSym =
+        emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc");
+  DwarfDebugRangeSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range");
 }
 
 // Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs) {
+void DwarfDebug::emitDIE(DIE *Die) {
   // Get the abbreviation for this DIE.
-  unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1];
+  const DIEAbbrev &Abbrev = Die->getAbbrev();
 
   // Emit the code (index) for the abbreviation.
   if (Asm->isVerbose())
-    Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
-                                Twine::utohexstr(Die->getOffset()) + ":0x" +
-                                Twine::utohexstr(Die->getSize()) + " " +
-                                dwarf::TagString(Abbrev->getTag()));
-  Asm->EmitULEB128(AbbrevNumber);
+    Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
+                                "] 0x" + Twine::utohexstr(Die->getOffset()) +
+                                ":0x" + Twine::utohexstr(Die->getSize()) + " " +
+                                dwarf::TagString(Abbrev.getTag()));
+  Asm->EmitULEB128(Abbrev.getNumber());
 
-  const SmallVectorImpl<DIEValue*> &Values = Die->getValues();
-  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData();
+  const SmallVectorImpl<DIEValue *> &Values = Die->getValues();
+  const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
 
   // Emit the DIE attribute values.
   for (unsigned i = 0, N = Values.size(); i < N; ++i) {
@@ -2060,159 +1927,73 @@
     dwarf::Form Form = AbbrevData[i].getForm();
     assert(Form && "Too many attributes for DIE (check abbreviation)");
 
-    if (Asm->isVerbose())
+    if (Asm->isVerbose()) {
       Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+      if (Attr == dwarf::DW_AT_accessibility)
+        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(
+            cast<DIEInteger>(Values[i])->getValue()));
+    }
 
-    switch (Attr) {
-    case dwarf::DW_AT_abstract_origin:
-    case dwarf::DW_AT_type:
-    case dwarf::DW_AT_friend:
-    case dwarf::DW_AT_specification:
-    case dwarf::DW_AT_import:
-    case dwarf::DW_AT_containing_type: {
-      DIEEntry *E = cast<DIEEntry>(Values[i]);
-      DIE *Origin = E->getEntry();
-      unsigned Addr = Origin->getOffset();
-      if (Form == dwarf::DW_FORM_ref_addr) {
-        assert(!useSplitDwarf() && "TODO: dwo files can't have relocations.");
-        // For DW_FORM_ref_addr, output the offset from beginning of debug info
-        // section. Origin->getOffset() returns the offset from start of the
-        // compile unit.
-        CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit());
-        assert(CU && "CUDie should belong to a CU.");
-        Addr += CU->getDebugInfoOffset();
-        if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-          Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr,
-                                   DIEEntry::getRefAddrSize(Asm));
-        else
-          Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr,
-                                         DwarfInfoSectionSym,
-                                         DIEEntry::getRefAddrSize(Asm));
-      } else {
-        // Make sure Origin belong to the same CU.
-        assert(Die->getCompileUnit() == Origin->getCompileUnit() &&
-               "The referenced DIE should belong to the same CU in ref4");
-        Asm->EmitInt32(Addr);
-      }
-      break;
-    }
-    case dwarf::DW_AT_ranges: {
-      // DW_AT_range Value encodes offset in debug_range section.
-      DIEInteger *V = cast<DIEInteger>(Values[i]);
-
-      if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) {
-        Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
-                                 V->getValue(),
-                                 4);
-      } else {
-        Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
-                                       V->getValue(),
-                                       DwarfDebugRangeSectionSym,
-                                       4);
-      }
-      break;
-    }
-    case dwarf::DW_AT_location: {
-      if (DIELabel *L = dyn_cast<DIELabel>(Values[i])) {
-        if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-          Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym);
-        else
-          Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
-      } else {
-        Values[i]->EmitValue(Asm, Form);
-      }
-      break;
-    }
-    case dwarf::DW_AT_accessibility: {
-      if (Asm->isVerbose()) {
-        DIEInteger *V = cast<DIEInteger>(Values[i]);
-        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
-      }
-      Values[i]->EmitValue(Asm, Form);
-      break;
-    }
-    default:
-      // Emit an attribute using the defined form.
-      Values[i]->EmitValue(Asm, Form);
-      break;
-    }
+    // Emit an attribute using the defined form.
+    Values[i]->EmitValue(Asm, Form);
   }
 
   // Emit the DIE children if any.
-  if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+  if (Abbrev.hasChildren()) {
     const std::vector<DIE *> &Children = Die->getChildren();
 
-    for (unsigned j = 0, M = Children.size(); j < M; ++j)
-      emitDIE(Children[j], Abbrevs);
+    for (DIE *Child : Children)
+      emitDIE(Child);
 
-    if (Asm->isVerbose())
-      Asm->OutStreamer.AddComment("End Of Children Mark");
+    Asm->OutStreamer.AddComment("End Of Children Mark");
     Asm->EmitInt8(0);
   }
 }
 
 // Emit the various dwarf units to the unit section USection with
 // the abbreviations going into ASection.
-void DwarfUnits::emitUnits(DwarfDebug *DD,
-                           const MCSection *USection,
-                           const MCSection *ASection,
-                           const MCSymbol *ASectionSym) {
-  Asm->OutStreamer.SwitchSection(USection);
-  for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(),
-         E = CUs.end(); I != E; ++I) {
-    CompileUnit *TheCU = *I;
-    DIE *Die = TheCU->getCUDie();
+void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) {
+  for (DwarfUnit *TheU : CUs) {
+    DIE *Die = TheU->getUnitDie();
+    const MCSection *USection = TheU->getSection();
+    Asm->OutStreamer.SwitchSection(USection);
 
     // Emit the compile units header.
-    Asm->OutStreamer
-      .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(),
-                                    TheCU->getUniqueID()));
+    Asm->OutStreamer.EmitLabel(TheU->getLabelBegin());
 
     // Emit size of content not including length itself
     Asm->OutStreamer.AddComment("Length of Unit");
-    Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize());
+    Asm->EmitInt32(TheU->getHeaderSize() + Die->getSize());
 
-    TheCU->emitHeader(ASection, ASectionSym);
+    TheU->emitHeader(ASectionSym);
 
-    DD->emitDIE(Die, Abbreviations);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(),
-                                                  TheCU->getUniqueID()));
+    DD->emitDIE(Die);
+    Asm->OutStreamer.EmitLabel(TheU->getLabelEnd());
   }
 }
 
 // Emit the debug info section.
 void DwarfDebug::emitDebugInfo() {
-  DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+  DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
 
-  Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(),
-                   Asm->getObjFileLowering().getDwarfAbbrevSection(),
-                   DwarfAbbrevSectionSym);
+  Holder.emitUnits(this, DwarfAbbrevSectionSym);
 }
 
 // Emit the abbreviation section.
 void DwarfDebug::emitAbbreviations() {
-  if (!useSplitDwarf())
-    emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(),
-                &Abbreviations);
-  else
-    emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
+  DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+
+  Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
 }
 
-void DwarfDebug::emitAbbrevs(const MCSection *Section,
-                             std::vector<DIEAbbrev *> *Abbrevs) {
+void DwarfFile::emitAbbrevs(const MCSection *Section) {
   // Check to see if it is worth the effort.
-  if (!Abbrevs->empty()) {
+  if (!Abbreviations.empty()) {
     // Start the debug abbrev section.
     Asm->OutStreamer.SwitchSection(Section);
 
-    MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName());
-    Asm->OutStreamer.EmitLabel(Begin);
-
     // For each abbrevation.
-    for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) {
-      // Get abbreviation data
-      const DIEAbbrev *Abbrev = Abbrevs->at(i);
-
+    for (const DIEAbbrev *Abbrev : Abbreviations) {
       // Emit the abbrevations code (base 1 index.)
       Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
 
@@ -2222,9 +2003,6 @@
 
     // Mark end of abbreviations.
     Asm->EmitULEB128(0, "EOM(3)");
-
-    MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName());
-    Asm->OutStreamer.EmitLabel(End);
   }
 }
 
@@ -2241,8 +2019,9 @@
 
   Asm->OutStreamer.AddComment("Section end label");
 
-  Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
-                                   Asm->getDataLayout().getPointerSize());
+  Asm->OutStreamer.EmitSymbolValue(
+      Asm->GetTempSymbol("section_end", SectionEnd),
+      Asm->getDataLayout().getPointerSize());
 
   // Mark end of matrix.
   Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
@@ -2253,25 +2032,19 @@
 
 // Emit visible names into a hashed accelerator table section.
 void DwarfDebug::emitAccelNames() {
-  DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
-                                           dwarf::DW_FORM_data4));
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
-    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
-    for (StringMap<std::vector<DIE*> >::const_iterator
-           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
-      StringRef Name = GI->getKey();
-      const std::vector<DIE *> &Entities = GI->second;
-      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
-             DE = Entities.end(); DI != DE; ++DI)
-        AT.AddName(Name, (*DI));
+  DwarfAccelTable AT(
+      DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
+  for (DwarfUnit *TheU : getUnits()) {
+    for (const auto &GI : TheU->getAccelNames()) {
+      StringRef Name = GI.getKey();
+      for (const DIE *D : GI.second)
+        AT.AddName(Name, D);
     }
   }
 
   AT.FinalizeTable(Asm, "Names");
   Asm->OutStreamer.SwitchSection(
-    Asm->getObjFileLowering().getDwarfAccelNamesSection());
+      Asm->getObjFileLowering().getDwarfAccelNamesSection());
   MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
@@ -2282,25 +2055,19 @@
 // Emit objective C classes and categories into a hashed accelerator table
 // section.
 void DwarfDebug::emitAccelObjC() {
-  DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
-                                           dwarf::DW_FORM_data4));
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
-    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
-    for (StringMap<std::vector<DIE*> >::const_iterator
-           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
-      StringRef Name = GI->getKey();
-      const std::vector<DIE *> &Entities = GI->second;
-      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
-             DE = Entities.end(); DI != DE; ++DI)
-        AT.AddName(Name, (*DI));
+  DwarfAccelTable AT(
+      DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
+  for (DwarfUnit *TheU : getUnits()) {
+    for (const auto &GI : TheU->getAccelObjC()) {
+      StringRef Name = GI.getKey();
+      for (const DIE *D : GI.second)
+        AT.AddName(Name, D);
     }
   }
 
   AT.FinalizeTable(Asm, "ObjC");
-  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
-                                 .getDwarfAccelObjCSection());
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfAccelObjCSection());
   MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
@@ -2310,25 +2077,19 @@
 
 // Emit namespace dies into a hashed accelerator table.
 void DwarfDebug::emitAccelNamespaces() {
-  DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
-                                           dwarf::DW_FORM_data4));
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
-    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
-    for (StringMap<std::vector<DIE*> >::const_iterator
-           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
-      StringRef Name = GI->getKey();
-      const std::vector<DIE *> &Entities = GI->second;
-      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
-             DE = Entities.end(); DI != DE; ++DI)
-        AT.AddName(Name, (*DI));
+  DwarfAccelTable AT(
+      DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
+  for (DwarfUnit *TheU : getUnits()) {
+    for (const auto &GI : TheU->getAccelNamespace()) {
+      StringRef Name = GI.getKey();
+      for (const DIE *D : GI.second)
+        AT.AddName(Name, D);
     }
   }
 
   AT.FinalizeTable(Asm, "namespac");
-  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
-                                 .getDwarfAccelNamespaceSection());
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfAccelNamespaceSection());
   MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
@@ -2339,31 +2100,24 @@
 // Emit type dies into a hashed accelerator table.
 void DwarfDebug::emitAccelTypes() {
   std::vector<DwarfAccelTable::Atom> Atoms;
-  Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
-                                        dwarf::DW_FORM_data4));
-  Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag,
-                                        dwarf::DW_FORM_data2));
-  Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags,
-                                        dwarf::DW_FORM_data1));
+  Atoms.push_back(
+      DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
+  Atoms.push_back(
+      DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2));
+  Atoms.push_back(
+      DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1));
   DwarfAccelTable AT(Atoms);
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
-    const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
-      = TheCU->getAccelTypes();
-    for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
-           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
-      StringRef Name = GI->getKey();
-      const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
-      for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
-             = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
-        AT.AddName(Name, (*DI).first, (*DI).second);
+  for (DwarfUnit *TheU : getUnits()) {
+    for (const auto &GI : TheU->getAccelTypes()) {
+      StringRef Name = GI.getKey();
+      for (const auto &DI : GI.second)
+        AT.AddName(Name, DI.first, DI.second);
     }
   }
 
   AT.FinalizeTable(Asm, "types");
-  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
-                                 .getDwarfAccelTypesSection());
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfAccelTypesSection());
   MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
   Asm->OutStreamer.EmitLabel(SectionBegin);
 
@@ -2386,8 +2140,8 @@
 // reference in the pubname header doesn't change.
 
 /// computeIndexValue - Compute the gdb index value for the DIE and CU.
-static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU,
-                                                        DIE *Die) {
+static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
+                                                        const DIE *Die) {
   dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
 
   // We could have a specification DIE that has our most of our knowledge,
@@ -2431,176 +2185,109 @@
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
 ///
 void DwarfDebug::emitDebugPubNames(bool GnuStyle) {
-  const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
   const MCSection *PSec =
       GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection()
                : Asm->getObjFileLowering().getDwarfPubNamesSection();
 
-  typedef DenseMap<const MDNode*, CompileUnit*> CUMapType;
-  for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) {
-    CompileUnit *TheCU = I->second;
-    unsigned ID = TheCU->getUniqueID();
+  emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames);
+}
+
+void DwarfDebug::emitDebugPubSection(
+    bool GnuStyle, const MCSection *PSec, StringRef Name,
+    const StringMap<const DIE *> &(DwarfUnit::*Accessor)() const) {
+  for (const auto &NU : CUMap) {
+    DwarfCompileUnit *TheU = NU.second;
+
+    const auto &Globals = (TheU->*Accessor)();
+
+    if (Globals.empty())
+      continue;
+
+    if (auto Skeleton = static_cast<DwarfCompileUnit *>(TheU->getSkeleton()))
+      TheU = Skeleton;
+    unsigned ID = TheU->getUniqueID();
 
     // Start the dwarf pubnames section.
     Asm->OutStreamer.SwitchSection(PSec);
 
-    // Emit a label so we can reference the beginning of this pubname section.
-    if (GnuStyle)
-      Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames",
-                                                    TheCU->getUniqueID()));
-
     // Emit the header.
-    Asm->OutStreamer.AddComment("Length of Public Names Info");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID),
-                             Asm->GetTempSymbol("pubnames_begin", ID), 4);
+    Asm->OutStreamer.AddComment("Length of Public " + Name + " Info");
+    MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID);
+    MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID);
+    Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID));
+    Asm->OutStreamer.EmitLabel(BeginLabel);
 
     Asm->OutStreamer.AddComment("DWARF Version");
     Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
 
     Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
-                           DwarfInfoSectionSym);
+    Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
 
     Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID),
-                             Asm->GetTempSymbol(ISec->getLabelBeginName(), ID),
-                             4);
+    Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4);
 
     // Emit the pubnames for this compilation unit.
-    const StringMap<DIE*> &Globals = TheCU->getGlobalNames();
-    for (StringMap<DIE*>::const_iterator
-           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
-      const char *Name = GI->getKeyData();
-      DIE *Entity = GI->second;
+    for (const auto &GI : Globals) {
+      const char *Name = GI.getKeyData();
+      const DIE *Entity = GI.second;
 
       Asm->OutStreamer.AddComment("DIE offset");
       Asm->EmitInt32(Entity->getOffset());
 
       if (GnuStyle) {
-        dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity);
+        dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity);
         Asm->OutStreamer.AddComment(
             Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
             dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
         Asm->EmitInt8(Desc.toBits());
       }
 
-      if (Asm->isVerbose())
-        Asm->OutStreamer.AddComment("External Name");
-      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1));
+      Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, GI.getKeyLength() + 1));
     }
 
     Asm->OutStreamer.AddComment("End Mark");
     Asm->EmitInt32(0);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID));
+    Asm->OutStreamer.EmitLabel(EndLabel);
   }
 }
 
 void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
-  const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
   const MCSection *PSec =
       GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection()
                : Asm->getObjFileLowering().getDwarfPubTypesSection();
 
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-                                                         E = CUMap.end();
-       I != E; ++I) {
-    CompileUnit *TheCU = I->second;
-    // Start the dwarf pubtypes section.
-    Asm->OutStreamer.SwitchSection(PSec);
-
-    // Emit a label so we can reference the beginning of this pubtype section.
-    if (GnuStyle)
-      Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes",
-                                                    TheCU->getUniqueID()));
-
-    // Emit the header.
-    Asm->OutStreamer.AddComment("Length of Public Types Info");
-    Asm->EmitLabelDifference(
-        Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
-        Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
-
-    Asm->OutStreamer.EmitLabel(
-        Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()));
-
-    if (Asm->isVerbose())
-      Asm->OutStreamer.AddComment("DWARF Version");
-    Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION);
-
-    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(
-        Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()),
-        DwarfInfoSectionSym);
-
-    Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(
-        Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()),
-        Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4);
-
-    // Emit the pubtypes.
-    const StringMap<DIE *> &Globals = TheCU->getGlobalTypes();
-    for (StringMap<DIE *>::const_iterator GI = Globals.begin(),
-                                          GE = Globals.end();
-         GI != GE; ++GI) {
-      const char *Name = GI->getKeyData();
-      DIE *Entity = GI->second;
-
-      if (Asm->isVerbose())
-        Asm->OutStreamer.AddComment("DIE offset");
-      Asm->EmitInt32(Entity->getOffset());
-
-      if (GnuStyle) {
-        dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity);
-        Asm->OutStreamer.AddComment(
-            Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " +
-            dwarf::GDBIndexEntryLinkageString(Desc.Linkage));
-        Asm->EmitInt8(Desc.toBits());
-      }
-
-      if (Asm->isVerbose())
-        Asm->OutStreamer.AddComment("External Name");
-
-      // Emit the name with a terminating null byte.
-      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1));
-    }
-
-    Asm->OutStreamer.AddComment("End Mark");
-    Asm->EmitInt32(0);
-    Asm->OutStreamer.EmitLabel(
-        Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()));
-  }
+  emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes);
 }
 
 // Emit strings into a string section.
-void DwarfUnits::emitStrings(const MCSection *StrSection,
-                             const MCSection *OffsetSection = NULL,
-                             const MCSymbol *StrSecSym = NULL) {
+void DwarfFile::emitStrings(const MCSection *StrSection,
+                            const MCSection *OffsetSection = NULL,
+                            const MCSymbol *StrSecSym = NULL) {
 
-  if (StringPool.empty()) return;
+  if (StringPool.empty())
+    return;
 
   // Start the dwarf str section.
   Asm->OutStreamer.SwitchSection(StrSection);
 
   // Get all of the string pool entries and put them in an array by their ID so
   // we can sort them.
-  SmallVector<std::pair<unsigned,
-                 StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+  SmallVector<std::pair<unsigned, const StrPool::value_type *>, 64 > Entries;
 
-  for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
-         I = StringPool.begin(), E = StringPool.end();
-       I != E; ++I)
-    Entries.push_back(std::make_pair(I->second.second, &*I));
+  for (const auto &I : StringPool)
+    Entries.push_back(std::make_pair(I.second.second, &I));
 
   array_pod_sort(Entries.begin(), Entries.end());
 
-  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+  for (const auto &Entry : Entries) {
     // Emit a label for reference from debug information entries.
-    Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+    Asm->OutStreamer.EmitLabel(Entry.second->getValue().first);
 
     // Emit the string itself with a terminating null byte.
-    Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
-                                         Entries[i].second->getKeyLength()+1));
+    Asm->OutStreamer.EmitBytes(StringRef(Entry.second->getKeyData(),
+                                         Entry.second->getKeyLength() + 1));
   }
 
   // If we've got an offset section go ahead and emit that now as well.
@@ -2608,17 +2295,18 @@
     Asm->OutStreamer.SwitchSection(OffsetSection);
     unsigned offset = 0;
     unsigned size = 4; // FIXME: DWARF64 is 8.
-    for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+    for (const auto &Entry : Entries) {
       Asm->OutStreamer.EmitIntValue(offset, size);
-      offset += Entries[i].second->getKeyLength() + 1;
+      offset += Entry.second->getKeyLength() + 1;
     }
   }
 }
 
-// Emit strings into a string section.
-void DwarfUnits::emitAddresses(const MCSection *AddrSection) {
+// Emit addresses into the section given.
+void DwarfFile::emitAddresses(const MCSection *AddrSection) {
 
-  if (AddressPool.empty()) return;
+  if (AddressPool.empty())
+    return;
 
   // Start the dwarf addr section.
   Asm->OutStreamer.SwitchSection(AddrSection);
@@ -2626,147 +2314,143 @@
   // Order the address pool entries by ID
   SmallVector<const MCExpr *, 64> Entries(AddressPool.size());
 
-  for (DenseMap<const MCExpr *, unsigned>::iterator I = AddressPool.begin(),
-                                                    E = AddressPool.end();
-       I != E; ++I)
-    Entries[I->second] = I->first;
+  for (const auto &I : AddressPool)
+    Entries[I.second.Number] =
+        I.second.TLS
+            ? Asm->getObjFileLowering().getDebugThreadLocalSymbol(I.first)
+            : MCSymbolRefExpr::Create(I.first, Asm->OutContext);
 
-  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
-    // Emit an expression for reference from debug information entries.
-    if (const MCExpr *Expr = Entries[i])
-      Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize());
-    else
-      Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize());
-  }
-
+  for (const MCExpr *Entry : Entries)
+    Asm->OutStreamer.EmitValue(Entry, Asm->getDataLayout().getPointerSize());
 }
 
 // Emit visible names into a debug str section.
 void DwarfDebug::emitDebugStr() {
-  DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+  DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
   Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
 }
 
+void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
+                                   const DebugLocEntry &Entry) {
+  DIVariable DV(Entry.getVariable());
+  if (Entry.isInt()) {
+    DIBasicType BTy(resolve(DV.getType()));
+    if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
+                         BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
+      Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts");
+      Streamer.EmitSLEB128(Entry.getInt());
+    } else {
+      Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu");
+      Streamer.EmitULEB128(Entry.getInt());
+    }
+  } else if (Entry.isLocation()) {
+    MachineLocation Loc = Entry.getLoc();
+    if (!DV.hasComplexAddress())
+      // Regular entry.
+      Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
+    else {
+      // Complex address entry.
+      unsigned N = DV.getNumAddrElements();
+      unsigned i = 0;
+      if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+        if (Loc.getOffset()) {
+          i = 2;
+          Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
+          Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
+          Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
+          Streamer.EmitSLEB128(DV.getAddrElement(1));
+        } else {
+          // If first address element is OpPlus then emit
+          // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+          MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1));
+          Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect());
+          i = 2;
+        }
+      } else {
+        Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
+      }
+
+      // Emit remaining complex address elements.
+      for (; i < N; ++i) {
+        uint64_t Element = DV.getAddrElement(i);
+        if (Element == DIBuilder::OpPlus) {
+          Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst");
+          Streamer.EmitULEB128(DV.getAddrElement(++i));
+        } else if (Element == DIBuilder::OpDeref) {
+          if (!Loc.isReg())
+            Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref");
+        } else
+          llvm_unreachable("unknown Opcode found in complex address");
+      }
+    }
+  }
+  // else ... ignore constant fp. There is not any good way to
+  // to represent them here in dwarf.
+  // FIXME: ^
+}
+
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) {
+  Asm->OutStreamer.AddComment("Loc expr size");
+  MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
+  MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
+  Asm->EmitLabelDifference(end, begin, 2);
+  Asm->OutStreamer.EmitLabel(begin);
+  // Emit the entry.
+  APByteStreamer Streamer(*Asm);
+  emitDebugLocEntry(Streamer, Entry);
+  // Close the range.
+  Asm->OutStreamer.EmitLabel(end);
+}
+
 // Emit locations into the debug loc section.
 void DwarfDebug::emitDebugLoc() {
-  if (DotDebugLocEntries.empty())
-    return;
-
-  for (SmallVectorImpl<DotDebugLocEntry>::iterator
-         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
-       I != E; ++I) {
-    DotDebugLocEntry &Entry = *I;
-    if (I + 1 != DotDebugLocEntries.end())
-      Entry.Merge(I+1);
-  }
-
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
-    Asm->getObjFileLowering().getDwarfLocSection());
+      Asm->getObjFileLowering().getDwarfLocSection());
   unsigned char Size = Asm->getDataLayout().getPointerSize();
-  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
-  unsigned index = 1;
-  for (SmallVectorImpl<DotDebugLocEntry>::iterator
-         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
-       I != E; ++I, ++index) {
-    DotDebugLocEntry &Entry = *I;
-    if (Entry.isMerged()) continue;
-    if (Entry.isEmpty()) {
-      Asm->OutStreamer.EmitIntValue(0, Size);
-      Asm->OutStreamer.EmitIntValue(0, Size);
-      Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
-    } else {
-      Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size);
-      Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size);
-      DIVariable DV(Entry.getVariable());
-      Asm->OutStreamer.AddComment("Loc expr size");
-      MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
-      MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
-      Asm->EmitLabelDifference(end, begin, 2);
-      Asm->OutStreamer.EmitLabel(begin);
-      if (Entry.isInt()) {
-        DIBasicType BTy(DV.getType());
-        if (BTy.Verify() &&
-            (BTy.getEncoding()  == dwarf::DW_ATE_signed
-             || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
-          Asm->OutStreamer.AddComment("DW_OP_consts");
-          Asm->EmitInt8(dwarf::DW_OP_consts);
-          Asm->EmitSLEB128(Entry.getInt());
-        } else {
-          Asm->OutStreamer.AddComment("DW_OP_constu");
-          Asm->EmitInt8(dwarf::DW_OP_constu);
-          Asm->EmitULEB128(Entry.getInt());
-        }
-      } else if (Entry.isLocation()) {
-        MachineLocation Loc = Entry.getLoc();
-        if (!DV.hasComplexAddress())
-          // Regular entry.
-          Asm->EmitDwarfRegOp(Loc, DV.isIndirect());
-        else {
-          // Complex address entry.
-          unsigned N = DV.getNumAddrElements();
-          unsigned i = 0;
-          if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
-            if (Loc.getOffset()) {
-              i = 2;
-              Asm->EmitDwarfRegOp(Loc, DV.isIndirect());
-              Asm->OutStreamer.AddComment("DW_OP_deref");
-              Asm->EmitInt8(dwarf::DW_OP_deref);
-              Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
-              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
-              Asm->EmitSLEB128(DV.getAddrElement(1));
-            } else {
-              // If first address element is OpPlus then emit
-              // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
-              MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1));
-              Asm->EmitDwarfRegOp(TLoc, DV.isIndirect());
-              i = 2;
-            }
-          } else {
-            Asm->EmitDwarfRegOp(Loc, DV.isIndirect());
-          }
-
-          // Emit remaining complex address elements.
-          for (; i < N; ++i) {
-            uint64_t Element = DV.getAddrElement(i);
-            if (Element == DIBuilder::OpPlus) {
-              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
-              Asm->EmitULEB128(DV.getAddrElement(++i));
-            } else if (Element == DIBuilder::OpDeref) {
-              if (!Loc.isReg())
-                Asm->EmitInt8(dwarf::DW_OP_deref);
-            } else
-              llvm_unreachable("unknown Opcode found in complex address");
-          }
-        }
+  for (const auto &DebugLoc : DotDebugLocEntries) {
+    Asm->OutStreamer.EmitLabel(DebugLoc.Label);
+    for (const auto &Entry : DebugLoc.List) {
+      // Set up the range. This range is relative to the entry point of the
+      // compile unit. This is a hard coded 0 for low_pc when we're emitting
+      // ranges, or the DW_AT_low_pc on the compile unit otherwise.
+      const DwarfCompileUnit *CU = Entry.getCU();
+      if (CU->getRanges().size() == 1) {
+        // Grab the begin symbol from the first range as our base.
+        const MCSymbol *Base = CU->getRanges()[0].getStart();
+        Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size);
+        Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size);
+      } else {
+        Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size);
+        Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size);
       }
-      // else ... ignore constant fp. There is not any good way to
-      // to represent them here in dwarf.
-      Asm->OutStreamer.EmitLabel(end);
+
+      emitDebugLocEntryLocation(Entry);
     }
+    Asm->OutStreamer.EmitIntValue(0, Size);
+    Asm->OutStreamer.EmitIntValue(0, Size);
   }
 }
 
-struct SymbolCUSorter {
-  SymbolCUSorter(const MCStreamer &s) : Streamer(s) {}
-  const MCStreamer &Streamer;
+void DwarfDebug::emitDebugLocDWO() {
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfLocDWOSection());
+  for (const auto &DebugLoc : DotDebugLocEntries) {
+    Asm->OutStreamer.EmitLabel(DebugLoc.Label);
+    for (const auto &Entry : DebugLoc.List) {
+      // Just always use start_length for now - at least that's one address
+      // rather than two. We could get fancier and try to, say, reuse an
+      // address we know we've emitted elsewhere (the start of the function?
+      // The start of the CU or CU subrange that encloses this range?)
+      Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
+      unsigned idx = InfoHolder.getAddrPoolIndex(Entry.getBeginSym());
+      Asm->EmitULEB128(idx);
+      Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4);
 
-  bool operator() (const SymbolCU &A, const SymbolCU &B) {
-    unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0;
-    unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0;
-
-    // Symbols with no order assigned should be placed at the end.
-    // (e.g. section end labels)
-    if (IA == 0)
-      IA = (unsigned)(-1);
-    if (IB == 0)
-      IB = (unsigned)(-1);
-    return IA < IB;
+      emitDebugLocEntryLocation(Entry);
+    }
+    Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry);
   }
-};
-
-static bool CUSort(const CompileUnit *A, const CompileUnit *B) {
-    return (A->getUniqueID() < B->getUniqueID());
 }
 
 struct ArangeSpan {
@@ -2777,18 +2461,17 @@
 // address we can tie back to a CU.
 void DwarfDebug::emitDebugARanges() {
   // Start the dwarf aranges section.
-  Asm->OutStreamer
-      .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection());
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfARangesSection());
 
-  typedef DenseMap<CompileUnit *, std::vector<ArangeSpan> > SpansType;
+  typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan> > SpansType;
 
   SpansType Spans;
 
   // Build a list of sections used.
   std::vector<const MCSection *> Sections;
-  for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end();
-       it++) {
-    const MCSection *Section = it->first;
+  for (const auto &it : SectionMap) {
+    const MCSection *Section = it.first;
     Sections.push_back(Section);
   }
 
@@ -2797,22 +2480,30 @@
   std::sort(Sections.begin(), Sections.end(), SectionSort);
 
   // Build a set of address spans, sorted by CU.
-  for (size_t SecIdx=0;SecIdx<Sections.size();SecIdx++) {
-    const MCSection *Section = Sections[SecIdx];
+  for (const MCSection *Section : Sections) {
     SmallVector<SymbolCU, 8> &List = SectionMap[Section];
     if (List.size() < 2)
       continue;
 
     // Sort the symbols by offset within the section.
-    SymbolCUSorter sorter(Asm->OutStreamer);
-    std::sort(List.begin(), List.end(), sorter);
+    std::sort(List.begin(), List.end(),
+              [&](const SymbolCU &A, const SymbolCU &B) {
+      unsigned IA = A.Sym ? Asm->OutStreamer.GetSymbolOrder(A.Sym) : 0;
+      unsigned IB = B.Sym ? Asm->OutStreamer.GetSymbolOrder(B.Sym) : 0;
+
+      // Symbols with no order assigned should be placed at the end.
+      // (e.g. section end labels)
+      if (IA == 0)
+        return false;
+      if (IB == 0)
+        return true;
+      return IA < IB;
+    });
 
     // If we have no section (e.g. common), just write out
     // individual spans for each symbol.
     if (Section == NULL) {
-      for (size_t n = 0; n < List.size(); n++) {
-        const SymbolCU &Cur = List[n];
-
+      for (const SymbolCU &Cur : List) {
         ArangeSpan Span;
         Span.Start = Cur.Sym;
         Span.End = NULL;
@@ -2822,7 +2513,7 @@
     } else {
       // Build spans between each label.
       const MCSymbol *StartSym = List[0].Sym;
-      for (size_t n = 1; n < List.size(); n++) {
+      for (size_t n = 1, e = List.size(); n < e; n++) {
         const SymbolCU &Prev = List[n - 1];
         const SymbolCU &Cur = List[n];
 
@@ -2838,37 +2529,36 @@
     }
   }
 
-  const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection();
   unsigned PtrSize = Asm->getDataLayout().getPointerSize();
 
   // Build a list of CUs used.
-  std::vector<CompileUnit *> CUs;
-  for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) {
-    CompileUnit *CU = it->first;
+  std::vector<DwarfCompileUnit *> CUs;
+  for (const auto &it : Spans) {
+    DwarfCompileUnit *CU = it.first;
     CUs.push_back(CU);
   }
 
   // Sort the CU list (again, to ensure consistent output order).
-  std::sort(CUs.begin(), CUs.end(), CUSort);
+  std::sort(CUs.begin(), CUs.end(), [](const DwarfUnit *A, const DwarfUnit *B) {
+    return A->getUniqueID() < B->getUniqueID();
+  });
 
   // Emit an arange table for each CU we used.
-  for (size_t CUIdx=0;CUIdx<CUs.size();CUIdx++) {
-    CompileUnit *CU = CUs[CUIdx];
+  for (DwarfCompileUnit *CU : CUs) {
     std::vector<ArangeSpan> &List = Spans[CU];
 
     // Emit size of content not including length itself.
-    unsigned ContentSize
-        = sizeof(int16_t) // DWARF ARange version number
-        + sizeof(int32_t) // Offset of CU in the .debug_info section
-        + sizeof(int8_t)  // Pointer Size (in bytes)
-        + sizeof(int8_t); // Segment Size (in bytes)
+    unsigned ContentSize =
+        sizeof(int16_t) + // DWARF ARange version number
+        sizeof(int32_t) + // Offset of CU in the .debug_info section
+        sizeof(int8_t) +  // Pointer Size (in bytes)
+        sizeof(int8_t);   // Segment Size (in bytes)
 
     unsigned TupleSize = PtrSize * 2;
 
     // 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
-    unsigned Padding = 0;
-    while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0)
-      Padding++;
+    unsigned Padding =
+        OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize);
 
     ContentSize += Padding;
     ContentSize += (List.size() + 1) * TupleSize;
@@ -2879,19 +2569,15 @@
     Asm->OutStreamer.AddComment("DWARF Arange version number");
     Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
     Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
-    Asm->EmitSectionOffset(
-        Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()),
-        DwarfInfoSectionSym);
+    Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym());
     Asm->OutStreamer.AddComment("Address Size (in bytes)");
     Asm->EmitInt8(PtrSize);
     Asm->OutStreamer.AddComment("Segment Size (in bytes)");
     Asm->EmitInt8(0);
 
-    for (unsigned n = 0; n < Padding; n++)
-      Asm->EmitInt8(0xff);
+    Asm->OutStreamer.EmitFill(Padding, 0xff);
 
-    for (unsigned n = 0; n < List.size(); n++) {
-      const ArangeSpan &Span = List[n];
+    for (const ArangeSpan &Span : List) {
       Asm->EmitLabelReference(Span.Start, PtrSize);
 
       // Calculate the size as being from the span start to it's end.
@@ -2917,122 +2603,129 @@
 // Emit visible names into a debug ranges section.
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
-  Asm->OutStreamer
-      .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection());
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
-  for (SmallVectorImpl<const MCSymbol *>::iterator
-         I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
-       I != E; ++I) {
-    if (*I)
-      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size);
-    else
-      Asm->OutStreamer.EmitIntValue(0, Size);
-  }
-}
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfRangesSection());
 
-// Emit visible names into a debug macinfo section.
-void DwarfDebug::emitDebugMacInfo() {
-  if (const MCSection *LineInfo =
-      Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
-    // Start the dwarf macinfo section.
-    Asm->OutStreamer.SwitchSection(LineInfo);
+  // Size for our labels.
+  unsigned char Size = Asm->getDataLayout().getPointerSize();
+
+  // Grab the specific ranges for the compile units in the module.
+  for (const auto &I : CUMap) {
+    DwarfCompileUnit *TheCU = I.second;
+
+    // Emit a symbol so we can find the beginning of our ranges.
+    Asm->OutStreamer.EmitLabel(TheCU->getLabelRange());
+
+    // Iterate over the misc ranges for the compile units in the module.
+    for (const RangeSpanList &List : TheCU->getRangeLists()) {
+      // Emit our symbol so we can find the beginning of the range.
+      Asm->OutStreamer.EmitLabel(List.getSym());
+
+      for (const RangeSpan &Range : List.getRanges()) {
+        const MCSymbol *Begin = Range.getStart();
+        const MCSymbol *End = Range.getEnd();
+        assert(Begin && "Range without a begin symbol?");
+        assert(End && "Range without an end symbol?");
+        Asm->OutStreamer.EmitSymbolValue(Begin, Size);
+        Asm->OutStreamer.EmitSymbolValue(End, Size);
+      }
+
+      // And terminate the list with two 0 values.
+      Asm->OutStreamer.EmitIntValue(0, Size);
+      Asm->OutStreamer.EmitIntValue(0, Size);
+    }
+
+    // Now emit a range for the CU itself.
+    if (TheCU->getRanges().size() > 1) {
+      Asm->OutStreamer.EmitLabel(
+          Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID()));
+      for (const RangeSpan &Range : TheCU->getRanges()) {
+        const MCSymbol *Begin = Range.getStart();
+        const MCSymbol *End = Range.getEnd();
+        assert(Begin && "Range without a begin symbol?");
+        assert(End && "Range without an end symbol?");
+        Asm->OutStreamer.EmitSymbolValue(Begin, Size);
+        Asm->OutStreamer.EmitSymbolValue(End, Size);
+      }
+      // And terminate the list with two 0 values.
+      Asm->OutStreamer.EmitIntValue(0, Size);
+      Asm->OutStreamer.EmitIntValue(0, Size);
+    }
   }
 }
 
 // DWARF5 Experimental Separate Dwarf emitters.
 
-// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
-// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
-// DW_AT_ranges_base, DW_AT_addr_base.
-CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) {
-
-  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(),
-                                       Asm, this, &SkeletonHolder);
-
-  NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
-                        CU->getNode().getSplitDebugFilename());
-
-  // Relocate to the beginning of the addr_base section, else 0 for the
-  // beginning of the one for this compile unit.
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset,
-                    DwarfAddrSectionSym);
-  else
-    NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base,
-                   dwarf::DW_FORM_sec_offset, 0);
-
-  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
-  // into an entity. We're using 0, or a NULL label for this.
-  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
-
-  // DW_AT_stmt_list is a offset of line number information for this
-  // compile unit in debug_line section.
-  // FIXME: Should handle multiple compile units.
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
-                    DwarfLineSectionSym);
-  else
-    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0);
+void DwarfDebug::initSkeletonUnit(const DwarfUnit *U, DIE *Die,
+                                  DwarfUnit *NewU) {
+  NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
+                       U->getCUNode().getSplitDebugFilename());
 
   if (!CompilationDir.empty())
-    NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+    NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
 
-  // Flags to let the linker know we have emitted new style pubnames.
-  if (GenerateGnuPubSections) {
-    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-      NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_sec_offset,
-                      Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()));
-    else
-      NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubnames, dwarf::DW_FORM_data4,
-                      Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()),
-                      DwarfGnuPubNamesSectionSym);
+  addGnuPubAttributes(NewU, Die);
 
-    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-      NewCU->addLabel(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_sec_offset,
-                      Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()));
-    else
-      NewCU->addDelta(Die, dwarf::DW_AT_GNU_pubtypes, dwarf::DW_FORM_data4,
-                      Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()),
-                      DwarfGnuPubTypesSectionSym);
-  }
+  SkeletonHolder.addUnit(NewU);
+}
 
-  // Flag if we've emitted any ranges and their location for the compile unit.
-  if (DebugRangeSymbols.size()) {
-    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-      NewCU->addLabel(Die, dwarf::DW_AT_GNU_ranges_base,
-                      dwarf::DW_FORM_sec_offset, DwarfDebugRangeSectionSym);
-    else
-      NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4,
-                     0);
-  }
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_addr_base, DW_AT_ranges_base.
+DwarfCompileUnit *DwarfDebug::constructSkeletonCU(const DwarfCompileUnit *CU) {
 
-  SkeletonHolder.addUnit(NewCU);
-  SkeletonCUs.push_back(NewCU);
+  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+  DwarfCompileUnit *NewCU = new DwarfCompileUnit(
+      CU->getUniqueID(), Die, CU->getCUNode(), Asm, this, &SkeletonHolder);
+  NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
+                     DwarfInfoSectionSym);
+
+  NewCU->initStmtList(DwarfLineSectionSym);
+
+  initSkeletonUnit(CU, Die, NewCU);
 
   return NewCU;
 }
 
-void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) {
-  assert(useSplitDwarf() && "No split dwarf debug info?");
-  emitAbbrevs(Section, &SkeletonAbbrevs);
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name,
+// DW_AT_addr_base.
+DwarfTypeUnit *DwarfDebug::constructSkeletonTU(DwarfTypeUnit *TU) {
+  DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>(
+      *SkeletonHolder.getUnits()[TU->getCU().getUniqueID()]);
+
+  DIE *Die = new DIE(dwarf::DW_TAG_type_unit);
+  DwarfTypeUnit *NewTU =
+      new DwarfTypeUnit(TU->getUniqueID(), Die, CU, Asm, this, &SkeletonHolder);
+  NewTU->setTypeSignature(TU->getTypeSignature());
+  NewTU->setType(NULL);
+  NewTU->initSection(
+      Asm->getObjFileLowering().getDwarfTypesSection(TU->getTypeSignature()));
+
+  initSkeletonUnit(TU, Die, NewTU);
+  return NewTU;
 }
 
 // Emit the .debug_info.dwo section for separated dwarf. This contains the
 // compile units that would normally be in debug_info.
 void DwarfDebug::emitDebugInfoDWO() {
   assert(useSplitDwarf() && "No split dwarf debug info?");
-  InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(),
-                       Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
-                       DwarfAbbrevDWOSectionSym);
+  // Don't pass an abbrev symbol, using a constant zero instead so as not to
+  // emit relocations into the dwo file.
+  InfoHolder.emitUnits(this, /* AbbrevSymbol */nullptr);
 }
 
 // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
 // abbreviations for the .debug_info.dwo section.
 void DwarfDebug::emitDebugAbbrevDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
-  emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(),
-              &Abbreviations);
+  InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection());
+}
+
+void DwarfDebug::emitDebugLineDWO() {
+  assert(useSplitDwarf() && "No split dwarf?");
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfLineDWOSection());
+  SplitTypeUnitFileTable.Emit(Asm->OutStreamer);
 }
 
 // Emit the .debug_str.dwo section for separated dwarf. This contains the
@@ -3040,9 +2733,75 @@
 // sections.
 void DwarfDebug::emitDebugStrDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
-  const MCSection *OffSec = Asm->getObjFileLowering()
-                            .getDwarfStrOffDWOSection();
+  const MCSection *OffSec =
+      Asm->getObjFileLowering().getDwarfStrOffDWOSection();
   const MCSymbol *StrSym = DwarfStrSectionSym;
   InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
                          OffSec, StrSym);
 }
+
+MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
+  if (!useSplitDwarf())
+    return nullptr;
+  if (SingleCU)
+    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory());
+  return &SplitTypeUnitFileTable;
+}
+
+void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
+                                      StringRef Identifier, DIE *RefDie,
+                                      DICompositeType CTy) {
+  // Flag the type unit reference as a declaration so that if it contains
+  // members (implicit special members, static data member definitions, member
+  // declarations for definitions in this CU, etc) consumers don't get confused
+  // and think this is a full definition.
+  CU.addFlag(RefDie, dwarf::DW_AT_declaration);
+
+  const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy];
+  if (TU) {
+    CU.addDIETypeSignature(RefDie, *TU);
+    return;
+  }
+
+  DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit);
+  DwarfTypeUnit *NewTU =
+      new DwarfTypeUnit(InfoHolder.getUnits().size(), UnitDie, CU, Asm, this,
+                        &InfoHolder, getDwoLineTable(CU));
+  TU = NewTU;
+  InfoHolder.addUnit(NewTU);
+
+  NewTU->addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+                 CU.getLanguage());
+
+  MD5 Hash;
+  Hash.update(Identifier);
+  // ... take the least significant 8 bytes and return those. Our MD5
+  // implementation always returns its results in little endian, swap bytes
+  // appropriately.
+  MD5::MD5Result Result;
+  Hash.final(Result);
+  uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+  NewTU->setTypeSignature(Signature);
+  if (useSplitDwarf())
+    NewTU->setSkeleton(constructSkeletonTU(NewTU));
+  else
+    CU.applyStmtList(*UnitDie);
+
+  NewTU->setType(NewTU->createTypeDIE(CTy));
+
+  NewTU->initSection(
+      useSplitDwarf()
+          ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature)
+          : Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+
+  CU.addDIETypeSignature(RefDie, *NewTU);
+}
+
+void DwarfDebug::attachLowHighPC(DwarfCompileUnit *Unit, DIE *D,
+                                 MCSymbol *Begin, MCSymbol *End) {
+  Unit->addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
+  if (DwarfVersion < 4)
+    Unit->addLabelAddress(D, dwarf::DW_AT_high_pc, End);
+  else
+    Unit->addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index cebac39..da708f5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,44 +14,44 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 
+#include "AsmPrinterHandler.h"
 #include "DIE.h"
+#include "DebugLocEntry.h"
+#include "DebugLocList.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
-class CompileUnit;
+class AsmPrinter;
+class ByteStreamer;
 class ConstantInt;
 class ConstantFP;
-class DbgVariable;
-class MachineFrameInfo;
+class DwarfCompileUnit;
+class DwarfDebug;
+class DwarfTypeUnit;
+class DwarfUnit;
 class MachineModuleInfo;
-class MachineOperand;
-class MCAsmInfo;
-class DIEAbbrev;
-class DIE;
-class DIEBlock;
-class DIEEntry;
 
 //===----------------------------------------------------------------------===//
 /// \brief This class is used to record source line correspondence.
 class SrcLineInfo {
-  unsigned Line;                     // Source line number.
-  unsigned Column;                   // Source column.
-  unsigned SourceID;                 // Source ID number.
-  MCSymbol *Label;                   // Label in code ID number.
+  unsigned Line;     // Source line number.
+  unsigned Column;   // Source column.
+  unsigned SourceID; // Source ID number.
+  MCSymbol *Label;   // Label in code ID number.
 public:
   SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
-    : Line(L), Column(C), SourceID(S), Label(label) {}
+      : Line(L), Column(C), SourceID(S), Label(label) {}
 
   // Accessors
   unsigned getLine() const { return Line; }
@@ -60,124 +60,44 @@
   MCSymbol *getLabel() const { return Label; }
 };
 
-/// \brief This struct describes location entries emitted in the .debug_loc
-/// section.
-class DotDebugLocEntry {
-  // Begin and end symbols for the address range that this location is valid.
-  const MCSymbol *Begin;
-  const MCSymbol *End;
-
-  // Type of entry that this represents.
-  enum EntryType {
-    E_Location,
-    E_Integer,
-    E_ConstantFP,
-    E_ConstantInt
-  };
-  enum EntryType EntryKind;
-
-  union {
-    int64_t Int;
-    const ConstantFP *CFP;
-    const ConstantInt *CIP;
-  } Constants;
-
-  // The location in the machine frame.
-  MachineLocation Loc;
-
-  // The variable to which this location entry corresponds.
-  const MDNode *Variable;
-
-  // Whether this location has been merged.
-  bool Merged;
-
-public:
-  DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) {
-    Constants.Int = 0;
-  }
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
-                   const MDNode *V)
-      : Begin(B), End(E), Loc(L), Variable(V), Merged(false) {
-    Constants.Int = 0;
-    EntryKind = E_Location;
-  }
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
-      : Begin(B), End(E), Variable(0), Merged(false) {
-    Constants.Int = i;
-    EntryKind = E_Integer;
-  }
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
-      : Begin(B), End(E), Variable(0), Merged(false) {
-    Constants.CFP = FPtr;
-    EntryKind = E_ConstantFP;
-  }
-  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
-                   const ConstantInt *IPtr)
-      : Begin(B), End(E), Variable(0), Merged(false) {
-    Constants.CIP = IPtr;
-    EntryKind = E_ConstantInt;
-  }
-
-  /// \brief Empty entries are also used as a trigger to emit temp label. Such
-  /// labels are referenced is used to find debug_loc offset for a given DIE.
-  bool isEmpty() { return Begin == 0 && End == 0; }
-  bool isMerged() { return Merged; }
-  void Merge(DotDebugLocEntry *Next) {
-    if (!(Begin && Loc == Next->Loc && End == Next->Begin))
-      return;
-    Next->Begin = Begin;
-    Merged = true;
-  }
-  bool isLocation() const    { return EntryKind == E_Location; }
-  bool isInt() const         { return EntryKind == E_Integer; }
-  bool isConstantFP() const  { return EntryKind == E_ConstantFP; }
-  bool isConstantInt() const { return EntryKind == E_ConstantInt; }
-  int64_t getInt() const                    { return Constants.Int; }
-  const ConstantFP *getConstantFP() const   { return Constants.CFP; }
-  const ConstantInt *getConstantInt() const { return Constants.CIP; }
-  const MDNode *getVariable() const { return Variable; }
-  const MCSymbol *getBeginSym() const { return Begin; }
-  const MCSymbol *getEndSym() const { return End; }
-  MachineLocation getLoc() const { return Loc; }
-};
-
 //===----------------------------------------------------------------------===//
 /// \brief This class is used to track local variable information.
 class DbgVariable {
-  DIVariable Var;                    // Variable Descriptor.
-  DIE *TheDIE;                       // Variable DIE.
-  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
-  DbgVariable *AbsVar;               // Corresponding Abstract variable, if any.
-  const MachineInstr *MInsn;         // DBG_VALUE instruction of the variable.
+  DIVariable Var;             // Variable Descriptor.
+  DIE *TheDIE;                // Variable DIE.
+  unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
+  DbgVariable *AbsVar;        // Corresponding Abstract variable, if any.
+  const MachineInstr *MInsn;  // DBG_VALUE instruction of the variable.
   int FrameIndex;
   DwarfDebug *DD;
+
 public:
   // AbsVar may be NULL.
   DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD)
-    : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
-      FrameIndex(~0), DD(DD) {}
+      : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
+        FrameIndex(~0), DD(DD) {}
 
   // Accessors.
-  DIVariable getVariable()           const { return Var; }
-  void setDIE(DIE *D)                      { TheDIE = D; }
-  DIE *getDIE()                      const { return TheDIE; }
-  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
-  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
-  StringRef getName()                const { return Var.getName(); }
+  DIVariable getVariable() const { return Var; }
+  void setDIE(DIE *D) { TheDIE = D; }
+  DIE *getDIE() const { return TheDIE; }
+  void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
+  unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+  StringRef getName() const { return Var.getName(); }
   DbgVariable *getAbstractVariable() const { return AbsVar; }
-  const MachineInstr *getMInsn()     const { return MInsn; }
-  void setMInsn(const MachineInstr *M)     { MInsn = M; }
-  int getFrameIndex()                const { return FrameIndex; }
-  void setFrameIndex(int FI)               { FrameIndex = FI; }
+  const MachineInstr *getMInsn() const { return MInsn; }
+  void setMInsn(const MachineInstr *M) { MInsn = M; }
+  int getFrameIndex() const { return FrameIndex; }
+  void setFrameIndex(int FI) { FrameIndex = FI; }
   // Translate tag to proper Dwarf tag.
-  uint16_t getTag()                  const {
+  uint16_t getTag() const {
     if (Var.getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
   }
   /// \brief Return true if DbgVariable is artificial.
-  bool isArtificial()                const {
+  bool isArtificial() const {
     if (Var.isArtificial())
       return true;
     if (getType().isArtificial())
@@ -185,7 +105,7 @@
     return false;
   }
 
-  bool isObjectPointer()             const {
+  bool isObjectPointer() const {
     if (Var.isObjectPointer())
       return true;
     if (getType().isObjectPointer())
@@ -193,21 +113,16 @@
     return false;
   }
 
-  bool variableHasComplexAddress()   const {
+  bool variableHasComplexAddress() const {
     assert(Var.isVariable() && "Invalid complex DbgVariable!");
     return Var.hasComplexAddress();
   }
-  bool isBlockByrefVariable()        const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
-    return Var.isBlockByrefVariable();
-  }
-  unsigned getNumAddrElements()      const {
+  bool isBlockByrefVariable() const;
+  unsigned getNumAddrElements() const {
     assert(Var.isVariable() && "Invalid complex DbgVariable!");
     return Var.getNumAddrElements();
   }
-  uint64_t getAddrElement(unsigned i) const {
-    return Var.getAddrElement(i);
-  }
+  uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); }
   DIType getType() const;
 
 private:
@@ -217,43 +132,50 @@
 };
 
 /// \brief Collects and handles information specific to a particular
-/// collection of units.
-class DwarfUnits {
+/// collection of units. This collection represents all of the units
+/// that will be ultimately output into a single object file.
+class DwarfFile {
   // Target of Dwarf emission, used for sizing of abbreviations.
   AsmPrinter *Asm;
 
   // Used to uniquely define abbreviations.
-  FoldingSet<DIEAbbrev> *AbbreviationsSet;
+  FoldingSet<DIEAbbrev> AbbreviationsSet;
 
   // A list of all the unique abbreviations in use.
-  std::vector<DIEAbbrev *> &Abbreviations;
+  std::vector<DIEAbbrev *> Abbreviations;
 
   // A pointer to all units in the section.
-  SmallVector<CompileUnit *, 1> CUs;
+  SmallVector<DwarfUnit *, 1> CUs;
 
   // Collection of strings for this unit and assorted symbols.
   // A String->Symbol mapping of strings used by indirect
   // references.
-  typedef StringMap<std::pair<MCSymbol*, unsigned>,
-                    BumpPtrAllocator&> StrPool;
+  typedef StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &>
+  StrPool;
   StrPool StringPool;
   unsigned NextStringPoolNumber;
   std::string StringPref;
 
+  struct AddressPoolEntry {
+    unsigned Number;
+    bool TLS;
+    AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
+  };
   // Collection of addresses for this unit and assorted labels.
   // A Symbol->unsigned mapping of addresses used by indirect
   // references.
-  typedef DenseMap<const MCExpr *, unsigned> AddrPool;
+  typedef DenseMap<const MCSymbol *, AddressPoolEntry> AddrPool;
   AddrPool AddressPool;
   unsigned NextAddrPoolNumber;
 
 public:
-  DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
-             std::vector<DIEAbbrev *> &A, const char *Pref,
-             BumpPtrAllocator &DA)
-      : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA),
-        NextStringPoolNumber(0), StringPref(Pref), AddressPool(),
-        NextAddrPoolNumber(0) {}
+  DwarfFile(AsmPrinter *AP, const char *Pref, BumpPtrAllocator &DA)
+      : Asm(AP), StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
+        AddressPool(), NextAddrPoolNumber(0) {}
+
+  ~DwarfFile();
+
+  const SmallVectorImpl<DwarfUnit *> &getUnits() { return CUs; }
 
   /// \brief Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
@@ -265,12 +187,14 @@
   void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
   /// \brief Add a unit to the list of CUs.
-  void addUnit(CompileUnit *CU) { CUs.push_back(CU); }
+  void addUnit(DwarfUnit *CU) { CUs.push_back(CU); }
 
   /// \brief Emit all of the units to the section listed with the given
   /// abbreviation section.
-  void emitUnits(DwarfDebug *DD, const MCSection *USection,
-                 const MCSection *ASection, const MCSymbol *ASectionSym);
+  void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym);
+
+  /// \brief Emit a set of abbreviations to the specific section.
+  void emitAbbrevs(const MCSection *);
 
   /// \brief Emit all of the strings to the section given.
   void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection,
@@ -295,8 +219,7 @@
 
   /// \brief Returns the index into the address pool with the given
   /// label/symbol.
-  unsigned getAddrPoolIndex(const MCExpr *Sym);
-  unsigned getAddrPoolIndex(const MCSymbol *Sym);
+  unsigned getAddrPoolIndex(const MCSymbol *Sym, bool TLS = false);
 
   /// \brief Returns the address pool.
   AddrPool *getAddrPool() { return &AddressPool; }
@@ -304,13 +227,13 @@
 
 /// \brief Helper used to pair up a symbol and its DWARF compile unit.
 struct SymbolCU {
-  SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
+  SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
   const MCSymbol *Sym;
-  CompileUnit *CU;
+  DwarfCompileUnit *CU;
 };
 
 /// \brief Collects and handles dwarf debug information.
-class DwarfDebug {
+class DwarfDebug : public AsmPrinterHandler {
   // Target of Dwarf emission.
   AsmPrinter *Asm;
 
@@ -320,40 +243,31 @@
   // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  // Handle to the a compile unit used for the inline extension handling.
-  CompileUnit *FirstCU;
+  // Handle to the compile unit used for the inline extension handling,
+  // this is just so that the DIEValue allocator has a place to store
+  // the particular elements.
+  // FIXME: Store these off of DwarfDebug instead?
+  DwarfCompileUnit *FirstCU;
 
-  // Maps MDNode with its corresponding CompileUnit.
-  DenseMap <const MDNode *, CompileUnit *> CUMap;
+  // Maps MDNode with its corresponding DwarfCompileUnit.
+  MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
 
-  // Maps subprogram MDNode with its corresponding CompileUnit.
-  DenseMap <const MDNode *, CompileUnit *> SPMap;
+  // Maps subprogram MDNode with its corresponding DwarfCompileUnit.
+  DenseMap<const MDNode *, DwarfCompileUnit *> SPMap;
 
-  // Maps a CU DIE with its corresponding CompileUnit.
-  DenseMap <const DIE *, CompileUnit *> CUDieMap;
+  // Maps a CU DIE with its corresponding DwarfCompileUnit.
+  DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
 
-  /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can
+  /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
   /// be shared across CUs, that is why we keep the map here instead
-  /// of in CompileUnit.
+  /// of in DwarfCompileUnit.
   DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
 
-  // Used to uniquely define abbreviations.
-  FoldingSet<DIEAbbrev> AbbreviationsSet;
-
-  // A list of all the unique abbreviations in use.
-  std::vector<DIEAbbrev *> Abbreviations;
-
-  // Stores the current file ID for a given compile unit.
-  DenseMap <unsigned, unsigned> FileIDCUMap;
-  // Source id map, i.e. CUID, source filename and directory,
-  // separated by a zero byte, mapped to a unique id.
-  StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
-
   // List of all labels used in aranges generation.
   std::vector<SymbolCU> ArangeLabels;
 
   // Size of each symbol emitted (for those symbols that have a specific size).
-  DenseMap <const MCSymbol *, uint64_t> SymSize;
+  DenseMap<const MCSymbol *, uint64_t> SymSize;
 
   // Provides a unique id per text section.
   typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType;
@@ -368,15 +282,16 @@
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
 
   // Collection of dbg variables of a scope.
-  typedef DenseMap<LexicalScope *,
-                   SmallVector<DbgVariable *, 8> > ScopeVariablesMap;
+  typedef DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> >
+  ScopeVariablesMap;
   ScopeVariablesMap ScopeVariables;
 
   // Collection of abstract variables.
   DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
 
-  // Collection of DotDebugLocEntry.
-  SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+  // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
+  // can refer to them in spite of insertions into this list.
+  SmallVector<DebugLocList, 4> DotDebugLocEntries;
 
   // Collection of subprogram DIEs that are marked (at the end of the module)
   // as DW_AT_inline.
@@ -394,17 +309,15 @@
 
   // Every user variable mentioned by a DBG_VALUE instruction in order of
   // appearance.
-  SmallVector<const MDNode*, 8> UserVariables;
+  SmallVector<const MDNode *, 8> UserVariables;
 
   // For each user variable, keep a list of DBG_VALUE instructions in order.
   // The list can also contain normal instructions that clobber the previous
   // DBG_VALUE.
-  typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
-    DbgValueHistoryMap;
+  typedef DenseMap<const MDNode *, SmallVector<const MachineInstr *, 4> >
+  DbgValueHistoryMap;
   DbgValueHistoryMap DbgValues;
 
-  SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
-
   // Previous instruction's location information. This is used to determine
   // label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
@@ -414,6 +327,19 @@
   // body.
   DebugLoc PrologEndLoc;
 
+  // If nonnull, stores the current machine function we're processing.
+  const MachineFunction *CurFn;
+
+  // If nonnull, stores the current machine instruction we're processing.
+  const MachineInstr *CurMI;
+
+  // If nonnull, stores the section that the previous function was allocated to
+  // emitting.
+  const MCSection *PrevSection;
+
+  // If nonnull, stores the CU in which the previous subprogram was contained.
+  const DwarfCompileUnit *PrevCU;
+
   // Section Symbols: these are assembler temporary labels that are emitted at
   // the beginning of each supported dwarf section.  These are used to form
   // section offsets and are created by EmitSectionLabels.
@@ -421,36 +347,48 @@
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
   MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
-  MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
+  MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
+  MCSymbol *DwarfStrDWOSectionSym;
   MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
 
   // As an optimization, there is no need to emit an entry in the directory
   // table for the same directory as DW_AT_comp_dir.
   StringRef CompilationDir;
 
-  // Counter for assigning globally unique IDs for CUs.
-  unsigned GlobalCUIndexCount;
+  // Counter for assigning globally unique IDs for ranges.
+  unsigned GlobalRangeCount;
 
   // Holder for the file specific debug information.
-  DwarfUnits InfoHolder;
+  DwarfFile InfoHolder;
 
   // Holders for the various debug information flags that we might need to
   // have exposed. See accessor functions below for description.
 
   // Holder for imported entities.
   typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
-    ImportedEntityMap;
+  ImportedEntityMap;
   ImportedEntityMap ScopesWithImportedEntities;
 
-  // Holder for types that are going to be extracted out into a type unit.
-  std::vector<DIE *> TypeUnits;
+  // Map from MDNodes for user-defined types to the type units that describe
+  // them.
+  DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
 
   // Whether to emit the pubnames/pubtypes sections.
   bool HasDwarfPubSections;
 
+  // Whether or not to use AT_ranges for compilation units.
+  bool HasCURanges;
+
+  // Whether we emitted a function into a section other than the default
+  // text.
+  bool UsedNonDefaultText;
+
   // Version of dwarf we're emitting.
   unsigned DwarfVersion;
 
+  // Maps from a type identifier to the actual MDNode.
+  DITypeIdentifierMap TypeIdentifierMap;
+
   // DWARF5 Experimental Options
   bool HasDwarfAccelTables;
   bool HasSplitDwarf;
@@ -460,25 +398,27 @@
   // original object file, rather than things that are meant
   // to be in the .dwo sections.
 
-  // The CUs left in the original object file for separated debug info.
-  SmallVector<CompileUnit *, 1> SkeletonCUs;
-
-  // Used to uniquely define abbreviations for the skeleton emission.
-  FoldingSet<DIEAbbrev> SkeletonAbbrevSet;
-
-  // A list of all the unique abbreviations in use.
-  std::vector<DIEAbbrev *> SkeletonAbbrevs;
-
   // Holder for the skeleton information.
-  DwarfUnits SkeletonHolder;
+  DwarfFile SkeletonHolder;
 
-  // Maps from a type identifier to the actual MDNode.
-  DITypeIdentifierMap TypeIdentifierMap;
+  /// Store file names for type units under fission in a line table header that
+  /// will be emitted into debug_line.dwo.
+  // FIXME: replace this with a map from comp_dir to table so that we can emit
+  // multiple tables during LTO each of which uses directory 0, referencing the
+  // comp_dir of all the type units that use it.
+  MCDwarfDwoLineTable SplitTypeUnitFileTable;
 
-private:
+  // True iff there are multiple CUs in this module.
+  bool SingleCU;
+
+  MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
+  const SmallVectorImpl<DwarfUnit *> &getUnits() {
+    return InfoHolder.getUnits();
+  }
+
   /// \brief Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
 
@@ -486,24 +426,30 @@
   /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
   /// variables in this scope then create and insert DIEs for these
   /// variables.
-  DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP);
+  DIE *updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, DISubprogram SP);
+
+  /// \brief A helper function to check whether the DIE for a given Scope is
+  /// going to be null.
+  bool isLexicalScopeDIENull(LexicalScope *Scope);
+
+  /// \brief A helper function to construct a RangeSpanList for a given
+  /// lexical scope.
+  void addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE,
+                         const SmallVectorImpl<InsnRange> &Range);
 
   /// \brief Construct new DW_TAG_lexical_block for this scope and
   /// attach DW_AT_low_pc/DW_AT_high_pc labels.
-  DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
-  /// A helper function to check whether the DIE for a given Scope is going
-  /// to be null.
-  bool isLexicalScopeDIENull(LexicalScope *Scope);
+  DIE *constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope);
 
   /// \brief This scope represents inlined body of a function. Construct
   /// DIE to represent this concrete inlined copy of the function.
-  DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+  DIE *constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope);
 
   /// \brief Construct a DIE for this scope.
-  DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
+  DIE *constructScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope);
   /// A helper function to create children of a Scope DIE.
-  DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope,
-                              SmallVectorImpl<DIE*> &Children);
+  DIE *createScopeChildrenDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope,
+                              SmallVectorImpl<DIE *> &Children);
 
   /// \brief Emit initial Dwarf sections with a label at the start of each one.
   void emitSectionLabels();
@@ -528,9 +474,6 @@
   /// open.
   void endSections();
 
-  /// \brief Emit a set of abbreviations to the specific section.
-  void emitAbbrevs(const MCSection *, std::vector<DIEAbbrev*> *);
-
   /// \brief Emit the debug info section.
   void emitDebugInfo();
 
@@ -566,32 +509,41 @@
   /// index.
   void emitDebugPubTypes(bool GnuStyle = false);
 
+  void
+  emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name,
+                      const StringMap<const DIE *> &(DwarfUnit::*Accessor)()
+                      const);
+
   /// \brief Emit visible names into a debug str section.
   void emitDebugStr();
 
   /// \brief Emit visible names into a debug loc section.
   void emitDebugLoc();
 
+  /// \brief Emit visible names into a debug loc dwo section.
+  void emitDebugLocDWO();
+
   /// \brief Emit visible names into a debug aranges section.
   void emitDebugARanges();
 
   /// \brief Emit visible names into a debug ranges section.
   void emitDebugRanges();
 
-  /// \brief Emit visible names into a debug macinfo section.
-  void emitDebugMacInfo();
-
   /// \brief Emit inline info using custom format.
   void emitDebugInlineInfo();
 
   /// DWARF 5 Experimental Split Dwarf Emitters
 
+  /// \brief Initialize common features of skeleton units.
+  void initSkeletonUnit(const DwarfUnit *U, DIE *Die, DwarfUnit *NewU);
+
   /// \brief Construct the split debug info compile unit for the debug info
   /// section.
-  CompileUnit *constructSkeletonCU(const CompileUnit *CU);
+  DwarfCompileUnit *constructSkeletonCU(const DwarfCompileUnit *CU);
 
-  /// \brief Emit the local split abbreviations.
-  void emitSkeletonAbbrevs(const MCSection *);
+  /// \brief Construct the split debug info compile unit for the debug info
+  /// section.
+  DwarfTypeUnit *constructSkeletonTU(DwarfTypeUnit *TU);
 
   /// \brief Emit the debug info dwo section.
   void emitDebugInfoDWO();
@@ -599,27 +551,33 @@
   /// \brief Emit the debug abbrev dwo section.
   void emitDebugAbbrevDWO();
 
+  /// \brief Emit the debug line dwo section.
+  void emitDebugLineDWO();
+
   /// \brief Emit the debug str dwo section.
   void emitDebugStrDWO();
 
-  /// \brief Create new CompileUnit for the given metadata node with tag
+  /// Flags to let the linker know we have emitted new style pubnames. Only
+  /// emit it here if we don't have a skeleton CU for split dwarf.
+  void addGnuPubAttributes(DwarfUnit *U, DIE *D) const;
+
+  /// \brief Create new DwarfCompileUnit for the given metadata node with tag
   /// DW_TAG_compile_unit.
-  CompileUnit *constructCompileUnit(DICompileUnit DIUnit);
+  DwarfCompileUnit *constructDwarfCompileUnit(DICompileUnit DIUnit);
 
   /// \brief Construct subprogram DIE.
-  void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
+  void constructSubprogramDIE(DwarfCompileUnit *TheCU, const MDNode *N);
 
   /// \brief Construct imported_module or imported_declaration DIE.
-  void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N);
+  void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N);
 
   /// \brief Construct import_module DIE.
-  void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N,
+  void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N,
                                   DIE *Context);
 
   /// \brief Construct import_module DIE.
-  void constructImportedEntityDIE(CompileUnit *TheCU,
-                                  const DIImportedEntity &Module,
-                                  DIE *Context);
+  void constructImportedEntityDIE(DwarfCompileUnit *TheCU,
+                                  const DIImportedEntity &Module, DIE *Context);
 
   /// \brief Register a source line with debug info. Returns the unique
   /// label that was emitted and which provides correspondence to the
@@ -633,21 +591,18 @@
 
   /// \brief If Var is an current function argument that add it in
   /// CurrentFnArguments list.
-  bool addCurrentFnArgument(const MachineFunction *MF,
-                            DbgVariable *Var, LexicalScope *Scope);
+  bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope);
 
   /// \brief Populate LexicalScope entries with variables' info.
-  void collectVariableInfo(const MachineFunction *,
-                           SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+  void collectVariableInfo(SmallPtrSet<const MDNode *, 16> &ProcessedVars);
 
   /// \brief Collect variable information from the side table maintained
   /// by MMI.
-  void collectVariableInfoFromMMITable(const MachineFunction * MF,
-                                       SmallPtrSet<const MDNode *, 16> &P);
+  void collectVariableInfoFromMMITable(SmallPtrSet<const MDNode *, 16> &P);
 
   /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
-    LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+    LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol *)0));
   }
 
   /// \brief Return Label preceding the instruction.
@@ -655,12 +610,15 @@
 
   /// \brief Ensure that a label will be emitted after MI.
   void requestLabelAfterInsn(const MachineInstr *MI) {
-    LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
+    LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol *)0));
   }
 
   /// \brief Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
+  void attachLowHighPC(DwarfCompileUnit *Unit, DIE *D, MCSymbol *Begin,
+                       MCSymbol *End);
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -679,62 +637,89 @@
   void beginModule();
 
   /// \brief Emit all Dwarf sections that should come after the content.
-  void endModule();
+  void endModule() override;
 
   /// \brief Gather pre-function debug information.
-  void beginFunction(const MachineFunction *MF);
+  void beginFunction(const MachineFunction *MF) override;
 
   /// \brief Gather and emit post-function debug information.
-  void endFunction(const MachineFunction *MF);
+  void endFunction(const MachineFunction *MF) override;
 
   /// \brief Process beginning of an instruction.
-  void beginInstruction(const MachineInstr *MI);
+  void beginInstruction(const MachineInstr *MI) override;
 
   /// \brief Process end of an instruction.
-  void endInstruction(const MachineInstr *MI);
+  void endInstruction() override;
 
   /// \brief Add a DIE to the set of types that we're going to pull into
   /// type units.
-  void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); }
+  void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
+                            DIE *Die, DICompositeType CTy);
 
   /// \brief Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
 
   /// \brief For symbols that have a size designated (e.g. common symbols),
   /// this tracks that size.
-  void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;}
-
-  /// \brief Look up the source id with the given directory and source file
-  /// names. If none currently exists, create a new id and insert it in the
-  /// SourceIds map.
-  unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
-                               unsigned CUID);
+  void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
+    SymSize[Sym] = Size;
+  }
 
   /// \brief Recursively Emits a debug information entry.
-  void emitDIE(DIE *Die, ArrayRef<DIEAbbrev *> Abbrevs);
+  void emitDIE(DIE *Die);
 
   // Experimental DWARF5 features.
 
   /// \brief Returns whether or not to emit tables that dwarf consumers can
   /// use to accelerate lookup.
-  bool useDwarfAccelTables() { return HasDwarfAccelTables; }
+  bool useDwarfAccelTables() const { return HasDwarfAccelTables; }
 
   /// \brief Returns whether or not to change the current debug info for the
   /// split dwarf proposal support.
-  bool useSplitDwarf() { return HasSplitDwarf; }
+  bool useSplitDwarf() const { return HasSplitDwarf; }
 
   /// Returns the Dwarf Version.
   unsigned getDwarfVersion() const { return DwarfVersion; }
 
+  /// Returns the section symbol for the .debug_loc section.
+  MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
+
+  /// Returns the previous section that was emitted into.
+  const MCSection *getPrevSection() const { return PrevSection; }
+
+  /// Returns the previous CU that was being updated
+  const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
+
+  /// Returns the entries for the .debug_loc section.
+  const SmallVectorImpl<DebugLocList> &
+  getDebugLocEntries() const {
+    return DotDebugLocEntries;
+  }
+
+  /// \brief Emit an entry for the debug loc section. This can be used to
+  /// handle an entry that's going to be emitted into the debug loc section.
+  void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
+
+  /// Emit the location for a debug loc entry, including the size header.
+  void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
+
   /// Find the MDNode for the given reference.
   template <typename T> T resolve(DIRef<T> Ref) const {
     return Ref.resolve(TypeIdentifierMap);
   }
 
+  /// \brief Return the TypeIdentifierMap.
+  const DITypeIdentifierMap &getTypeIdentifierMap() const {
+    return TypeIdentifierMap;
+  }
+
+  /// Find the DwarfCompileUnit for the given CU Die.
+  DwarfCompileUnit *lookupUnit(const DIE *CU) const {
+    return CUDieMap.lookup(CU);
+  }
   /// isSubprogramContext - Return true if Context is either a subprogram
   /// or another context nested inside a subprogram.
   bool isSubprogramContext(const MDNode *Context);
-
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 7133458..113a9e4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -30,7 +31,7 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/Mangler.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
@@ -57,19 +58,6 @@
   return Count;
 }
 
-/// PadLT - Order landing pads lexicographically by type id.
-bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
-  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
-  unsigned LSize = LIds.size(), RSize = RIds.size();
-  unsigned MinSize = LSize < RSize ? LSize : RSize;
-
-  for (unsigned i = 0; i != MinSize; ++i)
-    if (LIds[i] != RIds[i])
-      return LIds[i] < RIds[i];
-
-  return LSize < RSize;
-}
-
 /// ComputeActionsTable - Compute the actions table and gather the first action
 /// index for each landing pad site.
 unsigned DwarfException::
@@ -108,7 +96,7 @@
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
     FilterOffsets.push_back(Offset);
-    Offset -= MCAsmInfo::getULEB128Size(*I);
+    Offset -= getULEB128Size(*I);
   }
 
   FirstActions.reserve(LandingPads.size());
@@ -132,14 +120,12 @@
         unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
         PrevAction = Actions.size() - 1;
-        SizeAction =
-          MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
-          MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+        SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) +
+                     getSLEB128Size(Actions[PrevAction].ValueForTypeID);
 
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
           assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
-          SizeAction -=
-            MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+          SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID);
           SizeAction += -Actions[PrevAction].NextAction;
           PrevAction = Actions[PrevAction].Previous;
         }
@@ -150,10 +136,10 @@
         int TypeID = TypeIds[J];
         assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
         int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
-        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+        unsigned SizeTypeID = getSLEB128Size(ValueForTypeID);
 
         int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+        SizeAction = SizeTypeID + getSLEB128Size(NextAction);
         SizeSiteActions += SizeAction;
 
         ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
@@ -242,7 +228,7 @@
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
          MI != E; ++MI) {
-      if (!MI->isLabel()) {
+      if (!MI->isEHLabel()) {
         if (MI->isCall())
           SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
         continue;
@@ -357,7 +343,10 @@
   for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
     LandingPads.push_back(&PadInfos[i]);
 
-  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+  // Order landing pads lexicographically by type id.
+  std::sort(LandingPads.begin(), LandingPads.end(),
+            [](const LandingPadInfo *L,
+               const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; });
 
   // Compute the actions table and gather the first action index for each
   // landing pad site.
@@ -401,9 +390,9 @@
   }
 
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
-    CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+    CallSiteTableLength += getULEB128Size(CallSites[i].Action);
     if (IsSJLJ)
-      CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+      CallSiteTableLength += getULEB128Size(i);
   }
 
   // Type infos.
@@ -488,15 +477,14 @@
   // We chose another solution: don't output padding inside the table like GCC
   // does, instead output it before the table.
   unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
-  unsigned CallSiteTableLengthSize =
-    MCAsmInfo::getULEB128Size(CallSiteTableLength);
+  unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength);
   unsigned TTypeBaseOffset =
     sizeof(int8_t) +                            // Call site format
     CallSiteTableLengthSize +                   // Call site table length size
     CallSiteTableLength +                       // Call site table length
     SizeActions +                               // Actions size
     SizeTypes;
-  unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+  unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset);
   unsigned TotalSize =
     sizeof(int8_t) +                            // LPStart format
     sizeof(int8_t) +                            // TType format
@@ -717,20 +705,19 @@
   }
 }
 
-/// EndModule - Emit all exception information that should come after the
+/// endModule - Emit all exception information that should come after the
 /// content.
-void DwarfException::EndModule() {
+void DwarfException::endModule() {
   llvm_unreachable("Should be implemented");
 }
 
-/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// beginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
-void DwarfException::BeginFunction(const MachineFunction *MF) {
+void DwarfException::beginFunction(const MachineFunction *MF) {
   llvm_unreachable("Should be implemented");
 }
 
-/// EndFunction - Gather and emit post-function exception information.
-///
-void DwarfException::EndFunction() {
+/// endFunction - Gather and emit post-function exception information.
+void DwarfException::endFunction(const MachineFunction *) {
   llvm_unreachable("Should be implemented");
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 1575161..f792482 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
 
+#include "AsmPrinterHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include <vector>
@@ -35,7 +36,7 @@
 //===----------------------------------------------------------------------===//
 /// DwarfException - Emits Dwarf exception handling directives.
 ///
-class DwarfException {
+class DwarfException : public AsmPrinterHandler {
 protected:
   /// Asm - Target of Dwarf emission.
   AsmPrinter *Asm;
@@ -47,9 +48,6 @@
   static unsigned SharedTypeIds(const LandingPadInfo *L,
                                 const LandingPadInfo *R);
 
-  /// PadLT - Order landing pads lexicographically by type id.
-  static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
-
   /// PadRange - Structure holding a try-range and the associated landing pad.
   struct PadRange {
     // The index of the landing pad.
@@ -130,16 +128,21 @@
   DwarfException(AsmPrinter *A);
   virtual ~DwarfException();
 
-  /// EndModule - Emit all exception information that should come after the
+  /// endModule - Emit all exception information that should come after the
   /// content.
-  virtual void EndModule();
+  void endModule() override;
 
-  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// beginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  virtual void BeginFunction(const MachineFunction *MF);
+  void beginFunction(const MachineFunction *MF) override;
 
-  /// EndFunction - Gather and emit post-function exception information.
-  virtual void EndFunction();
+  /// endFunction - Gather and emit post-function exception information.
+  void endFunction(const MachineFunction *) override;
+
+  // We don't need these.
+  void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+  void beginInstruction(const MachineInstr *MI) override {}
+  void endInstruction() override {}
 };
 
 class DwarfCFIException : public DwarfException {
@@ -164,22 +167,26 @@
   DwarfCFIException(AsmPrinter *A);
   virtual ~DwarfCFIException();
 
-  /// EndModule - Emit all exception information that should come after the
+  /// endModule - Emit all exception information that should come after the
   /// content.
-  virtual void EndModule();
+  void endModule() override;
 
-  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// beginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  virtual void BeginFunction(const MachineFunction *MF);
+  void beginFunction(const MachineFunction *MF) override;
 
-  /// EndFunction - Gather and emit post-function exception information.
-  virtual void EndFunction();
+  /// endFunction - Gather and emit post-function exception information.
+  void endFunction(const MachineFunction *) override;
 };
 
 class ARMException : public DwarfException {
-  void EmitTypeInfos(unsigned TTypeEncoding);
+  void EmitTypeInfos(unsigned TTypeEncoding) override;
   ARMTargetStreamer &getTargetStreamer();
 
+  /// shouldEmitCFI - Per-function flag to indicate if frame CFI info
+  /// should be emitted.
+  bool shouldEmitCFI;
+
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -187,16 +194,16 @@
   ARMException(AsmPrinter *A);
   virtual ~ARMException();
 
-  /// EndModule - Emit all exception information that should come after the
+  /// endModule - Emit all exception information that should come after the
   /// content.
-  virtual void EndModule();
+  void endModule() override;
 
-  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// beginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  virtual void BeginFunction(const MachineFunction *MF);
+  void beginFunction(const MachineFunction *MF) override;
 
-  /// EndFunction - Gather and emit post-function exception information.
-  virtual void EndFunction();
+  /// endFunction - Gather and emit post-function exception information.
+  void endFunction(const MachineFunction *) override;
 };
 
 class Win64Exception : public DwarfException {
@@ -219,16 +226,16 @@
   Win64Exception(AsmPrinter *A);
   virtual ~Win64Exception();
 
-  /// EndModule - Emit all exception information that should come after the
+  /// endModule - Emit all exception information that should come after the
   /// content.
-  virtual void EndModule();
+  void endModule() override;
 
-  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// beginFunction - Gather pre-function exception information.  Assumes being
   /// emitted immediately after the function entry point.
-  virtual void BeginFunction(const MachineFunction *MF);
+  void beginFunction(const MachineFunction *MF) override;
 
-  /// EndFunction - Gather and emit post-function exception information.
-  virtual void EndFunction();
+  /// endFunction - Gather and emit post-function exception information.
+  void endFunction(const MachineFunction *) override;
 };
 
 } // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
similarity index 67%
rename from lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
rename to lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index a6ff953..82e9bb0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===//
+//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,50 +13,75 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
-#include "DwarfCompileUnit.h"
+#include "DwarfUnit.h"
 #include "DwarfAccelTable.h"
 #include "DwarfDebug.h"
 #include "llvm/ADT/APFloat.h"
-#include "llvm/DIBuilder.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/Mangler.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
-/// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node,
-                         AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU)
-    : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU),
-      IndexTyDie(0), DebugInfoOffset(0) {
+static cl::opt<bool>
+GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
+                       cl::desc("Generate DWARF4 type units."),
+                       cl::init(false));
+
+/// Unit - Unit constructor.
+DwarfUnit::DwarfUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A,
+                     DwarfDebug *DW, DwarfFile *DWU)
+    : UniqueID(UID), CUNode(Node), UnitDie(D), DebugInfoOffset(0), Asm(A),
+      DD(DW), DU(DWU), IndexTyDie(0), Section(0), Skeleton(0) {
   DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+}
+
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node,
+                                   AsmPrinter *A, DwarfDebug *DW,
+                                   DwarfFile *DWU)
+    : DwarfUnit(UID, D, Node, A, DW, DWU) {
   insertDIE(Node, D);
 }
 
-/// ~CompileUnit - Destructor for compile unit.
-CompileUnit::~CompileUnit() {
+DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU,
+                             AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU,
+                             MCDwarfDwoLineTable *SplitLineTable)
+    : DwarfUnit(UID, D, CU.getCUNode(), A, DW, DWU), CU(CU),
+      SplitLineTable(SplitLineTable) {
+  if (SplitLineTable)
+    addSectionOffset(UnitDie.get(), dwarf::DW_AT_stmt_list, 0);
+}
+
+/// ~Unit - Destructor for compile unit.
+DwarfUnit::~DwarfUnit() {
   for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
     DIEBlocks[j]->~DIEBlock();
+  for (unsigned j = 0, M = DIELocs.size(); j < M; ++j)
+    DIELocs[j]->~DIELoc();
 }
 
 /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
 /// information entry.
-DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
+DIEEntry *DwarfUnit::createDIEEntry(DIE *Entry) {
   DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
   return Value;
 }
 
 /// getDefaultLowerBound - Return the default lower bound for an array. If the
 /// DWARF version doesn't handle the language, return -1.
-int64_t CompileUnit::getDefaultLowerBound() const {
+int64_t DwarfUnit::getDefaultLowerBound() const {
   switch (getLanguage()) {
   default:
     break;
@@ -100,17 +125,23 @@
 
 /// Check whether the DIE for this MDNode can be shared across CUs.
 static bool isShareableAcrossCUs(DIDescriptor D) {
-  // When the MDNode can be part of the type system, the DIE can be
-  // shared across CUs.
-  return D.isType() ||
-         (D.isSubprogram() && !DISubprogram(D).isDefinition());
+  // When the MDNode can be part of the type system, the DIE can be shared
+  // across CUs.
+  // Combining type units and cross-CU DIE sharing is lower value (since
+  // cross-CU DIE sharing is used in LTO and removes type redundancy at that
+  // level already) but may be implementable for some value in projects
+  // building multiple independent libraries with LTO and then linking those
+  // together.
+  return (D.isType() ||
+          (D.isSubprogram() && !DISubprogram(D).isDefinition())) &&
+         !GenerateDwarfTypeUnits;
 }
 
 /// getDIE - Returns the debug information entry map slot for the
 /// specified debug variable. We delegate the request to DwarfDebug
 /// when the DIE for this MDNode can be shared across CUs. The mappings
 /// will be kept in DwarfDebug for shareable DIEs.
-DIE *CompileUnit::getDIE(DIDescriptor D) const {
+DIE *DwarfUnit::getDIE(DIDescriptor D) const {
   if (isShareableAcrossCUs(D))
     return DD->getDIE(D);
   return MDNodeToDieMap.lookup(D);
@@ -119,7 +150,7 @@
 /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
 /// when the DIE for this MDNode can be shared across CUs. The mappings
 /// will be kept in DwarfDebug for shareable DIEs.
-void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) {
+void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
   if (isShareableAcrossCUs(Desc)) {
     DD->insertDIE(Desc, D);
     return;
@@ -128,7 +159,7 @@
 }
 
 /// addFlag - Add a flag that is true.
-void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) {
+void DwarfUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) {
   if (DD->getDwarfVersion() >= 4)
     Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
   else
@@ -137,8 +168,8 @@
 
 /// addUInt - Add an unsigned integer attribute data and value.
 ///
-void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute,
-                          Optional<dwarf::Form> Form, uint64_t Integer) {
+void DwarfUnit::addUInt(DIE *Die, dwarf::Attribute Attribute,
+                        Optional<dwarf::Form> Form, uint64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(false, Integer);
   DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator)
@@ -146,22 +177,22 @@
   Die->addValue(Attribute, *Form, Value);
 }
 
-void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) {
+void DwarfUnit::addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer) {
   addUInt(Block, (dwarf::Attribute)0, Form, Integer);
 }
 
 /// addSInt - Add an signed integer attribute data and value.
 ///
-void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute,
-                          Optional<dwarf::Form> Form, int64_t Integer) {
+void DwarfUnit::addSInt(DIE *Die, dwarf::Attribute Attribute,
+                        Optional<dwarf::Form> Form, int64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(true, Integer);
   DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
   Die->addValue(Attribute, *Form, Value);
 }
 
-void CompileUnit::addSInt(DIEBlock *Die, Optional<dwarf::Form> Form,
-                          int64_t Integer) {
+void DwarfUnit::addSInt(DIELoc *Die, Optional<dwarf::Form> Form,
+                        int64_t Integer) {
   addSInt(Die, (dwarf::Attribute)0, Form, Integer);
 }
 
@@ -170,91 +201,135 @@
 /// more predictable sizes. In the case of split dwarf we emit an index
 /// into another table which gets us the static offset into the string
 /// table.
-void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute,
-                            StringRef String) {
-  DIEValue *Value;
-  dwarf::Form Form;
-  if (!DD->useSplitDwarf()) {
-    MCSymbol *Symb = DU->getStringPoolEntry(String);
-    if (Asm->needsRelocationsForDwarfStringPool())
-      Value = new (DIEValueAllocator) DIELabel(Symb);
-    else {
-      MCSymbol *StringPool = DU->getStringPoolSym();
-      Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
-    }
-    Form = dwarf::DW_FORM_strp;
-  } else {
-    unsigned idx = DU->getStringPoolIndex(String);
-    Value = new (DIEValueAllocator) DIEInteger(idx);
-    Form = dwarf::DW_FORM_GNU_str_index;
-  }
+void DwarfUnit::addString(DIE *Die, dwarf::Attribute Attribute,
+                          StringRef String) {
+
+  if (!DD->useSplitDwarf())
+    return addLocalString(Die, Attribute, String);
+
+  unsigned idx = DU->getStringPoolIndex(String);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
   DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
-  Die->addValue(Attribute, Form, Str);
+  Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str);
 }
 
 /// addLocalString - Add a string attribute data and value. This is guaranteed
 /// to be in the local string pool instead of indirected.
-void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute,
-                                 StringRef String) {
+void DwarfUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute,
+                               StringRef String) {
   MCSymbol *Symb = DU->getStringPoolEntry(String);
   DIEValue *Value;
-  if (Asm->needsRelocationsForDwarfStringPool())
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
     Value = new (DIEValueAllocator) DIELabel(Symb);
   else {
     MCSymbol *StringPool = DU->getStringPoolSym();
     Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
   }
-  Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
+  DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
+  Die->addValue(Attribute, dwarf::DW_FORM_strp, Str);
 }
 
 /// addExpr - Add a Dwarf expression attribute data and value.
 ///
-void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) {
+void DwarfUnit::addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr) {
   DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
   Die->addValue((dwarf::Attribute)0, Form, Value);
 }
 
+/// addLocationList - Add a Dwarf loclistptr attribute data and value.
+///
+void DwarfUnit::addLocationList(DIE *Die, dwarf::Attribute Attribute,
+                                unsigned Index) {
+  DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
+  dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+                                                : dwarf::DW_FORM_data4;
+  Die->addValue(Attribute, Form, Value);
+}
+
 /// addLabel - Add a Dwarf label attribute data and value.
 ///
-void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute,
-                           dwarf::Form Form, const MCSymbol *Label) {
+void DwarfUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form,
+                         const MCSymbol *Label) {
   DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
   Die->addValue(Attribute, Form, Value);
 }
 
-void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form,
-                           const MCSymbol *Label) {
+void DwarfUnit::addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label) {
   addLabel(Die, (dwarf::Attribute)0, Form, Label);
 }
 
+/// addSectionLabel - Add a Dwarf section label attribute data and value.
+///
+void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute,
+                                const MCSymbol *Label) {
+  if (DD->getDwarfVersion() >= 4)
+    addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label);
+  else
+    addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label);
+}
+
+/// addSectionOffset - Add an offset into a section attribute data and value.
+///
+void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute,
+                                 uint64_t Integer) {
+  if (DD->getDwarfVersion() >= 4)
+    addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer);
+  else
+    addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
+}
+
 /// addLabelAddress - Add a dwarf label attribute data and value using
 /// DW_FORM_addr or DW_FORM_GNU_addr_index.
 ///
-void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
-                                  MCSymbol *Label) {
+void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
+                                       const MCSymbol *Label) {
+
+  if (!DD->useSplitDwarf())
+    return addLocalLabelAddress(Die, Attribute, Label);
+
   if (Label)
     DD->addArangeLabel(SymbolCU(this, Label));
 
-  if (!DD->useSplitDwarf()) {
-    if (Label != NULL) {
-      DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
-      Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
-    } else {
-      DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
-      Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
-    }
+  unsigned idx = DU->getAddrPoolIndex(Label);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
+  Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+}
+
+void DwarfCompileUnit::addLocalLabelAddress(DIE *Die,
+                                            dwarf::Attribute Attribute,
+                                            const MCSymbol *Label) {
+  if (Label)
+    DD->addArangeLabel(SymbolCU(this, Label));
+
+  if (Label) {
+    DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+    Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
   } else {
-    unsigned idx = DU->getAddrPoolIndex(Label);
-    DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
-    Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+    DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
+    Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
   }
 }
 
+unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
+  // If we print assembly, we can't separate .file entries according to
+  // compile units. Thus all files will belong to the default compile unit.
+
+  // FIXME: add a better feature test than hasRawTextSupport. Even better,
+  // extend .file to support this.
+  return Asm->OutStreamer.EmitDwarfFileDirective(
+      0, DirName, FileName,
+      Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID());
+}
+
+unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
+  return SplitLineTable ? SplitLineTable->getFile(DirName, FileName)
+                        : getCU().getOrCreateSourceID(FileName, DirName);
+}
+
 /// addOpAddress - Add a dwarf op address data and value using the
 /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
 ///
-void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) {
-  DD->addArangeLabel(SymbolCU(this, Sym));
+void DwarfUnit::addOpAddress(DIELoc *Die, const MCSymbol *Sym) {
   if (!DD->useSplitDwarf()) {
     addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
     addLabel(Die, dwarf::DW_FORM_udata, Sym);
@@ -264,31 +339,43 @@
   }
 }
 
-/// addDelta - Add a label delta attribute data and value.
+/// addSectionDelta - Add a section label delta attribute data and value.
 ///
-void CompileUnit::addDelta(DIE *Die, dwarf::Attribute Attribute,
-                           dwarf::Form Form, const MCSymbol *Hi,
-                           const MCSymbol *Lo) {
+void DwarfUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute,
+                                const MCSymbol *Hi, const MCSymbol *Lo) {
   DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
-  Die->addValue(Attribute, Form, Value);
+  if (DD->getDwarfVersion() >= 4)
+    Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value);
+  else
+    Die->addValue(Attribute, dwarf::DW_FORM_data4, Value);
+}
+
+void DwarfUnit::addLabelDelta(DIE *Die, dwarf::Attribute Attribute,
+                              const MCSymbol *Hi, const MCSymbol *Lo) {
+  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+  Die->addValue(Attribute, dwarf::DW_FORM_data4, Value);
 }
 
 /// addDIEEntry - Add a DIE attribute data and value.
 ///
-void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute,
-                              DIE *Entry) {
+void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) {
   addDIEEntry(Die, Attribute, createDIEEntry(Entry));
 }
 
-void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute,
-                              DIEEntry *Entry) {
-  const DIE *DieCU = Die->getCompileUnitOrNull();
-  const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull();
+void DwarfUnit::addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type) {
+  Die->addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
+                new (DIEValueAllocator) DIETypeSignature(Type));
+}
+
+void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute,
+                            DIEEntry *Entry) {
+  const DIE *DieCU = Die->getUnitOrNull();
+  const DIE *EntryCU = Entry->getEntry()->getUnitOrNull();
   if (!DieCU)
     // We assume that Die belongs to this CU, if it is not linked to any CU yet.
-    DieCU = getCUDie();
+    DieCU = getUnitDie();
   if (!EntryCU)
-    EntryCU = getCUDie();
+    EntryCU = getUnitDie();
   Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4
                                             : dwarf::DW_FORM_ref_addr,
                 Entry);
@@ -296,7 +383,7 @@
 
 /// Create a DIE with the given Tag, add the DIE to its parent, and
 /// call insertDIE if MD is not null.
-DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
+DIE *DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
   DIE *Die = new DIE(Tag);
   Parent.addChild(Die);
   if (N)
@@ -306,8 +393,14 @@
 
 /// addBlock - Add block data.
 ///
-void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute,
-                           DIEBlock *Block) {
+void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Loc) {
+  Loc->ComputeSize(Asm);
+  DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
+  Die->addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
+}
+
+void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute,
+                         DIEBlock *Block) {
   Block->ComputeSize(Asm);
   DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
   Die->addValue(Attribute, Block->BestForm(), Block);
@@ -315,17 +408,12 @@
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void CompileUnit::addSourceLine(DIE *Die, DIVariable V) {
-  // Verify variable.
-  if (!V.isVariable())
-    return;
-
-  unsigned Line = V.getLineNumber();
+void DwarfUnit::addSourceLine(DIE *Die, unsigned Line, StringRef File,
+                              StringRef Directory) {
   if (Line == 0)
     return;
-  unsigned FileID =
-      DD->getOrCreateSourceID(V.getContext().getFilename(),
-                              V.getContext().getDirectory(), getUniqueID());
+
+  unsigned FileID = getOrCreateSourceID(File, Directory);
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
@@ -333,98 +421,59 @@
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
-  // Verify global variable.
-  if (!G.isGlobalVariable())
-    return;
+void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) {
+  assert(V.isVariable());
 
-  unsigned Line = G.getLineNumber();
-  if (Line == 0)
-    return;
-  unsigned FileID =
-      DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+  addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(),
+                V.getContext().getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
-  // Verify subprogram.
-  if (!SP.isSubprogram())
-    return;
+void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+  assert(G.isGlobalVariable());
 
-  // If the line number is 0, don't add it.
-  unsigned Line = SP.getLineNumber();
-  if (Line == 0)
-    return;
-
-  unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(),
-                                            getUniqueID());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+  addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
-  // Verify type.
-  if (!Ty.isType())
-    return;
+void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+  assert(SP.isSubprogram());
 
-  unsigned Line = Ty.getLineNumber();
-  if (Line == 0)
-    return;
-  unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(),
-                                            getUniqueID());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+  addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
-  // Verify type.
-  if (!Ty.isObjCProperty())
-    return;
+void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) {
+  assert(Ty.isType());
 
-  unsigned Line = Ty.getLineNumber();
-  if (Line == 0)
-    return;
+  addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory());
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+  assert(Ty.isObjCProperty());
+
   DIFile File = Ty.getFile();
-  unsigned FileID = DD->getOrCreateSourceID(File.getFilename(),
-                                            File.getDirectory(), getUniqueID());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+  addSourceLine(Die, Ty.getLineNumber(), File.getFilename(),
+                File.getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
-void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) {
-  // Verify namespace.
-  if (!NS.Verify())
-    return;
+void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+  assert(NS.Verify());
 
-  unsigned Line = NS.getLineNumber();
-  if (Line == 0)
-    return;
-  StringRef FN = NS.getFilename();
-
-  unsigned FileID =
-      DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID());
-  assert(FileID && "Invalid file id");
-  addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
-  addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
+  addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
 }
 
 /// addVariableAddress - Add DW_AT_location attribute for a
 /// DbgVariable based on provided MachineLocation.
-void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die,
-                                     MachineLocation Location) {
+void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die,
+                                   MachineLocation Location) {
   if (DV.variableHasComplexAddress())
     addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
   else if (DV.isBlockByrefVariable())
@@ -435,20 +484,53 @@
 }
 
 /// addRegisterOp - Add register operand.
-void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) {
+void DwarfUnit::addRegisterOp(DIELoc *TheDie, unsigned Reg) {
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
-  unsigned DWReg = RI->getDwarfRegNum(Reg, false);
+  int DWReg = RI->getDwarfRegNum(Reg, false);
+  bool isSubRegister = DWReg < 0;
+
+  unsigned Idx = 0;
+
+  // Go up the super-register chain until we hit a valid dwarf register number.
+  for (MCSuperRegIterator SR(Reg, RI); SR.isValid() && DWReg < 0; ++SR) {
+    DWReg = RI->getDwarfRegNum(*SR, false);
+    if (DWReg >= 0)
+      Idx = RI->getSubRegIndex(*SR, Reg);
+  }
+
+  if (DWReg < 0) {
+    DEBUG(dbgs() << "Invalid Dwarf register number.\n");
+    addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_nop);
+    return;
+  }
+
+  // Emit register
   if (DWReg < 32)
     addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg);
   else {
     addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
     addUInt(TheDie, dwarf::DW_FORM_udata, DWReg);
   }
+
+  // Emit Mask
+  if (isSubRegister) {
+    unsigned Size = RI->getSubRegIdxSize(Idx);
+    unsigned Offset = RI->getSubRegIdxOffset(Idx);
+    if (Offset > 0) {
+      addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece);
+      addUInt(TheDie, dwarf::DW_FORM_data1, Size);
+      addUInt(TheDie, dwarf::DW_FORM_data1, Offset);
+    } else {
+      unsigned ByteSize = Size / 8; // Assuming 8 bits per byte.
+      addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece);
+      addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize);
+    }
+  }
 }
 
 /// addRegisterOffset - Add register offset.
-void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg,
-                                    int64_t Offset) {
+void DwarfUnit::addRegisterOffset(DIELoc *TheDie, unsigned Reg,
+                                  int64_t Offset) {
   const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
   unsigned DWReg = RI->getDwarfRegNum(Reg, false);
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
@@ -466,59 +548,59 @@
 
 /// addAddress - Add an address attribute to a die based on the location
 /// provided.
-void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute,
-                             const MachineLocation &Location, bool Indirect) {
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute,
+                           const MachineLocation &Location, bool Indirect) {
+  DIELoc *Loc = new (DIEValueAllocator) DIELoc();
 
   if (Location.isReg() && !Indirect)
-    addRegisterOp(Block, Location.getReg());
+    addRegisterOp(Loc, Location.getReg());
   else {
-    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+    addRegisterOffset(Loc, Location.getReg(), Location.getOffset());
     if (Indirect && !Location.isReg()) {
-      addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+      addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     }
   }
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Block);
+  addBlock(Die, Attribute, Loc);
 }
 
 /// addComplexAddress - Start with the address based on the location provided,
 /// and generate the DWARF information necessary to find the actual variable
-/// given the extra address information encoded in the DIVariable, starting from
-/// the starting location.  Add the DWARF information to the die.
+/// given the extra address information encoded in the DbgVariable, starting
+/// from the starting location.  Add the DWARF information to the die.
 ///
-void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
-                                    dwarf::Attribute Attribute,
-                                    const MachineLocation &Location) {
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
+                                  dwarf::Attribute Attribute,
+                                  const MachineLocation &Location) {
+  DIELoc *Loc = new (DIEValueAllocator) DIELoc();
   unsigned N = DV.getNumAddrElements();
   unsigned i = 0;
   if (Location.isReg()) {
     if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
       // If first address element is OpPlus then emit
       // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
-      addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1));
+      addRegisterOffset(Loc, Location.getReg(), DV.getAddrElement(1));
       i = 2;
     } else
-      addRegisterOp(Block, Location.getReg());
+      addRegisterOp(Loc, Location.getReg());
   } else
-    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+    addRegisterOffset(Loc, Location.getReg(), Location.getOffset());
 
   for (; i < N; ++i) {
     uint64_t Element = DV.getAddrElement(i);
     if (Element == DIBuilder::OpPlus) {
-      addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-      addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
+      addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
     } else if (Element == DIBuilder::OpDeref) {
       if (!Location.isReg())
-        addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+        addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     } else
       llvm_unreachable("unknown DIBuilder Opcode");
   }
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Block);
+  addBlock(Die, Attribute, Loc);
 }
 
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -581,9 +663,9 @@
 /// starting location.  Add the DWARF information to the die.  For
 /// more information, read large comment just above here.
 ///
-void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
-                                       dwarf::Attribute Attribute,
-                                       const MachineLocation &Location) {
+void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
+                                     dwarf::Attribute Attribute,
+                                     const MachineLocation &Location) {
   DIType Ty = DV.getType();
   DIType TmpTy = Ty;
   uint16_t Tag = Ty.getTag();
@@ -620,40 +702,40 @@
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
-  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  DIELoc *Loc = new (DIEValueAllocator) DIELoc();
 
   if (Location.isReg())
-    addRegisterOp(Block, Location.getReg());
+    addRegisterOp(Loc, Location.getReg());
   else
-    addRegisterOffset(Block, Location.getReg(), Location.getOffset());
+    addRegisterOffset(Loc, Location.getReg(), Location.getOffset());
 
   // If we started with a pointer to the __Block_byref... struct, then
   // the first thing we need to do is dereference the pointer (DW_OP_deref).
   if (isPointer)
-    addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
 
   // Next add the offset for the '__forwarding' field:
   // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
   // adding the offset if it's 0.
   if (forwardingFieldOffset > 0) {
-    addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset);
+    addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
   }
 
   // Now dereference the __forwarding field to get to the real __Block_byref
   // struct:  DW_OP_deref.
-  addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+  addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
 
   // Now that we've got the real __Block_byref... struct, add the offset
   // for the variable's field to get to the location of the actual variable:
   // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
   if (varFieldOffset > 0) {
-    addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset);
+    addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Loc, dwarf::DW_FORM_udata, varFieldOffset);
   }
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Block);
+  addBlock(Die, Attribute, Loc);
 }
 
 /// isTypeSigned - Return true if the type is signed.
@@ -698,8 +780,9 @@
 
   DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
 
-  // If this type is not derived from any type then take conservative approach.
-  if (!BaseType.isValid())
+  // If this type is not derived from any type or the type is a declaration then
+  // take conservative approach.
+  if (!BaseType.isValid() || BaseType.isForwardDecl())
     return Ty.getSizeInBits();
 
   // If this is a derived type, go ahead and get the base type, unless it's a
@@ -716,8 +799,8 @@
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
-                                   DIType Ty) {
+void DwarfUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
+                                 DIType Ty) {
   // FIXME: This is a bit conservative/simple - it emits negative values at
   // their maximum bit width which is a bit unfortunate (& doesn't prefer
   // udata/sdata over dataN as suggested by the DWARF spec)
@@ -755,7 +838,7 @@
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
   assert(MO.isFPImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -778,19 +861,19 @@
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
-void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+void DwarfUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
   // Pass this down to addConstantValue as an unsigned bag of bits.
   addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
-                                   bool Unsigned) {
+void DwarfUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
+                                 bool Unsigned) {
   addConstantValue(Die, CI->getValue(), Unsigned);
 }
 
 // addConstantValue - Add constant value entry in variable DIE.
-void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
+void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
   unsigned CIBitWidth = Val.getBitWidth();
   if (CIBitWidth <= 64) {
     // If we're a signed constant definitely use sdata.
@@ -846,7 +929,7 @@
 }
 
 /// addTemplateParams - Add template parameters into buffer.
-void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
+void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
   // Add template parameters.
   for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
     DIDescriptor Element = TParams.getElement(i);
@@ -860,9 +943,9 @@
 }
 
 /// getOrCreateContextDIE - Get context owner's DIE.
-DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) {
+DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) {
   if (!Context || Context.isFile())
-    return getCUDie();
+    return getUnitDie();
   if (Context.isType())
     return getOrCreateTypeDIE(DIType(Context));
   if (Context.isNameSpace())
@@ -872,18 +955,38 @@
   return getDIE(Context);
 }
 
+DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
+  DIScope Context = resolve(Ty.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(Context);
+
+  DIE *TyDIE = getDIE(Ty);
+  if (TyDIE)
+    return TyDIE;
+
+  // Create new type.
+  TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+
+  constructTypeDIE(*TyDIE, Ty);
+
+  updateAcceleratorTables(Context, Ty, TyDIE);
+  return TyDIE;
+}
+
 /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
 /// given DIType.
-DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
+DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (!TyNode)
     return NULL;
 
   DIType Ty(TyNode);
   assert(Ty.isType());
+  assert(Ty == resolve(Ty.getRef()) &&
+         "type was not uniqued, possible ODR violation.");
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext()));
+  DIScope Context = resolve(Ty.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(Context);
   assert(ContextDIE);
 
   DIE *TyDIE = getDIE(Ty);
@@ -893,16 +996,29 @@
   // Create new type.
   TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
 
+  updateAcceleratorTables(Context, Ty, TyDIE);
+
   if (Ty.isBasicType())
     constructTypeDIE(*TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType())
-    constructTypeDIE(*TyDIE, DICompositeType(Ty));
-  else {
+  else if (Ty.isCompositeType()) {
+    DICompositeType CTy(Ty);
+    if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
+      if (MDString *TypeId = CTy.getIdentifier()) {
+        DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
+        // Skip updating the accelerator tables since this is not the full type.
+        return TyDIE;
+      }
+    constructTypeDIE(*TyDIE, CTy);
+  } else {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");
     constructTypeDIE(*TyDIE, DIDerivedType(Ty));
   }
-  // If this is a named finished type then include it in the list of types
-  // for the accelerator tables.
+
+  return TyDIE;
+}
+
+void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
+                                        const DIE *TyDIE) {
   if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
     bool IsImplementation = 0;
     if (Ty.isCompositeType()) {
@@ -913,13 +1029,16 @@
     }
     unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
     addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
-  }
 
-  return TyDIE;
+    if ((!Context || Context.isCompileUnit() || Context.isFile() ||
+         Context.isNameSpace()) &&
+        getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly)
+      GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = TyDIE;
+  }
 }
 
 /// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) {
+void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) {
   assert(Ty && "Trying to add a type that doesn't exist?");
 
   // Check for pre-existence.
@@ -937,66 +1056,59 @@
   Entry = createDIEEntry(Buffer);
   insertDIEEntry(Ty, Entry);
   addDIEEntry(Entity, Attribute, Entry);
-
-  // If this is a complete composite type then include it in the
-  // list of global types.
-  addGlobalType(Ty);
 }
 
 // Accelerator table mutators - add each name along with its companion
 // DIE to the proper table while ensuring that the name that we're going
 // to reference is in the string table. We do this since the names we
 // add may not only be identical to the names in the DIE.
-void CompileUnit::addAccelName(StringRef Name, DIE *Die) {
+void DwarfUnit::addAccelName(StringRef Name, const DIE *Die) {
+  if (!DD->useDwarfAccelTables())
+    return;
   DU->getStringPoolEntry(Name);
-  std::vector<DIE *> &DIEs = AccelNames[Name];
+  std::vector<const DIE *> &DIEs = AccelNames[Name];
   DIEs.push_back(Die);
 }
 
-void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) {
+void DwarfUnit::addAccelObjC(StringRef Name, const DIE *Die) {
+  if (!DD->useDwarfAccelTables())
+    return;
   DU->getStringPoolEntry(Name);
-  std::vector<DIE *> &DIEs = AccelObjC[Name];
+  std::vector<const DIE *> &DIEs = AccelObjC[Name];
   DIEs.push_back(Die);
 }
 
-void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) {
+void DwarfUnit::addAccelNamespace(StringRef Name, const DIE *Die) {
+  if (!DD->useDwarfAccelTables())
+    return;
   DU->getStringPoolEntry(Name);
-  std::vector<DIE *> &DIEs = AccelNamespace[Name];
+  std::vector<const DIE *> &DIEs = AccelNamespace[Name];
   DIEs.push_back(Die);
 }
 
-void CompileUnit::addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+void DwarfUnit::addAccelType(StringRef Name,
+                             std::pair<const DIE *, unsigned> Die) {
+  if (!DD->useDwarfAccelTables())
+    return;
   DU->getStringPoolEntry(Name);
-  std::vector<std::pair<DIE *, unsigned> > &DIEs = AccelTypes[Name];
+  std::vector<std::pair<const DIE *, unsigned> > &DIEs = AccelTypes[Name];
   DIEs.push_back(Die);
 }
 
 /// addGlobalName - Add a new global name to the compile unit.
-void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) {
+void DwarfUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) {
+  if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly)
+    return;
   std::string FullName = getParentContextString(Context) + Name.str();
   GlobalNames[FullName] = Die;
 }
 
-/// addGlobalType - Add a new global type to the compile unit.
-///
-void CompileUnit::addGlobalType(DIType Ty) {
-  DIScope Context = resolve(Ty.getContext());
-  if (!Ty.getName().empty() && !Ty.isForwardDecl() &&
-      (!Context || Context.isCompileUnit() || Context.isFile() ||
-       Context.isNameSpace()))
-    if (DIEEntry *Entry = getDIEEntry(Ty)) {
-      std::string FullName =
-          getParentContextString(Context) + Ty.getName().str();
-      GlobalTypes[FullName] = Entry->getEntry();
-    }
-}
-
 /// getParentContextString - Walks the metadata parent chain in a language
 /// specific manner (using the compile unit language) and returns
 /// it as a string. This is done at the metadata level because DIEs may
 /// not currently have been added to the parent context and walking the
 /// DIEs looking for names is more expensive than walking the metadata.
-std::string CompileUnit::getParentContextString(DIScope Context) const {
+std::string DwarfUnit::getParentContextString(DIScope Context) const {
   if (!Context)
     return "";
 
@@ -1031,24 +1143,8 @@
   return CS;
 }
 
-/// addPubTypes - Add subprogram argument types for pubtypes section.
-void CompileUnit::addPubTypes(DISubprogram SP) {
-  DICompositeType SPTy = SP.getType();
-  uint16_t SPTag = SPTy.getTag();
-  if (SPTag != dwarf::DW_TAG_subroutine_type)
-    return;
-
-  DIArray Args = SPTy.getTypeArray();
-  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
-    DIType ATy(Args.getElement(i));
-    if (!ATy.isType())
-      continue;
-    addGlobalType(ATy);
-  }
-}
-
 /// constructTypeDIE - Construct basic type die from DIBasicType.
-void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   // Get core information.
   StringRef Name = BTy.getName();
   // Add name if not anonymous or intermediate type.
@@ -1067,7 +1163,7 @@
 }
 
 /// constructTypeDIE - Construct derived type die from DIDerivedType.
-void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
   // Get core information.
   StringRef Name = DTy.getName();
   uint64_t Size = DTy.getSizeInBits() >> 3;
@@ -1094,40 +1190,25 @@
     addSourceLine(&Buffer, DTy);
 }
 
-/// Return true if the type is appropriately scoped to be contained inside
-/// its own type unit.
-static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) {
-  DIScope Parent = DD->resolve(Ty.getContext());
-  while (Parent) {
-    // Don't generate a hash for anything scoped inside a function.
-    if (Parent.isSubprogram())
-      return false;
-    Parent = DD->resolve(Parent.getContext());
-  }
-  return true;
-}
-
-/// Return true if the type should be split out into a type unit.
-static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) {
-  uint16_t Tag = CTy.getTag();
-
-  switch (Tag) {
-  case dwarf::DW_TAG_structure_type:
-  case dwarf::DW_TAG_union_type:
-  case dwarf::DW_TAG_enumeration_type:
-  case dwarf::DW_TAG_class_type:
-    // If this is a class, structure, union, or enumeration type
-    // that is a definition (not a declaration), and not scoped
-    // inside a function then separate this out as a type unit.
-    return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD);
-  default:
-    return false;
+/// constructSubprogramArguments - Construct function argument DIEs.
+void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) {
+  for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+    DIDescriptor Ty = Args.getElement(i);
+    if (Ty.isUnspecifiedParameter()) {
+      assert(i == N-1 && "Unspecified parameter must be the last argument");
+      createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
+    } else {
+      DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
+      addType(Arg, DIType(Ty));
+      if (DIType(Ty).isArtificial())
+        addFlag(Arg, dwarf::DW_AT_artificial);
+    }
   }
 }
 
 /// constructTypeDIE - Construct type DIE from DICompositeType.
-void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  // Get core information.
+void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+  // Add name if not anonymous or intermediate type.
   StringRef Name = CTy.getName();
 
   uint64_t Size = CTy.getSizeInBits() >> 3;
@@ -1148,19 +1229,12 @@
       addType(&Buffer, RTy);
 
     bool isPrototyped = true;
-    // Add arguments.
-    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Ty = Elements.getElement(i);
-      if (Ty.isUnspecifiedParameter()) {
-        createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
-        isPrototyped = false;
-      } else {
-        DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
-        addType(Arg, DIType(Ty));
-        if (DIType(Ty).isArtificial())
-          addFlag(Arg, dwarf::DW_AT_artificial);
-      }
-    }
+    if (Elements.getNumElements() == 2 &&
+        Elements.getElement(1).isUnspecifiedParameter())
+      isPrototyped = false;
+
+    constructSubprogramArguments(Buffer, Elements);
+
     // Add prototype flag if we're dealing with a C language and the
     // function has been prototyped.
     uint16_t Language = getLanguage();
@@ -1168,6 +1242,12 @@
         (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
          Language == dwarf::DW_LANG_ObjC))
       addFlag(&Buffer, dwarf::DW_AT_prototyped);
+
+    if (CTy.isLValueReference())
+      addFlag(&Buffer, dwarf::DW_AT_reference);
+
+    if (CTy.isRValueReference())
+      addFlag(&Buffer, dwarf::DW_AT_rvalue_reference);
   } break;
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
@@ -1177,21 +1257,9 @@
     for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
       DIDescriptor Element = Elements.getElement(i);
       DIE *ElemDie = NULL;
-      if (Element.isSubprogram()) {
-        DISubprogram SP(Element);
-        ElemDie = getOrCreateSubprogramDIE(SP);
-        if (SP.isProtected())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
-                  dwarf::DW_ACCESS_protected);
-        else if (SP.isPrivate())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
-                  dwarf::DW_ACCESS_private);
-        else
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
-                  dwarf::DW_ACCESS_public);
-        if (SP.isExplicit())
-          addFlag(ElemDie, dwarf::DW_AT_explicit);
-      } else if (Element.isDerivedType()) {
+      if (Element.isSubprogram())
+        ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
+      else if (Element.isDerivedType()) {
         DIDerivedType DDTy(Element);
         if (DDTy.getTag() == dwarf::DW_TAG_friend) {
           ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
@@ -1207,7 +1275,8 @@
         ElemDie = createAndAddDIE(Property.getTag(), Buffer);
         StringRef PropertyName = Property.getObjCPropertyName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
-        addType(ElemDie, Property.getType());
+        if (Property.getType())
+          addType(ElemDie, Property.getType());
         addSourceLine(ElemDie, Property);
         StringRef GetterName = Property.getObjCPropertyGetterName();
         if (!GetterName.empty())
@@ -1293,17 +1362,12 @@
       addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
   }
-  // If this is a type applicable to a type unit it then add it to the
-  // list of types we'll compute a hash for later.
-  if (shouldCreateTypeUnit(CTy, DD))
-    DD->addTypeUnitType(&Buffer);
 }
 
 /// constructTemplateTypeParameterDIE - Construct new DIE for the given
 /// DITemplateTypeParameter.
-void
-CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
-                                               DITemplateTypeParameter TP) {
+void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
+                                                  DITemplateTypeParameter TP) {
   DIE *ParamDIE =
       createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
   // Add the type if it exists, it could be void and therefore no type.
@@ -1316,8 +1380,8 @@
 /// constructTemplateValueParameterDIE - Construct new DIE for the given
 /// DITemplateValueParameter.
 void
-CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer,
-                                                DITemplateValueParameter VP) {
+DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
+                                              DITemplateValueParameter VP) {
   DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
 
   // Add the type if there is one, template template and template parameter
@@ -1333,12 +1397,12 @@
     else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) {
       // For declaration non-type template parameters (such as global values and
       // functions)
-      DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
-      addOpAddress(Block, Asm->getSymbol(GV));
+      DIELoc *Loc = new (DIEValueAllocator) DIELoc();
+      addOpAddress(Loc, Asm->getSymbol(GV));
       // Emit DW_OP_stack_value to use the address as the immediate value of the
       // parameter, rather than a pointer to it.
-      addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
-      addBlock(ParamDIE, dwarf::DW_AT_location, Block);
+      addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+      addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
     } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
       assert(isa<MDString>(Val));
       addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
@@ -1352,7 +1416,7 @@
 }
 
 /// getOrCreateNameSpace - Create a DIE for DINameSpace.
-DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
+DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
   DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
@@ -1373,11 +1437,16 @@
 }
 
 /// getOrCreateSubprogramDIE - Create new DIE using SP.
-DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
+DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE (as is the case for member function
   // declarations).
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+  DIScope Context = resolve(SP.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(Context);
+
+  // Unique declarations based on the ODR, where applicable.
+  SP = DISubprogram(DD->resolve(SP.getRef()));
+  assert(SP.Verify());
 
   DIE *SPDie = getDIE(SP);
   if (SPDie)
@@ -1386,7 +1455,7 @@
   DISubprogram SPDecl = SP.getFunctionDeclaration();
   if (SPDecl.isSubprogram())
     // Add subprogram definitions to the CU die directly.
-    ContextDIE = CUDie.get();
+    ContextDIE = UnitDie.get();
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
   SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
@@ -1440,7 +1509,7 @@
   unsigned VK = SP.getVirtuality();
   if (VK) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
-    DIEBlock *Block = getDIEBlock();
+    DIELoc *Block = getDIELoc();
     addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
@@ -1453,13 +1522,7 @@
 
     // Add arguments. Do not add arguments for subprogram definition. They will
     // be handled while processing variables.
-    for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-      DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie);
-      DIType ATy(Args.getElement(i));
-      addType(Arg, ATy);
-      if (ATy.isArtificial())
-        addFlag(Arg, dwarf::DW_AT_artificial);
-    }
+    constructSubprogramArguments(*SPDie, Args);
   }
 
   if (SP.isArtificial())
@@ -1475,6 +1538,25 @@
     addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
   }
 
+  if (SP.isLValueReference())
+    addFlag(SPDie, dwarf::DW_AT_reference);
+
+  if (SP.isRValueReference())
+    addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
+
+  if (SP.isProtected())
+    addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+            dwarf::DW_ACCESS_protected);
+  else if (SP.isPrivate())
+    addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+            dwarf::DW_ACCESS_private);
+  else
+    addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
+            dwarf::DW_ACCESS_public);
+
+  if (SP.isExplicit())
+    addFlag(SPDie, dwarf::DW_AT_explicit);
+
   return SPDie;
 }
 
@@ -1506,17 +1588,15 @@
 }
 
 /// createGlobalVariableDIE - create global variable DIE.
-void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
-
+void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
   // Check for pre-existence.
   if (getDIE(GV))
     return;
 
-  if (!GV.isGlobalVariable())
-    return;
+  assert(GV.isGlobalVariable());
 
   DIScope GVContext = GV.getContext();
-  DIType GTy = GV.getType();
+  DIType GTy = DD->resolve(GV.getType());
 
   // If this is a static data member definition, some attributes belong
   // to the declaration DIE.
@@ -1558,44 +1638,46 @@
   bool isGlobalVariable = GV.getGlobal() != NULL;
   if (isGlobalVariable) {
     addToAccelTable = true;
-    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    DIELoc *Loc = new (DIEValueAllocator) DIELoc();
     const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
     if (GV.getGlobal()->isThreadLocal()) {
       // FIXME: Make this work with -gsplit-dwarf.
       unsigned PointerSize = Asm->getDataLayout().getPointerSize();
       assert((PointerSize == 4 || PointerSize == 8) &&
              "Add support for other sizes if necessary");
-      const MCExpr *Expr =
-          Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym);
       // Based on GCC's support for TLS:
       if (!DD->useSplitDwarf()) {
         // 1) Start with a constNu of the appropriate pointer size
-        addUInt(Block, dwarf::DW_FORM_data1,
+        addUInt(Loc, dwarf::DW_FORM_data1,
                 PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
         // 2) containing the (relocated) offset of the TLS variable
         //    within the module's TLS block.
-        addExpr(Block, dwarf::DW_FORM_udata, Expr);
+        addExpr(Loc, dwarf::DW_FORM_udata,
+                Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
       } else {
-        addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
-        addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr));
+        addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+        addUInt(Loc, dwarf::DW_FORM_udata,
+                DU->getAddrPoolIndex(Sym, /* TLS */ true));
       }
       // 3) followed by a custom OP to make the debugger do a TLS lookup.
-      addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
-    } else
-      addOpAddress(Block, Sym);
+      addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+    } else {
+      DD->addArangeLabel(SymbolCU(this, Sym));
+      addOpAddress(Loc, Sym);
+    }
     // Do not create specification DIE if context is either compile unit
     // or a subprogram.
     if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
         !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) {
       // Create specification DIE.
-      VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie);
+      VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *UnitDie);
       addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE);
-      addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block);
+      addBlock(VariableSpecDIE, dwarf::DW_AT_location, Loc);
       // A static member's declaration is already flagged as such.
       if (!SDMDecl.Verify())
         addFlag(VariableDIE, dwarf::DW_AT_declaration);
     } else {
-      addBlock(VariableDIE, dwarf::DW_AT_location, Block);
+      addBlock(VariableDIE, dwarf::DW_AT_location, Loc);
     }
     // Add the linkage name.
     StringRef LinkageName = GV.getLinkageName();
@@ -1605,7 +1687,8 @@
       // TAG_variable.
       addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE
                                                   : VariableDIE,
-                dwarf::DW_AT_MIPS_linkage_name,
+                DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+                                           : dwarf::DW_AT_MIPS_linkage_name,
                 GlobalValue::getRealLinkageName(LinkageName));
   } else if (const ConstantInt *CI =
                  dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
@@ -1617,15 +1700,17 @@
   } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) {
     addToAccelTable = true;
     // GV is a merged global.
-    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    DIELoc *Loc = new (DIEValueAllocator) DIELoc();
     Value *Ptr = CE->getOperand(0);
-    addOpAddress(Block, Asm->getSymbol(cast<GlobalValue>(Ptr)));
-    addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
+    DD->addArangeLabel(SymbolCU(this, Sym));
+    addOpAddress(Loc, Sym);
+    addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
-    addUInt(Block, dwarf::DW_FORM_udata,
+    addUInt(Loc, dwarf::DW_FORM_udata,
             Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
-    addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-    addBlock(VariableDIE, dwarf::DW_AT_location, Block);
+    addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    addBlock(VariableDIE, dwarf::DW_AT_location, Loc);
   }
 
   if (addToAccelTable) {
@@ -1644,8 +1729,7 @@
 }
 
 /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
-void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
-                                       DIE *IndexTy) {
+void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
   DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy);
 
@@ -1670,7 +1754,7 @@
 }
 
 /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
+void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
   if (CTy.isVector())
     addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
 
@@ -1682,12 +1766,12 @@
   // as different languages may have different sizes for indexes.
   DIE *IdxTy = getIndexTyDie();
   if (!IdxTy) {
-    // Construct an anonymous type for index type.
-    IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get());
-    addString(IdxTy, dwarf::DW_AT_name, "int");
-    addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t));
+    // Construct an integer type to use for indexes.
+    IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *UnitDie);
+    addString(IdxTy, dwarf::DW_AT_name, "sizetype");
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
     addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-            dwarf::DW_ATE_signed);
+            dwarf::DW_ATE_unsigned);
     setIndexTyDie(IdxTy);
   }
 
@@ -1701,7 +1785,7 @@
 }
 
 /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
-void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
+void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
   DIArray Elements = CTy.getTypeArray();
 
   // Add enumerators to enumeration type.
@@ -1712,7 +1796,8 @@
       StringRef Name = Enum.getName();
       addString(Enumerator, dwarf::DW_AT_name, Name);
       int64_t Value = Enum.getEnumValue();
-      addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+      addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+              Value);
     }
   }
   DIType DTy = resolve(CTy.getTypeDerivedFrom());
@@ -1724,7 +1809,7 @@
 
 /// constructContainingTypeDIEs - Construct DIEs for types that contain
 /// vtables.
-void CompileUnit::constructContainingTypeDIEs() {
+void DwarfUnit::constructContainingTypeDIEs() {
   for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
                                                  CE = ContainingTypeMap.end();
        CI != CE; ++CI) {
@@ -1740,7 +1825,7 @@
 }
 
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) {
+DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) {
   StringRef Name = DV.getName();
 
   // Define variable debug information entry.
@@ -1768,10 +1853,7 @@
 
   unsigned Offset = DV.getDotDebugLocOffset();
   if (Offset != ~0U) {
-    addLabel(VariableDie, dwarf::DW_AT_location,
-             DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
-                                        : dwarf::DW_FORM_data4,
-             Asm->GetTempSymbol("debug_loc", Offset));
+    addLocationList(VariableDie, dwarf::DW_AT_location, Offset);
     DV.setDIE(VariableDie);
     return VariableDie;
   }
@@ -1815,7 +1897,7 @@
 }
 
 /// constructMemberDIE - Construct member DIE from DIDerivedType.
-void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
+void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
   DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer);
   StringRef Name = DT.getName();
   if (!Name.empty())
@@ -1825,16 +1907,13 @@
 
   addSourceLine(MemberDie, DT);
 
-  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
-  addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-
   if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) {
 
     // For C++, virtual base classes are not at fixed offset. Use following
     // expression to extract appropriate offset from vtable.
     // BaseAddr = ObAddr + *((*ObAddr) - Offset)
 
-    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+    DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc();
     addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
     addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
@@ -1850,10 +1929,9 @@
     uint64_t OffsetInBytes;
 
     if (Size != FieldSize) {
-      // Handle bitfield.
-      addUInt(MemberDie, dwarf::DW_AT_byte_size, None,
-              getBaseTypeSize(DD, DT) >> 3);
-      addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits());
+      // Handle bitfield, assume bytes are 8 bits.
+      addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
+      addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
       uint64_t Offset = DT.getOffsetInBits();
       uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
@@ -1866,13 +1944,21 @@
         Offset = FieldSize - (Offset + Size);
       addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset);
 
-      // Here WD_AT_data_member_location points to the anonymous
+      // Here DW_AT_data_member_location points to the anonymous
       // field that includes this bit field.
       OffsetInBytes = FieldOffset >> 3;
     } else
       // This is not a bitfield.
       OffsetInBytes = DT.getOffsetInBits() >> 3;
-    addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, OffsetInBytes);
+
+    if (DD->getDwarfVersion() <= 2) {
+      DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
+      addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
+      addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
+    } else
+      addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
+              OffsetInBytes);
   }
 
   if (DT.isProtected())
@@ -1900,7 +1986,7 @@
 }
 
 /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
-DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
   if (!DT.Verify())
     return NULL;
 
@@ -1944,13 +2030,89 @@
   return StaticMemberDIE;
 }
 
-void CompileUnit::emitHeader(const MCSection *ASection,
-                             const MCSymbol *ASectionSym) {
+void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
   Asm->OutStreamer.AddComment("DWARF version number");
   Asm->EmitInt16(DD->getDwarfVersion());
   Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
-  Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()),
-                         ASectionSym);
+  // We share one abbreviations table across all units so it's always at the
+  // start of the section. Use a relocatable offset where needed to ensure
+  // linking doesn't invalidate that offset.
+  if (ASectionSym)
+    Asm->EmitSectionOffset(ASectionSym, ASectionSym);
+  else
+    // Use a constant value when no symbol is provided.
+    Asm->EmitInt32(0);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 }
+
+void DwarfUnit::addRange(RangeSpan Range) {
+  // Only add a range for this unit if we're emitting full debug.
+  if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) {
+    // If we have no current ranges just add the range and return, otherwise,
+    // check the current section and CU against the previous section and CU we
+    // emitted into and the subprogram was contained within. If these are the
+    // same then extend our current range, otherwise add this as a new range.
+    if (CURanges.size() == 0 ||
+        this != DD->getPrevCU() ||
+        Asm->getCurrentSection() != DD->getPrevSection()) {
+      CURanges.push_back(Range);
+      return;
+    }
+
+    assert(&(CURanges.back().getEnd()->getSection()) ==
+               &(Range.getEnd()->getSection()) &&
+           "We can only append to a range in the same section!");
+    CURanges.back().setEnd(Range.getEnd());
+  }
+}
+
+void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+  // Define start line table label for each Compile Unit.
+  MCSymbol *LineTableStartSym =
+      Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
+
+  stmtListIndex = UnitDie->getValues().size();
+
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section. For split dwarf this is
+  // left in the skeleton CU and so not included.
+  // The line table entries are not always emitted in assembly, so it
+  // is not okay to use line_table_start here.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    addSectionLabel(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym);
+  else
+    addSectionDelta(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym,
+                    DwarfLineSectionSym);
+}
+
+void DwarfCompileUnit::applyStmtList(DIE &D) {
+  D.addValue(dwarf::DW_AT_stmt_list,
+             UnitDie->getAbbrev().getData()[stmtListIndex].getForm(),
+             UnitDie->getValues()[stmtListIndex]);
+}
+
+void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
+  DwarfUnit::emitHeader(ASectionSym);
+  Asm->OutStreamer.AddComment("Type Signature");
+  Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature));
+  Asm->OutStreamer.AddComment("Type DIE Offset");
+  // In a skeleton type unit there is no type DIE so emit a zero offset.
+  Asm->OutStreamer.EmitIntValue(Ty ? Ty->getOffset() : 0,
+                                sizeof(Ty->getOffset()));
+}
+
+void DwarfTypeUnit::initSection(const MCSection *Section) {
+  assert(!this->Section);
+  this->Section = Section;
+  // Since each type unit is contained in its own COMDAT section, the begin
+  // label and the section label are the same. Using the begin label emission in
+  // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but
+  // the only other alternative of lazily constructing start-of-section labels
+  // and storing a mapping in DwarfDebug (or AsmPrinter).
+  this->SectionSym = this->LabelBegin =
+      Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+  this->LabelEnd =
+      Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
+  this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID());
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
new file mode 100644
index 0000000..ef713f7
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -0,0 +1,624 @@
+//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
+
+#include "DIE.h"
+#include "DwarfDebug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCDwarf.h"
+
+namespace llvm {
+
+class MachineLocation;
+class MachineOperand;
+class ConstantInt;
+class ConstantFP;
+class DbgVariable;
+class DwarfCompileUnit;
+
+// Data structure to hold a range for range lists.
+class RangeSpan {
+public:
+  RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
+  const MCSymbol *getStart() const { return Start; }
+  const MCSymbol *getEnd() const { return End; }
+  void setEnd(const MCSymbol *E) { End = E; }
+
+private:
+  const MCSymbol *Start, *End;
+};
+
+class RangeSpanList {
+private:
+  // Index for locating within the debug_range section this particular span.
+  MCSymbol *RangeSym;
+  // List of ranges.
+  SmallVector<RangeSpan, 2> Ranges;
+
+public:
+  RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {}
+  MCSymbol *getSym() const { return RangeSym; }
+  const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
+  void addRange(RangeSpan Range) { Ranges.push_back(Range); }
+};
+
+//===----------------------------------------------------------------------===//
+/// Unit - This dwarf writer support class manages information associated
+/// with a source file.
+class DwarfUnit {
+protected:
+  /// UniqueID - a numeric ID unique among all CUs in the module
+  unsigned UniqueID;
+
+  /// Node - MDNode for the compile unit.
+  DICompileUnit CUNode;
+
+  /// Unit debug information entry.
+  const std::unique_ptr<DIE> UnitDie;
+
+  /// Offset of the UnitDie from beginning of debug info section.
+  unsigned DebugInfoOffset;
+
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  // Holders for some common dwarf information.
+  DwarfDebug *DD;
+  DwarfFile *DU;
+
+  /// IndexTyDie - An anonymous type for index type.  Owned by UnitDie.
+  DIE *IndexTyDie;
+
+  /// MDNodeToDieMap - Tracks the mapping of unit level debug information
+  /// variables to debug information entries.
+  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information
+  /// descriptors to debug information entries using a DIEEntry proxy.
+  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+  /// GlobalNames - A map of globally visible named entities for this unit.
+  StringMap<const DIE *> GlobalNames;
+
+  /// GlobalTypes - A map of globally visible types for this unit.
+  StringMap<const DIE *> GlobalTypes;
+
+  /// AccelNames - A map of names for the name accelerator table.
+  StringMap<std::vector<const DIE *> > AccelNames;
+
+  /// AccelObjC - A map of objc spec for the objc accelerator table.
+  StringMap<std::vector<const DIE *> > AccelObjC;
+
+  /// AccelNamespace - A map of names for the namespace accelerator table.
+  StringMap<std::vector<const DIE *> > AccelNamespace;
+
+  /// AccelTypes - A map of names for the type accelerator table.
+  StringMap<std::vector<std::pair<const DIE *, unsigned> > > AccelTypes;
+
+  /// DIEBlocks - A list of all the DIEBlocks in use.
+  std::vector<DIEBlock *> DIEBlocks;
+  
+  /// DIELocs - A list of all the DIELocs in use.
+  std::vector<DIELoc *> DIELocs;
+
+  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// corresponds to the MDNode mapped with the subprogram DIE.
+  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+  // List of ranges for a given compile unit.
+  SmallVector<RangeSpan, 1> CURanges;
+
+  // List of range lists for a given compile unit, separate from the ranges for
+  // the CU itself.
+  SmallVector<RangeSpanList, 1> CURangeLists;
+
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
+  // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently.
+  DIEInteger *DIEIntegerOne;
+
+  /// The section this unit will be emitted in.
+  const MCSection *Section;
+
+  /// A label at the start of the non-dwo section related to this unit.
+  MCSymbol *SectionSym;
+
+  /// The start of the unit within its section.
+  MCSymbol *LabelBegin;
+
+  /// The end of the unit within its section.
+  MCSymbol *LabelEnd;
+
+  /// The label for the start of the range sets for the elements of this unit.
+  MCSymbol *LabelRange;
+
+  /// Skeleton unit associated with this unit.
+  DwarfUnit *Skeleton;
+
+  DwarfUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A,
+            DwarfDebug *DW, DwarfFile *DWU);
+
+public:
+  virtual ~DwarfUnit();
+
+  /// Set the skeleton unit associated with this unit.
+  void setSkeleton(DwarfUnit *Skel) { Skeleton = Skel; }
+
+  /// Get the skeleton unit associated with this unit.
+  DwarfUnit *getSkeleton() const { return Skeleton; }
+
+  /// Pass in the SectionSym even though we could recreate it in every compile
+  /// unit (type units will have actually distinct symbols once they're in
+  /// comdat sections).
+  void initSection(const MCSection *Section, MCSymbol *SectionSym) {
+    assert(!this->Section);
+    this->Section = Section;
+    this->SectionSym = SectionSym;
+    this->LabelBegin =
+        Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+    this->LabelEnd =
+        Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
+    this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID());
+  }
+
+  const MCSection *getSection() const {
+    assert(Section);
+    return Section;
+  }
+
+  /// If there's a skeleton then return the section symbol for the skeleton
+  /// unit, otherwise return the section symbol for this unit.
+  MCSymbol *getLocalSectionSym() const {
+    if (Skeleton)
+      return Skeleton->getSectionSym();
+    return getSectionSym();
+  }
+
+  MCSymbol *getSectionSym() const {
+    assert(Section);
+    return SectionSym;
+  }
+
+  /// If there's a skeleton then return the begin label for the skeleton unit,
+  /// otherwise return the local label for this unit.
+  MCSymbol *getLocalLabelBegin() const {
+    if (Skeleton)
+      return Skeleton->getLabelBegin();
+    return getLabelBegin();
+  }
+
+  MCSymbol *getLabelBegin() const {
+    assert(Section);
+    return LabelBegin;
+  }
+
+  MCSymbol *getLabelEnd() const {
+    assert(Section);
+    return LabelEnd;
+  }
+
+  MCSymbol *getLabelRange() const {
+    assert(Section);
+    return LabelRange;
+  }
+
+  // Accessors.
+  unsigned getUniqueID() const { return UniqueID; }
+  uint16_t getLanguage() const { return CUNode.getLanguage(); }
+  DICompileUnit getCUNode() const { return CUNode; }
+  DIE *getUnitDie() const { return UnitDie.get(); }
+  const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
+  const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
+
+  const StringMap<std::vector<const DIE *> > &getAccelNames() const {
+    return AccelNames;
+  }
+  const StringMap<std::vector<const DIE *> > &getAccelObjC() const {
+    return AccelObjC;
+  }
+  const StringMap<std::vector<const DIE *> > &getAccelNamespace() const {
+    return AccelNamespace;
+  }
+  const StringMap<std::vector<std::pair<const DIE *, unsigned> > > &
+  getAccelTypes() const {
+    return AccelTypes;
+  }
+
+  unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
+  void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
+
+  /// hasContent - Return true if this compile unit has something to write out.
+  bool hasContent() const { return !UnitDie->getChildren().empty(); }
+
+  /// addRange - Add an address range to the list of ranges for this unit.
+  void addRange(RangeSpan Range);
+
+  /// getRanges - Get the list of ranges for this unit.
+  const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; }
+  SmallVectorImpl<RangeSpan> &getRanges() { return CURanges; }
+
+  /// addRangeList - Add an address range list to the list of range lists.
+  void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); }
+
+  /// getRangeLists - Get the vector of range lists.
+  const SmallVectorImpl<RangeSpanList> &getRangeLists() const {
+    return CURangeLists;
+  }
+  SmallVectorImpl<RangeSpanList> &getRangeLists() { return CURangeLists; }
+
+  /// getParentContextString - Get a string containing the language specific
+  /// context for a global name.
+  std::string getParentContextString(DIScope Context) const;
+
+  /// addGlobalName - Add a new global entity to the compile unit.
+  ///
+  void addGlobalName(StringRef Name, DIE *Die, DIScope Context);
+
+  /// addAccelName - Add a new name to the name accelerator table.
+  void addAccelName(StringRef Name, const DIE *Die);
+
+  /// addAccelObjC - Add a new name to the ObjC accelerator table.
+  void addAccelObjC(StringRef Name, const DIE *Die);
+
+  /// addAccelNamespace - Add a new name to the namespace accelerator table.
+  void addAccelNamespace(StringRef Name, const DIE *Die);
+
+  /// addAccelType - Add a new type to the type accelerator table.
+  void addAccelType(StringRef Name, std::pair<const DIE *, unsigned> Die);
+
+  /// getDIE - Returns the debug information entry map slot for the
+  /// specified debug variable. We delegate the request to DwarfDebug
+  /// when the MDNode can be part of the type system, since DIEs for
+  /// the type system can be shared across CUs and the mappings are
+  /// kept in DwarfDebug.
+  DIE *getDIE(DIDescriptor D) const;
+
+  /// getDIELoc - Returns a fresh newly allocated DIELoc.
+  DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc(); }
+
+  /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug
+  /// when the MDNode can be part of the type system, since DIEs for
+  /// the type system can be shared across CUs and the mappings are
+  /// kept in DwarfDebug.
+  void insertDIE(DIDescriptor Desc, DIE *D);
+
+  /// addDie - Adds or interns the DIE to the compile unit.
+  ///
+  void addDie(DIE *Buffer) { UnitDie->addChild(Buffer); }
+
+  /// addFlag - Add a flag that is true to the DIE.
+  void addFlag(DIE *Die, dwarf::Attribute Attribute);
+
+  /// addUInt - Add an unsigned integer attribute data and value.
+  void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
+               uint64_t Integer);
+
+  void addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer);
+
+  /// addSInt - Add an signed integer attribute data and value.
+  void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
+               int64_t Integer);
+
+  void addSInt(DIELoc *Die, Optional<dwarf::Form> Form, int64_t Integer);
+
+  /// addString - Add a string attribute data and value.
+  void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str);
+
+  /// addLocalString - Add a string attribute data and value.
+  void addLocalString(DIE *Die, dwarf::Attribute Attribute,
+                      const StringRef Str);
+
+  /// addExpr - Add a Dwarf expression attribute data and value.
+  void addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr);
+
+  /// addLabel - Add a Dwarf label attribute data and value.
+  void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form,
+                const MCSymbol *Label);
+
+  void addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label);
+
+  /// addLocationList - Add a Dwarf loclistptr attribute data and value.
+  void addLocationList(DIE *Die, dwarf::Attribute Attribute, unsigned Index);
+
+  /// addSectionLabel - Add a Dwarf section label attribute data and value.
+  ///
+  void addSectionLabel(DIE *Die, dwarf::Attribute Attribute,
+                       const MCSymbol *Label);
+
+  /// addSectionOffset - Add an offset into a section attribute data and value.
+  ///
+  void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer);
+
+  /// addOpAddress - Add a dwarf op address data and value using the
+  /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
+  void addOpAddress(DIELoc *Die, const MCSymbol *Label);
+
+  /// addSectionDelta - Add a label delta attribute data and value.
+  void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+                       const MCSymbol *Lo);
+
+  /// addLabelDelta - Add a label delta attribute data and value.
+  void addLabelDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+                     const MCSymbol *Lo);
+
+  /// addDIEEntry - Add a DIE attribute data and value.
+  void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry);
+
+  /// addDIEEntry - Add a DIE attribute data and value.
+  void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry);
+
+  void addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type);
+
+  /// addBlock - Add block data.
+  void addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Block);
+
+  /// addBlock - Add block data.
+  void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block);
+
+  /// addSourceLine - Add location information to specified debug information
+  /// entry.
+  void addSourceLine(DIE *Die, unsigned Line, StringRef File,
+                     StringRef Directory);
+  void addSourceLine(DIE *Die, DIVariable V);
+  void addSourceLine(DIE *Die, DIGlobalVariable G);
+  void addSourceLine(DIE *Die, DISubprogram SP);
+  void addSourceLine(DIE *Die, DIType Ty);
+  void addSourceLine(DIE *Die, DINameSpace NS);
+  void addSourceLine(DIE *Die, DIObjCProperty Ty);
+
+  /// addAddress - Add an address attribute to a die based on the location
+  /// provided.
+  void addAddress(DIE *Die, dwarf::Attribute Attribute,
+                  const MachineLocation &Location, bool Indirect = false);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
+  void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
+  void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  void addConstantFPValue(DIE *Die, const MachineOperand &MO);
+  void addConstantFPValue(DIE *Die, const ConstantFP *CFP);
+
+  /// addTemplateParams - Add template parameters in buffer.
+  void addTemplateParams(DIE &Buffer, DIArray TParams);
+
+  /// addRegisterOp - Add register operand.
+  void addRegisterOp(DIELoc *TheDie, unsigned Reg);
+
+  /// addRegisterOffset - Add register offset.
+  void addRegisterOffset(DIELoc *TheDie, unsigned Reg, int64_t Offset);
+
+  /// addComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  void addComplexAddress(const DbgVariable &DV, DIE *Die,
+                         dwarf::Attribute Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use addComplexAddress instead.
+  void addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
+                            dwarf::Attribute Attribute,
+                            const MachineLocation &Location);
+
+  /// addVariableAddress - Add DW_AT_location attribute for a
+  /// DbgVariable based on provided MachineLocation.
+  void addVariableAddress(const DbgVariable &DV, DIE *Die,
+                          MachineLocation Location);
+
+  /// addType - Add a new type attribute to the specified entity. This takes
+  /// and attribute parameter because DW_AT_friend attributes are also
+  /// type references.
+  void addType(DIE *Entity, DIType Ty,
+               dwarf::Attribute Attribute = dwarf::DW_AT_type);
+
+  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+  DIE *getOrCreateNameSpace(DINameSpace NS);
+
+  /// getOrCreateSubprogramDIE - Create new DIE using SP.
+  DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+
+  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+  /// given DIType.
+  DIE *getOrCreateTypeDIE(const MDNode *N);
+
+  /// getOrCreateContextDIE - Get context owner's DIE.
+  DIE *createTypeDIE(DICompositeType Ty);
+
+  /// getOrCreateContextDIE - Get context owner's DIE.
+  DIE *getOrCreateContextDIE(DIScope Context);
+
+  /// constructContainingTypeDIEs - Construct DIEs for types that contain
+  /// vtables.
+  void constructContainingTypeDIEs();
+
+  /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+  DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract);
+
+  /// constructSubprogramArguments - Construct function argument DIEs.
+  void constructSubprogramArguments(DIE &Buffer, DIArray Args);
+
+  /// Create a DIE with the given Tag, add the DIE to its parent, and
+  /// call insertDIE if MD is not null.
+  DIE *createAndAddDIE(unsigned Tag, DIE &Parent,
+                       DIDescriptor N = DIDescriptor());
+
+  /// Compute the size of a header for this unit, not including the initial
+  /// length field.
+  virtual unsigned getHeaderSize() const {
+    return sizeof(int16_t) + // DWARF version number
+           sizeof(int32_t) + // Offset Into Abbrev. Section
+           sizeof(int8_t);   // Pointer Size (in bytes)
+  }
+
+  /// Emit the header for this unit, not including the initial length field.
+  virtual void emitHeader(const MCSymbol *ASectionSym) const;
+
+  virtual DwarfCompileUnit &getCU() = 0;
+
+protected:
+  /// getOrCreateStaticMemberDIE - Create new static data member DIE.
+  DIE *getOrCreateStaticMemberDIE(DIDerivedType DT);
+
+  /// Look up the source ID with the given directory and source file names. If
+  /// none currently exists, create a new ID and insert it in the line table.
+  virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
+
+private:
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
+
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(DIE &Buffer, DIDerivedType DTy);
+
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
+
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy);
+
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy);
+
+  /// constructMemberDIE - Construct member DIE from DIDerivedType.
+  void constructMemberDIE(DIE &Buffer, DIDerivedType DT);
+
+  /// constructTemplateTypeParameterDIE - Construct new DIE for the given
+  /// DITemplateTypeParameter.
+  void constructTemplateTypeParameterDIE(DIE &Buffer,
+                                         DITemplateTypeParameter TP);
+
+  /// constructTemplateValueParameterDIE - Construct new DIE for the given
+  /// DITemplateValueParameter.
+  void constructTemplateValueParameterDIE(DIE &Buffer,
+                                          DITemplateValueParameter TVP);
+
+  /// getLowerBoundDefault - Return the default lower bound for an array. If the
+  /// DWARF version doesn't handle the language, return -1.
+  int64_t getDefaultLowerBound() const;
+
+  /// getDIEEntry - Returns the debug information entry for the specified
+  /// debug variable.
+  DIEEntry *getDIEEntry(const MDNode *N) const {
+    return MDNodeToDIEEntryMap.lookup(N);
+  }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() { return IndexTyDie; }
+
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) { IndexTyDie = D; }
+
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// information entry.
+  DIEEntry *createDIEEntry(DIE *Entry);
+
+  /// resolve - Look in the DwarfDebug map for the MDNode that
+  /// corresponds to the reference.
+  template <typename T> T resolve(DIRef<T> Ref) const {
+    return DD->resolve(Ref);
+  }
+
+  /// If this is a named finished type then include it in the list of types for
+  /// the accelerator tables.
+  void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE *TyDIE);
+};
+
+class DwarfCompileUnit : public DwarfUnit {
+  /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
+  /// the need to search for it in applyStmtList.
+  unsigned stmtListIndex;
+
+public:
+  DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A,
+                   DwarfDebug *DW, DwarfFile *DWU);
+
+  void initStmtList(MCSymbol *DwarfLineSectionSym);
+
+  /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
+  void applyStmtList(DIE &D);
+
+  /// createGlobalVariableDIE - create global variable DIE.
+  void createGlobalVariableDIE(DIGlobalVariable GV);
+
+  /// addLabelAddress - Add a dwarf label attribute data and value using
+  /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
+  void addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
+                       const MCSymbol *Label);
+
+  /// addLocalLabelAddress - Add a dwarf label attribute data and value using
+  /// DW_FORM_addr only.
+  void addLocalLabelAddress(DIE *Die, dwarf::Attribute Attribute,
+                            const MCSymbol *Label);
+
+  DwarfCompileUnit &getCU() override { return *this; }
+
+  unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
+};
+
+class DwarfTypeUnit : public DwarfUnit {
+private:
+  uint64_t TypeSignature;
+  const DIE *Ty;
+  DwarfCompileUnit &CU;
+  MCDwarfDwoLineTable *SplitLineTable;
+
+public:
+  DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU, AsmPrinter *A,
+                DwarfDebug *DW, DwarfFile *DWU,
+                MCDwarfDwoLineTable *SplitLineTable = nullptr);
+
+  void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
+  uint64_t getTypeSignature() const { return TypeSignature; }
+  void setType(const DIE *Ty) { this->Ty = Ty; }
+
+  /// Emit the header for this unit, not including the initial length field.
+  void emitHeader(const MCSymbol *ASectionSym) const override;
+  unsigned getHeaderSize() const override {
+    return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
+           sizeof(uint32_t);                               // Type DIE Offset
+  }
+  void initSection(const MCSection *Section);
+  DwarfCompileUnit &getCU() override { return CU; }
+
+protected:
+  unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override;
+};
+} // end llvm namespace
+#endif
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index a8fb66d..bfcbe6b 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -14,8 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
@@ -35,8 +35,8 @@
 
   class ErlangGCPrinter : public GCMetadataPrinter {
   public:
-    void beginAssembly(AsmPrinter &AP);
-    void finishAssembly(AsmPrinter &AP);
+    void beginAssembly(AsmPrinter &AP) override;
+    void finishAssembly(AsmPrinter &AP) override;
   };
 
 }
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
index 20b1f7b..bbdb0c7 100644
--- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = AsmPrinter
 parent = Libraries
-required_libraries = Analysis CodeGen Core MC MCParser Support Target
+required_libraries = Analysis CodeGen Core MC MCParser Support Target TransformUtils
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 98177c0..5a9ecd7 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -23,7 +24,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cctype>
@@ -33,8 +33,8 @@
 
   class OcamlGCMetadataPrinter : public GCMetadataPrinter {
   public:
-    void beginAssembly(AsmPrinter &AP);
-    void finishAssembly(AsmPrinter &AP);
+    void beginAssembly(AsmPrinter &AP) override;
+    void finishAssembly(AsmPrinter &AP) override;
   };
 
 }
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 1561012..17d8bff 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -30,7 +31,6 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
@@ -44,14 +44,14 @@
 
 Win64Exception::~Win64Exception() {}
 
-/// EndModule - Emit all exception information that should come after the
+/// endModule - Emit all exception information that should come after the
 /// content.
-void Win64Exception::EndModule() {
+void Win64Exception::endModule() {
 }
 
-/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// beginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
-void Win64Exception::BeginFunction(const MachineFunction *MF) {
+void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
   // If any landing pads survive, we need an EH table.
@@ -86,9 +86,9 @@
                                                 Asm->getFunctionNumber()));
 }
 
-/// EndFunction - Gather and emit post-function exception information.
+/// endFunction - Gather and emit post-function exception information.
 ///
-void Win64Exception::EndFunction() {
+void Win64Exception::endFunction(const MachineFunction *) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
@@ -101,7 +101,8 @@
   if (shouldEmitPersonality) {
     const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
     const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
-    const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI);
+    const MCSymbol *Sym =
+        TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
 
     Asm->OutStreamer.PushSection();
     Asm->OutStreamer.EmitWin64EHHandlerData();
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
new file mode 100644
index 0000000..50b2ca8
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -0,0 +1,336 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing line tables info into COFF files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "WinCodeViewLineTables.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/COFF.h"
+
+namespace llvm {
+
+StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
+  assert(S);
+  DIDescriptor D(S);
+  assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() ||
+          D.isLexicalBlockFile() || D.isLexicalBlock()) &&
+         "Unexpected scope info");
+
+  DIScope Scope(S);
+  StringRef Dir = Scope.getDirectory(),
+            Filename = Scope.getFilename();
+  char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
+  if (Result != 0)
+    return Result;
+
+  // Clang emits directory and relative filename info into the IR, but CodeView
+  // operates on full paths.  We could change Clang to emit full paths too, but
+  // that would increase the IR size and probably not needed for other users.
+  // For now, just concatenate and canonicalize the path here.
+  std::string Filepath;
+  if (Filename.find(':') == 1)
+    Filepath = Filename;
+  else
+    Filepath = (Dir + Twine("\\") + Filename).str();
+
+  // Canonicalize the path.  We have to do it textually because we may no longer
+  // have access the file in the filesystem.
+  // First, replace all slashes with backslashes.
+  std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
+
+  // Remove all "\.\" with "\".
+  size_t Cursor = 0;
+  while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
+    Filepath.erase(Cursor, 2);
+
+  // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
+  // path should be well-formatted, e.g. start with a drive letter, etc.
+  Cursor = 0;
+  while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
+    // Something's wrong if the path starts with "\..\", abort.
+    if (Cursor == 0)
+      break;
+
+    size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
+    if (PrevSlash == std::string::npos)
+      // Something's wrong, abort.
+      break;
+
+    Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
+    // The next ".." might be following the one we've just erased.
+    Cursor = PrevSlash;
+  }
+
+  // Remove all duplicate backslashes.
+  Cursor = 0;
+  while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
+    Filepath.erase(Cursor, 1);
+
+  Result = strdup(Filepath.c_str());
+  return StringRef(Result);
+}
+
+void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
+                                                const MachineFunction *MF) {
+  const MDNode *Scope = DL.getScope(MF->getFunction()->getContext());
+  if (!Scope)
+    return;
+  StringRef Filename = getFullFilepath(Scope);
+
+  // Skip this instruction if it has the same file:line as the previous one.
+  assert(CurFn);
+  if (!CurFn->Instrs.empty()) {
+    const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
+    if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine())
+      return;
+  }
+  FileNameRegistry.add(Filename);
+
+  MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol();
+  Asm->OutStreamer.EmitLabel(MCL);
+  CurFn->Instrs.push_back(MCL);
+  InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine());
+}
+
+WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
+    : Asm(0), CurFn(0) {
+  MachineModuleInfo *MMI = AP->MMI;
+
+  // If module doesn't have named metadata anchors or COFF debug section
+  // is not available, skip any debug info related stuff.
+  if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
+      !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
+    return;
+
+  // Tell MMI that we have debug info.
+  MMI->setDebugInfoAvailability(true);
+  Asm = AP;
+}
+
+static void EmitLabelDiff(MCStreamer &Streamer,
+                          const MCSymbol *From, const MCSymbol *To) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context),
+               *ToRef   = MCSymbolRefExpr::Create(To, Variant, Context);
+  const MCExpr *AddrDelta =
+      MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
+  Streamer.EmitValue(AddrDelta, 4);
+}
+
+void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
+  // For each function there is a separate subsection
+  // which holds the PC to file:line table.
+  const MCSymbol *Fn = Asm->getSymbol(GV);
+  assert(Fn);
+
+  const FunctionInfo &FI = FnDebugInfo[GV];
+  if (FI.Instrs.empty())
+    return;
+  assert(FI.End && "Don't know where the function ends?");
+
+  // PCs/Instructions are grouped into segments sharing the same filename.
+  // Pre-calculate the lengths (in instructions) of these segments and store
+  // them in a map for convenience.  Each index in the map is the sequential
+  // number of the respective instruction that starts a new segment.
+  DenseMap<size_t, size_t> FilenameSegmentLengths;
+  size_t LastSegmentEnd = 0;
+  StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename;
+  for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) {
+    if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename)
+      continue;
+    FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd;
+    LastSegmentEnd = J;
+    PrevFilename = InstrInfo[FI.Instrs[J]].Filename;
+  }
+  FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
+
+  // Emit the control code of the subsection followed by the payload size.
+  Asm->OutStreamer.AddComment(
+      "Linetable subsection for " + Twine(Fn->getName()));
+  Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
+  MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(),
+           *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol();
+  EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd);
+  Asm->OutStreamer.EmitLabel(SubsectionBegin);
+
+  // Identify the function this subsection is for.
+  Asm->OutStreamer.EmitCOFFSecRel32(Fn);
+  Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
+
+  // Length of the function's code, in bytes.
+  EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
+
+  // PC-to-linenumber lookup table:
+  MCSymbol *FileSegmentEnd = 0;
+  for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
+    MCSymbol *Instr = FI.Instrs[J];
+    assert(InstrInfo.count(Instr));
+
+    if (FilenameSegmentLengths.count(J)) {
+      // We came to a beginning of a new filename segment.
+      if (FileSegmentEnd)
+        Asm->OutStreamer.EmitLabel(FileSegmentEnd);
+      StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
+      assert(FileNameRegistry.Infos.count(CurFilename));
+      size_t IndexInStringTable =
+          FileNameRegistry.Infos[CurFilename].FilenameID;
+      // Each segment starts with the offset of the filename
+      // in the string table.
+      Asm->OutStreamer.AddComment(
+          "Segment for file '" + Twine(CurFilename) + "' begins");
+      MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol();
+      Asm->OutStreamer.EmitLabel(FileSegmentBegin);
+      Asm->EmitInt32(8 * IndexInStringTable);
+
+      // Number of PC records in the lookup table.
+      size_t SegmentLength = FilenameSegmentLengths[J];
+      Asm->EmitInt32(SegmentLength);
+
+      // Full size of the segment for this filename, including the prev two
+      // records.
+      FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
+      EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
+    }
+
+    // The first PC with the given linenumber and the linenumber itself.
+    EmitLabelDiff(Asm->OutStreamer, Fn, Instr);
+    Asm->EmitInt32(InstrInfo[Instr].LineNumber);
+  }
+
+  if (FileSegmentEnd)
+    Asm->OutStreamer.EmitLabel(FileSegmentEnd);
+  Asm->OutStreamer.EmitLabel(SubsectionEnd);
+}
+
+void WinCodeViewLineTables::endModule() {
+  if (FnDebugInfo.empty())
+    return;
+
+  assert(Asm != 0);
+  Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
+  Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
+
+  // The COFF .debug$S section consists of several subsections, each starting
+  // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
+  // of the payload followed by the payload itself.  The subsections are 4-byte
+  // aligned.
+
+  for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
+    emitDebugInfoForFunction(VisitedFunctions[I]);
+
+  // This subsection holds a file index to offset in string table table.
+  Asm->OutStreamer.AddComment("File index to string table offset subsection");
+  Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
+  size_t NumFilenames = FileNameRegistry.Infos.size();
+  Asm->EmitInt32(8 * NumFilenames);
+  for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
+    StringRef Filename = FileNameRegistry.Filenames[I];
+    // For each unique filename, just write it's offset in the string table.
+    Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
+    // The function name offset is not followed by any additional data.
+    Asm->EmitInt32(0);
+  }
+
+  // This subsection holds the string table.
+  Asm->OutStreamer.AddComment("String table");
+  Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
+  Asm->EmitInt32(FileNameRegistry.LastOffset);
+  // The payload starts with a null character.
+  Asm->EmitInt8(0);
+
+  for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
+    // Just emit unique filenames one by one, separated by a null character.
+    Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
+    Asm->EmitInt8(0);
+  }
+
+  // No more subsections. Fill with zeros to align the end of the section by 4.
+  Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
+
+  clear();
+}
+
+void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
+  assert(!CurFn && "Can't process two functions at once!");
+
+  if (!Asm || !Asm->MMI->hasDebugInfo())
+    return;
+
+  const Function *GV = MF->getFunction();
+  assert(FnDebugInfo.count(GV) == false);
+  VisitedFunctions.push_back(GV);
+  CurFn = &FnDebugInfo[GV];
+
+  // Find the end of the function prolog.
+  // FIXME: is there a simpler a way to do this? Can we just search
+  // for the first instruction of the function, not the last of the prolog?
+  DebugLoc PrologEndLoc;
+  bool EmptyPrologue = true;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E && PrologEndLoc.isUnknown(); ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+      if (MI->isDebugValue())
+        continue;
+
+      // First known non-DBG_VALUE and non-frame setup location marks
+      // the beginning of the function body.
+      // FIXME: do we need the first subcondition?
+      if (!MI->getFlag(MachineInstr::FrameSetup) &&
+          (!MI->getDebugLoc().isUnknown())) {
+        PrologEndLoc = MI->getDebugLoc();
+        break;
+      }
+      EmptyPrologue = false;
+    }
+  }
+  // Record beginning of function if we have a non-empty prologue.
+  if (!PrologEndLoc.isUnknown() && !EmptyPrologue) {
+    DebugLoc FnStartDL =
+        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
+    maybeRecordLocation(FnStartDL, MF);
+  }
+}
+
+void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
+  if (!Asm || !CurFn)  // We haven't created any debug info for this function.
+    return;
+
+  const Function *GV = MF->getFunction();
+  assert(FnDebugInfo.count(GV) == true);
+  assert(CurFn == &FnDebugInfo[GV]);
+
+  if (CurFn->Instrs.empty()) {
+    FnDebugInfo.erase(GV);
+    VisitedFunctions.pop_back();
+  } else {
+    // Define end label for subprogram.
+    MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(FunctionEndSym);
+    CurFn->End = FunctionEndSym;
+  }
+  CurFn = 0;
+}
+
+void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
+  // Ignore DBG_VALUE locations and function prologue.
+  if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
+    return;
+  DebugLoc DL = MI->getDebugLoc();
+  if (DL == PrevInstLoc || DL.isUnknown())
+    return;
+  maybeRecordLocation(DL, Asm->MF);
+}
+}
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
new file mode 100644
index 0000000..a7a6205
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -0,0 +1,144 @@
+//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h ----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing line tables info into COFF files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__
+#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__
+
+#include "AsmPrinterHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+/// \brief Collects and handles line tables information in a CodeView format.
+class WinCodeViewLineTables : public AsmPrinterHandler {
+  AsmPrinter *Asm;
+  DebugLoc PrevInstLoc;
+
+  // For each function, store a vector of labels to its instructions, as well as
+  // to the end of the function.
+  struct FunctionInfo {
+    SmallVector<MCSymbol *, 10> Instrs;
+    MCSymbol *End;
+    FunctionInfo() : End(0) {}
+  } *CurFn;
+
+  typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy;
+  FnDebugInfoTy FnDebugInfo;
+  // Store the functions we've visited in a vector so we can maintain a stable
+  // order while emitting subsections.
+  SmallVector<const Function *, 10> VisitedFunctions;
+
+  // InstrInfoTy - Holds the Filename:LineNumber information for every
+  // instruction with a unique debug location.
+  struct InstrInfoTy {
+    StringRef Filename;
+    unsigned LineNumber;
+
+    InstrInfoTy() : LineNumber(0) {}
+
+    InstrInfoTy(StringRef Filename, unsigned LineNumber)
+        : Filename(Filename), LineNumber(LineNumber) {}
+  };
+  DenseMap<MCSymbol *, InstrInfoTy> InstrInfo;
+
+  // FileNameRegistry - Manages filenames observed while generating debug info
+  // by filtering out duplicates and bookkeeping the offsets in the string
+  // table to be generated.
+  struct FileNameRegistryTy {
+    SmallVector<StringRef, 10> Filenames;
+    struct PerFileInfo {
+      size_t FilenameID, StartOffset;
+    };
+    StringMap<PerFileInfo> Infos;
+
+    // The offset in the string table where we'll write the next unique
+    // filename.
+    size_t LastOffset;
+
+    FileNameRegistryTy() {
+      clear();
+    }
+
+    // Add Filename to the registry, if it was not observed before.
+    void add(StringRef Filename) {
+      if (Infos.count(Filename))
+        return;
+      size_t OldSize = Infos.size();
+      Infos[Filename].FilenameID = OldSize;
+      Infos[Filename].StartOffset = LastOffset;
+      LastOffset += Filename.size() + 1;
+      Filenames.push_back(Filename);
+    }
+
+    void clear() {
+      LastOffset = 1;
+      Infos.clear();
+      Filenames.clear();
+    }
+  } FileNameRegistry;
+
+  typedef std::map<std::pair<StringRef, StringRef>, char *>
+      DirAndFilenameToFilepathMapTy;
+  DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap;
+  StringRef getFullFilepath(const MDNode *S);
+
+  void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF);
+
+  void clear() {
+    assert(CurFn == 0);
+    FileNameRegistry.clear();
+    InstrInfo.clear();
+  }
+
+  void emitDebugInfoForFunction(const Function *GV);
+
+public:
+  WinCodeViewLineTables(AsmPrinter *Asm);
+
+  ~WinCodeViewLineTables() {
+    for (DirAndFilenameToFilepathMapTy::iterator
+             I = DirAndFilenameToFilepathMap.begin(),
+             E = DirAndFilenameToFilepathMap.end();
+         I != E; ++I)
+      free(I->second);
+  }
+
+  void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
+
+  /// \brief Emit the COFF section that holds the line table information.
+  void endModule() override;
+
+  /// \brief Gather pre-function debug information.
+  void beginFunction(const MachineFunction *MF) override;
+
+  /// \brief Gather post-function debug information.
+  void endFunction(const MachineFunction *) override;
+
+  /// \brief Process beginning of an instruction.
+  void beginInstruction(const MachineInstr *MI) override;
+
+  /// \brief Process end of an instruction.
+  void endInstruction() override {}
+};
+} // End of namespace llvm
+
+#endif
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 24aa1ab..c6654ec2 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -20,12 +20,11 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include <utility>
-
 using namespace llvm;
 
 namespace {
 
-class BasicTTI : public ImmutablePass, public TargetTransformInfo {
+class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
   const TargetMachine *TM;
 
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
@@ -43,15 +42,11 @@
     initializeBasicTTIPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual void initializePass() {
+  void initializePass() override {
     pushTTIStack(this);
   }
 
-  virtual void finalizePass() {
-    popTTIStack();
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     TargetTransformInfo::getAnalysisUsage(AU);
   }
 
@@ -59,61 +54,61 @@
   static char ID;
 
   /// Provide necessary pointer adjustments for the two base classes.
-  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+  void *getAdjustedAnalysisPointer(const void *ID) override {
     if (ID == &TargetTransformInfo::ID)
       return (TargetTransformInfo*)this;
     return this;
   }
 
-  virtual bool hasBranchDivergence() const;
+  bool hasBranchDivergence() const override;
 
   /// \name Scalar TTI Implementations
   /// @{
 
-  virtual bool isLegalAddImmediate(int64_t imm) const;
-  virtual bool isLegalICmpImmediate(int64_t imm) const;
-  virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
-                                     int64_t BaseOffset, bool HasBaseReg,
-                                     int64_t Scale) const;
-  virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
-                                   int64_t BaseOffset, bool HasBaseReg,
-                                   int64_t Scale) const;
-  virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
-  virtual bool isTypeLegal(Type *Ty) const;
-  virtual unsigned getJumpBufAlignment() const;
-  virtual unsigned getJumpBufSize() const;
-  virtual bool shouldBuildLookupTables() const;
-  virtual bool haveFastSqrt(Type *Ty) const;
-  virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+  bool isLegalAddImmediate(int64_t imm) const override;
+  bool isLegalICmpImmediate(int64_t imm) const override;
+  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+                             int64_t BaseOffset, bool HasBaseReg,
+                             int64_t Scale) const override;
+  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
+                           int64_t BaseOffset, bool HasBaseReg,
+                           int64_t Scale) const override;
+  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+  bool isTypeLegal(Type *Ty) const override;
+  unsigned getJumpBufAlignment() const override;
+  unsigned getJumpBufSize() const override;
+  bool shouldBuildLookupTables() const override;
+  bool haveFastSqrt(Type *Ty) const override;
+  void getUnrollingPreferences(Loop *L,
+                               UnrollingPreferences &UP) const override;
 
   /// @}
 
   /// \name Vector TTI Implementations
   /// @{
 
-  virtual unsigned getNumberOfRegisters(bool Vector) const;
-  virtual unsigned getMaximumUnrollFactor() const;
-  virtual unsigned getRegisterBitWidth(bool Vector) const;
-  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                                          OperandValueKind,
-                                          OperandValueKind) const;
-  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                                  int Index, Type *SubTp) const;
-  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                                    Type *Src) const;
-  virtual unsigned getCFInstrCost(unsigned Opcode) const;
-  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) const;
-  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) const;
-  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) const;
-  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
-                                         ArrayRef<Type*> Tys) const;
-  virtual unsigned getNumberOfParts(Type *Tp) const;
-  virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const;
-  virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const;
+  unsigned getNumberOfRegisters(bool Vector) const override;
+  unsigned getMaximumUnrollFactor() const override;
+  unsigned getRegisterBitWidth(bool Vector) const override;
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
+                                  OperandValueKind) const override;
+  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+                          int Index, Type *SubTp) const override;
+  unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+                            Type *Src) const override;
+  unsigned getCFInstrCost(unsigned Opcode) const override;
+  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                              Type *CondTy) const override;
+  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+                              unsigned Index) const override;
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace) const override;
+  unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+                                 ArrayRef<Type*> Tys) const override;
+  unsigned getNumberOfParts(Type *Tp) const override;
+  unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override;
+  unsigned getReductionCost(unsigned Opcode, Type *Ty,
+                            bool IsPairwise) const override;
 
   /// @}
 };
@@ -302,7 +297,8 @@
     return 0;
 
   // If the cast is marked as legal (or promote) then assume low cost.
-  if (TLI->isOperationLegalOrPromote(ISD, DstLT.second))
+  if (SrcLT.first == DstLT.first &&
+      TLI->isOperationLegalOrPromote(ISD, DstLT.second))
     return 1;
 
   // Handle scalar conversions.
@@ -409,7 +405,9 @@
 
 unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
                                       unsigned Index) const {
-  return 1;
+  std::pair<unsigned, MVT> LT =  getTLI()->getTypeLegalizationCost(Val->getScalarType());
+
+  return LT.first;
 }
 
 unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
@@ -418,8 +416,30 @@
   assert(!Src->isVoidTy() && "Invalid type");
   std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src);
 
-  // Assume that all loads of legal types cost 1.
-  return LT.first;
+  // Assuming that all loads of legal types cost 1.
+  unsigned Cost = LT.first;
+
+  if (Src->isVectorTy() &&
+      Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
+    // This is a vector load that legalizes to a larger type than the vector
+    // itself. Unless the corresponding extending load or truncating store is
+    // legal, then this will scalarize.
+    TargetLowering::LegalizeAction LA;
+    MVT MemVT = getTLI()->getSimpleValueType(Src, true);
+    if (Opcode == Instruction::Store)
+      LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
+    else
+      LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT);
+
+    if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
+      // This is a vector load/store for some illegal type that is scalarized.
+      // We must account for the cost of building or decomposing the vector.
+      Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
+                                            Opcode == Instruction::Store);
+    }
+  }
+
+  return Cost;
 }
 
 unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 9cd4208..b39777e 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -66,9 +66,9 @@
     static char ID;
     explicit BranchFolderPass(): MachineFunctionPass(ID) {}
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<TargetPassConfig>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -82,8 +82,15 @@
                 "Control Flow Optimizer", false, false)
 
 bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
-  BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+  // TailMerge can create jump into if branches that make CFG irreducible for
+  // HW that requires structurized CFG.
+  bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
+      PassConfig->getEnableTailMerge();
+  BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true);
   return Folder.OptimizeFunction(MF,
                                  MF.getTarget().getInstrInfo(),
                                  MF.getTarget().getRegisterInfo(),
@@ -379,7 +386,7 @@
   if (RS) {
     RS->enterBasicBlock(CurMBB);
     if (!CurMBB->empty())
-      RS->forward(prior(CurMBB->end()));
+      RS->forward(std::prev(CurMBB->end()));
     BitVector RegsLiveAtExit(TRI->getNumRegs());
     RS->getRegsUsed(RegsLiveAtExit, false);
     for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
@@ -458,7 +465,7 @@
 static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
                     const TargetInstrInfo *TII) {
   MachineFunction *MF = CurMBB->getParent();
-  MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+  MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
   DebugLoc dl;  // FIXME: this is nowhere
@@ -596,12 +603,11 @@
   unsigned maxCommonTailLength = 0U;
   SameTails.clear();
   MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
-  MPIterator HighestMPIter = prior(MergePotentials.end());
-  for (MPIterator CurMPIter = prior(MergePotentials.end()),
+  MPIterator HighestMPIter = std::prev(MergePotentials.end());
+  for (MPIterator CurMPIter = std::prev(MergePotentials.end()),
                   B = MergePotentials.begin();
-       CurMPIter != B && CurMPIter->getHash() == CurHash;
-       --CurMPIter) {
-    for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+       CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) {
+    for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
       unsigned CommonTailLen;
       if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
                             minCommonTailLength,
@@ -630,9 +636,9 @@
                                         MachineBasicBlock *SuccBB,
                                         MachineBasicBlock *PredBB) {
   MPIterator CurMPIter, B;
-  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
-       CurMPIter->getHash() == CurHash;
-       --CurMPIter) {
+  for (CurMPIter = std::prev(MergePotentials.end()),
+      B = MergePotentials.begin();
+       CurMPIter->getHash() == CurHash; --CurMPIter) {
     // Put the unconditional branch back, if we need one.
     MachineBasicBlock *CurMBB = CurMPIter->getBlock();
     if (SuccBB && CurMBB != PredBB)
@@ -864,12 +870,12 @@
   // a compile-time infinite loop repeatedly doing and undoing the same
   // transformations.)
 
-  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
        I != E; ++I) {
     if (I->pred_size() < 2) continue;
     SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
     MachineBasicBlock *IBB = I;
-    MachineBasicBlock *PredBB = prior(I);
+    MachineBasicBlock *PredBB = std::prev(I);
     MergePotentials.clear();
     for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
            E2 = I->pred_end();
@@ -901,7 +907,7 @@
             continue;
           // This is the QBB case described above
           if (!FBB)
-            FBB = llvm::next(MachineFunction::iterator(PBB));
+            FBB = std::next(MachineFunction::iterator(PBB));
         }
 
         // Failing case: the only way IBB can be reached from PBB is via
@@ -951,7 +957,7 @@
 
     // Reinsert an unconditional branch if needed. The 1 below can occur as a
     // result of removing blocks in TryTailMergeBlocks.
-    PredBB = prior(I);     // this may have been changed in TryTailMergeBlocks
+    PredBB = std::prev(I);     // this may have been changed in TryTailMergeBlocks
     if (MergePotentials.size() == 1 &&
         MergePotentials.begin()->getBlock() != PredBB)
       FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -970,7 +976,7 @@
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
 
-  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
        I != E; ) {
     MachineBasicBlock *MBB = I++;
     MadeChange |= OptimizeBlock(MBB);
@@ -1091,7 +1097,7 @@
 
   // Check to see if we can simplify the terminator of the block before this
   // one.
-  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+  MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));
 
   MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
   SmallVector<MachineOperand, 4> PriorCond;
@@ -1390,7 +1396,8 @@
           // B elsewhere
           // next:
           if (CurFallsThru) {
-            MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+            MachineBasicBlock *NextBB =
+                std::next(MachineFunction::iterator(MBB));
             CurCond.clear();
             TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
           }
@@ -1511,7 +1518,7 @@
   // branch from condition setting instruction.
   MachineBasicBlock::iterator PI = Loc;
   --PI;
-  while (PI != MBB->begin() && Loc->isDebugValue())
+  while (PI != MBB->begin() && PI->isDebugValue())
     --PI;
 
   bool IsDef = false;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 10cc9ff..8943cb1 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -7,6 +7,7 @@
   CalcSpillWeights.cpp
   CallingConvLower.cpp
   CodeGen.cpp
+  CodeGenPrepare.cpp
   CriticalAntiDepBreaker.cpp
   DFAPacketizer.cpp
   DeadMachineInstructionElim.cpp
@@ -35,7 +36,7 @@
   LiveRangeCalc.cpp
   LiveRangeEdit.cpp
   LiveRegMatrix.cpp
-  LiveRegUnits.cpp
+  LivePhysRegs.cpp
   LiveStackAnalysis.cpp
   LiveVariables.cpp
   LocalStackSlotAllocation.cpp
@@ -97,6 +98,7 @@
   StackColoring.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
+  StackMapLivenessAnalysis.cpp
   StackMaps.cpp
   TailDuplication.cpp
   TargetFrameLoweringImpl.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 4925c4d..4833731 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -112,8 +112,10 @@
   // Don't recompute spill weight for an unspillable register.
   bool Spillable = li.isSpillable();
 
-  for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
-       MachineInstr *mi = I.skipInstruction();) {
+  for (MachineRegisterInfo::reg_instr_iterator
+       I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
+       I != E; ) {
+    MachineInstr *mi = &*(I++);
     if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
       continue;
     if (!visited.insert(mi))
@@ -130,9 +132,9 @@
 
       // Calculate instr weight.
       bool reads, writes;
-      tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+      std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
       weight = LiveIntervals::getSpillWeight(
-          writes, reads, MBFI.getBlockFreq(mi->getParent()));
+        writes, reads, &MBFI, mi);
 
       // Give extra weight to what looks like a loop induction variable update.
       if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 7430c53..17402f0 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
 
 using namespace llvm;
 
@@ -22,6 +22,7 @@
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeBasicTTIPass(Registry);
   initializeBranchFolderPassPass(Registry);
+  initializeCodeGenPreparePass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeEarlyIfConverterPass(Registry);
   initializeExpandPostRAPass(Registry);
@@ -51,6 +52,7 @@
   initializeOptimizePHIsPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
+  initializePostMachineSchedulerPass(Registry);
   initializePostRASchedulerPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializePEIPass(Registry);
@@ -69,6 +71,7 @@
   initializeVirtRegRewriterPass(Registry);
   initializeLowerIntrinsicsPass(Registry);
   initializeMachineFunctionPrinterPassPass(Registry);
+  initializeStackMapLivenessPass(Registry);
 }
 
 void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
new file mode 100644
index 0000000..e82a306
--- /dev/null
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -0,0 +1,3011 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+                      "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+                       "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+                          "computations were sunk");
+STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
+STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
+STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
+STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
+
+static cl::opt<bool> DisableBranchOpts(
+  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
+  cl::desc("Disable branch optimizations in CodeGenPrepare"));
+
+static cl::opt<bool> DisableSelectToBranch(
+  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
+  cl::desc("Disable select to branch conversion."));
+
+static cl::opt<bool> EnableAndCmpSinking(
+   "enable-andcmp-sinking", cl::Hidden, cl::init(true),
+   cl::desc("Enable sinkinig and/cmp into branches."));
+
+namespace {
+typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
+typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
+
+  class CodeGenPrepare : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// transformation profitability.
+    const TargetMachine *TM;
+    const TargetLowering *TLI;
+    const TargetLibraryInfo *TLInfo;
+    DominatorTree *DT;
+
+    /// CurInstIterator - As we scan instructions optimizing them, this is the
+    /// next instruction to optimize.  Xforms that can invalidate this should
+    /// update it.
+    BasicBlock::iterator CurInstIterator;
+
+    /// Keeps track of non-local addresses that have been sunk into a block.
+    /// This allows us to avoid inserting duplicate code for blocks with
+    /// multiple load/stores of the same address.
+    ValueMap<Value*, Value*> SunkAddrs;
+
+    /// Keeps track of all truncates inserted for the current function.
+    SetOfInstrs InsertedTruncsSet;
+    /// Keeps track of the type of the related instruction before their
+    /// promotion for the current function.
+    InstrToOrigTy PromotedInsts;
+
+    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
+    /// be updated.
+    bool ModifiedDT;
+
+    /// OptSize - True if optimizing for size.
+    bool OptSize;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit CodeGenPrepare(const TargetMachine *TM = 0)
+      : FunctionPass(ID), TM(TM), TLI(0) {
+        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+      }
+    bool runOnFunction(Function &F) override;
+
+    const char *getPassName() const override { return "CodeGen Prepare"; }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<TargetLibraryInfo>();
+    }
+
+  private:
+    bool EliminateFallThrough(Function &F);
+    bool EliminateMostlyEmptyBlocks(Function &F);
+    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+    void EliminateMostlyEmptyBlock(BasicBlock *BB);
+    bool OptimizeBlock(BasicBlock &BB);
+    bool OptimizeInst(Instruction *I);
+    bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
+    bool OptimizeInlineAsmInst(CallInst *CS);
+    bool OptimizeCallInst(CallInst *CI);
+    bool MoveExtToFormExtLoad(Instruction *I);
+    bool OptimizeExtUses(Instruction *I);
+    bool OptimizeSelectInst(SelectInst *SI);
+    bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
+    bool DupRetToEnableTailCallOpts(BasicBlock *BB);
+    bool PlaceDbgValues(Function &F);
+    bool sinkAndCmp(Function &F);
+  };
+}
+
+char CodeGenPrepare::ID = 0;
+static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
+  initializeTargetLibraryInfoPass(Registry);
+  PassInfo *PI = new PassInfo(
+      "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
+      PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
+      PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
+  Registry.registerPass(*PI, true);
+  return PI;
+}
+
+void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
+}
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
+  return new CodeGenPrepare(TM);
+}
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  bool EverMadeChange = false;
+  // Clear per function information.
+  InsertedTruncsSet.clear();
+  PromotedInsts.clear();
+
+  ModifiedDT = false;
+  if (TM) TLI = TM->getTargetLowering();
+  TLInfo = &getAnalysis<TargetLibraryInfo>();
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : 0;
+  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::OptimizeForSize);
+
+  /// This optimization identifies DIV instructions that can be
+  /// profitably bypassed and carried out with a shorter, faster divide.
+  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
+    const DenseMap<unsigned int, unsigned int> &BypassWidths =
+       TLI->getBypassSlowDivWidths();
+    for (Function::iterator I = F.begin(); I != F.end(); I++)
+      EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
+  }
+
+  // Eliminate blocks that contain only PHI nodes and an
+  // unconditional branch.
+  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+  // llvm.dbg.value is far away from the value then iSel may not be able
+  // handle it properly. iSel will drop llvm.dbg.value if it can not
+  // find a node corresponding to the value.
+  EverMadeChange |= PlaceDbgValues(F);
+
+  // If there is a mask, compare against zero, and branch that can be combined
+  // into a single target instruction, push the mask and compare into branch
+  // users. Do this before OptimizeBlock -> OptimizeInst ->
+  // OptimizeCmpExpression, which perturbs the pattern being searched for.
+  if (!DisableBranchOpts)
+    EverMadeChange |= sinkAndCmp(F);
+
+  bool MadeChange = true;
+  while (MadeChange) {
+    MadeChange = false;
+    for (Function::iterator I = F.begin(); I != F.end(); ) {
+      BasicBlock *BB = I++;
+      MadeChange |= OptimizeBlock(*BB);
+    }
+    EverMadeChange |= MadeChange;
+  }
+
+  SunkAddrs.clear();
+
+  if (!DisableBranchOpts) {
+    MadeChange = false;
+    SmallPtrSet<BasicBlock*, 8> WorkList;
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+      MadeChange |= ConstantFoldTerminator(BB, true);
+      if (!MadeChange) continue;
+
+      for (SmallVectorImpl<BasicBlock*>::iterator
+             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+        if (pred_begin(*II) == pred_end(*II))
+          WorkList.insert(*II);
+    }
+
+    // Delete the dead blocks and any of their dead successors.
+    MadeChange |= !WorkList.empty();
+    while (!WorkList.empty()) {
+      BasicBlock *BB = *WorkList.begin();
+      WorkList.erase(BB);
+      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+
+      DeleteDeadBlock(BB);
+
+      for (SmallVectorImpl<BasicBlock*>::iterator
+             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+        if (pred_begin(*II) == pred_end(*II))
+          WorkList.insert(*II);
+    }
+
+    // Merge pairs of basic blocks with unconditional branches, connected by
+    // a single edge.
+    if (EverMadeChange || MadeChange)
+      MadeChange |= EliminateFallThrough(F);
+
+    if (MadeChange)
+      ModifiedDT = true;
+    EverMadeChange |= MadeChange;
+  }
+
+  if (ModifiedDT && DT)
+    DT->recalculate(F);
+
+  return EverMadeChange;
+}
+
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+  bool Changed = false;
+  // Scan all of the blocks in the function, except for the entry block.
+  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+    // If the destination block has a single pred, then this is a trivial
+    // edge, just collapse it.
+    BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+    // Don't merge if BB's address is taken.
+    if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
+
+    BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+    if (Term && !Term->isConditional()) {
+      Changed = true;
+      DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+      // Remember if SinglePred was the entry block of the function.
+      // If so, we will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      MergeBasicBlockIntoOnlyPred(BB, this);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+      // We have erased a block. Update the iterator.
+      I = BB;
+    }
+  }
+  return Changed;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch.  Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel.  Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+  bool MadeChange = false;
+  // Note that this intentionally skips the entry block.
+  for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+
+    // If this block doesn't end with an uncond branch, ignore it.
+    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+    if (!BI || !BI->isUnconditional())
+      continue;
+
+    // If the instruction before the branch (skipping debug info) isn't a phi
+    // node, then other stuff is happening here.
+    BasicBlock::iterator BBI = BI;
+    if (BBI != BB->begin()) {
+      --BBI;
+      while (isa<DbgInfoIntrinsic>(BBI)) {
+        if (BBI == BB->begin())
+          break;
+        --BBI;
+      }
+      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+        continue;
+    }
+
+    // Do not break infinite loops.
+    BasicBlock *DestBB = BI->getSuccessor(0);
+    if (DestBB == BB)
+      continue;
+
+    if (!CanMergeBlocks(BB, DestBB))
+      continue;
+
+    EliminateMostlyEmptyBlock(BB);
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+                                    const BasicBlock *DestBB) const {
+  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+  // the successor.  If there are more complex condition (e.g. preheaders),
+  // don't mess around with them.
+  BasicBlock::const_iterator BBI = BB->begin();
+  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+    for (const User *U : PN->users()) {
+      const Instruction *UI = cast<Instruction>(U);
+      if (UI->getParent() != DestBB || !isa<PHINode>(UI))
+        return false;
+      // If User is inside DestBB block and it is a PHINode then check
+      // incoming value. If incoming value is not from BB then this is
+      // a complex condition (e.g. preheaders) we want to avoid here.
+      if (UI->getParent() == DestBB) {
+        if (const PHINode *UPN = dyn_cast<PHINode>(UI))
+          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+            if (Insn && Insn->getParent() == BB &&
+                Insn->getParent() != UPN->getIncomingBlock(I))
+              return false;
+          }
+      }
+    }
+  }
+
+  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+  // and DestBB may have conflicting incoming values for the block.  If so, we
+  // can't merge the block.
+  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+  if (!DestBBPN) return true;  // no conflict.
+
+  // Collect the preds of BB.
+  SmallPtrSet<const BasicBlock*, 16> BBPreds;
+  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+    // It is faster to get preds from a PHI than with pred_iterator.
+    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+      BBPreds.insert(BBPN->getIncomingBlock(i));
+  } else {
+    BBPreds.insert(pred_begin(BB), pred_end(BB));
+  }
+
+  // Walk the preds of DestBB.
+  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+    if (BBPreds.count(Pred)) {   // Common predecessor?
+      BBI = DestBB->begin();
+      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+        const Value *V1 = PN->getIncomingValueForBlock(Pred);
+        const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+        // If V2 is a phi node in BB, look up what the mapped value will be.
+        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+          if (V2PN->getParent() == BB)
+            V2 = V2PN->getIncomingValueForBlock(Pred);
+
+        // If there is a conflict, bail out.
+        if (V1 != V2) return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  BasicBlock *DestBB = BI->getSuccessor(0);
+
+  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+  // If the destination block has a single pred, then this is a trivial edge,
+  // just collapse it.
+  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+    if (SinglePred != DestBB) {
+      // Remember if SinglePred was the entry block of the function.  If so, we
+      // will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      MergeBasicBlockIntoOnlyPred(DestBB, this);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+      return;
+    }
+  }
+
+  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
+  // to handle the new incoming edges it is about to have.
+  PHINode *PN;
+  for (BasicBlock::iterator BBI = DestBB->begin();
+       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+    // Remove the incoming value for BB, and remember it.
+    Value *InVal = PN->removeIncomingValue(BB, false);
+
+    // Two options: either the InVal is a phi node defined in BB or it is some
+    // value that dominates BB.
+    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+    if (InValPhi && InValPhi->getParent() == BB) {
+      // Add all of the input values of the input PHI as inputs of this phi.
+      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+        PN->addIncoming(InValPhi->getIncomingValue(i),
+                        InValPhi->getIncomingBlock(i));
+    } else {
+      // Otherwise, add one instance of the dominating value for each edge that
+      // we will be adding.
+      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+      } else {
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+          PN->addIncoming(InVal, *PI);
+      }
+    }
+  }
+
+  // The PHIs are now updated, change everything that refers to BB to use
+  // DestBB and remove BB.
+  BB->replaceAllUsesWith(DestBB);
+  if (DT && !ModifiedDT) {
+    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
+    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+    DT->changeImmediateDominator(DestBB, NewIDom);
+    DT->eraseNode(BB);
+  }
+  BB->eraseFromParent();
+  ++NumBlocksElim;
+
+  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+/// SinkCast - Sink the specified cast instruction into its user blocks
+static bool SinkCast(CastInst *CI) {
+  BasicBlock *DefBB = CI->getParent();
+
+  /// InsertedCasts - Only insert a cast in each block once.
+  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+  bool MadeChange = false;
+  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+       UI != E; ) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this cast is used in.  For PHI's this is the
+    // appropriate predecessor block.
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User)) {
+      UserBB = PN->getIncomingBlock(TheUse);
+    }
+
+    // Preincrement use iterator so we don't invalidate it.
+    ++UI;
+
+    // If this user is in the same block as the cast, don't change the cast.
+    if (UserBB == DefBB) continue;
+
+    // If we have already inserted a cast into this block, use it.
+    CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+    if (!InsertedCast) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      InsertedCast =
+        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+                         InsertPt);
+      MadeChange = true;
+    }
+
+    // Replace a use of the cast with a use of the new cast.
+    TheUse = InsertedCast;
+    ++NumCastUses;
+  }
+
+  // If we removed all uses, nuke the cast.
+  if (CI->use_empty()) {
+    CI->eraseFromParent();
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+  // If this is a noop copy,
+  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(CI->getType());
+
+  // This is an fp<->int conversion?
+  if (SrcVT.isInteger() != DstVT.isInteger())
+    return false;
+
+  // If this is an extension, it will be a zero or sign extension, which
+  // isn't a noop.
+  if (SrcVT.bitsLT(DstVT)) return false;
+
+  // If these values will be promoted, find out what they will be promoted
+  // to.  This helps us consider truncates on PPC as noop copies when they
+  // are.
+  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
+      TargetLowering::TypePromoteInteger)
+    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
+      TargetLowering::TypePromoteInteger)
+    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+  // If, after promotion, these are the same types, this is a noop copy.
+  if (SrcVT != DstVT)
+    return false;
+
+  return SinkCast(CI);
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced.  This is
+/// a clear win except on targets with multiple condition code registers
+///  (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  BasicBlock *DefBB = CI->getParent();
+
+  /// InsertedCmp - Only insert a cmp in each block once.
+  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+  bool MadeChange = false;
+  for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+       UI != E; ) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Preincrement use iterator so we don't invalidate it.
+    ++UI;
+
+    // Don't bother for PHI nodes.
+    if (isa<PHINode>(User))
+      continue;
+
+    // Figure out which BB this cmp is used in.
+    BasicBlock *UserBB = User->getParent();
+
+    // If this user is in the same block as the cmp, don't change the cmp.
+    if (UserBB == DefBB) continue;
+
+    // If we have already inserted a cmp into this block, use it.
+    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+    if (!InsertedCmp) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      InsertedCmp =
+        CmpInst::Create(CI->getOpcode(),
+                        CI->getPredicate(),  CI->getOperand(0),
+                        CI->getOperand(1), "", InsertPt);
+      MadeChange = true;
+    }
+
+    // Replace a use of the cmp with a use of the new cmp.
+    TheUse = InsertedCmp;
+    ++NumCmpUses;
+  }
+
+  // If we removed all uses, nuke the cmp.
+  if (CI->use_empty())
+    CI->eraseFromParent();
+
+  return MadeChange;
+}
+
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+  void replaceCall(Value *With) override {
+    CI->replaceAllUsesWith(With);
+    CI->eraseFromParent();
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override {
+      if (ConstantInt *SizeCI =
+                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+        return SizeCI->isAllOnesValue();
+    return false;
+  }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+  BasicBlock *BB = CI->getParent();
+
+  // Lower inline assembly if we can.
+  // If we found an inline asm expession, and if the target knows how to
+  // lower it to normal LLVM code, do so now.
+  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+    if (TLI->ExpandInlineAsm(CI)) {
+      // Avoid invalidating the iterator.
+      CurInstIterator = BB->begin();
+      // Avoid processing instructions out of order, which could cause
+      // reuse before a value is defined.
+      SunkAddrs.clear();
+      return true;
+    }
+    // Sink address computing for memory operands into the block.
+    if (OptimizeInlineAsmInst(CI))
+      return true;
+  }
+
+  // Lower all uses of llvm.objectsize.*
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+    Type *ReturnTy = CI->getType();
+    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+
+    // Substituting this can cause recursive simplifications, which can
+    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
+    // happens.
+    WeakVH IterHandle(CurInstIterator);
+
+    replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
+                                  TLInfo, ModifiedDT ? 0 : DT);
+
+    // If the iterator instruction was recursively deleted, start over at the
+    // start of the block.
+    if (IterHandle != CurInstIterator) {
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    }
+    return true;
+  }
+
+  if (II && TLI) {
+    SmallVector<Value*, 2> PtrOps;
+    Type *AccessTy;
+    if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+      while (!PtrOps.empty())
+        if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+          return true;
+  }
+
+  // From here on out we're working with named functions.
+  if (CI->getCalledFunction() == 0) return false;
+
+  // We'll need DataLayout from here on out.
+  const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
+  if (!TD) return false;
+
+  // Lower all default uses of _chk calls.  This is very similar
+  // to what InstCombineCalls does, but here we are only lowering calls
+  // that have the default "don't know" as the objectsize.  Anything else
+  // should be left alone.
+  CodeGenPrepareFortifiedLibCalls Simplifier;
+  return Simplifier.fold(CI, TD, TLInfo);
+}
+
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// @code
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   br label %return
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   br label %return
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   br label %return
+/// return:
+///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+///   ret i32 %retval
+/// @endcode
+///
+/// =>
+///
+/// @code
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   ret i32 %tmp0
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   ret i32 %tmp1
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   ret i32 %tmp2
+/// @endcode
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
+  if (!TLI)
+    return false;
+
+  ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+  if (!RI)
+    return false;
+
+  PHINode *PN = 0;
+  BitCastInst *BCI = 0;
+  Value *V = RI->getReturnValue();
+  if (V) {
+    BCI = dyn_cast<BitCastInst>(V);
+    if (BCI)
+      V = BCI->getOperand(0);
+
+    PN = dyn_cast<PHINode>(V);
+    if (!PN)
+      return false;
+  }
+
+  if (PN && PN->getParent() != BB)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  // See llvm::isInTailCallPosition().
+  const Function *F = BB->getParent();
+  AttributeSet CallerAttrs = F->getAttributes();
+  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+    return false;
+
+  // Make sure there are no instructions between the PHI and return, or that the
+  // return is the first instruction in the block.
+  if (PN) {
+    BasicBlock::iterator BI = BB->begin();
+    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+    if (&*BI == BCI)
+      // Also skip over the bitcast.
+      ++BI;
+    if (&*BI != RI)
+      return false;
+  } else {
+    BasicBlock::iterator BI = BB->begin();
+    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+    if (&*BI != RI)
+      return false;
+  }
+
+  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+  /// call.
+  SmallVector<CallInst*, 4> TailCalls;
+  if (PN) {
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+      // Make sure the phi value is indeed produced by the tail call.
+      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+          TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  } else {
+    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (!VisitedBBs.insert(*PI))
+        continue;
+
+      BasicBlock::InstListType &InstList = (*PI)->getInstList();
+      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+      if (RI == RE)
+        continue;
+
+      CallInst *CI = dyn_cast<CallInst>(&*RI);
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+    CallInst *CI = TailCalls[i];
+    CallSite CS(CI);
+
+    // Conservatively require the attributes of the call to match those of the
+    // return. Ignore noalias because it doesn't affect the call sequence.
+    AttributeSet CalleeAttrs = CS.getAttributes();
+    if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+          removeAttribute(Attribute::NoAlias) !=
+        AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+          removeAttribute(Attribute::NoAlias))
+      continue;
+
+    // Make sure the call instruction is followed by an unconditional branch to
+    // the return block.
+    BasicBlock *CallBB = CI->getParent();
+    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+      continue;
+
+    // Duplicate the return into CallBB.
+    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    ModifiedDT = Changed = true;
+    ++NumRetsDup;
+  }
+
+  // If we eliminated all predecessors of the block, delete the block now.
+  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
+    BB->eraseFromParent();
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+  Value *BaseReg;
+  Value *ScaledReg;
+  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+  bool operator==(const ExtAddrMode& O) const {
+    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+  }
+};
+
+#ifndef NDEBUG
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+  AM.print(OS);
+  return OS;
+}
+#endif
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+  bool NeedPlus = false;
+  OS << "[";
+  if (BaseGV) {
+    OS << (NeedPlus ? " + " : "")
+       << "GV:";
+    BaseGV->printAsOperand(OS, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  if (BaseOffs)
+    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+  if (BaseReg) {
+    OS << (NeedPlus ? " + " : "")
+       << "Base:";
+    BaseReg->printAsOperand(OS, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+  if (Scale) {
+    OS << (NeedPlus ? " + " : "")
+       << Scale << "*";
+    ScaledReg->printAsOperand(OS, /*PrintType=*/false);
+  }
+
+  OS << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ExtAddrMode::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+#endif
+
+/// \brief This class provides transaction based operation on the IR.
+/// Every change made through this class is recorded in the internal state and
+/// can be undone (rollback) until commit is called.
+class TypePromotionTransaction {
+
+  /// \brief This represents the common interface of the individual transaction.
+  /// Each class implements the logic for doing one specific modification on
+  /// the IR via the TypePromotionTransaction.
+  class TypePromotionAction {
+  protected:
+    /// The Instruction modified.
+    Instruction *Inst;
+
+  public:
+    /// \brief Constructor of the action.
+    /// The constructor performs the related action on the IR.
+    TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
+
+    virtual ~TypePromotionAction() {}
+
+    /// \brief Undo the modification done by this action.
+    /// When this method is called, the IR must be in the same state as it was
+    /// before this action was applied.
+    /// \pre Undoing the action works if and only if the IR is in the exact same
+    /// state as it was directly after this action was applied.
+    virtual void undo() = 0;
+
+    /// \brief Advocate every change made by this action.
+    /// When the results on the IR of the action are to be kept, it is important
+    /// to call this function, otherwise hidden information may be kept forever.
+    virtual void commit() {
+      // Nothing to be done, this action is not doing anything.
+    }
+  };
+
+  /// \brief Utility to remember the position of an instruction.
+  class InsertionHandler {
+    /// Position of an instruction.
+    /// Either an instruction:
+    /// - Is the first in a basic block: BB is used.
+    /// - Has a previous instructon: PrevInst is used.
+    union {
+      Instruction *PrevInst;
+      BasicBlock *BB;
+    } Point;
+    /// Remember whether or not the instruction had a previous instruction.
+    bool HasPrevInstruction;
+
+  public:
+    /// \brief Record the position of \p Inst.
+    InsertionHandler(Instruction *Inst) {
+      BasicBlock::iterator It = Inst;
+      HasPrevInstruction = (It != (Inst->getParent()->begin()));
+      if (HasPrevInstruction)
+        Point.PrevInst = --It;
+      else
+        Point.BB = Inst->getParent();
+    }
+
+    /// \brief Insert \p Inst at the recorded position.
+    void insert(Instruction *Inst) {
+      if (HasPrevInstruction) {
+        if (Inst->getParent())
+          Inst->removeFromParent();
+        Inst->insertAfter(Point.PrevInst);
+      } else {
+        Instruction *Position = Point.BB->getFirstInsertionPt();
+        if (Inst->getParent())
+          Inst->moveBefore(Position);
+        else
+          Inst->insertBefore(Position);
+      }
+    }
+  };
+
+  /// \brief Move an instruction before another.
+  class InstructionMoveBefore : public TypePromotionAction {
+    /// Original position of the instruction.
+    InsertionHandler Position;
+
+  public:
+    /// \brief Move \p Inst before \p Before.
+    InstructionMoveBefore(Instruction *Inst, Instruction *Before)
+        : TypePromotionAction(Inst), Position(Inst) {
+      DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n");
+      Inst->moveBefore(Before);
+    }
+
+    /// \brief Move the instruction back to its original position.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
+      Position.insert(Inst);
+    }
+  };
+
+  /// \brief Set the operand of an instruction with a new value.
+  class OperandSetter : public TypePromotionAction {
+    /// Original operand of the instruction.
+    Value *Origin;
+    /// Index of the modified instruction.
+    unsigned Idx;
+
+  public:
+    /// \brief Set \p Idx operand of \p Inst with \p NewVal.
+    OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
+        : TypePromotionAction(Inst), Idx(Idx) {
+      DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
+                   << "for:" << *Inst << "\n"
+                   << "with:" << *NewVal << "\n");
+      Origin = Inst->getOperand(Idx);
+      Inst->setOperand(Idx, NewVal);
+    }
+
+    /// \brief Restore the original value of the instruction.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
+                   << "for: " << *Inst << "\n"
+                   << "with: " << *Origin << "\n");
+      Inst->setOperand(Idx, Origin);
+    }
+  };
+
+  /// \brief Hide the operands of an instruction.
+  /// Do as if this instruction was not using any of its operands.
+  class OperandsHider : public TypePromotionAction {
+    /// The list of original operands.
+    SmallVector<Value *, 4> OriginalValues;
+
+  public:
+    /// \brief Remove \p Inst from the uses of the operands of \p Inst.
+    OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
+      DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
+      unsigned NumOpnds = Inst->getNumOperands();
+      OriginalValues.reserve(NumOpnds);
+      for (unsigned It = 0; It < NumOpnds; ++It) {
+        // Save the current operand.
+        Value *Val = Inst->getOperand(It);
+        OriginalValues.push_back(Val);
+        // Set a dummy one.
+        // We could use OperandSetter here, but that would implied an overhead
+        // that we are not willing to pay.
+        Inst->setOperand(It, UndefValue::get(Val->getType()));
+      }
+    }
+
+    /// \brief Restore the original list of uses.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
+      for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
+        Inst->setOperand(It, OriginalValues[It]);
+    }
+  };
+
+  /// \brief Build a truncate instruction.
+  class TruncBuilder : public TypePromotionAction {
+  public:
+    /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
+    /// result.
+    /// trunc Opnd to Ty.
+    TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
+      IRBuilder<> Builder(Opnd);
+      Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted"));
+      DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n");
+    }
+
+    /// \brief Get the built instruction.
+    Instruction *getBuiltInstruction() { return Inst; }
+
+    /// \brief Remove the built instruction.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n");
+      Inst->eraseFromParent();
+    }
+  };
+
+  /// \brief Build a sign extension instruction.
+  class SExtBuilder : public TypePromotionAction {
+  public:
+    /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
+    /// result.
+    /// sext Opnd to Ty.
+    SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
+        : TypePromotionAction(Inst) {
+      IRBuilder<> Builder(InsertPt);
+      Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted"));
+      DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n");
+    }
+
+    /// \brief Get the built instruction.
+    Instruction *getBuiltInstruction() { return Inst; }
+
+    /// \brief Remove the built instruction.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n");
+      Inst->eraseFromParent();
+    }
+  };
+
+  /// \brief Mutate an instruction to another type.
+  class TypeMutator : public TypePromotionAction {
+    /// Record the original type.
+    Type *OrigTy;
+
+  public:
+    /// \brief Mutate the type of \p Inst into \p NewTy.
+    TypeMutator(Instruction *Inst, Type *NewTy)
+        : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
+      DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
+                   << "\n");
+      Inst->mutateType(NewTy);
+    }
+
+    /// \brief Mutate the instruction back to its original type.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
+                   << "\n");
+      Inst->mutateType(OrigTy);
+    }
+  };
+
+  /// \brief Replace the uses of an instruction by another instruction.
+  class UsesReplacer : public TypePromotionAction {
+    /// Helper structure to keep track of the replaced uses.
+    struct InstructionAndIdx {
+      /// The instruction using the instruction.
+      Instruction *Inst;
+      /// The index where this instruction is used for Inst.
+      unsigned Idx;
+      InstructionAndIdx(Instruction *Inst, unsigned Idx)
+          : Inst(Inst), Idx(Idx) {}
+    };
+
+    /// Keep track of the original uses (pair Instruction, Index).
+    SmallVector<InstructionAndIdx, 4> OriginalUses;
+    typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
+
+  public:
+    /// \brief Replace all the use of \p Inst by \p New.
+    UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
+      DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
+                   << "\n");
+      // Record the original uses.
+      for (Use &U : Inst->uses()) {
+        Instruction *UserI = cast<Instruction>(U.getUser());
+        OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
+      }
+      // Now, we can replace the uses.
+      Inst->replaceAllUsesWith(New);
+    }
+
+    /// \brief Reassign the original uses of Inst to Inst.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
+      for (use_iterator UseIt = OriginalUses.begin(),
+                        EndIt = OriginalUses.end();
+           UseIt != EndIt; ++UseIt) {
+        UseIt->Inst->setOperand(UseIt->Idx, Inst);
+      }
+    }
+  };
+
+  /// \brief Remove an instruction from the IR.
+  class InstructionRemover : public TypePromotionAction {
+    /// Original position of the instruction.
+    InsertionHandler Inserter;
+    /// Helper structure to hide all the link to the instruction. In other
+    /// words, this helps to do as if the instruction was removed.
+    OperandsHider Hider;
+    /// Keep track of the uses replaced, if any.
+    UsesReplacer *Replacer;
+
+  public:
+    /// \brief Remove all reference of \p Inst and optinally replace all its
+    /// uses with New.
+    /// \pre If !Inst->use_empty(), then New != NULL
+    InstructionRemover(Instruction *Inst, Value *New = NULL)
+        : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
+          Replacer(NULL) {
+      if (New)
+        Replacer = new UsesReplacer(Inst, New);
+      DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+      Inst->removeFromParent();
+    }
+
+    ~InstructionRemover() { delete Replacer; }
+
+    /// \brief Really remove the instruction.
+    void commit() override { delete Inst; }
+
+    /// \brief Resurrect the instruction and reassign it to the proper uses if
+    /// new value was provided when build this action.
+    void undo() override {
+      DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
+      Inserter.insert(Inst);
+      if (Replacer)
+        Replacer->undo();
+      Hider.undo();
+    }
+  };
+
+public:
+  /// Restoration point.
+  /// The restoration point is a pointer to an action instead of an iterator
+  /// because the iterator may be invalidated but not the pointer.
+  typedef const TypePromotionAction *ConstRestorationPt;
+  /// Advocate every changes made in that transaction.
+  void commit();
+  /// Undo all the changes made after the given point.
+  void rollback(ConstRestorationPt Point);
+  /// Get the current restoration point.
+  ConstRestorationPt getRestorationPoint() const;
+
+  /// \name API for IR modification with state keeping to support rollback.
+  /// @{
+  /// Same as Instruction::setOperand.
+  void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
+  /// Same as Instruction::eraseFromParent.
+  void eraseInstruction(Instruction *Inst, Value *NewVal = NULL);
+  /// Same as Value::replaceAllUsesWith.
+  void replaceAllUsesWith(Instruction *Inst, Value *New);
+  /// Same as Value::mutateType.
+  void mutateType(Instruction *Inst, Type *NewTy);
+  /// Same as IRBuilder::createTrunc.
+  Instruction *createTrunc(Instruction *Opnd, Type *Ty);
+  /// Same as IRBuilder::createSExt.
+  Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
+  /// Same as Instruction::moveBefore.
+  void moveBefore(Instruction *Inst, Instruction *Before);
+  /// @}
+
+  ~TypePromotionTransaction();
+
+private:
+  /// The ordered list of actions made so far.
+  SmallVector<TypePromotionAction *, 16> Actions;
+  typedef SmallVectorImpl<TypePromotionAction *>::iterator CommitPt;
+};
+
+void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
+                                          Value *NewVal) {
+  Actions.push_back(
+      new TypePromotionTransaction::OperandSetter(Inst, Idx, NewVal));
+}
+
+void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
+                                                Value *NewVal) {
+  Actions.push_back(
+      new TypePromotionTransaction::InstructionRemover(Inst, NewVal));
+}
+
+void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
+                                                  Value *New) {
+  Actions.push_back(new TypePromotionTransaction::UsesReplacer(Inst, New));
+}
+
+void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
+  Actions.push_back(new TypePromotionTransaction::TypeMutator(Inst, NewTy));
+}
+
+Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd,
+                                                   Type *Ty) {
+  TruncBuilder *TB = new TruncBuilder(Opnd, Ty);
+  Actions.push_back(TB);
+  return TB->getBuiltInstruction();
+}
+
+Instruction *TypePromotionTransaction::createSExt(Instruction *Inst,
+                                                  Value *Opnd, Type *Ty) {
+  SExtBuilder *SB = new SExtBuilder(Inst, Opnd, Ty);
+  Actions.push_back(SB);
+  return SB->getBuiltInstruction();
+}
+
+void TypePromotionTransaction::moveBefore(Instruction *Inst,
+                                          Instruction *Before) {
+  Actions.push_back(
+      new TypePromotionTransaction::InstructionMoveBefore(Inst, Before));
+}
+
+TypePromotionTransaction::ConstRestorationPt
+TypePromotionTransaction::getRestorationPoint() const {
+  return Actions.rbegin() != Actions.rend() ? *Actions.rbegin() : NULL;
+}
+
+void TypePromotionTransaction::commit() {
+  for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
+       ++It) {
+    (*It)->commit();
+    delete *It;
+  }
+  Actions.clear();
+}
+
+void TypePromotionTransaction::rollback(
+    TypePromotionTransaction::ConstRestorationPt Point) {
+  while (!Actions.empty() && Point != (*Actions.rbegin())) {
+    TypePromotionAction *Curr = Actions.pop_back_val();
+    Curr->undo();
+    delete Curr;
+  }
+}
+
+TypePromotionTransaction::~TypePromotionTransaction() {
+  for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It)
+    delete *It;
+  Actions.clear();
+}
+
+/// \brief A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+  SmallVectorImpl<Instruction*> &AddrModeInsts;
+  const TargetLowering &TLI;
+
+  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+  /// the memory instruction that we're computing this address for.
+  Type *AccessTy;
+  Instruction *MemoryInst;
+
+  /// AddrMode - This is the addressing mode that we're building up.  This is
+  /// part of the return value of this addressing mode matching stuff.
+  ExtAddrMode &AddrMode;
+
+  /// The truncate instruction inserted by other CodeGenPrepare optimizations.
+  const SetOfInstrs &InsertedTruncs;
+  /// A map from the instructions to their type before promotion.
+  InstrToOrigTy &PromotedInsts;
+  /// The ongoing transaction where every action should be registered.
+  TypePromotionTransaction &TPT;
+
+  /// IgnoreProfitability - This is set to true when we should not do
+  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
+  /// always returns true.
+  bool IgnoreProfitability;
+
+  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
+                        const TargetLowering &T, Type *AT,
+                        Instruction *MI, ExtAddrMode &AM,
+                        const SetOfInstrs &InsertedTruncs,
+                        InstrToOrigTy &PromotedInsts,
+                        TypePromotionTransaction &TPT)
+      : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM),
+        InsertedTruncs(InsertedTruncs), PromotedInsts(PromotedInsts), TPT(TPT) {
+    IgnoreProfitability = false;
+  }
+public:
+
+  /// Match - Find the maximal addressing mode that a load/store of V can fold,
+  /// give an access type of AccessTy.  This returns a list of involved
+  /// instructions in AddrModeInsts.
+  /// \p InsertedTruncs The truncate instruction inserted by other
+  /// CodeGenPrepare
+  /// optimizations.
+  /// \p PromotedInsts maps the instructions to their type before promotion.
+  /// \p The ongoing transaction where every action should be registered.
+  static ExtAddrMode Match(Value *V, Type *AccessTy,
+                           Instruction *MemoryInst,
+                           SmallVectorImpl<Instruction*> &AddrModeInsts,
+                           const TargetLowering &TLI,
+                           const SetOfInstrs &InsertedTruncs,
+                           InstrToOrigTy &PromotedInsts,
+                           TypePromotionTransaction &TPT) {
+    ExtAddrMode Result;
+
+    bool Success = AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+                                         MemoryInst, Result, InsertedTruncs,
+                                         PromotedInsts, TPT).MatchAddr(V, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+    return Result;
+  }
+private:
+  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+  bool MatchAddr(Value *V, unsigned Depth);
+  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+                          bool *MovedAway = NULL);
+  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+                                            ExtAddrMode &AMBefore,
+                                            ExtAddrMode &AMAfter);
+  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+  bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+                             Value *PromotedOperand) const;
+};
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+                                             unsigned Depth) {
+  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+  // mode.  Just process that directly.
+  if (Scale == 1)
+    return MatchAddr(ScaleReg, Depth);
+
+  // If the scale is 0, it takes nothing to add this.
+  if (Scale == 0)
+    return true;
+
+  // If we already have a scale of this value, we can add to it, otherwise, we
+  // need an available scale field.
+  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+    return false;
+
+  ExtAddrMode TestAddrMode = AddrMode;
+
+  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
+  // [A+B + A*7] -> [B+A*8].
+  TestAddrMode.Scale += Scale;
+  TestAddrMode.ScaledReg = ScaleReg;
+
+  // If the new address isn't legal, bail out.
+  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+    return false;
+
+  // It was legal, so commit it.
+  AddrMode = TestAddrMode;
+
+  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
+  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
+  // X*Scale + C*Scale to addr mode.
+  ConstantInt *CI = 0; Value *AddLHS = 0;
+  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
+      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.ScaledReg = AddLHS;
+    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+
+    // If this addressing mode is legal, commit it and remember that we folded
+    // this instruction.
+    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+      AddrMode = TestAddrMode;
+      return true;
+    }
+  }
+
+  // Otherwise, not (x+c)*scale, just return what we have.
+  return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::BitCast:
+    // Don't touch identity bitcasts.
+    if (I->getType() == I->getOperand(0)->getType())
+      return false;
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return true;
+  case Instruction::IntToPtr:
+    // We know the input is intptr_t, so this is foldable.
+    return true;
+  case Instruction::Add:
+    return true;
+  case Instruction::Mul:
+  case Instruction::Shl:
+    // Can only handle X*C and X << C.
+    return isa<ConstantInt>(I->getOperand(1));
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// \brief Hepler class to perform type promotion.
+class TypePromotionHelper {
+  /// \brief Utility function to check whether or not a sign extension of
+  /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either
+  /// using the operands of \p Inst or promoting \p Inst.
+  /// In other words, check if:
+  /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType.
+  /// #1 Promotion applies:
+  /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...).
+  /// #2 Operand reuses:
+  /// sext opnd1 to ConsideredSExtType.
+  /// \p PromotedInsts maps the instructions to their type before promotion.
+  static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType,
+                            const InstrToOrigTy &PromotedInsts);
+
+  /// \brief Utility function to determine if \p OpIdx should be promoted when
+  /// promoting \p Inst.
+  static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
+    if (isa<SelectInst>(Inst) && OpIdx == 0)
+      return false;
+    return true;
+  }
+
+  /// \brief Utility function to promote the operand of \p SExt when this
+  /// operand is a promotable trunc or sext.
+  /// \p PromotedInsts maps the instructions to their type before promotion.
+  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// created to promote the operand of SExt.
+  /// Should never be called directly.
+  /// \return The promoted value which is used instead of SExt.
+  static Value *promoteOperandForTruncAndSExt(Instruction *SExt,
+                                              TypePromotionTransaction &TPT,
+                                              InstrToOrigTy &PromotedInsts,
+                                              unsigned &CreatedInsts);
+
+  /// \brief Utility function to promote the operand of \p SExt when this
+  /// operand is promotable and is not a supported trunc or sext.
+  /// \p PromotedInsts maps the instructions to their type before promotion.
+  /// \p CreatedInsts[out] contains how many non-free instructions have been
+  /// created to promote the operand of SExt.
+  /// Should never be called directly.
+  /// \return The promoted value which is used instead of SExt.
+  static Value *promoteOperandForOther(Instruction *SExt,
+                                       TypePromotionTransaction &TPT,
+                                       InstrToOrigTy &PromotedInsts,
+                                       unsigned &CreatedInsts);
+
+public:
+  /// Type for the utility function that promotes the operand of SExt.
+  typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT,
+                           InstrToOrigTy &PromotedInsts,
+                           unsigned &CreatedInsts);
+  /// \brief Given a sign extend instruction \p SExt, return the approriate
+  /// action to promote the operand of \p SExt instead of using SExt.
+  /// \return NULL if no promotable action is possible with the current
+  /// sign extension.
+  /// \p InsertedTruncs keeps track of all the truncate instructions inserted by
+  /// the others CodeGenPrepare optimizations. This information is important
+  /// because we do not want to promote these instructions as CodeGenPrepare
+  /// will reinsert them later. Thus creating an infinite loop: create/remove.
+  /// \p PromotedInsts maps the instructions to their type before promotion.
+  static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs,
+                          const TargetLowering &TLI,
+                          const InstrToOrigTy &PromotedInsts);
+};
+
+bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
+                                        Type *ConsideredSExtType,
+                                        const InstrToOrigTy &PromotedInsts) {
+  // We can always get through sext.
+  if (isa<SExtInst>(Inst))
+    return true;
+
+  // We can get through binary operator, if it is legal. In other words, the
+  // binary operator must have a nuw or nsw flag.
+  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
+  if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
+      (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
+    return true;
+
+  // Check if we can do the following simplification.
+  // sext(trunc(sext)) --> sext
+  if (!isa<TruncInst>(Inst))
+    return false;
+
+  Value *OpndVal = Inst->getOperand(0);
+  // Check if we can use this operand in the sext.
+  // If the type is larger than the result type of the sign extension,
+  // we cannot.
+  if (OpndVal->getType()->getIntegerBitWidth() >
+      ConsideredSExtType->getIntegerBitWidth())
+    return false;
+
+  // If the operand of the truncate is not an instruction, we will not have
+  // any information on the dropped bits.
+  // (Actually we could for constant but it is not worth the extra logic).
+  Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
+  if (!Opnd)
+    return false;
+
+  // Check if the source of the type is narrow enough.
+  // I.e., check that trunc just drops sign extended bits.
+  // #1 get the type of the operand.
+  const Type *OpndType;
+  InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
+  if (It != PromotedInsts.end())
+    OpndType = It->second;
+  else if (isa<SExtInst>(Opnd))
+    OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType();
+  else
+    return false;
+
+  // #2 check that the truncate just drop sign extended bits.
+  if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
+    return true;
+
+  return false;
+}
+
+TypePromotionHelper::Action TypePromotionHelper::getAction(
+    Instruction *SExt, const SetOfInstrs &InsertedTruncs,
+    const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
+  Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0));
+  Type *SExtTy = SExt->getType();
+  // If the operand of the sign extension is not an instruction, we cannot
+  // get through.
+  // If it, check we can get through.
+  if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts))
+    return NULL;
+
+  // Do not promote if the operand has been added by codegenprepare.
+  // Otherwise, it means we are undoing an optimization that is likely to be
+  // redone, thus causing potential infinite loop.
+  if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd))
+    return NULL;
+
+  // SExt or Trunc instructions.
+  // Return the related handler.
+  if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd))
+    return promoteOperandForTruncAndSExt;
+
+  // Regular instruction.
+  // Abort early if we will have to insert non-free instructions.
+  if (!SExtOpnd->hasOneUse() &&
+      !TLI.isTruncateFree(SExtTy, SExtOpnd->getType()))
+    return NULL;
+  return promoteOperandForOther;
+}
+
+Value *TypePromotionHelper::promoteOperandForTruncAndSExt(
+    llvm::Instruction *SExt, TypePromotionTransaction &TPT,
+    InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) {
+  // By construction, the operand of SExt is an instruction. Otherwise we cannot
+  // get through it and this method should not be called.
+  Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
+  // Replace sext(trunc(opnd)) or sext(sext(opnd))
+  // => sext(opnd).
+  TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
+  CreatedInsts = 0;
+
+  // Remove dead code.
+  if (SExtOpnd->use_empty())
+    TPT.eraseInstruction(SExtOpnd);
+
+  // Check if the sext is still needed.
+  if (SExt->getType() != SExt->getOperand(0)->getType())
+    return SExt;
+
+  // At this point we have: sext ty opnd to ty.
+  // Reassign the uses of SExt to the opnd and remove SExt.
+  Value *NextVal = SExt->getOperand(0);
+  TPT.eraseInstruction(SExt, NextVal);
+  return NextVal;
+}
+
+Value *
+TypePromotionHelper::promoteOperandForOther(Instruction *SExt,
+                                            TypePromotionTransaction &TPT,
+                                            InstrToOrigTy &PromotedInsts,
+                                            unsigned &CreatedInsts) {
+  // By construction, the operand of SExt is an instruction. Otherwise we cannot
+  // get through it and this method should not be called.
+  Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
+  CreatedInsts = 0;
+  if (!SExtOpnd->hasOneUse()) {
+    // SExtOpnd will be promoted.
+    // All its uses, but SExt, will need to use a truncated value of the
+    // promoted version.
+    // Create the truncate now.
+    Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType());
+    Trunc->removeFromParent();
+    // Insert it just after the definition.
+    Trunc->insertAfter(SExtOpnd);
+
+    TPT.replaceAllUsesWith(SExtOpnd, Trunc);
+    // Restore the operand of SExt (which has been replace by the previous call
+    // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
+    TPT.setOperand(SExt, 0, SExtOpnd);
+  }
+
+  // Get through the Instruction:
+  // 1. Update its type.
+  // 2. Replace the uses of SExt by Inst.
+  // 3. Sign extend each operand that needs to be sign extended.
+
+  // Remember the original type of the instruction before promotion.
+  // This is useful to know that the high bits are sign extended bits.
+  PromotedInsts.insert(
+      std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType()));
+  // Step #1.
+  TPT.mutateType(SExtOpnd, SExt->getType());
+  // Step #2.
+  TPT.replaceAllUsesWith(SExt, SExtOpnd);
+  // Step #3.
+  Instruction *SExtForOpnd = SExt;
+
+  DEBUG(dbgs() << "Propagate SExt to operands\n");
+  for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
+       ++OpIdx) {
+    DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n');
+    if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() ||
+        !shouldSExtOperand(SExtOpnd, OpIdx)) {
+      DEBUG(dbgs() << "No need to propagate\n");
+      continue;
+    }
+    // Check if we can statically sign extend the operand.
+    Value *Opnd = SExtOpnd->getOperand(OpIdx);
+    if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
+      DEBUG(dbgs() << "Statically sign extend\n");
+      TPT.setOperand(
+          SExtOpnd, OpIdx,
+          ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue()));
+      continue;
+    }
+    // UndefValue are typed, so we have to statically sign extend them.
+    if (isa<UndefValue>(Opnd)) {
+      DEBUG(dbgs() << "Statically sign extend\n");
+      TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType()));
+      continue;
+    }
+
+    // Otherwise we have to explicity sign extend the operand.
+    // Check if SExt was reused to sign extend an operand.
+    if (!SExtForOpnd) {
+      // If yes, create a new one.
+      DEBUG(dbgs() << "More operands to sext\n");
+      SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType());
+      ++CreatedInsts;
+    }
+
+    TPT.setOperand(SExtForOpnd, 0, Opnd);
+
+    // Move the sign extension before the insertion point.
+    TPT.moveBefore(SExtForOpnd, SExtOpnd);
+    TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd);
+    // If more sext are required, new instructions will have to be created.
+    SExtForOpnd = NULL;
+  }
+  if (SExtForOpnd == SExt) {
+    DEBUG(dbgs() << "Sign extension is useless now\n");
+    TPT.eraseInstruction(SExt);
+  }
+  return SExtOpnd;
+}
+
+/// IsPromotionProfitable - Check whether or not promoting an instruction
+/// to a wider type was profitable.
+/// \p MatchedSize gives the number of instructions that have been matched
+/// in the addressing mode after the promotion was applied.
+/// \p SizeWithPromotion gives the number of created instructions for
+/// the promotion plus the number of instructions that have been
+/// matched in the addressing mode before the promotion.
+/// \p PromotedOperand is the value that has been promoted.
+/// \return True if the promotion is profitable, false otherwise.
+bool
+AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
+                                             unsigned SizeWithPromotion,
+                                             Value *PromotedOperand) const {
+  // We folded less instructions than what we created to promote the operand.
+  // This is not profitable.
+  if (MatchedSize < SizeWithPromotion)
+    return false;
+  if (MatchedSize > SizeWithPromotion)
+    return true;
+  // The promotion is neutral but it may help folding the sign extension in
+  // loads for instance.
+  // Check that we did not create an illegal instruction.
+  Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand);
+  if (!PromotedInst)
+    return false;
+  int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
+  // If the ISDOpcode is undefined, it was undefined before the promotion.
+  if (!ISDOpcode)
+    return true;
+  // Otherwise, check if the promoted instruction is legal or not.
+  return TLI.isOperationLegalOrCustom(ISDOpcode,
+                                      EVT::getEVT(PromotedInst->getType()));
+}
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode.  If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+/// If \p MovedAway is not NULL, it contains the information of whether or
+/// not AddrInst has to be folded into the addressing mode on success.
+/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
+/// because it has been moved away.
+/// Thus AddrInst must not be added in the matched instructions.
+/// This state can happen when AddrInst is a sext, since it may be moved away.
+/// Therefore, AddrInst may not be valid when MovedAway is true and it must
+/// not be referenced anymore.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+                                               unsigned Depth,
+                                               bool *MovedAway) {
+  // Avoid exponential behavior on extremely deep expression trees.
+  if (Depth >= 5) return false;
+
+  // By default, all matched instructions stay in place.
+  if (MovedAway)
+    *MovedAway = false;
+
+  switch (Opcode) {
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return MatchAddr(AddrInst->getOperand(0), Depth);
+  case Instruction::IntToPtr:
+    // This inttoptr is a no-op if the integer type is pointer sized.
+    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+        TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::BitCast:
+    // BitCast is always a noop, and we can handle it as long as it is
+    // int->int or pointer->pointer (we don't want int<->fp or something).
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+        // Don't touch identity bitcasts.  These were probably put here by LSR,
+        // and we don't want to mess around with them.  Assume it knows what it
+        // is doing.
+        AddrInst->getOperand(0)->getType() != AddrInst->getType())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::Add: {
+    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+    // Start a transaction at this point.
+    // The LHS may match but not the RHS.
+    // Therefore, we need a higher level restoration point to undo partially
+    // matched operation.
+    TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+        TPT.getRestorationPoint();
+
+    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(0), Depth+1))
+      return true;
+
+    // Restore the old addr mode info.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    TPT.rollback(LastKnownGood);
+
+    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
+    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(1), Depth+1))
+      return true;
+
+    // Otherwise we definitely can't merge the ADD in.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    TPT.rollback(LastKnownGood);
+    break;
+  }
+  //case Instruction::Or:
+  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+  //break;
+  case Instruction::Mul:
+  case Instruction::Shl: {
+    // Can only handle X*C and X << C.
+    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+    if (!RHS) return false;
+    int64_t Scale = RHS->getSExtValue();
+    if (Opcode == Instruction::Shl)
+      Scale = 1LL << Scale;
+
+    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+  }
+  case Instruction::GetElementPtr: {
+    // Scan the GEP.  We check it if it contains constant offsets and at most
+    // one variable offset.
+    int VariableOperand = -1;
+    unsigned VariableScale = 0;
+
+    int64_t ConstantOffset = 0;
+    const DataLayout *TD = TLI.getDataLayout();
+    gep_type_iterator GTI = gep_type_begin(AddrInst);
+    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx =
+          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+        ConstantOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+          ConstantOffset += CI->getSExtValue()*TypeSize;
+        } else if (TypeSize) {  // Scales of zero don't do anything.
+          // We only allow one variable index at the moment.
+          if (VariableOperand != -1)
+            return false;
+
+          // Remember the variable index.
+          VariableOperand = i;
+          VariableScale = TypeSize;
+        }
+      }
+    }
+
+    // A common case is for the GEP to only do a constant offset.  In this case,
+    // just add it to the disp field and check validity.
+    if (VariableOperand == -1) {
+      AddrMode.BaseOffs += ConstantOffset;
+      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+        // Check to see if we can fold the base pointer in too.
+        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+          return true;
+      }
+      AddrMode.BaseOffs -= ConstantOffset;
+      return false;
+    }
+
+    // Save the valid addressing mode in case we can't match.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // See if the scale and offset amount is valid for this target.
+    AddrMode.BaseOffs += ConstantOffset;
+
+    // Match the base operand of the GEP.
+    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+      // If it couldn't be matched, just stuff the value in a register.
+      if (AddrMode.HasBaseReg) {
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+    }
+
+    // Match the remaining variable portion of the GEP.
+    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+                          Depth)) {
+      // If it couldn't be matched, try stuffing the base into a register
+      // instead of matching it, and retrying the match of the scale.
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      if (AddrMode.HasBaseReg)
+        return false;
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+      AddrMode.BaseOffs += ConstantOffset;
+      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+                            VariableScale, Depth)) {
+        // If even that didn't work, bail.
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+    }
+
+    return true;
+  }
+  case Instruction::SExt: {
+    // Try to move this sext out of the way of the addressing mode.
+    Instruction *SExt = cast<Instruction>(AddrInst);
+    // Ask for a method for doing so.
+    TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
+        SExt, InsertedTruncs, TLI, PromotedInsts);
+    if (!TPH)
+      return false;
+
+    TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+        TPT.getRestorationPoint();
+    unsigned CreatedInsts = 0;
+    Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts);
+    // SExt has been moved away.
+    // Thus either it will be rematched later in the recursive calls or it is
+    // gone. Anyway, we must not fold it into the addressing mode at this point.
+    // E.g.,
+    // op = add opnd, 1
+    // idx = sext op
+    // addr = gep base, idx
+    // is now:
+    // promotedOpnd = sext opnd           <- no match here
+    // op = promoted_add promotedOpnd, 1  <- match (later in recursive calls)
+    // addr = gep base, op                <- match
+    if (MovedAway)
+      *MovedAway = true;
+
+    assert(PromotedOperand &&
+           "TypePromotionHelper should have filtered out those cases");
+
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    if (!MatchAddr(PromotedOperand, Depth) ||
+        !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+                               PromotedOperand)) {
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
+      TPT.rollback(LastKnownGood);
+      return false;
+    }
+    return true;
+  }
+  }
+  return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode.  If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+  // Start a transaction at this point that we will rollback if the matching
+  // fails.
+  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+      TPT.getRestorationPoint();
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+    // Fold in immediates if legal for the target.
+    AddrMode.BaseOffs += CI->getSExtValue();
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.BaseOffs -= CI->getSExtValue();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+    // If this is a global variable, try to fold it into the addressing mode.
+    if (AddrMode.BaseGV == 0) {
+      AddrMode.BaseGV = GV;
+      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+        return true;
+      AddrMode.BaseGV = 0;
+    }
+  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // Check to see if it is possible to fold this operation.
+    bool MovedAway = false;
+    if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+      // This instruction may have been move away. If so, there is nothing
+      // to check here.
+      if (MovedAway)
+        return true;
+      // Okay, it's possible to fold this.  Check to see if it is actually
+      // *profitable* to do so.  We use a simple cost model to avoid increasing
+      // register pressure too much.
+      if (I->hasOneUse() ||
+          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+        AddrModeInsts.push_back(I);
+        return true;
+      }
+
+      // It isn't profitable to do this, roll back.
+      //cerr << "NOT FOLDING: " << *I;
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      TPT.rollback(LastKnownGood);
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+      return true;
+    TPT.rollback(LastKnownGood);
+  } else if (isa<ConstantPointerNull>(Addr)) {
+    // Null pointer gets folded without affecting the addressing mode.
+    return true;
+  }
+
+  // Worse case, the target should support [reg] addressing modes. :)
+  if (!AddrMode.HasBaseReg) {
+    AddrMode.HasBaseReg = true;
+    AddrMode.BaseReg = Addr;
+    // Still check for legality in case the target supports [imm] but not [i+r].
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.HasBaseReg = false;
+    AddrMode.BaseReg = 0;
+  }
+
+  // If the base register is already taken, see if we can do [r+r].
+  if (AddrMode.Scale == 0) {
+    AddrMode.Scale = 1;
+    AddrMode.ScaledReg = Addr;
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.Scale = 0;
+    AddrMode.ScaledReg = 0;
+  }
+  // Couldn't match.
+  TPT.rollback(LastKnownGood);
+  return false;
+}
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands.  If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+                                    const TargetLowering &TLI) {
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // If this asm operand is our Value*, and if it isn't an indirect memory
+    // operand, we can't fold it!
+    if (OpInfo.CallOperandVal == OpVal &&
+        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+         !OpInfo.isIndirect))
+      return false;
+  }
+
+  return true;
+}
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+                              const TargetLowering &TLI) {
+  // If we already considered this instruction, we're done.
+  if (!ConsideredInsts.insert(I))
+    return false;
+
+  // If this is an obviously unfoldable instruction, bail out.
+  if (!MightBeFoldableInst(I))
+    return true;
+
+  // Loop over all the uses, recursively processing them.
+  for (Use &U : I->uses()) {
+    Instruction *UserI = cast<Instruction>(U.getUser());
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
+      MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
+      continue;
+    }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
+      unsigned opNo = U.getOperandNo();
+      if (opNo == 0) return true; // Storing addr, not into addr.
+      MemoryUses.push_back(std::make_pair(SI, opNo));
+      continue;
+    }
+
+    if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
+      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      if (!IA) return true;
+
+      // If this is a memory operand, we're cool, otherwise bail out.
+      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+        return true;
+      continue;
+    }
+
+    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI))
+      return true;
+  }
+
+  return false;
+}
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into.  If so, there is no cost to
+/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+                                                   Value *KnownLive2) {
+  // If Val is either of the known-live values, we know it is live!
+  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+    return true;
+
+  // All values other than instructions and arguments (e.g. constants) are live.
+  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+
+  // If Val is a constant sized alloca in the entry block, it is live, this is
+  // true because it is just a reference to the stack/frame pointer, which is
+  // live for the whole function.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+    if (AI->isStaticAlloca())
+      return true;
+
+  // Check to see if this value is already used in the memory instruction's
+  // block.  If so, it's already live into the block at the very least, so we
+  // can reasonably fold it.
+  return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it.  However, the specified instruction has multiple
+/// uses.  Given this, it may actually increase register pressure to fold it
+/// into the load.  For example, consider this code:
+///
+///     X = ...
+///     Y = X+1
+///     use(Y)   -> nonload/store
+///     Z = Y+1
+///     load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
+/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case.  This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+                                     ExtAddrMode &AMAfter) {
+  if (IgnoreProfitability) return true;
+
+  // AMBefore is the addressing mode before this instruction was folded into it,
+  // and AMAfter is the addressing mode after the instruction was folded.  Get
+  // the set of registers referenced by AMAfter and subtract out those
+  // referenced by AMBefore: this is the set of values which folding in this
+  // address extends the lifetime of.
+  //
+  // Note that there are only two potential values being referenced here,
+  // BaseReg and ScaleReg (global addresses are always available, as are any
+  // folded immediates).
+  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+
+  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+  // lifetime wasn't extended by adding this instruction.
+  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    BaseReg = 0;
+  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    ScaledReg = 0;
+
+  // If folding this instruction (and it's subexprs) didn't extend any live
+  // ranges, we're ok with it.
+  if (BaseReg == 0 && ScaledReg == 0)
+    return true;
+
+  // If all uses of this instruction are ultimately load/store/inlineasm's,
+  // check to see if their addressing modes will include this instruction.  If
+  // so, we can fold it into all uses, so it doesn't matter if it has multiple
+  // uses.
+  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallPtrSet<Instruction*, 16> ConsideredInsts;
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+    return false;  // Has a non-memory, non-foldable use!
+
+  // Now that we know that all uses of this instruction are part of a chain of
+  // computation involving only operations that could theoretically be folded
+  // into a memory use, loop over each of these uses and see if they could
+  // *actually* fold the instruction.
+  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+    Instruction *User = MemoryUses[i].first;
+    unsigned OpNo = MemoryUses[i].second;
+
+    // Get the access type of this use.  If the use isn't a pointer, we don't
+    // know what it accesses.
+    Value *Address = User->getOperand(OpNo);
+    if (!Address->getType()->isPointerTy())
+      return false;
+    Type *AddressAccessTy = Address->getType()->getPointerElementType();
+
+    // Do a match against the root of this address, ignoring profitability. This
+    // will tell us if the addressing mode for the memory operation will
+    // *actually* cover the shared instruction.
+    ExtAddrMode Result;
+    TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+        TPT.getRestorationPoint();
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+                                  MemoryInst, Result, InsertedTruncs,
+                                  PromotedInsts, TPT);
+    Matcher.IgnoreProfitability = true;
+    bool Success = Matcher.MatchAddr(Address, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+
+    // The match was to check the profitability, the changes made are not
+    // part of the original matcher. Therefore, they should be dropped
+    // otherwise the original matcher will not present the right state.
+    TPT.rollback(LastKnownGood);
+
+    // If the match didn't cover I, then it won't be shared by it.
+    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+                  I) == MatchedAddrModeInsts.end())
+      return false;
+
+    MatchedAddrModeInsts.clear();
+  }
+
+  return true;
+}
+
+} // end anonymous namespace
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() != BB;
+  return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions often have
+/// addressing modes that can do significant amounts of computation.  As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program.  The problem is that isel can only
+/// see within a single block.  As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+                                        Type *AccessTy) {
+  Value *Repl = Addr;
+
+  // Try to collapse single-value PHI nodes.  This is necessary to undo
+  // unprofitable PRE transformations.
+  SmallVector<Value*, 8> worklist;
+  SmallPtrSet<Value*, 16> Visited;
+  worklist.push_back(Addr);
+
+  // Use a worklist to iteratively look through PHI nodes, and ensure that
+  // the addressing mode obtained from the non-PHI roots of the graph
+  // are equivalent.
+  Value *Consensus = 0;
+  unsigned NumUsesConsensus = 0;
+  bool IsNumUsesConsensusValid = false;
+  SmallVector<Instruction*, 16> AddrModeInsts;
+  ExtAddrMode AddrMode;
+  TypePromotionTransaction TPT;
+  TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+      TPT.getRestorationPoint();
+  while (!worklist.empty()) {
+    Value *V = worklist.back();
+    worklist.pop_back();
+
+    // Break use-def graph loops.
+    if (!Visited.insert(V)) {
+      Consensus = 0;
+      break;
+    }
+
+    // For a PHI node, push all of its incoming values.
+    if (PHINode *P = dyn_cast<PHINode>(V)) {
+      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+        worklist.push_back(P->getIncomingValue(i));
+      continue;
+    }
+
+    // For non-PHIs, determine the addressing mode being computed.
+    SmallVector<Instruction*, 16> NewAddrModeInsts;
+    ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
+        V, AccessTy, MemoryInst, NewAddrModeInsts, *TLI, InsertedTruncsSet,
+        PromotedInsts, TPT);
+
+    // This check is broken into two cases with very similar code to avoid using
+    // getNumUses() as much as possible. Some values have a lot of uses, so
+    // calling getNumUses() unconditionally caused a significant compile-time
+    // regression.
+    if (!Consensus) {
+      Consensus = V;
+      AddrMode = NewAddrMode;
+      AddrModeInsts = NewAddrModeInsts;
+      continue;
+    } else if (NewAddrMode == AddrMode) {
+      if (!IsNumUsesConsensusValid) {
+        NumUsesConsensus = Consensus->getNumUses();
+        IsNumUsesConsensusValid = true;
+      }
+
+      // Ensure that the obtained addressing mode is equivalent to that obtained
+      // for all other roots of the PHI traversal.  Also, when choosing one
+      // such root as representative, select the one with the most uses in order
+      // to keep the cost modeling heuristics in AddressingModeMatcher
+      // applicable.
+      unsigned NumUses = V->getNumUses();
+      if (NumUses > NumUsesConsensus) {
+        Consensus = V;
+        NumUsesConsensus = NumUses;
+        AddrModeInsts = NewAddrModeInsts;
+      }
+      continue;
+    }
+
+    Consensus = 0;
+    break;
+  }
+
+  // If the addressing mode couldn't be determined, or if multiple different
+  // ones were determined, bail out now.
+  if (!Consensus) {
+    TPT.rollback(LastKnownGood);
+    return false;
+  }
+  TPT.commit();
+
+  // Check to see if any of the instructions supersumed by this addr mode are
+  // non-local to I's BB.
+  bool AnyNonLocal = false;
+  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+      AnyNonLocal = true;
+      break;
+    }
+  }
+
+  // If all the instructions matched are already in this BB, don't do anything.
+  if (!AnyNonLocal) {
+    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
+    return false;
+  }
+
+  // Insert this computation right after this user.  Since our caller is
+  // scanning from the top of the BB to the bottom, reuse of the expr are
+  // guaranteed to happen later.
+  IRBuilder<> Builder(MemoryInst);
+
+  // Now that we determined the addressing expression we want to use and know
+  // that we have to sink it into this block.  Check to see if we have already
+  // done this for some other load/store instr in this block.  If so, reuse the
+  // computation.
+  Value *&SunkAddr = SunkAddrs[Addr];
+  if (SunkAddr) {
+    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    if (SunkAddr->getType() != Addr->getType())
+      SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+  } else {
+    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+    Value *Result = 0;
+
+    // Start with the base register. Do this first so that subsequent address
+    // matching finds it last, which will prevent it from trying to match it
+    // as the scaled value in case it happens to be a mul. That would be
+    // problematic if we've sunk a different mul for the scale, because then
+    // we'd end up sinking both muls.
+    if (AddrMode.BaseReg) {
+      Value *V = AddrMode.BaseReg;
+      if (V->getType()->isPointerTy())
+        V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+      if (V->getType() != IntPtrTy)
+        V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+      Result = V;
+    }
+
+    // Add the scale value.
+    if (AddrMode.Scale) {
+      Value *V = AddrMode.ScaledReg;
+      if (V->getType() == IntPtrTy) {
+        // done.
+      } else if (V->getType()->isPointerTy()) {
+        V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
+      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+                 cast<IntegerType>(V->getType())->getBitWidth()) {
+        V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+      } else {
+        // It is only safe to sign extend the BaseReg if we know that the math
+        // required to create it did not overflow before we extend it. Since
+        // the original IR value was tossed in favor of a constant back when
+        // the AddrMode was created we need to bail out gracefully if widths
+        // do not match instead of extending it.
+        if (Result != AddrMode.BaseReg)
+            cast<Instruction>(Result)->eraseFromParent();
+        return false;
+      }
+      if (AddrMode.Scale != 1)
+        V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+                              "sunkaddr");
+      if (Result)
+        Result = Builder.CreateAdd(Result, V, "sunkaddr");
+      else
+        Result = V;
+    }
+
+    // Add in the BaseGV if present.
+    if (AddrMode.BaseGV) {
+      Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
+      if (Result)
+        Result = Builder.CreateAdd(Result, V, "sunkaddr");
+      else
+        Result = V;
+    }
+
+    // Add in the Base Offset if present.
+    if (AddrMode.BaseOffs) {
+      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+      if (Result)
+        Result = Builder.CreateAdd(Result, V, "sunkaddr");
+      else
+        Result = V;
+    }
+
+    if (Result == 0)
+      SunkAddr = Constant::getNullValue(Addr->getType());
+    else
+      SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
+  }
+
+  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+
+  // If we have no uses, recursively delete the value and all dead instructions
+  // using it.
+  if (Repl->use_empty()) {
+    // This can cause recursive deletion, which can invalidate our iterator.
+    // Use a WeakVH to hold onto it in case this happens.
+    WeakVH IterHandle(CurInstIterator);
+    BasicBlock *BB = CurInstIterator->getParent();
+
+    RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
+
+    if (IterHandle != CurInstIterator) {
+      // If the iterator instruction was recursively deleted, start over at the
+      // start of the block.
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    }
+  }
+  ++NumMemoryInsts;
+  return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+  bool MadeChange = false;
+
+  TargetLowering::AsmOperandInfoVector
+    TargetConstraints = TLI->ParseConstraints(CS);
+  unsigned ArgNo = 0;
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        OpInfo.isIndirect) {
+      Value *OpVal = CS->getArgOperand(ArgNo++);
+      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+    } else if (OpInfo.Type == InlineAsm::isInput)
+      ArgNo++;
+  }
+
+  return MadeChange;
+}
+
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+  // Look for a load being extended.
+  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+  if (!LI) return false;
+
+  // If they're already in the same block, there's nothing to do.
+  if (LI->getParent() == I->getParent())
+    return false;
+
+  // If the load has other users and the truncate is not free, this probably
+  // isn't worthwhile.
+  if (!LI->hasOneUse() &&
+      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+      !TLI->isTruncateFree(I->getType(), LI->getType()))
+    return false;
+
+  // Check whether the target supports casts folded into loads.
+  unsigned LType;
+  if (isa<ZExtInst>(I))
+    LType = ISD::ZEXTLOAD;
+  else {
+    assert(isa<SExtInst>(I) && "Unexpected ext type!");
+    LType = ISD::SEXTLOAD;
+  }
+  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+    return false;
+
+  // Move the extend into the same block as the load, so that SelectionDAG
+  // can fold it.
+  I->removeFromParent();
+  I->insertAfter(LI);
+  ++NumExtsMoved;
+  return true;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+  BasicBlock *DefBB = I->getParent();
+
+  // If the result of a {s|z}ext and its source are both live out, rewrite all
+  // other uses of the source with result of extension.
+  Value *Src = I->getOperand(0);
+  if (Src->hasOneUse())
+    return false;
+
+  // Only do this xform if truncating is free.
+  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+    return false;
+
+  // Only safe to perform the optimization if the source is also defined in
+  // this block.
+  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+    return false;
+
+  bool DefIsLiveOut = false;
+  for (User *U : I->users()) {
+    Instruction *UI = cast<Instruction>(U);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = UI->getParent();
+    if (UserBB == DefBB) continue;
+    DefIsLiveOut = true;
+    break;
+  }
+  if (!DefIsLiveOut)
+    return false;
+
+  // Make sure none of the uses are PHI nodes.
+  for (User *U : Src->users()) {
+    Instruction *UI = cast<Instruction>(U);
+    BasicBlock *UserBB = UI->getParent();
+    if (UserBB == DefBB) continue;
+    // Be conservative. We don't want this xform to end up introducing
+    // reloads just before load / store instructions.
+    if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
+      return false;
+  }
+
+  // InsertedTruncs - Only insert one trunc in each block once.
+  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+  bool MadeChange = false;
+  for (Use &U : Src->uses()) {
+    Instruction *User = cast<Instruction>(U.getUser());
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+
+    // Both src and def are live in this block. Rewrite the use.
+    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+    if (!InsertedTrunc) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+      InsertedTruncsSet.insert(InsertedTrunc);
+    }
+
+    // Replace a use of the {s|z}ext source with a use of the result.
+    U = InsertedTrunc;
+    ++NumExtUses;
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
+/// turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+  // FIXME: This should use the same heuristics as IfConversion to determine
+  // whether a select is better represented as a branch.  This requires that
+  // branch probability metadata is preserved for the select, which is not the
+  // case currently.
+
+  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+
+  // If the branch is predicted right, an out of order CPU can avoid blocking on
+  // the compare.  Emit cmovs on compares with a memory operand as branches to
+  // avoid stalls on the load from memory.  If the compare has more than one use
+  // there's probably another cmov or setcc around so it's not worth emitting a
+  // branch.
+  if (!Cmp)
+    return false;
+
+  Value *CmpOp0 = Cmp->getOperand(0);
+  Value *CmpOp1 = Cmp->getOperand(1);
+
+  // We check that the memory operand has one use to avoid uses of the loaded
+  // value directly after the compare, making branches unprofitable.
+  return Cmp->hasOneUse() &&
+         ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+          (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+}
+
+
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
+bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
+
+  // Can we convert the 'select' to CF ?
+  if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
+    return false;
+
+  TargetLowering::SelectSupportKind SelectKind;
+  if (VectorCond)
+    SelectKind = TargetLowering::VectorMaskSelect;
+  else if (SI->getType()->isVectorTy())
+    SelectKind = TargetLowering::ScalarCondVectorVal;
+  else
+    SelectKind = TargetLowering::ScalarValSelect;
+
+  // Do we have efficient codegen support for this kind of 'selects' ?
+  if (TLI->isSelectSupported(SelectKind)) {
+    // We have efficient codegen support for the select instruction.
+    // Check if it is profitable to keep this 'select'.
+    if (!TLI->isPredictableSelectExpensive() ||
+        !isFormingBranchFromSelectProfitable(SI))
+      return false;
+  }
+
+  ModifiedDT = true;
+
+  // First, we split the block containing the select into 2 blocks.
+  BasicBlock *StartBlock = SI->getParent();
+  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+
+  // Create a new block serving as the landing pad for the branch.
+  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
+                                             NextBlock->getParent(), NextBlock);
+
+  // Move the unconditional branch from the block with the select in it into our
+  // landing pad block.
+  StartBlock->getTerminator()->eraseFromParent();
+  BranchInst::Create(NextBlock, SmallBlock);
+
+  // Insert the real conditional branch based on the original condition.
+  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+
+  // The select itself is replaced with a PHI Node.
+  PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+  PN->takeName(SI);
+  PN->addIncoming(SI->getTrueValue(), StartBlock);
+  PN->addIncoming(SI->getFalseValue(), SmallBlock);
+  SI->replaceAllUsesWith(PN);
+  SI->eraseFromParent();
+
+  // Instruct OptimizeBlock to skip to the next block.
+  CurInstIterator = StartBlock->end();
+  ++NumSelectsExpanded;
+  return true;
+}
+
+static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
+  SmallVector<int, 16> Mask(SVI->getShuffleMask());
+  int SplatElem = -1;
+  for (unsigned i = 0; i < Mask.size(); ++i) {
+    if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
+      return false;
+    SplatElem = Mask[i];
+  }
+
+  return true;
+}
+
+/// Some targets have expensive vector shifts if the lanes aren't all the same
+/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
+/// it's often worth sinking a shufflevector splat down to its use so that
+/// codegen can spot all lanes are identical.
+bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+  BasicBlock *DefBB = SVI->getParent();
+
+  // Only do this xform if variable vector shifts are particularly expensive.
+  if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
+    return false;
+
+  // We only expect better codegen by sinking a shuffle if we can recognise a
+  // constant splat.
+  if (!isBroadcastShuffle(SVI))
+    return false;
+
+  // InsertedShuffles - Only insert a shuffle in each block once.
+  DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
+
+  bool MadeChange = false;
+  for (User *U : SVI->users()) {
+    Instruction *UI = cast<Instruction>(U);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = UI->getParent();
+    if (UserBB == DefBB) continue;
+
+    // For now only apply this when the splat is used by a shift instruction.
+    if (!UI->isShift()) continue;
+
+    // Everything checks out, sink the shuffle if the user's block doesn't
+    // already have a copy.
+    Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
+
+    if (!InsertedShuffle) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
+                                              SVI->getOperand(1),
+                                              SVI->getOperand(2), "", InsertPt);
+    }
+
+    UI->replaceUsesOfWith(SVI, InsertedShuffle);
+    MadeChange = true;
+  }
+
+  // If we removed all uses, nuke the shuffle.
+  if (SVI->use_empty()) {
+    SVI->eraseFromParent();
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+  if (PHINode *P = dyn_cast<PHINode>(I)) {
+    // It is possible for very late stage optimizations (such as SimplifyCFG)
+    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
+    // trivial PHI, go ahead and zap it here.
+    if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+                                       TLInfo, DT)) {
+      P->replaceAllUsesWith(V);
+      P->eraseFromParent();
+      ++NumPHIsElim;
+      return true;
+    }
+    return false;
+  }
+
+  if (CastInst *CI = dyn_cast<CastInst>(I)) {
+    // If the source of the cast is a constant, then this should have
+    // already been constant folded.  The only reason NOT to constant fold
+    // it is if something (e.g. LSR) was careful to place the constant
+    // evaluation in a block other than then one that uses it (e.g. to hoist
+    // the address of globals out of a loop).  If this is the case, we don't
+    // want to forward-subst the cast.
+    if (isa<Constant>(CI->getOperand(0)))
+      return false;
+
+    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+      return true;
+
+    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+      /// Sink a zext or sext into its user blocks if the target type doesn't
+      /// fit in one register
+      if (TLI && TLI->getTypeAction(CI->getContext(),
+                                    TLI->getValueType(CI->getType())) ==
+                     TargetLowering::TypeExpandInteger) {
+        return SinkCast(CI);
+      } else {
+        bool MadeChange = MoveExtToFormExtLoad(I);
+        return MadeChange | OptimizeExtUses(I);
+      }
+    }
+    return false;
+  }
+
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    if (!TLI || !TLI->hasMultipleConditionRegisters())
+      return OptimizeCmpExpression(CI);
+
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+    return false;
+  }
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, SI->getOperand(1),
+                                SI->getOperand(0)->getType());
+    return false;
+  }
+
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    if (GEPI->hasAllZeroIndices()) {
+      /// The GEP operand must be a pointer, so must its result -> BitCast
+      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+                                        GEPI->getName(), GEPI);
+      GEPI->replaceAllUsesWith(NC);
+      GEPI->eraseFromParent();
+      ++NumGEPsElim;
+      OptimizeInst(NC);
+      return true;
+    }
+    return false;
+  }
+
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return OptimizeCallInst(CI);
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(I))
+    return OptimizeSelectInst(SI);
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
+    return OptimizeShuffleVectorInst(SVI);
+
+  return false;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+  SunkAddrs.clear();
+  bool MadeChange = false;
+
+  CurInstIterator = BB.begin();
+  while (CurInstIterator != BB.end())
+    MadeChange |= OptimizeInst(CurInstIterator++);
+
+  MadeChange |= DupRetToEnableTailCallOpts(&BB);
+
+  return MadeChange;
+}
+
+// llvm.dbg.value is far away from the value then iSel may not be able
+// handle it properly. iSel will drop llvm.dbg.value if it can not
+// find a node corresponding to the value.
+bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+  bool MadeChange = false;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    Instruction *PrevNonDbgInst = NULL;
+    for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
+      Instruction *Insn = BI; ++BI;
+      DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+      if (!DVI) {
+        PrevNonDbgInst = Insn;
+        continue;
+      }
+
+      Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+      if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+        DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
+        DVI->removeFromParent();
+        if (isa<PHINode>(VI))
+          DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+        else
+          DVI->insertAfter(VI);
+        MadeChange = true;
+        ++NumDbgValueMoved;
+      }
+    }
+  }
+  return MadeChange;
+}
+
+// If there is a sequence that branches based on comparing a single bit
+// against zero that can be combined into a single instruction, and the
+// target supports folding these into a single instruction, sink the
+// mask and compare into the branch uses. Do this before OptimizeBlock ->
+// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
+// searched for.
+bool CodeGenPrepare::sinkAndCmp(Function &F) {
+  if (!EnableAndCmpSinking)
+    return false;
+  if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
+    return false;
+  bool MadeChange = false;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *BB = I++;
+
+    // Does this BB end with the following?
+    //   %andVal = and %val, #single-bit-set
+    //   %icmpVal = icmp %andResult, 0
+    //   br i1 %cmpVal label %dest1, label %dest2"
+    BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator());
+    if (!Brcc || !Brcc->isConditional())
+      continue;
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
+    if (!Cmp || Cmp->getParent() != BB)
+      continue;
+    ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
+    if (!Zero || !Zero->isZero())
+      continue;
+    Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
+    if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB)
+      continue;
+    ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
+    if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
+      continue;
+    DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump());
+
+    // Push the "and; icmp" for any users that are conditional branches.
+    // Since there can only be one branch use per BB, we don't need to keep
+    // track of which BBs we insert into.
+    for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end();
+         UI != E; ) {
+      Use &TheUse = *UI;
+      // Find brcc use.
+      BranchInst *BrccUser = dyn_cast<BranchInst>(*UI);
+      ++UI;
+      if (!BrccUser || !BrccUser->isConditional())
+        continue;
+      BasicBlock *UserBB = BrccUser->getParent();
+      if (UserBB == BB) continue;
+      DEBUG(dbgs() << "found Brcc use\n");
+
+      // Sink the "and; icmp" to use.
+      MadeChange = true;
+      BinaryOperator *NewAnd =
+        BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
+                                  BrccUser);
+      CmpInst *NewCmp =
+        CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
+                        "", BrccUser);
+      TheUse = NewCmp;
+      ++NumAndCmpsMoved;
+      DEBUG(BrccUser->getParent()->dump());
+    }
+  }
+  return MadeChange;
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 18c8e0a..463eb86 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -595,7 +595,7 @@
     if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
       AntiDepReg = 0;
 
-    // Look for a suitable register to use to break the anti-depenence.
+    // Look for a suitable register to use to break the anti-dependence.
     //
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 565d20b..1949a48 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -72,7 +72,7 @@
     ~CriticalAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
-    void StartBlock(MachineBasicBlock *BB);
+    void StartBlock(MachineBasicBlock *BB) override;
 
     /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
     /// path
@@ -82,15 +82,16 @@
                                    MachineBasicBlock::iterator Begin,
                                    MachineBasicBlock::iterator End,
                                    unsigned InsertPosIndex,
-                                   DbgValueVector &DbgValues);
+                                   DbgValueVector &DbgValues) override;
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
-    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+    void Observe(MachineInstr *MI, unsigned Count,
+                 unsigned InsertPosIndex) override;
 
     /// Finish - Finish anti-dep breaking for a basic block.
-    void FinishBlock();
+    void FinishBlock() override;
 
   private:
     void PrescanInstruction(MachineInstr *MI);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 6619bcf..5b40ae1 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -108,7 +108,7 @@
   DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
                    MachineDominatorTree &MDT, bool IsPostRA);
   // Schedule - Actual scheduling work.
-  void schedule();
+  void schedule() override;
 };
 }
 
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 5efe1ff..aa03e77 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -27,7 +27,7 @@
 
 namespace {
   class DeadMachineInstructionElim : public MachineFunctionPass {
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
     const TargetRegisterInfo *TRI;
     const MachineRegisterInfo *MRI;
@@ -84,6 +84,9 @@
 }
 
 bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   bool AnyChanges = false;
   MRI = &MF.getRegInfo();
   TRI = MF.getTarget().getRegisterInfo();
@@ -127,17 +130,7 @@
           unsigned Reg = MO.getReg();
           if (!TargetRegisterInfo::isVirtualRegister(Reg))
             continue;
-          MachineRegisterInfo::use_iterator nextI;
-          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
-               E = MRI->use_end(); I!=E; I=nextI) {
-            nextI = llvm::next(I);  // I is invalidated by the setReg
-            MachineOperand& Use = I.getOperand();
-            MachineInstr *UseMI = Use.getParent();
-            if (UseMI==MI)
-              continue;
-            assert(Use.isDebug());
-            UseMI->getOperand(0).setReg(0U);
-          }
+          MRI->markUsesInDebugValueAsUndef(Reg);
         }
         AnyChanges = true;
         MI->eraseFromParent();
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index c7c1752..d543baf 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -15,14 +15,14 @@
 #define DEBUG_TYPE "dwarfehprepare"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -42,16 +42,16 @@
 
   public:
     static char ID; // Pass identification, replacement for typeid.
-    DwarfEHPrepare(const TargetMachine *TM) :
-      FunctionPass(ID), TM(TM), RewindFunction(0) {
-        initializeDominatorTreePass(*PassRegistry::getPassRegistry());
-      }
+    DwarfEHPrepare(const TargetMachine *TM)
+        : FunctionPass(ID), TM(TM), RewindFunction(0) {
+      initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+    }
 
-    virtual bool runOnFunction(Function &Fn);
+    bool runOnFunction(Function &Fn) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    void getAnalysisUsage(AnalysisUsage &AU) const override { }
 
-    const char *getPassName() const {
+    const char *getPassName() const override {
       return "Exception handling preparation";
     }
   };
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 5447df0..f8887ef 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -461,7 +461,7 @@
     DEBUG(dbgs() << "If-converting " << *PI.PHI);
     unsigned DstReg = PI.PHI->getOperand(0).getReg();
     TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
-    DEBUG(dbgs() << "          --> " << *llvm::prior(FirstTerm));
+    DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm));
     PI.PHI->eraseFromParent();
     PI.PHI = 0;
   }
@@ -482,7 +482,7 @@
     unsigned PHIDst = PI.PHI->getOperand(0).getReg();
     unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
     TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
-    DEBUG(dbgs() << "          --> " << *llvm::prior(FirstTerm));
+    DEBUG(dbgs() << "          --> " << *std::prev(FirstTerm));
 
     // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
     for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
@@ -590,9 +590,9 @@
 public:
   static char ID;
   EarlyIfConverter() : MachineFunctionPass(ID) {}
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-  bool runOnMachineFunction(MachineFunction &MF);
-  const char *getPassName() const { return "Early If-Conversion"; }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  const char *getPassName() const override { return "Early If-Conversion"; }
 
 private:
   bool tryConvertIf(MachineBasicBlock*);
diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp
index 8a1e2d9..e976d7f 100644
--- a/lib/CodeGen/ErlangGC.cpp
+++ b/lib/CodeGen/ErlangGC.cpp
@@ -32,7 +32,7 @@
                           DebugLoc DL) const;
   public:
     ErlangGC();
-    bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF);
+    bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) override;
   };
 
 }
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 031f19c..a08eb6b 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -23,7 +23,7 @@
 #define DEBUG_TYPE "execution-fix"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Allocator.h"
@@ -141,7 +141,7 @@
   std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
 
   /// Storage for register unit liveness.
-  LiveRegUnits LiveUnits;
+  LivePhysRegs LiveRegSet;
 
   /// Current instruction number.
   /// The first instruction in each basic block is 0.
@@ -155,14 +155,14 @@
   ExeDepsFix(const TargetRegisterClass *rc)
     : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  virtual bool runOnMachineFunction(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &MF) override;
 
-  virtual const char *getPassName() const {
+  const char *getPassName() const override {
     return "Execution dependency fix";
   }
 
@@ -352,7 +352,7 @@
 
   // Set up UndefReads to track undefined register reads.
   UndefReads.clear();
-  LiveUnits.clear();
+  LiveRegSet.clear();
 
   // Set up LiveRegs to represent registers entering MBB.
   if (!LiveRegs)
@@ -547,21 +547,19 @@
     return;
 
   // Collect this block's live out register units.
-  LiveUnits.init(TRI);
-  for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI) {
-    LiveUnits.addLiveIns(*SI, *TRI);
-  }
+  LiveRegSet.init(TRI);
+  LiveRegSet.addLiveOuts(MBB);
+
   MachineInstr *UndefMI = UndefReads.back().first;
   unsigned OpIdx = UndefReads.back().second;
 
   for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend();
        I != E; ++I) {
-    // Update liveness, including the current instrucion's defs.
-    LiveUnits.stepBackward(*I, *TRI);
+    // Update liveness, including the current instruction's defs.
+    LiveRegSet.stepBackward(*I);
 
     if (UndefMI == &*I) {
-      if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI))
+      if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
         TII->breakPartialRegDependency(UndefMI, OpIdx, TRI);
 
       UndefReads.pop_back();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index b2b6882..fb2e446 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -30,9 +30,9 @@
     ExpandISelPseudos() : MachineFunctionPass(ID) {}
 
   private:
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 6c73fff..1b0315a 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -35,7 +35,7 @@
   static char ID; // Pass identification, replacement for typeid
   ExpandPostRA() : MachineFunctionPass(ID) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addPreservedID(MachineLoopInfoID);
     AU.addPreservedID(MachineDominatorsID);
@@ -43,7 +43,7 @@
   }
 
   /// runOnMachineFunction - pass entry point
-  bool runOnMachineFunction(MachineFunction&);
+  bool runOnMachineFunction(MachineFunction&) override;
 
 private:
   bool LowerSubregToReg(MachineInstr *MI);
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index ef5247c..54b047b 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -32,12 +32,12 @@
   public:
     explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
-    
-    const char *getPassName() const;
-    void getAnalysisUsage(AnalysisUsage &AU) const;
-    
-    bool runOnFunction(Function &F);
-    bool doFinalization(Module &M);
+
+    const char *getPassName() const override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+    bool runOnFunction(Function &F) override;
+    bool doFinalization(Module &M) override;
   };
 
 }
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 1173d11..b31a0f2 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -16,13 +16,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
@@ -53,11 +52,11 @@
     static char ID;
 
     LowerIntrinsics();
-    const char *getPassName() const;
-    void getAnalysisUsage(AnalysisUsage &AU) const;
+    const char *getPassName() const override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-    bool doInitialization(Module &M);
-    bool runOnFunction(Function &F);
+    bool doInitialization(Module &M) override;
+    bool runOnFunction(Function &F) override;
   };
 
 
@@ -83,9 +82,9 @@
     static char ID;
 
     GCMachineCodeAnalysis();
-    void getAnalysisUsage(AnalysisUsage &AU) const;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-    bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
   };
 
 }
@@ -154,7 +153,7 @@
 void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
   FunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
-  AU.addPreserved<DominatorTree>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now.
@@ -271,8 +270,9 @@
 
   // Custom lowering may modify the CFG, so dominators must be recomputed.
   if (UseCustomLoweringPass) {
-    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
-      DT->DT->recalculate(F);
+    if (DominatorTreeWrapperPass *DTWP =
+            getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+      DTWP->getDomTree().recalculate(F);
   }
 
   return MadeChange;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index e2d0eb4..1a18b1a 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -17,13 +17,13 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -162,8 +162,8 @@
     const MachineBranchProbabilityInfo *MBPI;
     MachineRegisterInfo *MRI;
 
-    LiveRegUnits Redefs;
-    LiveRegUnits DontKill;
+    LivePhysRegs Redefs;
+    LivePhysRegs DontKill;
 
     bool PreRegAlloc;
     bool MadeChange;
@@ -174,12 +174,12 @@
       initializeIfConverterPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineBranchProbabilityInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
@@ -921,7 +921,7 @@
 /// next block).
 static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
   MachineFunction::iterator PI = BB;
-  MachineFunction::iterator I = llvm::next(PI);
+  MachineFunction::iterator I = std::next(PI);
   MachineFunction::iterator TI = ToBB;
   MachineFunction::iterator E = BB->getParent()->end();
   while (I != TI) {
@@ -968,23 +968,22 @@
 
 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
 /// values defined in MI which are not live/used by MI.
-static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs,
-                             const TargetRegisterInfo *TRI) {
+static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) {
   for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
     if (!Ops->isReg() || !Ops->isKill())
       continue;
     unsigned Reg = Ops->getReg();
     if (Reg == 0)
       continue;
-    Redefs.removeReg(Reg, *TRI);
+    Redefs.removeReg(Reg);
   }
   for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) {
     if (!Ops->isReg() || !Ops->isDef())
       continue;
     unsigned Reg = Ops->getReg();
-    if (Reg == 0 || Redefs.contains(Reg, *TRI))
+    if (Reg == 0 || Redefs.contains(Reg))
       continue;
-    Redefs.addReg(Reg, *TRI);
+    Redefs.addReg(Reg);
 
     MachineOperand &Op = *Ops;
     MachineInstr *MI = Op.getParent();
@@ -996,12 +995,11 @@
 /**
  * Remove kill flags from operands with a registers in the @p DontKill set.
  */
-static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill,
-                        const MCRegisterInfo &MCRI) {
+static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
   for (MIBundleOperands O(&MI); O.isValid(); ++O) {
     if (!O->isReg() || !O->isKill())
       continue;
-    if (DontKill.contains(O->getReg(), MCRI))
+    if (DontKill.contains(O->getReg()))
       O->setIsKill(false);
   }
 }
@@ -1012,10 +1010,10 @@
  */
 static void RemoveKills(MachineBasicBlock::iterator I,
                         MachineBasicBlock::iterator E,
-                        const LiveRegUnits &DontKill,
+                        const LivePhysRegs &DontKill,
                         const MCRegisterInfo &MCRI) {
   for ( ; I != E; ++I)
-    RemoveKills(*I, DontKill, MCRI);
+    RemoveKills(*I, DontKill);
 }
 
 /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
@@ -1049,13 +1047,13 @@
   // Initialize liveins to the first BB. These are potentiall redefined by
   // predicated instructions.
   Redefs.init(TRI);
-  Redefs.addLiveIns(CvtBBI->BB, *TRI);
-  Redefs.addLiveIns(NextBBI->BB, *TRI);
+  Redefs.addLiveIns(CvtBBI->BB);
+  Redefs.addLiveIns(NextBBI->BB);
 
   // Compute a set of registers which must not be killed by instructions in
   // BB1: This is everything live-in to BB2.
   DontKill.init(TRI);
-  DontKill.addLiveIns(NextBBI->BB, *TRI);
+  DontKill.addLiveIns(NextBBI->BB);
 
   if (CvtBBI->BB->pred_size() > 1) {
     BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
@@ -1104,6 +1102,28 @@
   return true;
 }
 
+/// Scale down weights to fit into uint32_t. NewTrue is the new weight
+/// for successor TrueBB, and NewFalse is the new weight for successor
+/// FalseBB.
+static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse,
+                         MachineBasicBlock *MBB,
+                         const MachineBasicBlock *TrueBB,
+                         const MachineBasicBlock *FalseBB,
+                         const MachineBranchProbabilityInfo *MBPI) {
+  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+                                        SE = MBB->succ_end();
+       SI != SE; ++SI) {
+    if (*SI == TrueBB)
+      MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale));
+    else if (*SI == FalseBB)
+      MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale));
+    else
+      MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale);
+  }
+}
+
 /// IfConvertTriangle - If convert a triangle sub-CFG.
 ///
 bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
@@ -1154,12 +1174,22 @@
   // Initialize liveins to the first BB. These are potentially redefined by
   // predicated instructions.
   Redefs.init(TRI);
-  Redefs.addLiveIns(CvtBBI->BB, *TRI);
-  Redefs.addLiveIns(NextBBI->BB, *TRI);
+  Redefs.addLiveIns(CvtBBI->BB);
+  Redefs.addLiveIns(NextBBI->BB);
 
   DontKill.clear();
 
   bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+  uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
+  uint32_t WeightScale = 0;
+  if (HasEarlyExit) {
+    // Get weights before modifying CvtBBI->BB and BBI.BB.
+    CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
+    CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB);
+    BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB);
+    BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
+    SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
+  }
   if (CvtBBI->BB->pred_size() > 1) {
     BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
     // Copy instructions in the true block, predicate them, and add them to
@@ -1187,6 +1217,20 @@
       llvm_unreachable("Unable to reverse branch condition!");
     TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
     BBI.BB->addSuccessor(CvtBBI->FalseBB);
+    // Update the edge weight for both CvtBBI->FalseBB and NextBBI.
+    // New_Weight(BBI.BB, NextBBI->BB) =
+    //   Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) +
+    //   Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB)
+    // New_Weight(BBI.BB, CvtBBI->FalseBB) =
+    //   Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB)
+
+    uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale;
+    uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale;
+    // We need to scale down all weights of BBI.BB to fit uint32_t.
+    // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to
+    // the next block.
+    ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB),
+                 CvtBBI->FalseBB, MBPI);
   }
 
   // Merge in the 'false' block if the 'false' block has no other
@@ -1284,7 +1328,7 @@
   // Initialize liveins to the first BB. These are potentially redefined by
   // predicated instructions.
   Redefs.init(TRI);
-  Redefs.addLiveIns(BBI1->BB, *TRI);
+  Redefs.addLiveIns(BBI1->BB);
 
   // Remove the duplicated instructions at the beginnings of both paths.
   MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
@@ -1317,12 +1361,12 @@
   DontKill.init(TRI);
   for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
        E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
-    DontKill.stepBackward(*I, *TRI);
+    DontKill.stepBackward(*I);
   }
 
   for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
        ++I) {
-    Redefs.stepForward(*I, *TRI);
+    Redefs.stepForward(*I);
   }
   BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
   BBI2->BB->erase(BBI2->BB->begin(), DI2);
@@ -1506,7 +1550,7 @@
 
     // If the predicated instruction now redefines a register as the result of
     // if-conversion, add an implicit kill.
-    UpdatePredRedefs(I, Redefs, TRI);
+    UpdatePredRedefs(I, Redefs);
   }
 
   std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
@@ -1552,11 +1596,11 @@
 
     // If the predicated instruction now redefines a register as the result of
     // if-conversion, add an implicit kill.
-    UpdatePredRedefs(MI, Redefs, TRI);
+    UpdatePredRedefs(MI, Redefs);
 
     // Some kill flags may not be correct anymore.
     if (!DontKill.empty())
-      RemoveKills(*MI, DontKill, *TRI);
+      RemoveKills(*MI, DontKill);
   }
 
   if (!IgnoreBr) {
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index bb0e642..0f7ba8e 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -21,8 +21,9 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -153,7 +154,7 @@
       TRI(*mf.getTarget().getRegisterInfo()),
       MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {}
 
-  void spill(LiveRangeEdit &);
+  void spill(LiveRangeEdit &) override;
 
 private:
   bool isSnippet(const LiveInterval &SnipLI);
@@ -238,9 +239,10 @@
   MachineInstr *UseMI = 0;
 
   // Check that all uses satisfy our criteria.
-  for (MachineRegisterInfo::reg_nodbg_iterator
-         RI = MRI.reg_nodbg_begin(SnipLI.reg);
-       MachineInstr *MI = RI.skipInstruction();) {
+  for (MachineRegisterInfo::reg_instr_nodbg_iterator
+       RI = MRI.reg_instr_nodbg_begin(SnipLI.reg),
+       E = MRI.reg_instr_nodbg_end(); RI != E; ) {
+    MachineInstr *MI = &*(RI++);
 
     // Allow copies to/from Reg.
     if (isFullCopyOf(MI, Reg))
@@ -277,8 +279,9 @@
   if (Original == Reg)
     return;
 
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
-       MachineInstr *MI = RI.skipInstruction();) {
+  for (MachineRegisterInfo::reg_instr_iterator
+       RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
+    MachineInstr *MI = &*(RI++);
     unsigned SnipReg = isFullCopyOf(MI, Reg);
     if (!isSibling(SnipReg))
       continue;
@@ -438,7 +441,20 @@
           // Also hoist spills to blocks with smaller loop depth, but make sure
           // that the new value dominates.  Non-phi dependents are always
           // dominated, phis need checking.
+
+          const BranchProbability MarginProb(4, 5); // 80%
+          // Hoist a spill to outer loop if there are multiple dependents (it
+          // can be beneficial if more than one dependents are hoisted) or
+          // if DepSV (the hoisting source) is hotter than SV (the hoisting
+          // destination) (we add a 80% margin to bias a little towards
+          // loop depth).
+          bool HoistCondition =
+            (MBFI.getBlockFreq(DepSV.SpillMBB) >=
+             (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) ||
+            Deps->size() > 1;
+
           if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) &&
+              HoistCondition &&
               (!DepSVI->first->isPHIDef() ||
                MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) {
             Changed = true;
@@ -476,7 +492,7 @@
   // Check if a cached value already exists.
   SibValueMap::iterator SVI;
   bool Inserted;
-  tie(SVI, Inserted) =
+  std::tie(SVI, Inserted) =
     SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI)));
   if (!Inserted) {
     DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':'
@@ -495,7 +511,7 @@
   do {
     unsigned Reg;
     VNInfo *VNI;
-    tie(Reg, VNI) = WorkList.pop_back_val();
+    std::tie(Reg, VNI) = WorkList.pop_back_val();
     DEBUG(dbgs() << "  " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
                  << ":\t");
 
@@ -554,7 +570,7 @@
       for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
         VNInfo *NonPHI = NonPHIs[i];
         // Known value? Try an insertion.
-        tie(SVI, Inserted) =
+        std::tie(SVI, Inserted) =
           SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
         // Add all the PHIs as dependents of NonPHI.
         for (unsigned pi = 0, pe = PHIs.size(); pi != pe; ++pi)
@@ -587,8 +603,8 @@
                      << SrcVNI->id << '@' << SrcVNI->def
                      << " kill=" << unsigned(SVI->second.KillsSource) << '\n');
         // Known sibling source value? Try an insertion.
-        tie(SVI, Inserted) = SibValues.insert(std::make_pair(SrcVNI,
-                                                 SibValueInfo(SrcReg, SrcVNI)));
+        std::tie(SVI, Inserted) = SibValues.insert(
+            std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI)));
         // This is the first time we see Src, add it to the worklist.
         if (Inserted)
           WorkList.push_back(std::make_pair(SrcReg, SrcVNI));
@@ -745,7 +761,7 @@
 
   do {
     LiveInterval *LI;
-    tie(LI, VNI) = WorkList.pop_back_val();
+    std::tie(LI, VNI) = WorkList.pop_back_val();
     unsigned Reg = LI->reg;
     DEBUG(dbgs() << "Checking redundant spills for "
                  << VNI->id << '@' << VNI->def << " in " << *LI << '\n');
@@ -759,8 +775,10 @@
     DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
 
     // Find all spills and copies of VNI.
-    for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
-         MachineInstr *MI = UI.skipInstruction();) {
+    for (MachineRegisterInfo::use_instr_nodbg_iterator
+         UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
+         UI != E; ) {
+      MachineInstr *MI = &*(UI++);
       if (!MI->isCopy() && !MI->mayStore())
         continue;
       SlotIndex Idx = LIS.getInstructionIndex(MI);
@@ -804,7 +822,7 @@
   SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList;
   WorkList.push_back(std::make_pair(LI, VNI));
   do {
-    tie(LI, VNI) = WorkList.pop_back_val();
+    std::tie(LI, VNI) = WorkList.pop_back_val();
     if (!UsedValues.insert(VNI))
       continue;
 
@@ -920,10 +938,12 @@
   for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
     unsigned Reg = RegsToSpill[i];
     LiveInterval &LI = LIS.getInterval(Reg);
-    for (MachineRegisterInfo::use_nodbg_iterator
-         RI = MRI.use_nodbg_begin(Reg);
-         MachineInstr *MI = RI.skipBundle();)
+    for (MachineRegisterInfo::use_bundle_nodbg_iterator
+         RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end();
+         RI != E; ) {
+      MachineInstr *MI = &*(RI++);
       anyRemat |= reMaterializeFor(LI, MI);
+    }
   }
   if (!anyRemat)
     return;
@@ -1014,7 +1034,7 @@
   char NextLine = '\n';
   char SlotIndent = '\t';
 
-  if (llvm::next(B) == E) {
+  if (std::next(B) == E) {
     NextLine = ' ';
     SlotIndent = ' ';
   }
@@ -1098,12 +1118,11 @@
         MRI.isReserved(Reg)) {
       continue;
     }
+    // Skip non-Defs, including undef uses and internal reads.
+    if (MO->isUse())
+      continue;
     MIBundleOperands::PhysRegInfo RI =
       MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
-    if (MO->readsReg()) {
-      assert(RI.Reads && "Cannot fold physreg reader");
-      continue;
-    }
     if (RI.Defines)
       continue;
     // FoldMI does not define this physreg. Remove the LI segment.
@@ -1172,12 +1191,12 @@
   MachineBasicBlock &MBB = *MI->getParent();
 
   MachineInstrSpan MIS(MI);
-  TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot,
+  TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot,
                           MRI.getRegClass(NewVReg), &TRI);
 
-  LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end());
+  LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end());
 
-  DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS,
+  DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS,
                                            "spill"));
   ++NumSpills;
 }
@@ -1188,8 +1207,10 @@
   LiveInterval &OldLI = LIS.getInterval(Reg);
 
   // Iterate over instructions using Reg.
-  for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
-       MachineInstr *MI = RegI.skipBundle();) {
+  for (MachineRegisterInfo::reg_bundle_iterator
+       RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
+       RegI != E; ) {
+    MachineInstr *MI = &*(RegI++);
 
     // Debug values are not allowed to affect codegen.
     if (MI->isDebugValue()) {
@@ -1314,8 +1335,10 @@
 
   // Finally delete the SnippetCopies.
   for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
-    for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(RegsToSpill[i]);
-         MachineInstr *MI = RI.skipInstruction();) {
+    for (MachineRegisterInfo::reg_instr_iterator
+         RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end();
+         RI != E; ) {
+      MachineInstr *MI = &*(RI++);
       assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
       // FIXME: Do this with a LiveRangeEdit callback.
       LIS.RemoveMachineInstrFromMaps(MI);
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 427225d..61d065a 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -22,6 +22,22 @@
 // Static member used for null interference cursors.
 InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
 
+// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
+// buffer of size NumPhysRegs to speed up alloc/clear for targets with large
+// reg files). Calloced memory is used for good form, and quites tools like
+// Valgrind too, but zero initialized memory is not required by the algorithm:
+// this is because PhysRegEntries works like a SparseSet and its entries are
+// only valid when there is a corresponding CacheEntries assignment. There is
+// also support for when pass managers are reused for targets with different
+// numbers of PhysRegs: in this case PhysRegEntries is freed and reinitialized.
+void InterferenceCache::reinitPhysRegEntries() {
+  if (PhysRegEntriesCount == TRI->getNumRegs()) return;
+  free(PhysRegEntries);
+  PhysRegEntriesCount = TRI->getNumRegs();
+  PhysRegEntries = (unsigned char*)
+    calloc(PhysRegEntriesCount, sizeof(unsigned char));
+}
+
 void InterferenceCache::init(MachineFunction *mf,
                              LiveIntervalUnion *liuarray,
                              SlotIndexes *indexes,
@@ -30,7 +46,7 @@
   MF = mf;
   LIUArray = liuarray;
   TRI = tri;
-  PhysRegEntries.assign(TRI->getNumRegs(), 0);
+  reinitPhysRegEntries();
   for (unsigned i = 0; i != CacheEntries; ++i)
     Entries[i].clear(mf, indexes, lis);
 }
@@ -105,7 +121,7 @@
 
 void InterferenceCache::Entry::update(unsigned MBBNum) {
   SlotIndex Start, Stop;
-  tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+  std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
 
   // Use advanceTo only when possible.
   if (PrevPos != Start) {
@@ -182,7 +198,7 @@
     BI = &Blocks[MBBNum];
     if (BI->Tag == Tag)
       return;
-    tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
+    std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum);
   }
 
   // Check for last interference in block.
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 800f705..d3482d0 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -135,7 +135,8 @@
 
   // Point to an entry for each physreg. The entry pointed to may not be up to
   // date, and it may have been reused for a different physreg.
-  SmallVector<unsigned char, 2> PhysRegEntries;
+  unsigned char* PhysRegEntries;
+  size_t PhysRegEntriesCount;
 
   // Next round-robin entry to be picked.
   unsigned RoundRobin;
@@ -147,7 +148,14 @@
   Entry *get(unsigned PhysReg);
 
 public:
-  InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
+  InterferenceCache() : TRI(0), LIUArray(0), MF(0), PhysRegEntries(NULL),
+                        PhysRegEntriesCount(0), RoundRobin(0) {}
+
+  ~InterferenceCache() {
+    free(PhysRegEntries);
+  }
+
+  void reinitPhysRegEntries();
 
   /// init - Prepare cache for a new function.
   void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index c38d4fb..9977c6b 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -13,13 +13,13 @@
 
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -115,21 +115,21 @@
           Type::getInt8PtrTy(Context),
                               Type::getInt8PtrTy(Context), 
                               Type::getInt8PtrTy(Context), 
-                              TD.getIntPtrType(Context), (Type *)0);
+                              DL.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::memmove:
         M.getOrInsertFunction("memmove",
           Type::getInt8PtrTy(Context),
                               Type::getInt8PtrTy(Context), 
                               Type::getInt8PtrTy(Context), 
-                              TD.getIntPtrType(Context), (Type *)0);
+                              DL.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::memset:
         M.getOrInsertFunction("memset",
           Type::getInt8PtrTy(Context),
                               Type::getInt8PtrTy(Context), 
                               Type::getInt32Ty(M.getContext()), 
-                              TD.getIntPtrType(Context), (Type *)0);
+                              DL.getIntPtrType(Context), (Type *)0);
         break;
       case Intrinsic::sqrt:
         EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
@@ -463,7 +463,7 @@
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    Type *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = DL.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -474,7 +474,7 @@
     break;
   }
   case Intrinsic::memmove: {
-    Type *IntPtr = TD.getIntPtrType(Context);
+    Type *IntPtr = DL.getIntPtrType(Context);
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
@@ -486,7 +486,7 @@
   }
   case Intrinsic::memset: {
     Value *Op0 = CI->getArgOperand(0);
-    Type *IntPtr = TD.getIntPtrType(Op0->getType());
+    Type *IntPtr = DL.getIntPtrType(Op0->getType());
     Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                         /* isSigned */ false);
     Value *Ops[3];
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index 81ef1aa..fee0347 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -22,4 +22,4 @@
 type = Library
 name = CodeGen
 parent = Libraries
-required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index ad2c553..9c2718b 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,12 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -63,14 +62,23 @@
 }
 
 void LLVMTargetMachine::initAsmInfo() {
-  AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple);
+  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(),
+                                                    TargetTriple);
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
   // we'll crash later.
   // Provide the user with a useful error message about what's wrong.
-  assert(AsmInfo && "MCAsmInfo not initialized. "
+  assert(TmpAsmInfo && "MCAsmInfo not initialized. "
          "Make sure you include the correct TargetSelect.h"
          "and that InitializeAllTargetMCs() is being invoked!");
+
+  if (Options.DisableIntegratedAS)
+    TmpAsmInfo->setUseIntegratedAssembler(false);
+
+  if (Options.CompressDebugSections)
+    TmpAsmInfo->setCompressDebugSections(true);
+
+  AsmInfo = TmpAsmInfo;
 }
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
@@ -92,6 +100,9 @@
                                           bool DisableVerify,
                                           AnalysisID StartAfter,
                                           AnalysisID StopAfter) {
+  // Add internal analysis passes from the target machine.
+  TM->addAnalysisPasses(PM);
+
   // Targets may override createPassConfig to provide a target-specific sublass.
   TargetPassConfig *PassConfig = TM->createPassConfig(PM);
   PassConfig->setStartStopPasses(StartAfter, StopAfter);
@@ -154,7 +165,7 @@
     // machine-level pass), and whatever other information is needed to
     // deserialize the code and resume compilation.  For now, just write the
     // LLVM IR.
-    PM.add(createPrintModulePass(&Out));
+    PM.add(createPrintModulePass(Out));
     return false;
   }
 
@@ -165,7 +176,7 @@
   const MCRegisterInfo &MRI = *getRegisterInfo();
   const MCInstrInfo &MII = *getInstrInfo();
   const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-  OwningPtr<MCStreamer> AsmStreamer;
+  std::unique_ptr<MCStreamer> AsmStreamer;
 
   switch (FileType) {
   case CGFT_AssemblyFile: {
@@ -182,7 +193,6 @@
                                                        TargetCPU);
     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
                                                   getVerboseAsm(),
-                                                  hasMCUseLoc(),
                                                   hasMCUseCFI(),
                                                   hasMCUseDwarfDirectory(),
                                                   InstPrinter,
@@ -201,11 +211,9 @@
     if (MCE == 0 || MAB == 0)
       return true;
 
-    AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(),
-                                                         *Context, *MAB, Out,
-                                                         MCE, hasMCRelaxAll(),
-                                                         hasMCNoExecStack()));
-    AsmStreamer.get()->setAutoInitSections(true);
+    AsmStreamer.reset(getTarget().createMCObjectStreamer(
+        getTargetTriple(), *Context, *MAB, Out, MCE, STI, hasMCRelaxAll(),
+        hasMCNoExecStack()));
     break;
   }
   case CGFT_Null:
@@ -221,7 +229,7 @@
     return true;
 
   // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.take();
+  AsmStreamer.release();
 
   PM.add(Printer);
 
@@ -275,12 +283,10 @@
   if (MCE == 0 || MAB == 0)
     return true;
 
-  OwningPtr<MCStreamer> AsmStreamer;
-  AsmStreamer.reset(getTarget().createMCObjectStreamer(getTargetTriple(), *Ctx,
-                                                       *MAB, Out, MCE,
-                                                       hasMCRelaxAll(),
-                                                       hasMCNoExecStack()));
-  AsmStreamer.get()->InitSections();
+  std::unique_ptr<MCStreamer> AsmStreamer;
+  AsmStreamer.reset(getTarget().createMCObjectStreamer(
+      getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, hasMCRelaxAll(),
+      hasMCNoExecStack()));
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
   FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
@@ -288,7 +294,7 @@
     return true;
 
   // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.take();
+  AsmStreamer.release();
 
   PM.add(Printer);
 
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index deab05a..e88d537 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -119,12 +119,12 @@
 SUnit *LatencyPriorityQueue::pop() {
   if (empty()) return NULL;
   std::vector<SUnit *>::iterator Best = Queue.begin();
-  for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+  for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
        E = Queue.end(); I != E; ++I)
     if (Picker(*Best, *I))
       Best = I;
   SUnit *V = *Best;
-  if (Best != prior(Queue.end()))
+  if (Best != std::prev(Queue.end()))
     std::swap(*Best, Queue.back());
   Queue.pop_back();
   return V;
@@ -133,7 +133,7 @@
 void LatencyPriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
   std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
-  if (I != prior(Queue.end()))
+  if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
   Queue.pop_back();
 }
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index ffe407a..c22ab11 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -18,19 +18,18 @@
 #include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-LexicalScopes::~LexicalScopes() {
-  releaseMemory();
-}
+/// ~LexicalScopes - final cleanup after ourselves.
+LexicalScopes::~LexicalScopes() { reset(); }
 
-/// releaseMemory - release memory.
-void LexicalScopes::releaseMemory() {
+/// reset - Reset the instance so that it's prepared for another function.
+void LexicalScopes::reset() {
   MF = NULL;
   CurrentFnLexicalScope = NULL;
   DeleteContainerSeconds(LexicalScopeMap);
@@ -41,7 +40,7 @@
 
 /// initialize - Scan machine function and constuct lexical scope nest.
 void LexicalScopes::initialize(const MachineFunction &Fn) {
-  releaseMemory();
+  reset();
   MF = &Fn;
   SmallVector<InsnRange, 4> MIRanges;
   DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap;
@@ -54,13 +53,13 @@
 
 /// extractLexicalScopes - Extract instruction ranges for each lexical scopes
 /// for the given machine function.
-void LexicalScopes::
-extractLexicalScopes(SmallVectorImpl<InsnRange> &MIRanges,
-                  DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+void LexicalScopes::extractLexicalScopes(
+    SmallVectorImpl<InsnRange> &MIRanges,
+    DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
 
   // Scan each instruction and create scopes. First build working set of scopes.
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
-       I != E; ++I) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+       ++I) {
     const MachineInstr *RangeBeginMI = NULL;
     const MachineInstr *PrevMI = NULL;
     DebugLoc PrevDL;
@@ -117,14 +116,15 @@
   MDNode *Scope = NULL;
   MDNode *IA = NULL;
   DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
-  if (!Scope) return NULL;
+  if (!Scope)
+    return NULL;
 
   // The scope that we were created with could have an extra file - which
   // isn't what we care about in this case.
   DIDescriptor D = DIDescriptor(Scope);
   if (D.isLexicalBlockFile())
     Scope = DILexicalBlockFile(Scope).getScope();
-  
+
   if (IA)
     return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA));
   return LexicalScopeMap.lookup(Scope);
@@ -143,7 +143,7 @@
     // Create an inlined scope for inlined function.
     return getOrCreateInlinedScope(Scope, InlinedAt);
   }
-   
+
   return getOrCreateRegularScope(Scope);
 }
 
@@ -154,7 +154,7 @@
     Scope = DILexicalBlockFile(Scope).getScope();
     D = DIDescriptor(Scope);
   }
- 
+
   LexicalScope *WScope = LexicalScopeMap.lookup(Scope);
   if (WScope)
     return WScope;
@@ -164,15 +164,15 @@
     Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
   WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false);
   LexicalScopeMap.insert(std::make_pair(Scope, WScope));
-  if (!Parent && DIDescriptor(Scope).isSubprogram()
-      && DISubprogram(Scope).describes(MF->getFunction()))
+  if (!Parent && DIDescriptor(Scope).isSubprogram() &&
+      DISubprogram(Scope).describes(MF->getFunction()))
     CurrentFnLexicalScope = WScope;
-  
+
   return WScope;
 }
 
 /// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, 
+LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope,
                                                      MDNode *InlinedAt) {
   LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt);
   if (InlinedScope)
@@ -212,7 +212,7 @@
 
 /// constructScopeNest
 void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
-  assert (Scope && "Unable to calculate scope dominance graph!");
+  assert(Scope && "Unable to calculate scope dominance graph!");
   SmallVector<LexicalScope *, 4> WorkStack;
   WorkStack.push_back(Scope);
   unsigned Counter = 0;
@@ -221,7 +221,8 @@
     const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
     bool visitedChildren = false;
     for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
-           SE = Children.end(); SI != SE; ++SI) {
+                                                         SE = Children.end();
+         SI != SE; ++SI) {
       LexicalScope *ChildScope = *SI;
       if (!ChildScope->getDFSOut()) {
         WorkStack.push_back(ChildScope);
@@ -239,17 +240,17 @@
 
 /// assignInstructionRanges - Find ranges of instructions covered by each
 /// lexical scope.
-void LexicalScopes::
-assignInstructionRanges(SmallVectorImpl<InsnRange> &MIRanges,
-                    DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap)
-{
-  
+void LexicalScopes::assignInstructionRanges(
+    SmallVectorImpl<InsnRange> &MIRanges,
+    DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
+
   LexicalScope *PrevLexicalScope = NULL;
   for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
-         RE = MIRanges.end(); RI != RE; ++RI) {
+                                                  RE = MIRanges.end();
+       RI != RE; ++RI) {
     const InsnRange &R = *RI;
     LexicalScope *S = MI2ScopeMap.lookup(R.first);
-    assert (S && "Lost LexicalScope for a machine instruction!");
+    assert(S && "Lost LexicalScope for a machine instruction!");
     if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
       PrevLexicalScope->closeInsnRange(S);
     S->openInsnRange(R.first);
@@ -262,26 +263,26 @@
 }
 
 /// getMachineBasicBlocks - Populate given set using machine basic blocks which
-/// have machine instructions that belong to lexical scope identified by 
+/// have machine instructions that belong to lexical scope identified by
 /// DebugLoc.
-void LexicalScopes::
-getMachineBasicBlocks(DebugLoc DL, 
-                      SmallPtrSet<const MachineBasicBlock*, 4> &MBBs) {
+void LexicalScopes::getMachineBasicBlocks(
+    DebugLoc DL, SmallPtrSet<const MachineBasicBlock *, 4> &MBBs) {
   MBBs.clear();
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return;
-  
+
   if (Scope == CurrentFnLexicalScope) {
-    for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
-         I != E; ++I)
+    for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+         ++I)
       MBBs.insert(I);
     return;
   }
 
   SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
   for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(),
-         E = InsnRanges.end(); I != E; ++I) {
+                                            E = InsnRanges.end();
+       I != E; ++I) {
     InsnRange &R = *I;
     MBBs.insert(R.first->getParent());
   }
@@ -299,8 +300,8 @@
     return true;
 
   bool Result = false;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-       I != E; ++I) {
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+       ++I) {
     DebugLoc IDL = I->getDebugLoc();
     if (IDL.isUnknown())
       continue;
@@ -311,8 +312,6 @@
   return Result;
 }
 
-void LexicalScope::anchor() { }
-
 /// dump - Print data structures.
 void LexicalScope::dump(unsigned Indent) const {
 #ifndef NDEBUG
@@ -332,4 +331,3 @@
       Children[i]->dump(Indent + 2);
 #endif
 }
-
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 25645e0..bef4156 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -31,8 +31,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/VirtRegMap.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
@@ -72,7 +72,7 @@
 typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
 
 namespace {
-/// UserValueScopes - Keeps track of lexical scopes associated with an
+/// UserValueScopes - Keeps track of lexical scopes associated with a
 /// user value's source location.
 class UserValueScopes {
   DebugLoc DL;
@@ -480,7 +480,7 @@
       // DBG_VALUE has no slot index, use the previous instruction instead.
       SlotIndex Idx = MBBI == MBB->begin() ?
         LIS->getMBBStartIdx(MBB) :
-        LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
+        LIS->getInstructionIndex(std::prev(MBBI)).getRegSlot();
       // Handle consecutive DBG_VALUE instructions with the same slot index.
       do {
         if (handleDebugValue(MBBI, Idx)) {
@@ -568,13 +568,11 @@
 
   // Collect all the (vreg, valno) pairs that are copies of LI.
   SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues;
-  for (MachineRegisterInfo::use_nodbg_iterator
-         UI = MRI.use_nodbg_begin(LI->reg),
-         UE = MRI.use_nodbg_end(); UI != UE; ++UI) {
+  for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) {
+    MachineInstr *MI = MO.getParent();
     // Copies of the full value.
-    if (UI.getOperand().getSubReg() || !UI->isCopy())
+    if (MO.getSubReg() || !MI->isCopy())
       continue;
-    MachineInstr *MI = &*UI;
     unsigned DstReg = MI->getOperand(0).getReg();
 
     // Don't follow copies to physregs. These are usually setting up call
@@ -704,7 +702,6 @@
   bool Changed = collectDebugValues(mf);
   computeIntervals();
   DEBUG(print(dbgs()));
-  LS.releaseMemory();
   ModifiedMF = Changed;
   return Changed;
 }
@@ -915,7 +912,7 @@
 
   // Don't insert anything after the first terminator, though.
   return MI->isTerminator() ? MBB->getFirstTerminator() :
-                              llvm::next(MachineBasicBlock::iterator(MI));
+                              std::next(MachineBasicBlock::iterator(MI));
 }
 
 DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 58a3f0f..bb67435 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -61,9 +61,9 @@
 
 private:
 
-  virtual bool runOnMachineFunction(MachineFunction &);
-  virtual void releaseMemory();
-  virtual void getAnalysisUsage(AnalysisUsage &) const;
+  bool runOnMachineFunction(MachineFunction &) override;
+  void releaseMemory() override;
+  void getAnalysisUsage(AnalysisUsage &) const override;
 
 };
 
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 2b8feb8..3a7ac11 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -222,13 +222,13 @@
   VNInfo *ValNo = I->valno;
 
   // Search for the first segment that we can't merge with.
-  iterator MergeTo = llvm::next(I);
+  iterator MergeTo = std::next(I);
   for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) {
     assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
   }
 
   // If NewEnd was in the middle of a segment, make sure to get its endpoint.
-  I->end = std::max(NewEnd, prior(MergeTo)->end);
+  I->end = std::max(NewEnd, std::prev(MergeTo)->end);
 
   // If the newly formed segment now touches the segment after it and if they
   // have the same value number, merge the two segments into one segment.
@@ -239,7 +239,7 @@
   }
 
   // Erase any dead segments.
-  segments.erase(llvm::next(I), MergeTo);
+  segments.erase(std::next(I), MergeTo);
 }
 
 
@@ -274,7 +274,7 @@
     MergeTo->end = I->end;
   }
 
-  segments.erase(llvm::next(MergeTo), llvm::next(I));
+  segments.erase(std::next(MergeTo), std::next(I));
   return MergeTo;
 }
 
@@ -285,7 +285,7 @@
   // If the inserted segment starts in the middle or right at the end of
   // another segment, just extend that segment to contain the segment of S.
   if (it != begin()) {
-    iterator B = prior(it);
+    iterator B = std::prev(it);
     if (S.valno == B->valno) {
       if (B->start <= Start && B->end >= Start) {
         extendSegmentEndTo(B, End);
@@ -389,7 +389,7 @@
   I->end = Start;   // Trim the old segment.
 
   // Insert the new one.
-  segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo));
+  segments.insert(std::next(I), Segment(End, OldEnd, ValNo));
 }
 
 /// removeValNo - Remove all the segments defined by the specified value#.
@@ -433,7 +433,7 @@
 
     iterator OutIt = begin();
     OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
-    for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) {
+    for (iterator I = std::next(OutIt), E = end(); I != E; ++I) {
       VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
       assert(nextValNo != 0 && "Huh?");
 
@@ -641,10 +641,10 @@
     assert(I->valno != 0);
     assert(I->valno->id < valnos.size());
     assert(I->valno == valnos[I->valno->id]);
-    if (llvm::next(I) != E) {
-      assert(I->end <= llvm::next(I)->start);
-      if (I->end == llvm::next(I)->start)
-        assert(I->valno != llvm::next(I)->valno);
+    if (std::next(I) != E) {
+      assert(I->end <= std::next(I)->start);
+      if (I->end == std::next(I)->start)
+        assert(I->valno != std::next(I)->valno);
     }
   }
 }
@@ -905,8 +905,8 @@
   // Rewrite instructions.
   for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
        RE = MRI.reg_end(); RI != RE;) {
-    MachineOperand &MO = RI.getOperand();
-    MachineInstr *MI = MO.getParent();
+    MachineOperand &MO = *RI;
+    MachineInstr *MI = RI->getParent();
     ++RI;
     // DBG_VALUE instructions don't have slot indexes, so get the index of the
     // instruction before them.
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index e1c3217..fdc673f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -325,8 +326,10 @@
   SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
 
   // Visit all instructions reading li->reg.
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg);
-       MachineInstr *UseMI = I.skipInstruction();) {
+  for (MachineRegisterInfo::reg_instr_iterator
+       I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end();
+       I != E; ) {
+    MachineInstr *UseMI = &*(I++);
     if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
       continue;
     SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
@@ -458,7 +461,7 @@
 
   MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill);
   SlotIndex MBBStart, MBBEnd;
-  tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
+  std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB);
 
   // If VNI isn't live out from KillMBB, the value is trivially pruned.
   if (LRQ.endPoint() < MBBEnd) {
@@ -485,7 +488,7 @@
       MachineBasicBlock *MBB = *I;
 
       // Check if VNI is live in to MBB.
-      tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+      std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
       LiveQueryResult LRQ = LI->Query(MBBStart);
       if (LRQ.valueIn() != VNI) {
         // This block isn't part of the VNI segment. Prune the search.
@@ -620,9 +623,12 @@
 }
 
 float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) {
-  const float Scale = 1.0f / BlockFrequency::getEntryFrequency();
-  return (isDef + isUse) * (freq.getFrequency() * Scale);
+LiveIntervals::getSpillWeight(bool isDef, bool isUse,
+                              const MachineBlockFrequencyInfo *MBFI,
+                              const MachineInstr *MI) {
+  BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent());
+  const float Scale = 1.0f / MBFI->getEntryFreq();
+  return (isDef + isUse) * (Freq.getFrequency() * Scale);
 }
 
 LiveRange::Segment
@@ -870,8 +876,8 @@
     // values. The new range should be placed immediately before NewI, move any
     // intermediate ranges up.
     assert(NewI != I && "Inconsistent iterators");
-    std::copy(llvm::next(I), NewI, I);
-    *llvm::prior(NewI)
+    std::copy(std::next(I), NewI, I);
+    *std::prev(NewI)
       = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
@@ -916,7 +922,7 @@
       if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
         // No def, search for the new kill.
         // This can never be an early clobber kill since there is no def.
-        llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot();
+        std::prev(I)->end = findLastUseBefore(Reg).getRegSlot();
         return;
       }
     }
@@ -952,7 +958,7 @@
 
     // DefVNI is a dead def. It may have been moved across other values in LR,
     // so move I up to NewI. Slide [NewI;I) down one position.
-    std::copy_backward(NewI, I, llvm::next(I));
+    std::copy_backward(NewI, I, std::next(I));
     *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
@@ -964,11 +970,11 @@
            "No RegMask at OldIdx.");
     *RI = NewIdx.getRegSlot();
     assert((RI == LIS.RegMaskSlots.begin() ||
-            SlotIndex::isEarlierInstr(*llvm::prior(RI), *RI)) &&
-            "Cannot move regmask instruction above another call");
-    assert((llvm::next(RI) == LIS.RegMaskSlots.end() ||
-            SlotIndex::isEarlierInstr(*RI, *llvm::next(RI))) &&
-            "Cannot move regmask instruction below another call");
+            SlotIndex::isEarlierInstr(*std::prev(RI), *RI)) &&
+           "Cannot move regmask instruction above another call");
+    assert((std::next(RI) == LIS.RegMaskSlots.end() ||
+            SlotIndex::isEarlierInstr(*RI, *std::next(RI))) &&
+           "Cannot move regmask instruction below another call");
   }
 
   // Return the last use of reg between NewIdx and OldIdx.
@@ -976,10 +982,10 @@
 
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       SlotIndex LastUse = NewIdx;
-      for (MachineRegisterInfo::use_nodbg_iterator
-             UI = MRI.use_nodbg_begin(Reg),
-             UE = MRI.use_nodbg_end();
-           UI != UE; UI.skipInstruction()) {
+      for (MachineRegisterInfo::use_instr_nodbg_iterator
+             UI = MRI.use_instr_nodbg_begin(Reg),
+             UE = MRI.use_instr_nodbg_end();
+           UI != UE; ++UI) {
         const MachineInstr* MI = &*UI;
         SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
         if (InstSlot > LastUse && InstSlot < OldIdx)
@@ -1121,7 +1127,7 @@
             if (LII->end.isDead()) {
               SlotIndex prevStart;
               if (LII != LI.begin())
-                prevStart = llvm::prior(LII)->start;
+                prevStart = std::prev(LII)->start;
 
               // FIXME: This could be more efficient if there was a
               // removeSegment method that returned an iterator.
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
new file mode 100644
index 0000000..7efd941
--- /dev/null
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -0,0 +1,114 @@
+//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LivePhysRegs utility for tracking liveness of
+// physical registers across machine instructions in forward or backward order.
+// A more detailed description can be found in the corresponding header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+
+/// \brief Remove all registers from the set that get clobbered by the register
+/// mask.
+void LivePhysRegs::removeRegsInMask(const MachineOperand &MO) {
+  SparseSet<unsigned>::iterator LRI = LiveRegs.begin();
+  while (LRI != LiveRegs.end()) {
+    if (MO.clobbersPhysReg(*LRI))
+      LRI = LiveRegs.erase(LRI);
+    else
+      ++LRI;
+  }
+}
+
+/// Simulates liveness when stepping backwards over an instruction(bundle):
+/// Remove Defs, add uses. This is the recommended way of calculating liveness.
+void LivePhysRegs::stepBackward(const MachineInstr &MI) {
+  // Remove defined registers and regmask kills from the set.
+  for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+    if (O->isReg()) {
+      if (!O->isDef())
+        continue;
+      unsigned Reg = O->getReg();
+      if (Reg == 0)
+        continue;
+      removeReg(Reg);
+    } else if (O->isRegMask())
+      removeRegsInMask(*O);
+  }
+
+  // Add uses to the set.
+  for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+    if (!O->isReg() || !O->readsReg() || O->isUndef())
+      continue;
+    unsigned Reg = O->getReg();
+    if (Reg == 0)
+      continue;
+    addReg(Reg);
+  }
+}
+
+/// Simulates liveness when stepping forward over an instruction(bundle): Remove
+/// killed-uses, add defs. This is the not recommended way, because it depends
+/// on accurate kill flags. If possible use stepBackwards() instead of this
+/// function.
+void LivePhysRegs::stepForward(const MachineInstr &MI) {
+  SmallVector<unsigned, 4> Defs;
+  // Remove killed registers from the set.
+  for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
+    if (O->isReg()) {
+      unsigned Reg = O->getReg();
+      if (Reg == 0)
+        continue;
+      if (O->isDef()) {
+        if (!O->isDead())
+          Defs.push_back(Reg);
+      } else {
+        if (!O->isKill())
+          continue;
+        assert(O->isUse());
+        removeReg(Reg);
+      }
+    } else if (O->isRegMask())
+      removeRegsInMask(*O);
+  }
+
+  // Add defs to the set.
+  for (unsigned i = 0, e = Defs.size(); i != e; ++i)
+    addReg(Defs[i]);
+}
+
+/// Prin the currently live registers to OS.
+void LivePhysRegs::print(raw_ostream &OS) const {
+  OS << "Live Registers:";
+  if (!TRI) {
+    OS << " (uninitialized)\n";
+    return;
+  }
+
+  if (empty()) {
+    OS << " (empty)\n";
+    return;
+  }
+
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    OS << " " << PrintReg(*I, TRI);
+  OS << "\n";
+}
+
+/// Dumps the currently live registers to the debug output.
+void LivePhysRegs::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  dbgs() << "  " << *this;
+#endif
+}
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index ae086bc..ecd75b4 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -41,9 +41,8 @@
 
   // Visit all def operands. If the same instruction has multiple defs of Reg,
   // LR.createDeadDef() will deduplicate.
-  for (MachineRegisterInfo::def_iterator
-       I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) {
-    const MachineInstr *MI = &*I;
+  for (MachineOperand &MO : MRI->def_operands(Reg)) {
+    const MachineInstr *MI = MO.getParent();
     // Find the corresponding slot index.
     SlotIndex Idx;
     if (MI->isPHI())
@@ -52,7 +51,7 @@
     else
       // Instructions are either normal 'r', or early clobber 'e'.
       Idx = Indexes->getInstructionIndex(MI)
-        .getRegSlot(I.getOperand().isEarlyClobber());
+        .getRegSlot(MO.isEarlyClobber());
 
     // Create the def in LR. This may find an existing def.
     LR.createDeadDef(Idx, *Alloc);
@@ -64,9 +63,7 @@
   assert(MRI && Indexes && "call reset() first");
 
   // Visit all operands that read Reg. This may include partial defs.
-  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg),
-       E = MRI->reg_nodbg_end(); I != E; ++I) {
-    MachineOperand &MO = I.getOperand();
+  for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
     // Clear all kill flags. They will be reinserted after register allocation
     // by LiveIntervalAnalysis::addKillFlags().
     if (MO.isUse())
@@ -75,7 +72,8 @@
       continue;
     // MI is reading Reg. We may have visited MI before if it happens to be
     // reading Reg multiple times. That is OK, extend() is idempotent.
-    const MachineInstr *MI = &*I;
+    const MachineInstr *MI = MO.getParent();
+    unsigned OpNo = (&MO - &MI->getOperand(0));
 
     // Find the SlotIndex being read.
     SlotIndex Idx;
@@ -83,7 +81,7 @@
       assert(!MO.isDef() && "Cannot handle PHI def of partial register.");
       // PHI operands are paired: (Reg, PredMBB).
       // Extend the live range to be live-out from PredMBB.
-      Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB());
+      Idx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB());
     } else {
       // This is a normal instruction.
       Idx = Indexes->getInstructionIndex(MI).getRegSlot();
@@ -92,7 +90,7 @@
       if (MO.isDef()) {
         if (MO.isEarlyClobber())
           Idx = Idx.getRegSlot(true);
-      } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) {
+      } else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) {
         // FIXME: This would be a lot easier if tied early-clobber uses also
         // had an early-clobber flag.
         if (MI->getOperand(DefIdx).isEarlyClobber())
@@ -114,7 +112,7 @@
     MachineBasicBlock *MBB = I->DomNode->getBlock();
     assert(I->Value && "No live-in value found");
     SlotIndex Start, End;
-    tie(Start, End) = Indexes->getMBBRange(MBB);
+    std::tie(Start, End) = Indexes->getMBBRange(MBB);
 
     if (I->Kill.isValid())
       // Value is killed inside this block.
@@ -212,7 +210,7 @@
        }
 
        SlotIndex Start, End;
-       tie(Start, End) = Indexes->getMBBRange(Pred);
+       std::tie(Start, End) = Indexes->getMBBRange(Pred);
 
        // First time we see Pred.  Try to determine the live-out value, but set
        // it as null if Pred is live-through with an unknown value.
@@ -247,7 +245,7 @@
     for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(),
          E = WorkList.end(); I != E; ++I) {
        SlotIndex Start, End;
-       tie(Start, End) = Indexes->getMBBRange(*I);
+       std::tie(Start, End) = Indexes->getMBBRange(*I);
        // Trim the live range in KillMBB.
        if (*I == KillMBBNum && Kill.isValid())
          End = Kill;
@@ -342,7 +340,7 @@
         ++Changes;
         assert(Alloc && "Need VNInfo allocator to create PHI-defs");
         SlotIndex Start, End;
-        tie(Start, End) = Indexes->getMBBRange(MBB);
+        std::tie(Start, End) = Indexes->getMBBRange(MBB);
         LiveRange &LR = I->LR;
         VNInfo *VNI = LR.getNextValue(Start, *Alloc);
         I->Value = VNI;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index cb70c43..891eaab 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -167,9 +167,7 @@
   MachineInstr *DefMI = 0, *UseMI = 0;
 
   // Check that there is a single def and a single use.
-  for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg),
-       E = MRI.reg_nodbg_end(); I != E; ++I) {
-    MachineOperand &MO = I.getOperand();
+  for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) {
     MachineInstr *MI = MO.getParent();
     if (MO.isDef()) {
       if (DefMI && DefMI != MI)
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 1d801ac..7f797be 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -65,7 +65,9 @@
 void LiveRegMatrix::releaseMemory() {
   for (unsigned i = 0, e = Matrix.size(); i != e; ++i) {
     Matrix[i].clear();
-    Queries[i].clear();
+    // No need to clear Queries here, since LiveIntervalUnion::Query doesn't
+    // have anything important to clear and LiveRegMatrix's runOnFunction()
+    // does a std::unique_ptr::reset anyways.
   }
 }
 
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
deleted file mode 100644
index 6221ca2..0000000
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LiveRegUnits utility for tracking liveness of
-// physical register units across machine instructions in forward or backward
-// order.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/LiveRegUnits.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-using namespace llvm;
-
-/// Return true if the given MachineOperand clobbers the given register unit.
-/// A register unit is only clobbered if all its super-registers are clobbered.
-static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit,
-                             const MCRegisterInfo *MCRI) {
-  for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) {
-    for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) {
-      if (!MO->clobbersPhysReg(*SI))
-        return false;
-    }
-  }
-  return true;
-}
-
-/// We assume the high bits of a physical super register are not preserved
-/// unless the instruction has an implicit-use operand reading the
-/// super-register or a register unit for the upper bits is available.
-void LiveRegUnits::removeRegsInMask(const MachineOperand &Op,
-                                    const MCRegisterInfo &MCRI) {
-  SparseSet<unsigned>::iterator LUI = LiveUnits.begin();
-  while (LUI != LiveUnits.end()) {
-    if (operClobbersUnit(&Op, *LUI, &MCRI))
-      LUI = LiveUnits.erase(LUI);
-    else
-      ++LUI;
-  }
-}
-
-void LiveRegUnits::stepBackward(const MachineInstr &MI,
-                                const MCRegisterInfo &MCRI) {
-  // Remove defined registers and regmask kills from the set.
-  for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
-    if (O->isReg()) {
-      if (!O->isDef())
-        continue;
-      unsigned Reg = O->getReg();
-      if (Reg == 0)
-        continue;
-      removeReg(Reg, MCRI);
-    } else if (O->isRegMask()) {
-      removeRegsInMask(*O, MCRI);
-    }
-  }
-  // Add uses to the set.
-  for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
-    if (!O->isReg() || !O->readsReg() || O->isUndef())
-      continue;
-    unsigned Reg = O->getReg();
-    if (Reg == 0)
-      continue;
-    addReg(Reg, MCRI);
-  }
-}
-
-/// Uses with kill flag get removed from the set, defs added. If possible
-/// use StepBackward() instead of this function because some kill flags may
-/// be missing.
-void LiveRegUnits::stepForward(const MachineInstr &MI,
-                               const MCRegisterInfo &MCRI) {
-  SmallVector<unsigned, 4> Defs;
-  // Remove killed registers from the set.
-  for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) {
-    if (O->isReg()) {
-      unsigned Reg = O->getReg();
-      if (Reg == 0)
-        continue;
-      if (O->isDef()) {
-        if (!O->isDead())
-          Defs.push_back(Reg);
-      } else {
-        if (!O->isKill())
-          continue;
-        assert(O->isUse());
-        removeReg(Reg, MCRI);
-      }
-    } else if (O->isRegMask()) {
-      removeRegsInMask(*O, MCRI);
-    }
-  }
-  // Add defs to the set.
-  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
-    addReg(Defs[i], MCRI);
-  }
-}
-
-/// Adds all registers in the live-in list of block @p BB.
-void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB,
-                              const MCRegisterInfo &MCRI) {
-  for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(),
-         LE = MBB->livein_end(); L != LE; ++L) {
-    addReg(*L, MCRI);
-  }
-}
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 26a1176..122d467 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -17,12 +17,14 @@
 #define DEBUG_TYPE "localstackalloc"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -60,18 +62,27 @@
 
   class LocalStackSlotPass: public MachineFunctionPass {
     SmallVector<int64_t,16> LocalOffsets;
+    /// StackObjSet - A set of stack object indexes
+    typedef SmallSetVector<int, 8> StackObjSet;
 
     void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
                            bool StackGrowsDown, unsigned &MaxAlign);
+    void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+                               SmallSet<int, 16> &ProtectedObjs,
+                               MachineFrameInfo *MFI, bool StackGrowsDown,
+                               int64_t &Offset, unsigned &MaxAlign);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     bool insertFrameReferenceRegisters(MachineFunction &Fn);
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
-    bool runOnMachineFunction(MachineFunction &MF);
+    explicit LocalStackSlotPass() : MachineFunctionPass(ID) { 
+      initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
+      AU.addRequired<StackProtector>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -81,8 +92,12 @@
 
 char LocalStackSlotPass::ID = 0;
 char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
-INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
-                "Local Stack Slot Allocation", false, false)
+INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc",
+                      "Local Stack Slot Allocation", false, false)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
+                    "Local Stack Slot Allocation", false, false)
+
 
 bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -145,6 +160,22 @@
   ++NumAllocations;
 }
 
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+                                           SmallSet<int, 16> &ProtectedObjs,
+                                           MachineFrameInfo *MFI,
+                                           bool StackGrowsDown, int64_t &Offset,
+                                           unsigned &MaxAlign) {
+
+  for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+        E = UnassignedObjs.end(); I != E; ++I) {
+    int i = *I;
+    AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+    ProtectedObjs.insert(i);
+  }
+}
+
 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
 /// abstract stack objects.
 ///
@@ -156,11 +187,16 @@
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
   int64_t Offset = 0;
   unsigned MaxAlign = 0;
+  StackProtector *SP = &getAnalysis<StackProtector>();
 
   // Make sure that the stack protector comes before the local variables on the
   // stack.
-  SmallSet<int, 16> LargeStackObjs;
+  SmallSet<int, 16> ProtectedObjs;
   if (MFI->getStackProtectorIndex() >= 0) {
+    StackObjSet LargeArrayObjs;
+    StackObjSet SmallArrayObjs;
+    StackObjSet AddrOfObjs;
+
     AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
                       StackGrowsDown, MaxAlign);
 
@@ -170,12 +206,29 @@
         continue;
       if (MFI->getStackProtectorIndex() == (int)i)
         continue;
-      if (!MFI->MayNeedStackProtector(i))
-        continue;
 
-      AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
-      LargeStackObjs.insert(i);
+      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      case StackProtector::SSPLK_None:
+        continue;
+      case StackProtector::SSPLK_SmallArray:
+        SmallArrayObjs.insert(i);
+        continue;
+      case StackProtector::SSPLK_AddrOf:
+        AddrOfObjs.insert(i);
+        continue;
+      case StackProtector::SSPLK_LargeArray:
+        LargeArrayObjs.insert(i);
+        continue;
+      }
+      llvm_unreachable("Unexpected SSPLayoutKind.");
     }
+
+    AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign);
+    AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign);
+    AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign);
   }
 
   // Then assign frame offsets to stack objects that are not used to spill
@@ -185,7 +238,7 @@
       continue;
     if (MFI->getStackProtectorIndex() == (int)i)
       continue;
-    if (LargeStackObjs.count(i))
+    if (ProtectedObjs.count(i))
       continue;
 
     AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
@@ -233,9 +286,11 @@
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
       MachineInstr *MI = I;
 
-      // Debug value instructions can't be out of range, so they don't need
-      // any updates.
-      if (MI->isDebugValue())
+      // Debug value, stackmap and patchpoint instructions can't be out of
+      // range, so they don't need any updates.
+      if (MI->isDebugValue() ||
+          MI->getOpcode() == TargetOpcode::STACKMAP ||
+          MI->getOpcode() == TargetOpcode::PATCHPOINT)
         continue;
 
       // For now, allocate the base register(s) within the basic block
@@ -322,18 +377,11 @@
       // processed all FrameRefs before this one, just check whether or not
       // the next FrameRef will be able to reuse this new register. If not,
       // then don't bother creating it.
-      bool CanReuse = false;
-      for (int refn = ref + 1; refn < e; ++refn) {
-        FrameRef &FRN = FrameReferenceInsns[refn];
-        MachineBasicBlock::iterator J = FRN.getMachineInstr();
-        MachineInstr *MIN = J;
-
-        CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
-                                          FRN.getLocalOffset(), MIN, TRI);
-        break;
-      }
-
-      if (!CanReuse) {
+      if (ref + 1 >= e ||
+          !lookupCandidateBaseReg(
+              BaseOffset, FrameSizeAdjust,
+              FrameReferenceInsns[ref + 1].getLocalOffset(),
+              FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
         BaseOffset = PrevBaseOffset;
         continue;
       }
@@ -363,7 +411,7 @@
 
     // Modify the instruction to use the new base register rather
     // than the frame index operand.
-    TRI->resolveFrameIndex(I, BaseReg, Offset);
+    TRI->resolveFrameIndex(*I, BaseReg, Offset);
     DEBUG(dbgs() << "Resolved: " << *MI);
 
     ++NumReplacements;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index ca71e3b..888c20e 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -14,7 +14,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -25,10 +24,10 @@
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LeakDetector.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -52,7 +51,8 @@
   if (!CachedMCSymbol) {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
-    const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix();
+    const TargetMachine &TM = MF->getTarget();
+    const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix();
     CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
                                            "_" + Twine(getNumber()));
@@ -160,7 +160,7 @@
 MachineBasicBlock::iterator
 MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
   iterator E = end();
-  while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+  while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
     ++I;
   // FIXME: This needs to change if we wish to bundle labels / dbg_values
   // inside the bundle.
@@ -277,7 +277,7 @@
   const char *Comma = "";
   if (const BasicBlock *LBB = getBasicBlock()) {
     OS << Comma << "derived from LLVM BB ";
-    WriteAsOperand(OS, LBB, /*PrintType=*/false);
+    LBB->printAsOperand(OS, /*PrintType=*/false);
     Comma = ", ";
   }
   if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
@@ -330,6 +330,10 @@
   }
 }
 
+void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) {
+  OS << "BB#" << getNumber();
+}
+
 void MachineBasicBlock::removeLiveIn(unsigned Reg) {
   std::vector<unsigned>::iterator I =
     std::find(LiveIns.begin(), LiveIns.end(), Reg);
@@ -622,7 +626,7 @@
 
 bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
   MachineFunction::const_iterator I(this);
-  return llvm::next(I) == MachineFunction::const_iterator(MBB);
+  return std::next(I) == MachineFunction::const_iterator(MBB);
 }
 
 bool MachineBasicBlock::canFallThrough() {
@@ -677,6 +681,11 @@
   MachineFunction *MF = getParent();
   DebugLoc dl;  // FIXME: this is nowhere
 
+  // Performance might be harmed on HW that implements branching using exec mask
+  // where both sides of the branches are always executed.
+  if (MF->getTarget().requiresStructuredCFG())
+    return NULL;
+
   // We may need to update this's terminator, but we can't do that if
   // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
@@ -696,7 +705,7 @@
   }
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
-  MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+  MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
   DEBUG(dbgs() << "Splitting critical edge:"
         " BB#" << getNumber()
         << " -- BB#" << NMBB->getNumber()
@@ -839,7 +848,7 @@
     // extend to the end of the new split block.
 
     bool isLastMBB =
-      llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+      std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
 
     SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
     SlotIndex PrevIndex = StartIndex.getPrevSlot();
@@ -1054,7 +1063,7 @@
   bool Changed = false;
 
   MachineFunction::iterator FallThru =
-    llvm::next(MachineFunction::iterator(this));
+    std::next(MachineFunction::iterator(this));
 
   if (DestA == 0 && DestB == 0) {
     // Block falls through to successor.
@@ -1114,6 +1123,13 @@
   return *getWeightIterator(Succ);
 }
 
+/// Set successor weight of a given iterator.
+void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) {
+  if (Weights.empty())
+    return;
+  *getWeightIterator(I) = weight;
+}
+
 /// getWeightIterator - Return wight iterator corresonding to the I successor
 /// iterator
 MachineBasicBlock::weight_iterator MachineBasicBlock::
@@ -1210,9 +1226,3 @@
   // At this point we have no idea of the liveness of the register.
   return LQR_Unknown;
 }
-
-void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
-                          bool t) {
-  OS << "BB#" << MBB->getNumber();
-}
-
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index e269d24..13203d5 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//====----- MachineBlockFrequencyInfo.cpp - Machine Block Frequency Analysis ----====//
+//====------ MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis ------====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,9 +16,99 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
 
 using namespace llvm;
 
+#ifndef NDEBUG
+enum GVDAGType {
+  GVDT_None,
+  GVDT_Fraction,
+  GVDT_Integer
+};
+
+static cl::opt<GVDAGType>
+ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags",
+                                   cl::Hidden,
+          cl::desc("Pop up a window to show a dag displaying how machine block "
+                   "frequencies propagate through the CFG."),
+          cl::values(
+            clEnumValN(GVDT_None, "none",
+                       "do not display graphs."),
+            clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
+                       "fractional block frequency representation."),
+            clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
+                       "integer fractional block frequency representation."),
+            clEnumValEnd));
+
+namespace llvm {
+
+template <>
+struct GraphTraits<MachineBlockFrequencyInfo *> {
+  typedef const MachineBasicBlock NodeType;
+  typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
+  typedef MachineFunction::const_iterator nodes_iterator;
+
+  static inline
+  const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) {
+    return G->getFunction()->begin();
+  }
+
+  static ChildIteratorType child_begin(const NodeType *N) {
+    return N->succ_begin();
+  }
+
+  static ChildIteratorType child_end(const NodeType *N) {
+    return N->succ_end();
+  }
+
+  static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) {
+    return G->getFunction()->begin();
+  }
+
+  static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) {
+    return G->getFunction()->end();
+  }
+};
+
+template<>
+struct DOTGraphTraits<MachineBlockFrequencyInfo*> :
+    public DefaultDOTGraphTraits {
+  explicit DOTGraphTraits(bool isSimple=false) :
+    DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const MachineBlockFrequencyInfo *G) {
+    return G->getFunction()->getName();
+  }
+
+  std::string getNodeLabel(const MachineBasicBlock *Node,
+                           const MachineBlockFrequencyInfo *Graph) {
+    std::string Result;
+    raw_string_ostream OS(Result);
+
+    OS << Node->getName().str() << ":";
+    switch (ViewMachineBlockFreqPropagationDAG) {
+    case GVDT_Fraction:
+      Graph->printBlockFreq(OS, Node);
+      break;
+    case GVDT_Integer:
+      OS << Graph->getBlockFreq(Node).getFrequency();
+      break;
+    case GVDT_None:
+      llvm_unreachable("If we are not supposed to render a graph we should "
+                       "never reach this point.");
+    }
+
+    return Result;
+  }
+};
+
+
+} // end namespace llvm
+#endif
+
 INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
                       "Machine Block Frequency Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
@@ -28,15 +118,12 @@
 char MachineBlockFrequencyInfo::ID = 0;
 
 
-MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() : MachineFunctionPass(ID) {
+MachineBlockFrequencyInfo::
+MachineBlockFrequencyInfo() :MachineFunctionPass(ID) {
   initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
-  MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
-                                MachineBranchProbabilityInfo>();
 }
 
-MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {
-  delete MBFI;
-}
+MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {}
 
 void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfo>();
@@ -45,12 +132,55 @@
 }
 
 bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
-  MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+  MachineBranchProbabilityInfo &MBPI =
+    getAnalysis<MachineBranchProbabilityInfo>();
+  if (!MBFI)
+    MBFI.reset(new ImplType);
   MBFI->doFunction(&F, &MBPI);
+#ifndef NDEBUG
+  if (ViewMachineBlockFreqPropagationDAG != GVDT_None) {
+    view();
+  }
+#endif
   return false;
 }
 
+void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void MachineBlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+  ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this),
+            "MachineBlockFrequencyDAGs");
+#else
+  errs() << "MachineBlockFrequencyInfo::view is only available in debug builds "
+    "on systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
 BlockFrequency MachineBlockFrequencyInfo::
 getBlockFreq(const MachineBasicBlock *MBB) const {
-  return MBFI->getBlockFreq(MBB);
+  return MBFI ? MBFI->getBlockFreq(MBB) : 0;
+}
+
+const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
+  return MBFI ? MBFI->Fn : nullptr;
+}
+
+raw_ostream &
+MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+                                          const BlockFrequency Freq) const {
+  return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS;
+}
+
+raw_ostream &
+MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+                                          const MachineBasicBlock *MBB) const {
+  return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS;
+}
+
+uint64_t MachineBlockFrequencyInfo::getEntryFreq() const {
+  return MBFI ? MBFI->getEntryFreq() : 0;
 }
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 4b0f7f3..771e7ce 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -58,6 +58,13 @@
                                                 "blocks in the function."),
                                        cl::init(0), cl::Hidden);
 
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned>
+ExitBlockBias("block-placement-exit-block-bias",
+              cl::desc("Block frequency percentage a loop exit block needs "
+                       "over the original exit to be considered the new exit."),
+              cl::init(0), cl::Hidden);
+
 namespace {
 class BlockChain;
 /// \brief Type for our function-wide basic block -> block chain mapping.
@@ -145,7 +152,7 @@
 
 #ifndef NDEBUG
   /// \brief Dump the blocks in this chain.
-  void dump() LLVM_ATTRIBUTE_USED {
+  LLVM_DUMP_METHOD void dump() {
     for (iterator I = begin(), E = end(); I != E; ++I)
       (*I)->dump();
   }
@@ -230,9 +237,9 @@
     initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnMachineFunction(MachineFunction &F);
+  bool runOnMachineFunction(MachineFunction &F) override;
 
-  void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
     AU.addRequired<MachineLoopInfo>();
@@ -360,7 +367,8 @@
     // any CFG constraints.
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
+        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+                     << " (prob) (CFG conflict)\n");
         continue;
       }
 
@@ -383,8 +391,8 @@
         }
       }
       if (BadCFGConflict) {
-        DEBUG(dbgs() << "    " << getBlockName(*SI)
-                               << " -> non-cold CFG conflict\n");
+        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+                     << " (prob) (non-cold CFG conflict)\n");
         continue;
       }
     }
@@ -401,23 +409,6 @@
   return BestSucc;
 }
 
-namespace {
-/// \brief Predicate struct to detect blocks already placed.
-class IsBlockPlaced {
-  const BlockChain &PlacedChain;
-  const BlockToChainMapType &BlockToChain;
-
-public:
-  IsBlockPlaced(const BlockChain &PlacedChain,
-                const BlockToChainMapType &BlockToChain)
-      : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
-
-  bool operator()(MachineBasicBlock *BB) const {
-    return BlockToChain.lookup(BB) == &PlacedChain;
-  }
-};
-}
-
 /// \brief Select the best block from a worklist.
 ///
 /// This looks through the provided worklist as a list of candidate basic
@@ -436,7 +427,9 @@
   // FIXME: If this shows up on profiles, it could be folded (at the cost of
   // some code complexity) into the loop below.
   WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
-                                IsBlockPlaced(Chain, BlockToChain)),
+                                [&](MachineBasicBlock *BB) {
+                   return BlockToChain.lookup(BB) == &Chain;
+                 }),
                  WorkList.end());
 
   MachineBasicBlock *BestBlock = 0;
@@ -453,8 +446,8 @@
     assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
 
     BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
-    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
-                 << " (freq)\n");
+    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> ";
+                 MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
     if (BestBlock && BestFreq >= CandidateFreq)
       continue;
     BestBlock = *WBI;
@@ -501,11 +494,11 @@
 
   MachineBasicBlock *LoopHeaderBB = BB;
   markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
-  BB = *llvm::prior(Chain.end());
+  BB = *std::prev(Chain.end());
   for (;;) {
     assert(BB);
     assert(BlockToChain[BB] == &Chain);
-    assert(*llvm::prior(Chain.end()) == BB);
+    assert(*std::prev(Chain.end()) == BB);
 
     // Look for the best viable successor if there is one to place immediately
     // after this block.
@@ -536,7 +529,7 @@
                  << " to " << getBlockNum(BestSucc) << "\n");
     markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
     Chain.merge(BestSucc, &SuccChain);
-    BB = *llvm::prior(Chain.end());
+    BB = *std::prev(Chain.end());
   }
 
   DEBUG(dbgs() << "Finished forming chain for header block "
@@ -575,8 +568,8 @@
     if (!LoopBlockSet.count(Pred))
       continue;
     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
-                 << Pred->succ_size() << " successors, "
-                 << MBFI->getBlockFreq(Pred) << " freq\n");
+                 << Pred->succ_size() << " successors, ";
+                 MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
     if (Pred->succ_size() > 1)
       continue;
 
@@ -641,7 +634,7 @@
     BlockChain &Chain = *BlockToChain[*I];
     // Ensure that this block is at the end of a chain; otherwise it could be
     // mid-way through an inner loop or a successor of an analyzable branch.
-    if (*I != *llvm::prior(Chain.end()))
+    if (*I != *std::prev(Chain.end()))
       continue;
 
     // Now walk the successors. We need to establish whether this has a viable
@@ -690,14 +683,17 @@
       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
       DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
                    << getBlockName(*SI) << " [L:" << SuccLoopDepth
-                   << "] (" << ExitEdgeFreq << ")\n");
-      // Note that we slightly bias this toward an existing layout successor to
-      // retain incoming order in the absence of better information.
-      // FIXME: Should we bias this more strongly? It's pretty weak.
+                   << "] (";
+                   MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+      // Note that we bias this toward an existing layout successor to retain
+      // incoming order in the absence of better information. The exit must have
+      // a frequency higher than the current exit before we consider breaking
+      // the layout.
+      BranchProbability Bias(100 - ExitBlockBias, 100);
       if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
           ExitEdgeFreq > BestExitEdgeFreq ||
           ((*I)->isLayoutSuccessor(*SI) &&
-           !(ExitEdgeFreq < BestExitEdgeFreq))) {
+           !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
         BestExitEdgeFreq = ExitEdgeFreq;
         ExitingBB = *I;
       }
@@ -745,7 +741,7 @@
        PI != PE; ++PI) {
     BlockChain *PredChain = BlockToChain[*PI];
     if (!LoopBlockSet.count(*PI) &&
-        (!PredChain || *PI == *llvm::prior(PredChain->end()))) {
+        (!PredChain || *PI == *std::prev(PredChain->end()))) {
       ViableTopFallthrough = true;
       break;
     }
@@ -755,7 +751,7 @@
   // bottom is a viable exiting block. If so, bail out as rotating will
   // introduce an unnecessary branch.
   if (ViableTopFallthrough) {
-    MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end());
+    MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
     for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
                                           SE = Bottom->succ_end();
          SI != SE; ++SI) {
@@ -771,7 +767,7 @@
   if (ExitIt == LoopChain.end())
     return;
 
-  std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end());
+  std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
 }
 
 /// \brief Forms basic block chains from the natural loop structures.
@@ -891,7 +887,7 @@
       if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
         break;
 
-      MachineFunction::iterator NextFI(llvm::next(FI));
+      MachineFunction::iterator NextFI(std::next(FI));
       MachineBasicBlock *NextBB = NextFI;
       // Ensure that the layout successor is a viable block, as we know that
       // fallthrough is a possibility.
@@ -939,7 +935,9 @@
   BlockChain &FunctionChain = *BlockToChain[&F.front()];
   buildChain(&F.front(), FunctionChain, BlockWorkList);
 
+#ifndef NDEBUG
   typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+#endif
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
     bool BadFunc = false;
@@ -983,7 +981,7 @@
     // Update the terminator of the previous block.
     if (BI == FunctionChain.begin())
       continue;
-    MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+    MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
 
     // FIXME: It would be awesome of updateTerminator would just return rather
     // than assert when the branch cannot be analyzed in order to remove this
@@ -1055,7 +1053,7 @@
   const BranchProbability ColdProb(1, 5); // 20%
   BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
-  for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
+  for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
                             BE = FunctionChain.end();
        BI != BE; ++BI) {
     // Don't align non-looping basic blocks. These are unlikely to execute
@@ -1081,7 +1079,7 @@
 
     // Check for the existence of a non-layout predecessor which would benefit
     // from aligning this block.
-    MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+    MachineBasicBlock *LayoutPred = *std::prev(BI);
 
     // Force alignment if all the predecessors are jumps. We already checked
     // that the block isn't cold above.
@@ -1103,7 +1101,10 @@
 
 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   // Check for single-block functions and skip them.
-  if (llvm::next(F.begin()) == F.end())
+  if (std::next(F.begin()) == F.end())
+    return false;
+
+  if (skipOptnoneFunction(*F.getFunction()))
     return false;
 
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -1149,9 +1150,9 @@
     initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnMachineFunction(MachineFunction &F);
+  bool runOnMachineFunction(MachineFunction &F) override;
 
-  void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
     AU.setPreservesAll();
@@ -1171,7 +1172,7 @@
 
 bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
   // Check for single-block functions and skip them.
-  if (llvm::next(F.begin()) == F.end())
+  if (std::next(F.begin()) == F.end())
     return false;
 
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index ae70912..1d6879b 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -77,8 +77,9 @@
   return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
 }
 
-bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
-                                             MachineBasicBlock *Dst) const {
+bool
+MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src,
+                                        const MachineBasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
   // FIXME: Compare against a static "hot" BranchProbability.
   return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
@@ -103,9 +104,8 @@
   return 0;
 }
 
-BranchProbability
-MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
-                                                 MachineBasicBlock *Dst) const {
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+    const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
   uint32_t Scale = 1;
   uint32_t D = getSumForBlock(Src, Scale);
   uint32_t N = getEdgeWeight(Src, Dst) / Scale;
@@ -113,13 +113,13 @@
   return BranchProbability(N, D);
 }
 
-raw_ostream &MachineBranchProbabilityInfo::
-printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
-                     MachineBasicBlock *Dst) const {
+raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
+    raw_ostream &OS, const MachineBasicBlock *Src,
+    const MachineBasicBlock *Dst) const {
 
   const BranchProbability Prob = getEdgeProbability(Src, Dst);
   OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
-     << " probability is "  << Prob 
+     << " probability is " << Prob
      << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
 
   return OS;
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index d228286..9c3bcc4 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -49,9 +49,9 @@
       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
@@ -60,7 +60,7 @@
       AU.addPreserved<MachineDominatorTree>();
     }
 
-    virtual void releaseMemory() {
+    void releaseMemory() override {
       ScopeMap.clear();
       Exps.clear();
     }
@@ -131,9 +131,24 @@
     unsigned SrcReg = DefMI->getOperand(1).getReg();
     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
       continue;
-    if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+    if (DefMI->getOperand(0).getSubReg())
       continue;
-    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+    // FIXME: We should trivially coalesce subregister copies to expose CSE
+    // opportunities on instructions with truncated operands (see
+    // cse-add-with-overflow.ll). This can be done here as follows:
+    // if (SrcSubReg)
+    //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
+    //                                     SrcSubReg);
+    // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
+    //
+    // The 2-addr pass has been updated to handle coalesced subregs. However,
+    // some machine-specific code still can't handle it.
+    // To handle it properly we also need a way find a constrained subregister
+    // class given a super-reg class and subreg index.
+    if (DefMI->getOperand(1).getSubReg())
+      continue;
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    if (!MRI->constrainRegClass(SrcReg, RC))
       continue;
     DEBUG(dbgs() << "Coalescing: " << *DefMI);
     DEBUG(dbgs() << "***     to: " << *MI);
@@ -214,7 +229,7 @@
   // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
   // (which currently contains only uses), set the PhysUseDef flag.
   PhysUseDef = false;
-  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || !MO.isDef())
@@ -265,7 +280,7 @@
     }
     CrossMBB = true;
   }
-  MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
   MachineBasicBlock::const_iterator E = MI;
   MachineBasicBlock::const_iterator EE = CSMBB->end();
   unsigned LookAheadLeft = LookAheadLimit;
@@ -310,8 +325,8 @@
 }
 
 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
-  if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
-      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+  if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
+      MI->isInlineAsm() || MI->isDebugValue())
     return false;
 
   // Ignore copies.
@@ -349,15 +364,11 @@
       TargetRegisterInfo::isVirtualRegister(Reg)) {
     MayIncreasePressure = false;
     SmallPtrSet<MachineInstr*, 8> CSUses;
-    for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
-         E = MRI->use_nodbg_end(); I != E; ++I) {
-      MachineInstr *Use = &*I;
-      CSUses.insert(Use);
+    for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
+      CSUses.insert(&MI);
     }
-    for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
-         E = MRI->use_nodbg_end(); I != E; ++I) {
-      MachineInstr *Use = &*I;
-      if (!CSUses.count(Use)) {
+    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+      if (!CSUses.count(&MI)) {
         MayIncreasePressure = true;
         break;
       }
@@ -388,11 +399,9 @@
   }
   if (!HasVRegUse) {
     bool HasNonCopyUse = false;
-    for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(Reg),
-           E = MRI->use_nodbg_end(); I != E; ++I) {
-      MachineInstr *Use = &*I;
+    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
       // Ignore copies.
-      if (!Use->isCopyLike()) {
+      if (!MI.isCopyLike()) {
         HasNonCopyUse = true;
         break;
       }
@@ -405,11 +414,9 @@
   // it unless the defined value is already used in the BB of the new use.
   bool HasPHI = false;
   SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
-  for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(CSReg),
-       E = MRI->use_nodbg_end(); I != E; ++I) {
-    MachineInstr *Use = &*I;
-    HasPHI |= Use->isPHI();
-    CSBBs.insert(Use->getParent());
+  for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
+    HasPHI |= MI.isPHI();
+    CSBBs.insert(MI.getParent());
   }
 
   if (!HasPHI)
@@ -513,7 +520,7 @@
     bool DoCSE = true;
     unsigned NumDefs = MI->getDesc().getNumDefs() +
                        MI->getDesc().getNumImplicitDefs();
-    
+
     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isDef())
@@ -652,6 +659,9 @@
 }
 
 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 4f48e2c..7e1970c 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -42,7 +42,7 @@
      initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
     typedef SmallVector<unsigned, 4> DestList;
@@ -127,13 +127,10 @@
 }
 
 // Remove MI from the function because it has been determined it is dead.
-// Turn it into a noop KILL instruction if it has super-register liveness
-// adjustments.
+// Turn it into a noop KILL instruction as opposed to removing it to
+// maintain imp-use/imp-def chains.
 void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
-  if (MI->getNumOperands() == 2)
-    MI->eraseFromParent();
-  else
-    MI->setDesc(TII->get(TargetOpcode::KILL));
+  MI->setDesc(TII->get(TargetOpcode::KILL));
 }
 
 bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
@@ -142,6 +139,8 @@
   DenseMap<unsigned, MachineInstr*> CopyMap;         // Def -> copies map
   SourceMap SrcMap; // Src -> Def map
 
+  DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
     MachineInstr *MI = &*I;
@@ -176,6 +175,8 @@
           // CALL
           // %RAX<def> = COPY %RSP
 
+          DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump());
+
           // Clear any kills of Def between CopyMI and MI. This extends the
           // live range.
           for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
@@ -191,10 +192,14 @@
       // If Src is defined by a previous copy, it cannot be eliminated.
       for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) {
         CI = CopyMap.find(*AI);
-        if (CI != CopyMap.end())
+        if (CI != CopyMap.end()) {
+          DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump());
           MaybeDeadCopies.remove(CI->second);
+        }
       }
 
+      DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
+
       // Copy is now a candidate for deletion.
       MaybeDeadCopies.insert(MI);
 
@@ -255,8 +260,10 @@
       // for elimination.
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
         DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI);
-        if (CI != CopyMap.end())
+        if (CI != CopyMap.end()) {
+          DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump());
           MaybeDeadCopies.remove(CI->second);
+        }
       }
     }
 
@@ -273,6 +280,8 @@
         unsigned Reg = (*DI)->getOperand(0).getReg();
         if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
           continue;
+        DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: ";
+              (*DI)->dump());
         removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
@@ -320,6 +329,9 @@
 }
 
 bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   bool Changed = false;
 
   TRI = MF.getTarget().getRegisterInfo();
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 0703df0..061efdb 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -17,7 +17,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -26,8 +25,8 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -140,7 +139,7 @@
   // Figure out the block number this should have.
   unsigned BlockNo = 0;
   if (MBBI != begin())
-    BlockNo = prior(MBBI)->getNumber()+1;
+    BlockNo = std::prev(MBBI)->getNumber() + 1;
 
   for (; MBBI != E; ++MBBI, ++BlockNo) {
     if (MBBI->getNumber() != (int)BlockNo) {
@@ -347,7 +346,7 @@
       OS << PrintReg(I->first, TRI);
       if (I->second)
         OS << " in " << PrintReg(I->second, TRI);
-      if (llvm::next(I) != E)
+      if (std::next(I) != E)
         OS << ", ";
     }
     OS << '\n';
@@ -425,7 +424,16 @@
   MachineRegisterInfo &MRI = getRegInfo();
   unsigned VReg = MRI.getLiveInVirtReg(PReg);
   if (VReg) {
-    assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+    const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg);
+    (void)VRegRC;
+    // A physical register can be added several times.
+    // Between two calls, the register class of the related virtual register
+    // may have been constrained to match some operation constraints.
+    // In that case, check that the current register class includes the
+    // physical register and is a sub class of the specified RC.
+    assert((VRegRC == RC || (VRegRC->contains(PReg) &&
+                             RC->hasSubClassEq(VRegRC))) &&
+            "Register class mismatch!");
     return VReg;
   }
   VReg = MRI.createVirtualRegister(RC);
@@ -438,12 +446,12 @@
 /// normal 'L' label is returned.
 MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, 
                                         bool isLinkerPrivate) const {
+  const DataLayout *DL = getTarget().getDataLayout();
   assert(JumpTableInfo && "No jump tables");
   assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
-  const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
 
-  const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
-                                         MAI.getPrivateGlobalPrefix();
+  const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() :
+                                         DL->getPrivateGlobalPrefix();
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
@@ -453,8 +461,8 @@
 /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
 /// base.
 MCSymbol *MachineFunction::getPICBaseSymbol() const {
-  const MCAsmInfo &MAI = *Target.getMCAsmInfo();
-  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+  const DataLayout *DL = getTarget().getDataLayout();
+  return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
                                Twine(getFunctionNumber())+"$pb");
 }
 
@@ -490,14 +498,13 @@
 /// a nonnegative identifier to represent it.
 ///
 int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
-                      bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
+                      bool isSS, const AllocaInst *Alloca) {
   assert(Size != 0 && "Cannot allocate zero size stack objects!");
   Alignment =
     clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
                           !RealignOption,
                         Alignment, getFrameLowering()->getStackAlignment());
-  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
-                                Alloca));
+  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca));
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   assert(Index >= 0 && "Bad frame index!");
   ensureMaxAlignment(Alignment);
@@ -514,7 +521,7 @@
     clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
                           !RealignOption,
                         Alignment, getFrameLowering()->getStackAlignment()); 
-  CreateStackObject(Size, Alignment, true, false);
+  CreateStackObject(Size, Alignment, true);
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   ensureMaxAlignment(Alignment);
   return Index;
@@ -525,13 +532,14 @@
 /// variable sized object is created, whether or not the index returned is
 /// actually used.
 ///
-int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
+                                                const AllocaInst *Alloca) {
   HasVarSizedObjects = true;
   Alignment =
     clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
                           !RealignOption,
                         Alignment, getFrameLowering()->getStackAlignment()); 
-  Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
+  Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
   ensureMaxAlignment(Alignment);
   return (int)Objects.size()-NumFixedObjects-1;
 }
@@ -556,7 +564,6 @@
                         Align, getFrameLowering()->getStackAlignment()); 
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/   false,
-                                              /*NeedSP*/ false,
                                               /*Alloca*/ 0));
   return -++NumFixedObjects;
 }
@@ -910,7 +917,7 @@
     if (Constants[i].isMachineConstantPoolEntry())
       Constants[i].Val.MachineCPVal->print(OS);
     else
-      WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false);
+      Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false);
     OS << ", align=" << Constants[i].getAlignment();
     OS << "\n";
   }
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 674cc80..789f204 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -51,6 +51,7 @@
   AU.addPreserved("domfrontier");
   AU.addPreserved("loops");
   AU.addPreserved("lda");
+  AU.addPreserved("stack-protector");
 
   FunctionPass::getAnalysisUsage(AU);
 }
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index fa9c821..dee3977 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -34,14 +34,14 @@
   MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) 
       : MachineFunctionPass(ID), OS(os), Banner(banner) {}
 
-  const char *getPassName() const { return "MachineFunction Printer"; }
+  const char *getPassName() const override { return "MachineFunction Printer"; }
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  bool runOnMachineFunction(MachineFunction &MF) {
+  bool runOnMachineFunction(MachineFunction &MF) override {
     OS << "# " << Banner << ":\n";
     MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
     return false;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 295b450..d102794 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,15 +15,14 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
@@ -199,10 +198,13 @@
   case MachineOperand::MO_BlockAddress:
     return getBlockAddress() == Other.getBlockAddress() &&
            getOffset() == Other.getOffset();
-  case MO_RegisterMask:
+  case MachineOperand::MO_RegisterMask:
+  case MachineOperand::MO_RegisterLiveOut:
     return getRegMask() == Other.getRegMask();
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_CFIIndex:
+    return getCFIIndex() == Other.getCFIIndex();
   case MachineOperand::MO_Metadata:
     return getMetadata() == Other.getMetadata();
   }
@@ -241,11 +243,14 @@
     return hash_combine(MO.getType(), MO.getTargetFlags(),
                         MO.getBlockAddress(), MO.getOffset());
   case MachineOperand::MO_RegisterMask:
+  case MachineOperand::MO_RegisterLiveOut:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask());
   case MachineOperand::MO_Metadata:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata());
   case MachineOperand::MO_MCSymbol:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
+  case MachineOperand::MO_CFIIndex:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
   }
   llvm_unreachable("Invalid machine operand type");
 }
@@ -312,7 +317,6 @@
         OS << "tied";
         if (TiedTo != 15)
           OS << unsigned(TiedTo - 1);
-        NeedComma = true;
       }
       OS << '>';
     }
@@ -350,7 +354,7 @@
     break;
   case MachineOperand::MO_GlobalAddress:
     OS << "<ga:";
-    WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+    getGlobal()->printAsOperand(OS, /*PrintType=*/false);
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
@@ -361,21 +365,27 @@
     break;
   case MachineOperand::MO_BlockAddress:
     OS << '<';
-    WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+    getBlockAddress()->printAsOperand(OS, /*PrintType=*/false);
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
   case MachineOperand::MO_RegisterMask:
     OS << "<regmask>";
     break;
+  case MachineOperand::MO_RegisterLiveOut:
+    OS << "<regliveout>";
+    break;
   case MachineOperand::MO_Metadata:
     OS << '<';
-    WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+    getMetadata()->printAsOperand(OS, /*PrintType=*/false);
     OS << '>';
     break;
   case MachineOperand::MO_MCSymbol:
     OS << "<MCSym=" << *getMCSymbol() << '>';
     break;
+  case MachineOperand::MO_CFIIndex:
+    OS << "<call frame instruction>";
+    break;
   }
 
   if (unsigned TF = getTargetFlags())
@@ -479,7 +489,11 @@
   if (!MMO.getValue())
     OS << "<unknown>";
   else
-    WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+    MMO.getValue()->printAsOperand(OS, /*PrintType=*/false);
+
+  unsigned AS = MMO.getAddrSpace();
+  if (AS != 0)
+    OS << "(addrspace=" << AS << ')';
 
   // If the alignment of the memory reference itself differs from the alignment
   // of the base pointer, print the base alignment explicitly, next to the base
@@ -500,7 +514,7 @@
   if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
     OS << "(tbaa=";
     if (TBAAInfo->getNumOperands() > 0)
-      WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+      TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
     else
       OS << "<unknown>";
     OS << ")";
@@ -984,6 +998,54 @@
   return NULL;
 }
 
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
+    unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
+    const TargetRegisterInfo *TRI, bool ExploreBundle) const {
+  // Check every operands inside the bundle if we have
+  // been asked to.
+  if (ExploreBundle)
+    for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC;
+         ++OpndIt)
+      CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl(
+          OpndIt.getOperandNo(), Reg, CurRC, TII, TRI);
+  else
+    // Otherwise, just check the current operands.
+    for (ConstMIOperands OpndIt(this); OpndIt.isValid() && CurRC; ++OpndIt)
+      CurRC = getRegClassConstraintEffectForVRegImpl(OpndIt.getOperandNo(), Reg,
+                                                     CurRC, TII, TRI);
+  return CurRC;
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl(
+    unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
+    const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
+  assert(CurRC && "Invalid initial register class");
+  // Check if Reg is constrained by some of its use/def from MI.
+  const MachineOperand &MO = getOperand(OpIdx);
+  if (!MO.isReg() || MO.getReg() != Reg)
+    return CurRC;
+  // If yes, accumulate the constraints through the operand.
+  return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI);
+}
+
+const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
+    unsigned OpIdx, const TargetRegisterClass *CurRC,
+    const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
+  const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI);
+  const MachineOperand &MO = getOperand(OpIdx);
+  assert(MO.isReg() &&
+         "Cannot get register constraints for non-register operand");
+  assert(CurRC && "Invalid initial register class");
+  if (unsigned SubIdx = MO.getSubReg()) {
+    if (OpRC)
+      CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx);
+    else
+      CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx);
+  } else if (OpRC)
+    CurRC = TRI->getCommonSubClass(CurRC, OpRC);
+  return CurRC;
+}
+
 /// Return the number of instructions inside the MI bundle, not counting the
 /// header instruction.
 unsigned MachineInstr::getBundleSize() const {
@@ -1239,8 +1301,8 @@
     return false;
   }
 
-  if (isLabel() || isDebugValue() ||
-      isTerminator() || hasUnmodeledSideEffects())
+  if (isPosition() || isDebugValue() || isTerminator() ||
+      hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1372,7 +1434,7 @@
   for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
        i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
+    if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
       addOperand(MF, MO);
   }
 }
@@ -1587,7 +1649,7 @@
     for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
          i != e; ++i) {
       OS << **i;
-      if (llvm::next(i) != e)
+      if (std::next(i) != e)
         OS << " ";
     }
   }
@@ -1612,7 +1674,7 @@
 
   // Print debug location information.
   if (isDebugValue() && getOperand(e - 1).isMetadata()) {
-    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    if (!HaveSemi) OS << ";";
     DIVariable DV(getOperand(e - 1).getMetadata());
     OS << " line no:" <<  DV.getLineNumber();
     if (MDNode *InlinedAt = DV.getInlinedAt()) {
@@ -1624,7 +1686,7 @@
       }
     }
   } else if (!debugLoc.isUnknown() && MF) {
-    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    if (!HaveSemi) OS << ";";
     OS << " dbg:";
     printDebugLoc(debugLoc, MF, OS);
   }
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 77bcd1d..962169e 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -26,7 +26,7 @@
       initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
   };
 } // end anonymous namespace
 
@@ -77,7 +77,7 @@
       initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
   };
 } // end anonymous namespace
 
@@ -211,7 +211,7 @@
 llvm::finalizeBundle(MachineBasicBlock &MBB,
                      MachineBasicBlock::instr_iterator FirstMI) {
   MachineBasicBlock::instr_iterator E = MBB.instr_end();
-  MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+  MachineBasicBlock::instr_iterator LastMI = std::next(FirstMI);
   while (LastMI != E && LastMI->isInsideBundle())
     ++LastMI;
   finalizeBundle(MBB, FirstMI, LastMI);
@@ -235,7 +235,7 @@
       if (!MII->isInsideBundle())
         ++MII;
       else {
-        MII = finalizeBundle(MBB, llvm::prior(MII));
+        MII = finalizeBundle(MBB, std::prev(MII));
         Changed = true;
       }
     }
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 104eacd..d3a1ee7 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -125,9 +125,9 @@
         initializeMachineLICMPass(*PassRegistry::getPassRegistry());
       }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<AliasAnalysis>();
@@ -136,7 +136,7 @@
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    virtual void releaseMemory() {
+    void releaseMemory() override {
       RegSeen.clear();
       RegPressure.clear();
       RegLimit.clear();
@@ -319,6 +319,9 @@
 }
 
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   Changed = FirstInLoop = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
@@ -978,25 +981,23 @@
       unsigned Reg = MO->getReg();
       if (!TargetRegisterInfo::isVirtualRegister(Reg))
         continue;
-      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-           UE = MRI->use_end(); UI != UE; ++UI) {
-        MachineInstr *UseMI = &*UI;
+      for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
         // A PHI may cause a copy to be inserted.
-        if (UseMI->isPHI()) {
+        if (UseMI.isPHI()) {
           // A PHI inside the loop causes a copy because the live range of Reg is
           // extended across the PHI.
-          if (CurLoop->contains(UseMI))
+          if (CurLoop->contains(&UseMI))
             return true;
           // A PHI in an exit block can cause a copy to be inserted if the PHI
           // has multiple predecessors in the loop with different values.
           // For now, approximate by rejecting all exit blocks.
-          if (isExitBlock(UseMI->getParent()))
+          if (isExitBlock(UseMI.getParent()))
             return true;
           continue;
         }
         // Look past copies as well.
-        if (UseMI->isCopy() && CurLoop->contains(UseMI))
-          Work.push_back(UseMI);
+        if (UseMI.isCopy() && CurLoop->contains(&UseMI))
+          Work.push_back(&UseMI);
       }
     }
   } while (!Work.empty());
@@ -1011,22 +1012,20 @@
   if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
     return false;
 
-  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
-         E = MRI->use_nodbg_end(); I != E; ++I) {
-    MachineInstr *UseMI = &*I;
-    if (UseMI->isCopyLike())
+  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+    if (UseMI.isCopyLike())
       continue;
-    if (!CurLoop->contains(UseMI->getParent()))
+    if (!CurLoop->contains(UseMI.getParent()))
       continue;
-    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = UseMI->getOperand(i);
+    for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = UseMI.getOperand(i);
       if (!MO.isReg() || !MO.isUse())
         continue;
       unsigned MOReg = MO.getReg();
       if (MOReg != Reg)
         continue;
 
-      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i))
         return true;
     }
 
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 4e2cfdc..89054d4 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -50,11 +50,11 @@
   MachineBasicBlock *TopMBB = getHeader();
   MachineFunction::iterator Begin = TopMBB->getParent()->begin();
   if (TopMBB != Begin) {
-    MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+    MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
     while (contains(PriorMBB)) {
       TopMBB = PriorMBB;
       if (TopMBB == Begin) break;
-      PriorMBB = prior(MachineFunction::iterator(TopMBB));
+      PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
     }
   }
   return TopMBB;
@@ -63,12 +63,12 @@
 MachineBasicBlock *MachineLoop::getBottomBlock() {
   MachineBasicBlock *BotMBB = getHeader();
   MachineFunction::iterator End = BotMBB->getParent()->end();
-  if (BotMBB != prior(End)) {
-    MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+  if (BotMBB != std::prev(End)) {
+    MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB));
     while (contains(NextMBB)) {
       BotMBB = NextMBB;
-      if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
-      NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+      if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break;
+      NextMBB = std::next(MachineFunction::iterator(BotMBB));
     }
   }
   return BotMBB;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index bb54284..7181025 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -45,8 +45,8 @@
 
   void setMap(MMIAddrLabelMap *map) { Map = map; }
 
-  virtual void deleted();
-  virtual void allUsesReplacedWith(Value *V2);
+  void deleted() override;
+  void allUsesReplacedWith(Value *V2) override;
 };
 
 class MMIAddrLabelMap {
@@ -313,7 +313,7 @@
   CallsEHReturn = 0;
   CallsUnwindInit = 0;
   CompactUnwindEncoding = 0;
-  VariableDbgInfo.clear();
+  VariableDbgInfos.clear();
 }
 
 /// AnalyzeModule - Scan the module for global debug information.
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index f8b8796..db3eec3 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -13,9 +13,9 @@
 
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/raw_os_ostream.h"
 
 using namespace llvm;
 
@@ -77,19 +77,12 @@
     return false;
 
   // Accumulate constraints from all uses.
-  for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
-       ++I) {
-    const TargetRegisterClass *OpRC =
-      I->getRegClassConstraint(I.getOperandNo(), TII,
-                               getTargetRegisterInfo());
-    if (unsigned SubIdx = I.getOperand().getSubReg()) {
-      if (OpRC)
-        NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC,
-                                                                  SubIdx);
-      else
-        NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx);
-    } else if (OpRC)
-      NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC);
+  for (MachineOperand &MO : reg_nodbg_operands(Reg)) {
+    // Apply the effect of the given operand to NewRC.
+    MachineInstr *MI = MO.getParent();
+    unsigned OpNo = &MO - &MI->getOperand(0);
+    NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII,
+                                            getTargetRegisterInfo());
     if (!NewRC || NewRC == OldRC)
       return false;
   }
@@ -133,8 +126,8 @@
 void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
 #ifndef NDEBUG
   bool Valid = true;
-  for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
-    MachineOperand *MO = &I.getOperand();
+  for (MachineOperand &M : reg_operands(Reg)) {
+    MachineOperand *MO = &M;
     MachineInstr *MI = MO->getParent();
     if (!MI) {
       errs() << PrintReg(Reg, getTargetRegisterInfo())
@@ -295,7 +288,7 @@
 
   // TODO: This could be more efficient by bulk changing the operands.
   for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
-    MachineOperand &O = I.getOperand();
+    MachineOperand &O = *I;
     ++I;
     O.setReg(ToReg);
   }
@@ -307,8 +300,8 @@
 /// form, so there should only be one definition.
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   // Since we are in SSA form, we can use the first definition.
-  def_iterator I = def_begin(Reg);
-  assert((I.atEnd() || llvm::next(I) == def_end()) &&
+  def_instr_iterator I = def_instr_begin(Reg);
+  assert((I.atEnd() || std::next(I) == def_instr_end()) &&
          "getVRegDef assumes a single definition or no definition");
   return !I.atEnd() ? &*I : 0;
 }
@@ -318,8 +311,8 @@
 /// multiple definitions or no definition, return null.
 MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
   if (def_empty(Reg)) return 0;
-  def_iterator I = def_begin(Reg);
-  if (llvm::next(I) != def_end())
+  def_instr_iterator I = def_instr_begin(Reg);
+  if (std::next(I) != def_instr_end())
     return 0;
   return &*I;
 }
@@ -336,8 +329,8 @@
 /// optimization passes which extend register lifetimes and need only
 /// preserve conservative kill flag information.
 void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
-  for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
-    UI.getOperand().setIsKill(false);
+  for (MachineOperand &MO : use_operands(Reg))
+    MO.setIsKill(false);
 }
 
 bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
@@ -399,8 +392,8 @@
 
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
-  for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
-    I.getOperand().getParent()->dump();
+  for (MachineInstr &I : use_instructions(Reg))
+    I.dump();
 }
 #endif
 
@@ -422,3 +415,18 @@
       return false;
   return true;
 }
+
+/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the
+/// specified register as undefined which causes the DBG_VALUE to be
+/// deleted during LiveDebugVariables analysis.
+void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const {
+  // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.)
+  MachineRegisterInfo::use_instr_iterator nextI;
+  for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end();
+       I != E; I = nextI) {
+    nextI = std::next(I);  // I is invalidated by the setReg
+    MachineInstr *UseMI = &*I;
+    if (UseMI->isDebugValue())
+      UseMI->getOperand(0).setReg(0U);
+  }
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 17f0af8..77496ad 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -230,16 +230,6 @@
   U.setReg(NewVR);
 }
 
-void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
-  MRI->replaceRegWith(OldReg, NewReg);
-
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  for (DenseMap<MachineBasicBlock*, unsigned>::iterator
-         I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
-    if (I->second == OldReg)
-      I->second = NewReg;
-}
-
 /// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
 /// template, specialized for MachineSSAUpdater.
 namespace llvm {
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index e71c4df..d90cd23 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -15,7 +15,6 @@
 #define DEBUG_TYPE "misched"
 
 #include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -49,6 +48,11 @@
 
 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+
+static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
+  cl::desc("Only schedule this function"));
+static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
+  cl::desc("Only schedule this MBB#"));
 #else
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
@@ -90,25 +94,47 @@
 }
 
 namespace {
+/// Base class for a machine scheduler class that can run at any point.
+class MachineSchedulerBase : public MachineSchedContext,
+                             public MachineFunctionPass {
+public:
+  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
+
+  void print(raw_ostream &O, const Module* = 0) const override;
+
+protected:
+  void scheduleRegions(ScheduleDAGInstrs &Scheduler);
+};
+
 /// MachineScheduler runs after coalescing and before register allocation.
-class MachineScheduler : public MachineSchedContext,
-                         public MachineFunctionPass {
+class MachineScheduler : public MachineSchedulerBase {
 public:
   MachineScheduler();
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  virtual void releaseMemory() {}
-
-  virtual bool runOnMachineFunction(MachineFunction&);
-
-  virtual void print(raw_ostream &O, const Module* = 0) const;
+  bool runOnMachineFunction(MachineFunction&) override;
 
   static char ID; // Class identification, replacement for typeinfo
 
 protected:
   ScheduleDAGInstrs *createMachineScheduler();
 };
+
+/// PostMachineScheduler runs after shortly before code emission.
+class PostMachineScheduler : public MachineSchedulerBase {
+public:
+  PostMachineScheduler();
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnMachineFunction(MachineFunction&) override;
+
+  static char ID; // Class identification, replacement for typeinfo
+
+protected:
+  ScheduleDAGInstrs *createPostMachineScheduler();
+};
 } // namespace
 
 char MachineScheduler::ID = 0;
@@ -124,7 +150,7 @@
                     "Machine Instruction Scheduler", false, false)
 
 MachineScheduler::MachineScheduler()
-: MachineFunctionPass(ID) {
+: MachineSchedulerBase(ID) {
   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 }
 
@@ -141,6 +167,26 @@
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+char PostMachineScheduler::ID = 0;
+
+char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
+
+INITIALIZE_PASS(PostMachineScheduler, "postmisched",
+                "PostRA Machine Instruction Scheduler", false, false)
+
+PostMachineScheduler::PostMachineScheduler()
+: MachineSchedulerBase(ID) {
+  initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
 MachinePassRegistry MachineSchedRegistry::Registry;
 
 /// A dummy default scheduler factory indicates whether the scheduler
@@ -162,8 +208,8 @@
 
 /// Forward declare the standard machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C);
-
+static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
+static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
 
 /// Decrement this iterator until reaching the top or a non-debug instr.
 static MachineBasicBlock::const_iterator
@@ -222,7 +268,20 @@
     return Scheduler;
 
   // Default to GenericScheduler.
-  return createGenericSched(this);
+  return createGenericSchedLive(this);
+}
+
+/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
+/// the caller. We don't have a command line option to override the postRA
+/// scheduler. The Target must configure it.
+ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
+  // Get the postRA scheduler set by the target for this function.
+  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
+  if (Scheduler)
+    return Scheduler;
+
+  // Default to GenericScheduler.
+  return createGenericSchedPostRA(this);
 }
 
 /// Top-level MachineScheduler pass driver.
@@ -252,7 +311,6 @@
   AA = &getAnalysis<AliasAnalysis>();
 
   LIS = &getAnalysis<LiveIntervals>();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
   if (VerifyScheduling) {
     DEBUG(LIS->dump());
@@ -262,7 +320,60 @@
 
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
-  OwningPtr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+  scheduleRegions(*Scheduler);
+
+  DEBUG(LIS->dump());
+  if (VerifyScheduling)
+    MF->verify(this, "After machine scheduling.");
+  return true;
+}
+
+bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  if (skipOptnoneFunction(*mf.getFunction()))
+    return false;
+
+  DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
+
+  // Initialize the context of the pass.
+  MF = &mf;
+  PassConfig = &getAnalysis<TargetPassConfig>();
+
+  if (VerifyScheduling)
+    MF->verify(this, "Before post machine scheduling.");
+
+  // Instantiate the selected scheduler for this target, function, and
+  // optimization level.
+  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
+  scheduleRegions(*Scheduler);
+
+  if (VerifyScheduling)
+    MF->verify(this, "After post machine scheduling.");
+  return true;
+}
+
+/// Return true of the given instruction should not be included in a scheduling
+/// region.
+///
+/// MachineScheduler does not currently support scheduling across calls. To
+/// handle calls, the DAG builder needs to be modified to create register
+/// anti/output dependencies on the registers clobbered by the call's regmask
+/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
+/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
+/// the boundary, but there would be no benefit to postRA scheduling across
+/// calls this late anyway.
+static bool isSchedBoundary(MachineBasicBlock::iterator MI,
+                            MachineBasicBlock *MBB,
+                            MachineFunction *MF,
+                            const TargetInstrInfo *TII,
+                            bool IsPostRA) {
+  return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
+}
+
+/// Main driver for both MachineScheduler and PostMachineScheduler.
+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  bool IsPostRA = Scheduler.isPostRA();
 
   // Visit all machine basic blocks.
   //
@@ -271,7 +382,15 @@
   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
        MBB != MBBEnd; ++MBB) {
 
-    Scheduler->startBlock(MBB);
+    Scheduler.startBlock(MBB);
+
+#ifndef NDEBUG
+    if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
+      continue;
+    if (SchedOnlyBlock.getNumOccurrences()
+        && (int)SchedOnlyBlock != MBB->getNumber())
+      continue;
+#endif
 
     // Break the block into scheduling regions [I, RegionEnd), and schedule each
     // region as soon as it is discovered. RegionEnd points the scheduling
@@ -283,13 +402,16 @@
     // The Scheduler may insert instructions during either schedule() or
     // exitRegion(), even for empty regions. So the local iterators 'I' and
     // 'RegionEnd' are invalid across these calls.
-    unsigned RemainingInstrs = MBB->size();
+    //
+    // MBB::size() uses instr_iterator to count. Here we need a bundle to count
+    // as a single instruction.
+    unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end());
     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
-        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+        RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) {
 
       // Avoid decrementing RegionEnd for blocks with no terminator.
-      if (RegionEnd != MBB->end()
-          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+      if (RegionEnd != MBB->end() ||
+          isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
         --RegionEnd;
         // Count the boundary instruction.
         --RemainingInstrs;
@@ -300,21 +422,22 @@
       unsigned NumRegionInstrs = 0;
       MachineBasicBlock::iterator I = RegionEnd;
       for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) {
-        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+        if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
           break;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
       // it. Perhaps it still needs to be bundled.
-      Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
+      Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
 
       // Skip empty scheduling regions (0 or 1 schedulable instructions).
-      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+      if (I == RegionEnd || I == std::prev(RegionEnd)) {
         // Close the current region. Bundle the terminator if needed.
         // This invalidates 'RegionEnd' and 'I'.
-        Scheduler->exitRegion();
+        Scheduler.exitRegion();
         continue;
       }
-      DEBUG(dbgs() << "********** MI Scheduling **********\n");
+      DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
+            << "MI Scheduling **********\n");
       DEBUG(dbgs() << MF->getName()
             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
             << "\n  From: " << *I << "    To: ";
@@ -325,26 +448,27 @@
 
       // Schedule a region: possibly reorder instructions.
       // This invalidates 'RegionEnd' and 'I'.
-      Scheduler->schedule();
+      Scheduler.schedule();
 
       // Close the current region.
-      Scheduler->exitRegion();
+      Scheduler.exitRegion();
 
       // Scheduling has invalidated the current iterator 'I'. Ask the
       // scheduler for the top of it's scheduled region.
-      RegionEnd = Scheduler->begin();
+      RegionEnd = Scheduler.begin();
     }
     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
-    Scheduler->finishBlock();
+    Scheduler.finishBlock();
+    if (Scheduler.isPostRA()) {
+      // FIXME: Ideally, no further passes should rely on kill flags. However,
+      // thumb2 size reduction is currently an exception.
+      Scheduler.fixupKills(MBB);
+    }
   }
-  Scheduler->finalizeSchedule();
-  DEBUG(LIS->dump());
-  if (VerifyScheduling)
-    MF->verify(this, "After machine scheduling.");
-  return true;
+  Scheduler.finalizeSchedule();
 }
 
-void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
   // unimplemented
 }
 
@@ -358,12 +482,12 @@
 #endif
 
 //===----------------------------------------------------------------------===//
-// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
-// preservation.
-//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Basic machine instruction scheduling. This is
+// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
+// virtual registers.
+// ===----------------------------------------------------------------------===/
 
 ScheduleDAGMI::~ScheduleDAGMI() {
-  delete DFSResult;
   DeleteContainerPointers(Mutations);
   delete SchedImpl;
 }
@@ -453,10 +577,24 @@
   }
 }
 
+/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
+/// crossing a scheduling boundary. [begin, end) includes all instructions in
+/// the region, including the boundary itself and single-instruction regions
+/// that don't get scheduled.
+void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+                                     MachineBasicBlock::iterator begin,
+                                     MachineBasicBlock::iterator end,
+                                     unsigned regioninstrs)
+{
+  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
+
+  SchedImpl->initPolicy(begin, end, regioninstrs);
+}
+
 /// This is normally called from the main scheduler loop but may also be invoked
 /// by the scheduling strategy to perform additional code motion.
-void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
-                                    MachineBasicBlock::iterator InsertPos) {
+void ScheduleDAGMI::moveInstruction(
+  MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
   // Advance RegionBegin if the first instruction moves down.
   if (&*RegionBegin == MI)
     ++RegionBegin;
@@ -465,7 +603,8 @@
   BB->splice(InsertPos, BB, MI);
 
   // Update LiveIntervals
-  LIS->handleMove(MI, /*UpdateFlags=*/true);
+  if (LIS)
+    LIS->handleMove(MI, /*UpdateFlags=*/true);
 
   // Recede RegionBegin if an instruction moves above the first.
   if (RegionBegin == InsertPos)
@@ -483,31 +622,212 @@
   return true;
 }
 
+/// Per-region scheduling driver, called back from
+/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
+/// does not consider liveness or register pressure. It is useful for PostRA
+/// scheduling and potentially other custom schedulers.
+void ScheduleDAGMI::schedule() {
+  // Build the DAG.
+  buildSchedGraph(AA);
+
+  Topo.InitDAGTopologicalSorting();
+
+  postprocessDAG();
+
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+
+  // Initialize the strategy before modifying the DAG.
+  // This may initialize a DFSResult to be used for queue priority.
+  SchedImpl->initialize(this);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+  if (ViewMISchedDAGs) viewGraph();
+
+  // Initialize ready queues now that the DAG and priority data are finalized.
+  initQueues(TopRoots, BotRoots);
+
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    assert(!SU->isScheduled && "Node already scheduled");
+    if (!checkSchedLimit())
+      break;
+
+    MachineInstr *MI = SU->getInstr();
+    if (IsTopNode) {
+      assert(SU->isTopReady() && "node still has unscheduled dependencies");
+      if (&*CurrentTop == MI)
+        CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
+      else
+        moveInstruction(MI, CurrentTop);
+    }
+    else {
+      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+      MachineBasicBlock::iterator priorII =
+        priorNonDebug(CurrentBottom, CurrentTop);
+      if (&*priorII == MI)
+        CurrentBottom = priorII;
+      else {
+        if (&*CurrentTop == MI)
+          CurrentTop = nextIfDebug(++CurrentTop, priorII);
+        moveInstruction(MI, CurrentBottom);
+        CurrentBottom = MI;
+      }
+    }
+    updateQueues(SU, IsTopNode);
+
+    // Notify the scheduling strategy after updating the DAG.
+    SchedImpl->schedNode(SU, IsTopNode);
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+  placeDebugValues();
+
+  DEBUG({
+      unsigned BBNum = begin()->getParent()->getNumber();
+      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+}
+
+/// Apply each ScheduleDAGMutation step in order.
+void ScheduleDAGMI::postprocessDAG() {
+  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
+    Mutations[i]->apply(this);
+  }
+}
+
+void ScheduleDAGMI::
+findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+                      SmallVectorImpl<SUnit*> &BotRoots) {
+  for (std::vector<SUnit>::iterator
+         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+    SUnit *SU = &(*I);
+    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
+
+    // Order predecessors so DFSResult follows the critical path.
+    SU->biasCriticalPath();
+
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (!I->NumPredsLeft)
+      TopRoots.push_back(SU);
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (!I->NumSuccsLeft)
+      BotRoots.push_back(SU);
+  }
+  ExitSU.biasCriticalPath();
+}
+
+/// Identify DAG roots and setup scheduler queues.
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+                               ArrayRef<SUnit*> BotRoots) {
+  NextClusterSucc = NULL;
+  NextClusterPred = NULL;
+
+  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
+  //
+  // Nodes with unreleased weak edges can still be roots.
+  // Release top roots in forward order.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+    SchedImpl->releaseTopNode(*I);
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+    SchedImpl->releaseBottomNode(*I);
+  }
+
+  releaseSuccessors(&EntrySU);
+  releasePredecessors(&ExitSU);
+
+  SchedImpl->registerRoots();
+
+  // Advance past initial DebugValues.
+  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+  CurrentBottom = RegionEnd;
+}
+
+/// Update scheduler queues after scheduling an instruction.
+void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
+  // Release dependent instructions for scheduling.
+  if (IsTopNode)
+    releaseSuccessors(SU);
+  else
+    releasePredecessors(SU);
+
+  SU->isScheduled = true;
+}
+
+/// Reinsert any remaining debug_values, just like the PostRA scheduler.
+void ScheduleDAGMI::placeDebugValues() {
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue) {
+    BB->splice(RegionBegin, BB, FirstDbgValue);
+    RegionBegin = FirstDbgValue;
+  }
+
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineBasicBlock::iterator OrigPrevMI = P.second;
+    if (&*RegionBegin == DbgValue)
+      ++RegionBegin;
+    BB->splice(++OrigPrevMI, BB, DbgValue);
+    if (OrigPrevMI == std::prev(RegionEnd))
+      RegionEnd = DbgValue;
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ScheduleDAGMI::dumpSchedule() const {
+  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
+    if (SUnit *SU = getSUnit(&(*MI)))
+      SU->dump(this);
+    else
+      dbgs() << "Missing SUnit\n";
+  }
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+ScheduleDAGMILive::~ScheduleDAGMILive() {
+  delete DFSResult;
+}
+
 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 /// the region, including the boundary itself and single-instruction regions
 /// that don't get scheduled.
-void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
+void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
                                 MachineBasicBlock::iterator begin,
                                 MachineBasicBlock::iterator end,
                                 unsigned regioninstrs)
 {
-  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
+  // ScheduleDAGMI initializes SchedImpl's per-region policy.
+  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
 
   // For convenience remember the end of the liveness region.
-  LiveRegionEnd =
-    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
+  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
 
   SUPressureDiffs.clear();
 
-  SchedImpl->initPolicy(begin, end, regioninstrs);
-
   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
 }
 
 // Setup the register pressure trackers for the top scheduled top and bottom
 // scheduled regions.
-void ScheduleDAGMI::initRegPressure() {
+void ScheduleDAGMILive::initRegPressure() {
   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
 
@@ -567,7 +887,7 @@
         dbgs() << "\n");
 }
 
-void ScheduleDAGMI::
+void ScheduleDAGMILive::
 updateScheduledPressure(const SUnit *SU,
                         const std::vector<unsigned> &NewMaxPressure) {
   const PressureDiff &PDiff = getPressureDiff(SU);
@@ -595,7 +915,7 @@
 
 /// Update the PressureDiff array for liveness after scheduling this
 /// instruction.
-void ScheduleDAGMI::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
+void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
   for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) {
     /// FIXME: Currently assuming single-use physregs.
     unsigned Reg = LiveUses[LUIdx];
@@ -644,9 +964,9 @@
 /// so that it can be easilly extended by experimental schedulers. Generally,
 /// implementing MachineSchedStrategy should be sufficient to implement a new
 /// scheduling algorithm. However, if a scheduler further subclasses
-/// ScheduleDAGMI then it will want to override this virtual method in order to
-/// update any specialized state.
-void ScheduleDAGMI::schedule() {
+/// ScheduleDAGMILive then it will want to override this virtual method in order
+/// to update any specialized state.
+void ScheduleDAGMILive::schedule() {
   buildDAGWithRegPressure();
 
   Topo.InitDAGTopologicalSorting();
@@ -667,6 +987,11 @@
   // Initialize ready queues now that the DAG and priority data are finalized.
   initQueues(TopRoots, BotRoots);
 
+  if (ShouldTrackPressure) {
+    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+    TopRPTracker.setPos(CurrentTop);
+  }
+
   bool IsTopNode = false;
   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
     assert(!SU->isScheduled && "Node already scheduled");
@@ -676,6 +1001,18 @@
     scheduleMI(SU, IsTopNode);
 
     updateQueues(SU, IsTopNode);
+
+    if (DFSResult) {
+      unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+      if (!ScheduledTrees.test(SubtreeID)) {
+        ScheduledTrees.set(SubtreeID);
+        DFSResult->scheduleTree(SubtreeID);
+        SchedImpl->scheduleTree(SubtreeID);
+      }
+    }
+
+    // Notify the scheduling strategy after updating the DAG.
+    SchedImpl->schedNode(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
@@ -690,7 +1027,7 @@
 }
 
 /// Build the DAG and setup three register pressure trackers.
-void ScheduleDAGMI::buildDAGWithRegPressure() {
+void ScheduleDAGMILive::buildDAGWithRegPressure() {
   if (!ShouldTrackPressure) {
     RPTracker.reset();
     RegionCriticalPSets.clear();
@@ -713,14 +1050,7 @@
   initRegPressure();
 }
 
-/// Apply each ScheduleDAGMutation step in order.
-void ScheduleDAGMI::postprocessDAG() {
-  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
-    Mutations[i]->apply(this);
-  }
-}
-
-void ScheduleDAGMI::computeDFSResult() {
+void ScheduleDAGMILive::computeDFSResult() {
   if (!DFSResult)
     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
   DFSResult->clear();
@@ -730,26 +1060,6 @@
   ScheduledTrees.resize(DFSResult->getNumSubtrees());
 }
 
-void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
-                                          SmallVectorImpl<SUnit*> &BotRoots) {
-  for (std::vector<SUnit>::iterator
-         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
-    SUnit *SU = &(*I);
-    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
-
-    // Order predecessors so DFSResult follows the critical path.
-    SU->biasCriticalPath();
-
-    // A SUnit is ready to top schedule if it has no predecessors.
-    if (!I->NumPredsLeft)
-      TopRoots.push_back(SU);
-    // A SUnit is ready to bottom schedule if it has no successors.
-    if (!I->NumSuccsLeft)
-      BotRoots.push_back(SU);
-  }
-  ExitSU.biasCriticalPath();
-}
-
 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
 /// only provides the critical path for single block loops. To handle loops that
 /// span blocks, we could use the vreg path latencies provided by
@@ -773,7 +1083,10 @@
 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
 /// CyclicCriticalPath = min(2, 2) = 2
-unsigned ScheduleDAGMI::computeCyclicCriticalPath() {
+///
+/// This could be relevant to PostRA scheduling, but is currently implemented
+/// assuming LiveIntervals.
+unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
   // This only applies to single block loop.
   if (!BB->isSuccessor(BB))
     return 0;
@@ -835,44 +1148,8 @@
   return MaxCyclicLatency;
 }
 
-/// Identify DAG roots and setup scheduler queues.
-void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
-                               ArrayRef<SUnit*> BotRoots) {
-  NextClusterSucc = NULL;
-  NextClusterPred = NULL;
-
-  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
-  //
-  // Nodes with unreleased weak edges can still be roots.
-  // Release top roots in forward order.
-  for (SmallVectorImpl<SUnit*>::const_iterator
-         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
-    SchedImpl->releaseTopNode(*I);
-  }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
-    SchedImpl->releaseBottomNode(*I);
-  }
-
-  releaseSuccessors(&EntrySU);
-  releasePredecessors(&ExitSU);
-
-  SchedImpl->registerRoots();
-
-  // Advance past initial DebugValues.
-  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
-  CurrentBottom = RegionEnd;
-
-  if (ShouldTrackPressure) {
-    assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
-    TopRPTracker.setPos(CurrentTop);
-  }
-}
-
 /// Move an instruction and update register pressure.
-void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
+void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
   // Move the instruction to its new location in the instruction stream.
   MachineInstr *MI = SU->getInstr();
 
@@ -917,63 +1194,6 @@
   }
 }
 
-/// Update scheduler queues after scheduling an instruction.
-void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
-  // Release dependent instructions for scheduling.
-  if (IsTopNode)
-    releaseSuccessors(SU);
-  else
-    releasePredecessors(SU);
-
-  SU->isScheduled = true;
-
-  if (DFSResult) {
-    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
-    if (!ScheduledTrees.test(SubtreeID)) {
-      ScheduledTrees.set(SubtreeID);
-      DFSResult->scheduleTree(SubtreeID);
-      SchedImpl->scheduleTree(SubtreeID);
-    }
-  }
-
-  // Notify the scheduling strategy after updating the DAG.
-  SchedImpl->schedNode(SU, IsTopNode);
-}
-
-/// Reinsert any remaining debug_values, just like the PostRA scheduler.
-void ScheduleDAGMI::placeDebugValues() {
-  // If first instruction was a DBG_VALUE then put it back.
-  if (FirstDbgValue) {
-    BB->splice(RegionBegin, BB, FirstDbgValue);
-    RegionBegin = FirstDbgValue;
-  }
-
-  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
-         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
-    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
-    MachineInstr *DbgValue = P.first;
-    MachineBasicBlock::iterator OrigPrevMI = P.second;
-    if (&*RegionBegin == DbgValue)
-      ++RegionBegin;
-    BB->splice(++OrigPrevMI, BB, DbgValue);
-    if (OrigPrevMI == llvm::prior(RegionEnd))
-      RegionEnd = DbgValue;
-  }
-  DbgValues.clear();
-  FirstDbgValue = NULL;
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ScheduleDAGMI::dumpSchedule() const {
-  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
-    if (SUnit *SU = getSUnit(&(*MI)))
-      SU->dump(this);
-    else
-      dbgs() << "Missing SUnit\n";
-  }
-}
-#endif
-
 //===----------------------------------------------------------------------===//
 // LoadClusterMutation - DAG post-processing to cluster loads.
 //===----------------------------------------------------------------------===//
@@ -988,9 +1208,11 @@
     unsigned Offset;
     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
       : SU(su), BaseReg(reg), Offset(ofs) {}
+
+    bool operator<(const LoadInfo &RHS) const {
+      return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
+    }
   };
-  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
-                           const LoadClusterMutation::LoadInfo &RHS);
 
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
@@ -999,20 +1221,12 @@
                       const TargetRegisterInfo *tri)
     : TII(tii), TRI(tri) {}
 
-  virtual void apply(ScheduleDAGMI *DAG);
+  void apply(ScheduleDAGMI *DAG) override;
 protected:
   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
 };
 } // anonymous
 
-bool LoadClusterMutation::LoadInfoLess(
-  const LoadClusterMutation::LoadInfo &LHS,
-  const LoadClusterMutation::LoadInfo &RHS) {
-  if (LHS.BaseReg != RHS.BaseReg)
-    return LHS.BaseReg < RHS.BaseReg;
-  return LHS.Offset < RHS.Offset;
-}
-
 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
                                                   ScheduleDAGMI *DAG) {
   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
@@ -1025,7 +1239,7 @@
   }
   if (LoadRecords.size() < 2)
     return;
-  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
+  std::sort(LoadRecords.begin(), LoadRecords.end());
   unsigned ClusterLength = 1;
   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
@@ -1102,7 +1316,7 @@
 public:
   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
 
-  virtual void apply(ScheduleDAGMI *DAG);
+  void apply(ScheduleDAGMI *DAG) override;
 };
 } // anonymous
 
@@ -1151,10 +1365,10 @@
 public:
   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
 
-  virtual void apply(ScheduleDAGMI *DAG);
+  void apply(ScheduleDAGMI *DAG) override;
 
 protected:
-  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
+  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
 };
 } // anonymous
 
@@ -1177,7 +1391,7 @@
 /// this algorithm should handle extended blocks. An EBB is a set of
 /// contiguously numbered blocks such that the previous block in the EBB is
 /// always the single predecessor.
-void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
   LiveIntervals *LIS = DAG->getLIS();
   MachineInstr *Copy = CopySU->getInstr();
 
@@ -1227,19 +1441,19 @@
   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
   if (GlobalSegment != GlobalLI->begin()) {
     // Two address defs have no hole.
-    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
+    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
                                GlobalSegment->start)) {
       return;
     }
     // If the prior global segment may be defined by the same two-address
     // instruction that also defines LocalLI, then can't make a hole here.
-    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
+    if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
                                LocalLI->beginIndex())) {
       return;
     }
     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
     // it would be a disconnected component in the live range.
-    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
+    assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
            "Disconnected LRG within the scheduling region.");
   }
   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
@@ -1302,6 +1516,8 @@
 /// \brief Callback from DAG postProcessing to create weak edges to encourage
 /// copy elimination.
 void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
+
   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
   if (FirstPos == DAG->end())
     return;
@@ -1314,28 +1530,582 @@
     if (!SU->getInstr()->isCopy())
       continue;
 
-    constrainLocalCopy(SU, DAG);
+    constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG));
   }
 }
 
 //===----------------------------------------------------------------------===//
-// GenericScheduler - Implementation of the generic MachineSchedStrategy.
+// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
+// and possibly other custom schedulers.
+//===----------------------------------------------------------------------===//
+
+static const unsigned InvalidCycle = ~0U;
+
+SchedBoundary::~SchedBoundary() { delete HazardRec; }
+
+void SchedBoundary::reset() {
+  // A new HazardRec is created for each DAG and owned by SchedBoundary.
+  // Destroying and reconstructing it is very expensive though. So keep
+  // invalid, placeholder HazardRecs.
+  if (HazardRec && HazardRec->isEnabled()) {
+    delete HazardRec;
+    HazardRec = 0;
+  }
+  Available.clear();
+  Pending.clear();
+  CheckPending = false;
+  NextSUs.clear();
+  CurrCycle = 0;
+  CurrMOps = 0;
+  MinReadyCycle = UINT_MAX;
+  ExpectedLatency = 0;
+  DependentLatency = 0;
+  RetiredMOps = 0;
+  MaxExecutedResCount = 0;
+  ZoneCritResIdx = 0;
+  IsResourceLimited = false;
+  ReservedCycles.clear();
+#ifndef NDEBUG
+  // Track the maximum number of stall cycles that could arise either from the
+  // latency of a DAG edge or the number of cycles that a processor resource is
+  // reserved (SchedBoundary::ReservedCycles).
+  MaxObservedLatency = 0;
+#endif
+  // Reserve a zero-count for invalid CritResIdx.
+  ExecutedResCounts.resize(1);
+  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
+}
+
+void SchedRemainder::
+init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
+  reset();
+  if (!SchedModel->hasInstrSchedModel())
+    return;
+  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
+  for (std::vector<SUnit>::iterator
+         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
+    RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
+      * SchedModel->getMicroOpFactor();
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      unsigned PIdx = PI->ProcResourceIdx;
+      unsigned Factor = SchedModel->getResourceFactor(PIdx);
+      RemainingCounts[PIdx] += (Factor * PI->Cycles);
+    }
+  }
+}
+
+void SchedBoundary::
+init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
+  reset();
+  DAG = dag;
+  SchedModel = smodel;
+  Rem = rem;
+  if (SchedModel->hasInstrSchedModel()) {
+    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+    ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+  }
+}
+
+/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
+/// these "soft stalls" differently than the hard stall cycles based on CPU
+/// resources and computed by checkHazard(). A fully in-order model
+/// (MicroOpBufferSize==0) will not make use of this since instructions are not
+/// available for scheduling until they are ready. However, a weaker in-order
+/// model may use this for heuristics. For example, if a processor has in-order
+/// behavior when reading certain resources, this may come into play.
+unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
+  if (!SU->isUnbuffered)
+    return 0;
+
+  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+  if (ReadyCycle > CurrCycle)
+    return ReadyCycle - CurrCycle;
+  return 0;
+}
+
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.
+unsigned SchedBoundary::
+getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+  unsigned NextUnreserved = ReservedCycles[PIdx];
+  // If this resource has never been used, always return cycle zero.
+  if (NextUnreserved == InvalidCycle)
+    return 0;
+  // For bottom-up scheduling add the cycles needed for the current operation.
+  if (!isTop())
+    NextUnreserved += Cycles;
+  return NextUnreserved;
+}
+
+/// Does this SU have a hazard within the current instruction group.
+///
+/// The scheduler supports two modes of hazard recognition. The first is the
+/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
+/// supports highly complicated in-order reservation tables
+/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
+///
+/// The second is a streamlined mechanism that checks for hazards based on
+/// simple counters that the scheduler itself maintains. It explicitly checks
+/// for instruction dispatch limitations, including the number of micro-ops that
+/// can dispatch per cycle.
+///
+/// TODO: Also check whether the SU must start a new group.
+bool SchedBoundary::checkHazard(SUnit *SU) {
+  if (HazardRec->isEnabled()
+      && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
+    return true;
+  }
+  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
+  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
+    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
+          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
+    return true;
+  }
+  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+        return true;
+    }
+  }
+  return false;
+}
+
+// Find the unscheduled node in ReadySUs with the highest latency.
+unsigned SchedBoundary::
+findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
+  SUnit *LateSU = 0;
+  unsigned RemLatency = 0;
+  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency) {
+      RemLatency = L;
+      LateSU = *I;
+    }
+  }
+  if (LateSU) {
+    DEBUG(dbgs() << Available.getName() << " RemLatency SU("
+          << LateSU->NodeNum << ") " << RemLatency << "c\n");
+  }
+  return RemLatency;
+}
+
+// Count resources in this zone and the remaining unscheduled
+// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
+// resource index, or zero if the zone is issue limited.
+unsigned SchedBoundary::
+getOtherResourceCount(unsigned &OtherCritIdx) {
+  OtherCritIdx = 0;
+  if (!SchedModel->hasInstrSchedModel())
+    return 0;
+
+  unsigned OtherCritCount = Rem->RemIssueCount
+    + (RetiredMOps * SchedModel->getMicroOpFactor());
+  DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
+        << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
+  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
+       PIdx != PEnd; ++PIdx) {
+    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
+    if (OtherCount > OtherCritCount) {
+      OtherCritCount = OtherCount;
+      OtherCritIdx = PIdx;
+    }
+  }
+  if (OtherCritIdx) {
+    DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
+          << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
+          << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
+  }
+  return OtherCritCount;
+}
+
+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+  if (ReadyCycle < MinReadyCycle)
+    MinReadyCycle = ReadyCycle;
+
+  // Check for interlocks first. For the purpose of other heuristics, an
+  // instruction that cannot issue appears as if it's not in the ReadyQueue.
+  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
+    Pending.push(SU);
+  else
+    Available.push(SU);
+
+  // Record this node as an immediate dependent of the scheduled node.
+  NextSUs.insert(SU);
+}
+
+void SchedBoundary::releaseTopNode(SUnit *SU) {
+  if (SU->isScheduled)
+    return;
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isWeak())
+      continue;
+    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
+    unsigned Latency = I->getLatency();
+#ifndef NDEBUG
+    MaxObservedLatency = std::max(Latency, MaxObservedLatency);
+#endif
+    if (SU->TopReadyCycle < PredReadyCycle + Latency)
+      SU->TopReadyCycle = PredReadyCycle + Latency;
+  }
+  releaseNode(SU, SU->TopReadyCycle);
+}
+
+void SchedBoundary::releaseBottomNode(SUnit *SU) {
+  if (SU->isScheduled)
+    return;
+
+  assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isWeak())
+      continue;
+    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
+    unsigned Latency = I->getLatency();
+#ifndef NDEBUG
+    MaxObservedLatency = std::max(Latency, MaxObservedLatency);
+#endif
+    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
+      SU->BotReadyCycle = SuccReadyCycle + Latency;
+  }
+  releaseNode(SU, SU->BotReadyCycle);
+}
+
+/// Move the boundary of scheduled code by one cycle.
+void SchedBoundary::bumpCycle(unsigned NextCycle) {
+  if (SchedModel->getMicroOpBufferSize() == 0) {
+    assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+    if (MinReadyCycle > NextCycle)
+      NextCycle = MinReadyCycle;
+  }
+  // Update the current micro-ops, which will issue in the next cycle.
+  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
+  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
+
+  // Decrement DependentLatency based on the next cycle.
+  if ((NextCycle - CurrCycle) > DependentLatency)
+    DependentLatency = 0;
+  else
+    DependentLatency -= (NextCycle - CurrCycle);
+
+  if (!HazardRec->isEnabled()) {
+    // Bypass HazardRec virtual calls.
+    CurrCycle = NextCycle;
+  }
+  else {
+    // Bypass getHazardType calls in case of long latency.
+    for (; CurrCycle != NextCycle; ++CurrCycle) {
+      if (isTop())
+        HazardRec->AdvanceCycle();
+      else
+        HazardRec->RecedeCycle();
+    }
+  }
+  CheckPending = true;
+  unsigned LFactor = SchedModel->getLatencyFactor();
+  IsResourceLimited =
+    (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+    > (int)LFactor;
+
+  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
+}
+
+void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
+  ExecutedResCounts[PIdx] += Count;
+  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
+    MaxExecutedResCount = ExecutedResCounts[PIdx];
+}
+
+/// Add the given processor resource to this scheduled zone.
+///
+/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
+/// during which this resource is consumed.
+///
+/// \return the next cycle at which the instruction may execute without
+/// oversubscribing resources.
+unsigned SchedBoundary::
+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
+  unsigned Factor = SchedModel->getResourceFactor(PIdx);
+  unsigned Count = Factor * Cycles;
+  DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx)
+        << " +" << Cycles << "x" << Factor << "u\n");
+
+  // Update Executed resources counts.
+  incExecutedResources(PIdx, Count);
+  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
+  Rem->RemainingCounts[PIdx] -= Count;
+
+  // Check if this resource exceeds the current critical resource. If so, it
+  // becomes the critical resource.
+  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
+    ZoneCritResIdx = PIdx;
+    DEBUG(dbgs() << "  *** Critical resource "
+          << SchedModel->getResourceName(PIdx) << ": "
+          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
+  }
+  // For reserved resources, record the highest cycle using the resource.
+  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+  if (NextAvailable > CurrCycle) {
+    DEBUG(dbgs() << "  Resource conflict: "
+          << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
+          << NextAvailable << "\n");
+  }
+  return NextAvailable;
+}
+
+/// Move the boundary of scheduled code by one SUnit.
+void SchedBoundary::bumpNode(SUnit *SU) {
+  // Update the reservation table.
+  if (HazardRec->isEnabled()) {
+    if (!isTop() && SU->isCall) {
+      // Calls are scheduled with their preceding instructions. For bottom-up
+      // scheduling, clear the pipeline state before emitting.
+      HazardRec->Reset();
+    }
+    HazardRec->EmitInstruction(SU);
+  }
+  // checkHazard should prevent scheduling multiple instructions per cycle that
+  // exceed the issue width.
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
+  assert(
+      (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
+      "Cannot schedule this instruction's MicroOps in the current cycle.");
+
+  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
+  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
+
+  unsigned NextCycle = CurrCycle;
+  switch (SchedModel->getMicroOpBufferSize()) {
+  case 0:
+    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+    break;
+  case 1:
+    if (ReadyCycle > NextCycle) {
+      NextCycle = ReadyCycle;
+      DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
+    }
+    break;
+  default:
+    // We don't currently model the OOO reorder buffer, so consider all
+    // scheduled MOps to be "retired". We do loosely model in-order resource
+    // latency. If this instruction uses an in-order resource, account for any
+    // likely stall cycles.
+    if (SU->isUnbuffered && ReadyCycle > NextCycle)
+      NextCycle = ReadyCycle;
+    break;
+  }
+  RetiredMOps += IncMOps;
+
+  // Update resource counts and critical resource.
+  if (SchedModel->hasInstrSchedModel()) {
+    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
+    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
+    Rem->RemIssueCount -= DecRemIssue;
+    if (ZoneCritResIdx) {
+      // Scale scheduled micro-ops for comparing with the critical resource.
+      unsigned ScaledMOps =
+        RetiredMOps * SchedModel->getMicroOpFactor();
+
+      // If scaled micro-ops are now more than the previous critical resource by
+      // a full cycle, then micro-ops issue becomes critical.
+      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
+          >= (int)SchedModel->getLatencyFactor()) {
+        ZoneCritResIdx = 0;
+        DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
+              << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
+      }
+    }
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      unsigned RCycle =
+        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
+      if (RCycle > NextCycle)
+        NextCycle = RCycle;
+    }
+    if (SU->hasReservedResource) {
+      // For reserved resources, record the highest cycle using the resource.
+      // For top-down scheduling, this is the cycle in which we schedule this
+      // instruction plus the number of cycles the operations reserves the
+      // resource. For bottom-up is it simply the instruction's cycle.
+      for (TargetSchedModel::ProcResIter
+             PI = SchedModel->getWriteProcResBegin(SC),
+             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+        unsigned PIdx = PI->ProcResourceIdx;
+        if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
+          ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
+#ifndef NDEBUG
+          MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
+#endif
+        }
+      }
+    }
+  }
+  // Update ExpectedLatency and DependentLatency.
+  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
+  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
+  if (SU->getDepth() > TopLatency) {
+    TopLatency = SU->getDepth();
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
+  }
+  if (SU->getHeight() > BotLatency) {
+    BotLatency = SU->getHeight();
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
+  }
+  // If we stall for any reason, bump the cycle.
+  if (NextCycle > CurrCycle) {
+    bumpCycle(NextCycle);
+  }
+  else {
+    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
+    // resource limited. If a stall occurred, bumpCycle does this.
+    unsigned LFactor = SchedModel->getLatencyFactor();
+    IsResourceLimited =
+      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
+      > (int)LFactor;
+  }
+  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+  // one cycle.  Since we commonly reach the max MOps here, opportunistically
+  // bump the cycle to avoid uselessly checking everything in the readyQ.
+  CurrMOps += IncMOps;
+  while (CurrMOps >= SchedModel->getIssueWidth()) {
+    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
+          << " at cycle " << CurrCycle << '\n');
+    bumpCycle(++NextCycle);
+  }
+  DEBUG(dumpScheduledState());
+}
+
+/// Release pending ready nodes in to the available queue. This makes them
+/// visible to heuristics.
+void SchedBoundary::releasePending() {
+  // If the available queue is empty, it is safe to reset MinReadyCycle.
+  if (Available.empty())
+    MinReadyCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
+  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
+    SUnit *SU = *(Pending.begin()+i);
+    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
+
+    if (ReadyCycle < MinReadyCycle)
+      MinReadyCycle = ReadyCycle;
+
+    if (!IsBuffered && ReadyCycle > CurrCycle)
+      continue;
+
+    if (checkHazard(SU))
+      continue;
+
+    Available.push(SU);
+    Pending.remove(Pending.begin()+i);
+    --i; --e;
+  }
+  DEBUG(if (!Pending.empty()) Pending.dump());
+  CheckPending = false;
+}
+
+/// Remove SU from the ready set for this boundary.
+void SchedBoundary::removeReady(SUnit *SU) {
+  if (Available.isInQueue(SU))
+    Available.remove(Available.find(SU));
+  else {
+    assert(Pending.isInQueue(SU) && "bad ready count");
+    Pending.remove(Pending.find(SU));
+  }
+}
+
+/// If this queue only has one ready candidate, return it. As a side effect,
+/// defer any nodes that now hit a hazard, and advance the cycle until at least
+/// one node is ready. If multiple instructions are ready, return NULL.
+SUnit *SchedBoundary::pickOnlyChoice() {
+  if (CheckPending)
+    releasePending();
+
+  if (CurrMOps > 0) {
+    // Defer any ready instrs that now have a hazard.
+    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+      if (checkHazard(*I)) {
+        Pending.push(*I);
+        I = Available.remove(I);
+        continue;
+      }
+      ++I;
+    }
+  }
+  for (unsigned i = 0; Available.empty(); ++i) {
+    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
+           "permanent hazard"); (void)i;
+    bumpCycle(CurrCycle + 1);
+    releasePending();
+  }
+  if (Available.size() == 1)
+    return *Available.begin();
+  return NULL;
+}
+
+#ifndef NDEBUG
+// This is useful information to dump after bumpNode.
+// Note that the Queue contents are more useful before pickNodeFromQueue.
+void SchedBoundary::dumpScheduledState() {
+  unsigned ResFactor;
+  unsigned ResCount;
+  if (ZoneCritResIdx) {
+    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
+    ResCount = getResourceCount(ZoneCritResIdx);
+  }
+  else {
+    ResFactor = SchedModel->getMicroOpFactor();
+    ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
+  }
+  unsigned LFactor = SchedModel->getLatencyFactor();
+  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
+         << "  Retired: " << RetiredMOps;
+  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
+  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
+         << ResCount / ResFactor << " "
+         << SchedModel->getResourceName(ZoneCritResIdx)
+         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
+         << (IsResourceLimited ? "  - Resource" : "  - Latency")
+         << " limited.\n";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// GenericScheduler - Generic implementation of MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class GenericScheduler : public MachineSchedStrategy {
+/// Base class for GenericScheduler. This class maintains information about
+/// scheduling candidates based on TargetSchedModel making it easy to implement
+/// heuristics for either preRA or postRA scheduling.
+class GenericSchedulerBase : public MachineSchedStrategy {
 public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
-    NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax,
+    NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
 
 #ifndef NDEBUG
-  static const char *getReasonStr(GenericScheduler::CandReason Reason);
+  static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
 #endif
 
   /// Policy for scheduling the next instruction in the candidate's zone.
@@ -1407,252 +2177,299 @@
                            const TargetSchedModel *SchedModel);
   };
 
-  /// Summarize the unscheduled region.
-  struct SchedRemainder {
-    // Critical path through the DAG in expected latency.
-    unsigned CriticalPath;
-    unsigned CyclicCritPath;
-
-    // Scaled count of micro-ops left to schedule.
-    unsigned RemIssueCount;
-
-    bool IsAcyclicLatencyLimited;
-
-    // Unscheduled resources
-    SmallVector<unsigned, 16> RemainingCounts;
-
-    void reset() {
-      CriticalPath = 0;
-      CyclicCritPath = 0;
-      RemIssueCount = 0;
-      IsAcyclicLatencyLimited = false;
-      RemainingCounts.clear();
-    }
-
-    SchedRemainder() { reset(); }
-
-    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
-  };
-
-  /// Each Scheduling boundary is associated with ready queues. It tracks the
-  /// current cycle in the direction of movement, and maintains the state
-  /// of "hazards" and other interlocks at the current cycle.
-  struct SchedBoundary {
-    ScheduleDAGMI *DAG;
-    const TargetSchedModel *SchedModel;
-    SchedRemainder *Rem;
-
-    ReadyQueue Available;
-    ReadyQueue Pending;
-    bool CheckPending;
-
-    // For heuristics, keep a list of the nodes that immediately depend on the
-    // most recently scheduled node.
-    SmallPtrSet<const SUnit*, 8> NextSUs;
-
-    ScheduleHazardRecognizer *HazardRec;
-
-    /// Number of cycles it takes to issue the instructions scheduled in this
-    /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
-    /// See getStalls().
-    unsigned CurrCycle;
-
-    /// Micro-ops issued in the current cycle
-    unsigned CurrMOps;
-
-    /// MinReadyCycle - Cycle of the soonest available instruction.
-    unsigned MinReadyCycle;
-
-    // The expected latency of the critical path in this scheduled zone.
-    unsigned ExpectedLatency;
-
-    // The latency of dependence chains leading into this zone.
-    // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
-    // For each cycle scheduled: DLat -= 1.
-    unsigned DependentLatency;
-
-    /// Count the scheduled (issued) micro-ops that can be retired by
-    /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
-    unsigned RetiredMOps;
-
-    // Count scheduled resources that have been executed. Resources are
-    // considered executed if they become ready in the time that it takes to
-    // saturate any resource including the one in question. Counts are scaled
-    // for direct comparison with other resources. Counts can be compared with
-    // MOps * getMicroOpFactor and Latency * getLatencyFactor.
-    SmallVector<unsigned, 16> ExecutedResCounts;
-
-    /// Cache the max count for a single resource.
-    unsigned MaxExecutedResCount;
-
-    // Cache the critical resources ID in this scheduled zone.
-    unsigned ZoneCritResIdx;
-
-    // Is the scheduled region resource limited vs. latency limited.
-    bool IsResourceLimited;
-
-#ifndef NDEBUG
-    // Remember the greatest operand latency as an upper bound on the number of
-    // times we should retry the pending queue because of a hazard.
-    unsigned MaxObservedLatency;
-#endif
-
-    void reset() {
-      // A new HazardRec is created for each DAG and owned by SchedBoundary.
-      // Destroying and reconstructing it is very expensive though. So keep
-      // invalid, placeholder HazardRecs.
-      if (HazardRec && HazardRec->isEnabled()) {
-        delete HazardRec;
-        HazardRec = 0;
-      }
-      Available.clear();
-      Pending.clear();
-      CheckPending = false;
-      NextSUs.clear();
-      CurrCycle = 0;
-      CurrMOps = 0;
-      MinReadyCycle = UINT_MAX;
-      ExpectedLatency = 0;
-      DependentLatency = 0;
-      RetiredMOps = 0;
-      MaxExecutedResCount = 0;
-      ZoneCritResIdx = 0;
-      IsResourceLimited = false;
-#ifndef NDEBUG
-      MaxObservedLatency = 0;
-#endif
-      // Reserve a zero-count for invalid CritResIdx.
-      ExecutedResCounts.resize(1);
-      assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
-    }
-
-    /// Pending queues extend the ready queues with the same ID and the
-    /// PendingFlag set.
-    SchedBoundary(unsigned ID, const Twine &Name):
-      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
-      Pending(ID << GenericScheduler::LogMaxQID, Name+".P"),
-      HazardRec(0) {
-      reset();
-    }
-
-    ~SchedBoundary() { delete HazardRec; }
-
-    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
-              SchedRemainder *rem);
-
-    bool isTop() const {
-      return Available.getID() == GenericScheduler::TopQID;
-    }
-
-#ifndef NDEBUG
-    const char *getResourceName(unsigned PIdx) {
-      if (!PIdx)
-        return "MOps";
-      return SchedModel->getProcResource(PIdx)->Name;
-    }
-#endif
-
-    /// Get the number of latency cycles "covered" by the scheduled
-    /// instructions. This is the larger of the critical path within the zone
-    /// and the number of cycles required to issue the instructions.
-    unsigned getScheduledLatency() const {
-      return std::max(ExpectedLatency, CurrCycle);
-    }
-
-    unsigned getUnscheduledLatency(SUnit *SU) const {
-      return isTop() ? SU->getHeight() : SU->getDepth();
-    }
-
-    unsigned getResourceCount(unsigned ResIdx) const {
-      return ExecutedResCounts[ResIdx];
-    }
-
-    /// Get the scaled count of scheduled micro-ops and resources, including
-    /// executed resources.
-    unsigned getCriticalCount() const {
-      if (!ZoneCritResIdx)
-        return RetiredMOps * SchedModel->getMicroOpFactor();
-      return getResourceCount(ZoneCritResIdx);
-    }
-
-    /// Get a scaled count for the minimum execution time of the scheduled
-    /// micro-ops that are ready to execute by getExecutedCount. Notice the
-    /// feedback loop.
-    unsigned getExecutedCount() const {
-      return std::max(CurrCycle * SchedModel->getLatencyFactor(),
-                      MaxExecutedResCount);
-    }
-
-    bool checkHazard(SUnit *SU);
-
-    unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
-
-    unsigned getOtherResourceCount(unsigned &OtherCritIdx);
-
-    void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
-
-    void releaseNode(SUnit *SU, unsigned ReadyCycle);
-
-    void bumpCycle(unsigned NextCycle);
-
-    void incExecutedResources(unsigned PIdx, unsigned Count);
-
-    unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
-
-    void bumpNode(SUnit *SU);
-
-    void releasePending();
-
-    void removeReady(SUnit *SU);
-
-    SUnit *pickOnlyChoice();
-
-#ifndef NDEBUG
-    void dumpScheduledState();
-#endif
-  };
-
-private:
+protected:
   const MachineSchedContext *Context;
-  ScheduleDAGMI *DAG;
   const TargetSchedModel *SchedModel;
   const TargetRegisterInfo *TRI;
 
-  // State of the top and bottom scheduled instruction boundaries.
   SchedRemainder Rem;
+protected:
+  GenericSchedulerBase(const MachineSchedContext *C):
+    Context(C), SchedModel(0), TRI(0) {}
+
+  void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
+                 SchedBoundary *OtherZone);
+
+#ifndef NDEBUG
+  void traceCandidate(const SchedCandidate &Cand);
+#endif
+};
+} // namespace
+
+void GenericSchedulerBase::SchedCandidate::
+initResourceDelta(const ScheduleDAGMI *DAG,
+                  const TargetSchedModel *SchedModel) {
+  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
+    return;
+
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  for (TargetSchedModel::ProcResIter
+         PI = SchedModel->getWriteProcResBegin(SC),
+         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
+      ResDelta.CritResources += PI->Cycles;
+    if (PI->ProcResourceIdx == Policy.DemandResIdx)
+      ResDelta.DemandedResources += PI->Cycles;
+  }
+}
+
+/// Set the CandPolicy given a scheduling zone given the current resources and
+/// latencies inside and outside the zone.
+void GenericSchedulerBase::setPolicy(CandPolicy &Policy,
+                                     bool IsPostRA,
+                                     SchedBoundary &CurrZone,
+                                     SchedBoundary *OtherZone) {
+  // Apply preemptive heuristics based on the the total latency and resources
+  // inside and outside this zone. Potential stalls should be considered before
+  // following this policy.
+
+  // Compute remaining latency. We need this both to determine whether the
+  // overall schedule has become latency-limited and whether the instructions
+  // outside this zone are resource or latency limited.
+  //
+  // The "dependent" latency is updated incrementally during scheduling as the
+  // max height/depth of scheduled nodes minus the cycles since it was
+  // scheduled:
+  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
+  //
+  // The "independent" latency is the max ready queue depth:
+  //   ILat = max N.depth for N in Available|Pending
+  //
+  // RemainingLatency is the greater of independent and dependent latency.
+  unsigned RemLatency = CurrZone.getDependentLatency();
+  RemLatency = std::max(RemLatency,
+                        CurrZone.findMaxLatency(CurrZone.Available.elements()));
+  RemLatency = std::max(RemLatency,
+                        CurrZone.findMaxLatency(CurrZone.Pending.elements()));
+
+  // Compute the critical resource outside the zone.
+  unsigned OtherCritIdx = 0;
+  unsigned OtherCount =
+    OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
+
+  bool OtherResLimited = false;
+  if (SchedModel->hasInstrSchedModel()) {
+    unsigned LFactor = SchedModel->getLatencyFactor();
+    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
+  }
+  // Schedule aggressively for latency in PostRA mode. We don't check for
+  // acyclic latency during PostRA, and highly out-of-order processors will
+  // skip PostRA scheduling.
+  if (!OtherResLimited) {
+    if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
+      Policy.ReduceLatency |= true;
+      DEBUG(dbgs() << "  " << CurrZone.Available.getName()
+            << " RemainingLatency " << RemLatency << " + "
+            << CurrZone.getCurrCycle() << "c > CritPath "
+            << Rem.CriticalPath << "\n");
+    }
+  }
+  // If the same resource is limiting inside and outside the zone, do nothing.
+  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
+    return;
+
+  DEBUG(
+    if (CurrZone.isResourceLimited()) {
+      dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
+             << SchedModel->getResourceName(CurrZone.getZoneCritResIdx())
+             << "\n";
+    }
+    if (OtherResLimited)
+      dbgs() << "  RemainingLimit: "
+             << SchedModel->getResourceName(OtherCritIdx) << "\n";
+    if (!CurrZone.isResourceLimited() && !OtherResLimited)
+      dbgs() << "  Latency limited both directions.\n");
+
+  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
+    Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
+
+  if (OtherResLimited)
+    Policy.DemandResIdx = OtherCritIdx;
+}
+
+#ifndef NDEBUG
+const char *GenericSchedulerBase::getReasonStr(
+  GenericSchedulerBase::CandReason Reason) {
+  switch (Reason) {
+  case NoCand:         return "NOCAND    ";
+  case PhysRegCopy:    return "PREG-COPY";
+  case RegExcess:      return "REG-EXCESS";
+  case RegCritical:    return "REG-CRIT  ";
+  case Stall:          return "STALL     ";
+  case Cluster:        return "CLUSTER   ";
+  case Weak:           return "WEAK      ";
+  case RegMax:         return "REG-MAX   ";
+  case ResourceReduce: return "RES-REDUCE";
+  case ResourceDemand: return "RES-DEMAND";
+  case TopDepthReduce: return "TOP-DEPTH ";
+  case TopPathReduce:  return "TOP-PATH  ";
+  case BotHeightReduce:return "BOT-HEIGHT";
+  case BotPathReduce:  return "BOT-PATH  ";
+  case NextDefUse:     return "DEF-USE   ";
+  case NodeOrder:      return "ORDER     ";
+  };
+  llvm_unreachable("Unknown reason!");
+}
+
+void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
+  PressureChange P;
+  unsigned ResIdx = 0;
+  unsigned Latency = 0;
+  switch (Cand.Reason) {
+  default:
+    break;
+  case RegExcess:
+    P = Cand.RPDelta.Excess;
+    break;
+  case RegCritical:
+    P = Cand.RPDelta.CriticalMax;
+    break;
+  case RegMax:
+    P = Cand.RPDelta.CurrentMax;
+    break;
+  case ResourceReduce:
+    ResIdx = Cand.Policy.ReduceResIdx;
+    break;
+  case ResourceDemand:
+    ResIdx = Cand.Policy.DemandResIdx;
+    break;
+  case TopDepthReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  case TopPathReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotHeightReduce:
+    Latency = Cand.SU->getHeight();
+    break;
+  case BotPathReduce:
+    Latency = Cand.SU->getDepth();
+    break;
+  }
+  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+  if (P.isValid())
+    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
+           << ":" << P.getUnitInc() << " ";
+  else
+    dbgs() << "      ";
+  if (ResIdx)
+    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
+  else
+    dbgs() << "         ";
+  if (Latency)
+    dbgs() << " " << Latency << " cycles ";
+  else
+    dbgs() << "          ";
+  dbgs() << '\n';
+}
+#endif
+
+/// Return true if this heuristic determines order.
+static bool tryLess(int TryVal, int CandVal,
+                    GenericSchedulerBase::SchedCandidate &TryCand,
+                    GenericSchedulerBase::SchedCandidate &Cand,
+                    GenericSchedulerBase::CandReason Reason) {
+  if (TryVal < CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal > CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  Cand.setRepeat(Reason);
+  return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+                       GenericSchedulerBase::SchedCandidate &TryCand,
+                       GenericSchedulerBase::SchedCandidate &Cand,
+                       GenericSchedulerBase::CandReason Reason) {
+  if (TryVal > CandVal) {
+    TryCand.Reason = Reason;
+    return true;
+  }
+  if (TryVal < CandVal) {
+    if (Cand.Reason > Reason)
+      Cand.Reason = Reason;
+    return true;
+  }
+  Cand.setRepeat(Reason);
+  return false;
+}
+
+static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
+                       GenericSchedulerBase::SchedCandidate &Cand,
+                       SchedBoundary &Zone) {
+  if (Zone.isTop()) {
+    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
+        return true;
+    }
+    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                   TryCand, Cand, GenericSchedulerBase::TopPathReduce))
+      return true;
+  }
+  else {
+    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
+                  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
+        return true;
+    }
+    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
+                   TryCand, Cand, GenericSchedulerBase::BotPathReduce))
+      return true;
+  }
+  return false;
+}
+
+static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
+                      bool IsTop) {
+  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
+        << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
+}
+
+namespace {
+/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class GenericScheduler : public GenericSchedulerBase {
+  ScheduleDAGMILive *DAG;
+
+  // State of the top and bottom scheduled instruction boundaries.
   SchedBoundary Top;
   SchedBoundary Bot;
 
   MachineSchedPolicy RegionPolicy;
 public:
-  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
-  enum {
-    TopQID = 1,
-    BotQID = 2,
-    LogMaxQID = 2
-  };
-
   GenericScheduler(const MachineSchedContext *C):
-    Context(C), DAG(0), SchedModel(0), TRI(0),
-    Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+    GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"),
+    Bot(SchedBoundary::BotQID, "BotQ") {}
 
-  virtual void initPolicy(MachineBasicBlock::iterator Begin,
-                          MachineBasicBlock::iterator End,
-                          unsigned NumRegionInstrs);
+  void initPolicy(MachineBasicBlock::iterator Begin,
+                  MachineBasicBlock::iterator End,
+                  unsigned NumRegionInstrs) override;
 
-  bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; }
+  bool shouldTrackPressure() const override {
+    return RegionPolicy.ShouldTrackPressure;
+  }
 
-  virtual void initialize(ScheduleDAGMI *dag);
+  void initialize(ScheduleDAGMI *dag) override;
 
-  virtual SUnit *pickNode(bool &IsTopNode);
+  SUnit *pickNode(bool &IsTopNode) override;
 
-  virtual void schedNode(SUnit *SU, bool IsTopNode);
+  void schedNode(SUnit *SU, bool IsTopNode) override;
 
-  virtual void releaseTopNode(SUnit *SU);
+  void releaseTopNode(SUnit *SU) override {
+    Top.releaseTopNode(SU);
+  }
 
-  virtual void releaseBottomNode(SUnit *SU);
+  void releaseBottomNode(SUnit *SU) override {
+    Bot.releaseBottomNode(SU);
+  }
 
-  virtual void registerRoots();
+  void registerRoots() override;
 
 protected:
   void checkAcyclicLatency();
@@ -1670,57 +2487,55 @@
                          SchedCandidate &Candidate);
 
   void reschedulePhysRegCopies(SUnit *SU, bool isTop);
-
-#ifndef NDEBUG
-  void traceCandidate(const SchedCandidate &Cand);
-#endif
 };
 } // namespace
 
-void GenericScheduler::SchedRemainder::
-init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
-  reset();
-  if (!SchedModel->hasInstrSchedModel())
-    return;
-  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
-  for (std::vector<SUnit>::iterator
-         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
-    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
-    RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
-      * SchedModel->getMicroOpFactor();
-    for (TargetSchedModel::ProcResIter
-           PI = SchedModel->getWriteProcResBegin(SC),
-           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      unsigned PIdx = PI->ProcResourceIdx;
-      unsigned Factor = SchedModel->getResourceFactor(PIdx);
-      RemainingCounts[PIdx] += (Factor * PI->Cycles);
-    }
-  }
-}
+void GenericScheduler::initialize(ScheduleDAGMI *dag) {
+  assert(dag->hasVRegLiveness() &&
+         "(PreRA)GenericScheduler needs vreg liveness");
+  DAG = static_cast<ScheduleDAGMILive*>(dag);
+  SchedModel = DAG->getSchedModel();
+  TRI = DAG->TRI;
 
-void GenericScheduler::SchedBoundary::
-init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
-  reset();
-  DAG = dag;
-  SchedModel = smodel;
-  Rem = rem;
-  if (SchedModel->hasInstrSchedModel())
-    ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+  Rem.init(DAG, SchedModel);
+  Top.init(DAG, SchedModel, &Rem);
+  Bot.init(DAG, SchedModel, &Rem);
+
+  // Initialize resource counts.
+
+  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
+  // are disabled, then these HazardRecs will be disabled.
+  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+  const TargetMachine &TM = DAG->MF.getTarget();
+  if (!Top.HazardRec) {
+    Top.HazardRec =
+      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+  }
+  if (!Bot.HazardRec) {
+    Bot.HazardRec =
+      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+  }
 }
 
 /// Initialize the per-region scheduling policy.
 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
-                                     MachineBasicBlock::iterator End,
-                                     unsigned NumRegionInstrs) {
+                                  MachineBasicBlock::iterator End,
+                                  unsigned NumRegionInstrs) {
   const TargetMachine &TM = Context->MF->getTarget();
+  const TargetLowering *TLI = TM.getTargetLowering();
 
   // Avoid setting up the register pressure tracker for small regions to save
   // compile time. As a rough heuristic, only track pressure when the number of
   // schedulable instructions exceeds half the integer register file.
-  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
-    TM.getTargetLowering()->getRegClassFor(MVT::i32));
-
-  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+  RegionPolicy.ShouldTrackPressure = true;
+  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
+    MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
+    if (TLI->isTypeLegal(LegalIntVT)) {
+      unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
+        TLI->getRegClassFor(LegalIntVT));
+      RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
+    }
+  }
 
   // For generic targets, we default to bottom-up, because it's simpler and more
   // compile-time optimizations have been implemented in that direction.
@@ -1750,71 +2565,6 @@
   }
 }
 
-void GenericScheduler::initialize(ScheduleDAGMI *dag) {
-  DAG = dag;
-  SchedModel = DAG->getSchedModel();
-  TRI = DAG->TRI;
-
-  Rem.init(DAG, SchedModel);
-  Top.init(DAG, SchedModel, &Rem);
-  Bot.init(DAG, SchedModel, &Rem);
-
-  // Initialize resource counts.
-
-  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
-  // are disabled, then these HazardRecs will be disabled.
-  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
-  const TargetMachine &TM = DAG->MF.getTarget();
-  if (!Top.HazardRec) {
-    Top.HazardRec =
-      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
-  }
-  if (!Bot.HazardRec) {
-    Bot.HazardRec =
-      TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
-  }
-}
-
-void GenericScheduler::releaseTopNode(SUnit *SU) {
-  if (SU->isScheduled)
-    return;
-
-  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isWeak())
-      continue;
-    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
-    unsigned Latency = I->getLatency();
-#ifndef NDEBUG
-    Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
-#endif
-    if (SU->TopReadyCycle < PredReadyCycle + Latency)
-      SU->TopReadyCycle = PredReadyCycle + Latency;
-  }
-  Top.releaseNode(SU, SU->TopReadyCycle);
-}
-
-void GenericScheduler::releaseBottomNode(SUnit *SU) {
-  if (SU->isScheduled)
-    return;
-
-  assert(SU->getInstr() && "Scheduled SUnit must have instr");
-
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    if (I->isWeak())
-      continue;
-    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
-    unsigned Latency = I->getLatency();
-#ifndef NDEBUG
-    Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
-#endif
-    if (SU->BotReadyCycle < SuccReadyCycle + Latency)
-      SU->BotReadyCycle = SuccReadyCycle + Latency;
-  }
-  Bot.releaseNode(SU, SU->BotReadyCycle);
-}
-
 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
 /// critical path by more cycles than it takes to drain the instruction buffer.
 /// We estimate an upper bounds on in-flight instructions as:
@@ -1869,493 +2619,11 @@
   }
 }
 
-/// Does this SU have a hazard within the current instruction group.
-///
-/// The scheduler supports two modes of hazard recognition. The first is the
-/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
-/// supports highly complicated in-order reservation tables
-/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
-///
-/// The second is a streamlined mechanism that checks for hazards based on
-/// simple counters that the scheduler itself maintains. It explicitly checks
-/// for instruction dispatch limitations, including the number of micro-ops that
-/// can dispatch per cycle.
-///
-/// TODO: Also check whether the SU must start a new group.
-bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) {
-  if (HazardRec->isEnabled())
-    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
-
-  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
-  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
-    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
-          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
-    return true;
-  }
-  return false;
-}
-
-// Find the unscheduled node in ReadySUs with the highest latency.
-unsigned GenericScheduler::SchedBoundary::
-findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
-  SUnit *LateSU = 0;
-  unsigned RemLatency = 0;
-  for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
-       I != E; ++I) {
-    unsigned L = getUnscheduledLatency(*I);
-    if (L > RemLatency) {
-      RemLatency = L;
-      LateSU = *I;
-    }
-  }
-  if (LateSU) {
-    DEBUG(dbgs() << Available.getName() << " RemLatency SU("
-          << LateSU->NodeNum << ") " << RemLatency << "c\n");
-  }
-  return RemLatency;
-}
-
-// Count resources in this zone and the remaining unscheduled
-// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
-// resource index, or zero if the zone is issue limited.
-unsigned GenericScheduler::SchedBoundary::
-getOtherResourceCount(unsigned &OtherCritIdx) {
-  OtherCritIdx = 0;
-  if (!SchedModel->hasInstrSchedModel())
-    return 0;
-
-  unsigned OtherCritCount = Rem->RemIssueCount
-    + (RetiredMOps * SchedModel->getMicroOpFactor());
-  DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
-        << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
-  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
-       PIdx != PEnd; ++PIdx) {
-    unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
-    if (OtherCount > OtherCritCount) {
-      OtherCritCount = OtherCount;
-      OtherCritIdx = PIdx;
-    }
-  }
-  if (OtherCritIdx) {
-    DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
-          << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
-          << " " << getResourceName(OtherCritIdx) << "\n");
-  }
-  return OtherCritCount;
-}
-
-/// Set the CandPolicy for this zone given the current resources and latencies
-/// inside and outside the zone.
-void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
-                                                   SchedBoundary &OtherZone) {
-  // Now that potential stalls have been considered, apply preemptive heuristics
-  // based on the the total latency and resources inside and outside this
-  // zone.
-
-  // Compute remaining latency. We need this both to determine whether the
-  // overall schedule has become latency-limited and whether the instructions
-  // outside this zone are resource or latency limited.
-  //
-  // The "dependent" latency is updated incrementally during scheduling as the
-  // max height/depth of scheduled nodes minus the cycles since it was
-  // scheduled:
-  //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
-  //
-  // The "independent" latency is the max ready queue depth:
-  //   ILat = max N.depth for N in Available|Pending
-  //
-  // RemainingLatency is the greater of independent and dependent latency.
-  unsigned RemLatency = DependentLatency;
-  RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
-  RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
-
-  // Compute the critical resource outside the zone.
-  unsigned OtherCritIdx;
-  unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
-
-  bool OtherResLimited = false;
-  if (SchedModel->hasInstrSchedModel()) {
-    unsigned LFactor = SchedModel->getLatencyFactor();
-    OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
-  }
-  if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
-    Policy.ReduceLatency |= true;
-    DEBUG(dbgs() << "  " << Available.getName() << " RemainingLatency "
-          << RemLatency << " + " << CurrCycle << "c > CritPath "
-          << Rem->CriticalPath << "\n");
-  }
-  // If the same resource is limiting inside and outside the zone, do nothing.
-  if (ZoneCritResIdx == OtherCritIdx)
-    return;
-
-  DEBUG(
-    if (IsResourceLimited) {
-      dbgs() << "  " << Available.getName() << " ResourceLimited: "
-             << getResourceName(ZoneCritResIdx) << "\n";
-    }
-    if (OtherResLimited)
-      dbgs() << "  RemainingLimit: " << getResourceName(OtherCritIdx) << "\n";
-    if (!IsResourceLimited && !OtherResLimited)
-      dbgs() << "  Latency limited both directions.\n");
-
-  if (IsResourceLimited && !Policy.ReduceResIdx)
-    Policy.ReduceResIdx = ZoneCritResIdx;
-
-  if (OtherResLimited)
-    Policy.DemandResIdx = OtherCritIdx;
-}
-
-void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU,
-                                                     unsigned ReadyCycle) {
-  if (ReadyCycle < MinReadyCycle)
-    MinReadyCycle = ReadyCycle;
-
-  // Check for interlocks first. For the purpose of other heuristics, an
-  // instruction that cannot issue appears as if it's not in the ReadyQueue.
-  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
-  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
-    Pending.push(SU);
-  else
-    Available.push(SU);
-
-  // Record this node as an immediate dependent of the scheduled node.
-  NextSUs.insert(SU);
-}
-
-/// Move the boundary of scheduled code by one cycle.
-void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
-  if (SchedModel->getMicroOpBufferSize() == 0) {
-    assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
-    if (MinReadyCycle > NextCycle)
-      NextCycle = MinReadyCycle;
-  }
-  // Update the current micro-ops, which will issue in the next cycle.
-  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
-  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
-
-  // Decrement DependentLatency based on the next cycle.
-  if ((NextCycle - CurrCycle) > DependentLatency)
-    DependentLatency = 0;
-  else
-    DependentLatency -= (NextCycle - CurrCycle);
-
-  if (!HazardRec->isEnabled()) {
-    // Bypass HazardRec virtual calls.
-    CurrCycle = NextCycle;
-  }
-  else {
-    // Bypass getHazardType calls in case of long latency.
-    for (; CurrCycle != NextCycle; ++CurrCycle) {
-      if (isTop())
-        HazardRec->AdvanceCycle();
-      else
-        HazardRec->RecedeCycle();
-    }
-  }
-  CheckPending = true;
-  unsigned LFactor = SchedModel->getLatencyFactor();
-  IsResourceLimited =
-    (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
-    > (int)LFactor;
-
-  DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
-}
-
-void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
-                                                              unsigned Count) {
-  ExecutedResCounts[PIdx] += Count;
-  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
-    MaxExecutedResCount = ExecutedResCounts[PIdx];
-}
-
-/// Add the given processor resource to this scheduled zone.
-///
-/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
-/// during which this resource is consumed.
-///
-/// \return the next cycle at which the instruction may execute without
-/// oversubscribing resources.
-unsigned GenericScheduler::SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
-  unsigned Factor = SchedModel->getResourceFactor(PIdx);
-  unsigned Count = Factor * Cycles;
-  DEBUG(dbgs() << "  " << getResourceName(PIdx)
-        << " +" << Cycles << "x" << Factor << "u\n");
-
-  // Update Executed resources counts.
-  incExecutedResources(PIdx, Count);
-  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
-  Rem->RemainingCounts[PIdx] -= Count;
-
-  // Check if this resource exceeds the current critical resource. If so, it
-  // becomes the critical resource.
-  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
-    ZoneCritResIdx = PIdx;
-    DEBUG(dbgs() << "  *** Critical resource "
-          << getResourceName(PIdx) << ": "
-          << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
-  }
-  // TODO: We don't yet model reserved resources. It's not hard though.
-  return CurrCycle;
-}
-
-/// Move the boundary of scheduled code by one SUnit.
-void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) {
-  // Update the reservation table.
-  if (HazardRec->isEnabled()) {
-    if (!isTop() && SU->isCall) {
-      // Calls are scheduled with their preceding instructions. For bottom-up
-      // scheduling, clear the pipeline state before emitting.
-      HazardRec->Reset();
-    }
-    HazardRec->EmitInstruction(SU);
-  }
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
-  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
-  CurrMOps += IncMOps;
-  // checkHazard prevents scheduling multiple instructions per cycle that exceed
-  // issue width. However, we commonly reach the maximum. In this case
-  // opportunistically bump the cycle to avoid uselessly checking everything in
-  // the readyQ. Furthermore, a single instruction may produce more than one
-  // cycle's worth of micro-ops.
-  //
-  // TODO: Also check if this SU must end a dispatch group.
-  unsigned NextCycle = CurrCycle;
-  if (CurrMOps >= SchedModel->getIssueWidth()) {
-    ++NextCycle;
-    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
-          << " at cycle " << CurrCycle << '\n');
-  }
-  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
-  DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
-
-  switch (SchedModel->getMicroOpBufferSize()) {
-  case 0:
-    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
-    break;
-  case 1:
-    if (ReadyCycle > NextCycle) {
-      NextCycle = ReadyCycle;
-      DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
-    }
-    break;
-  default:
-    // We don't currently model the OOO reorder buffer, so consider all
-    // scheduled MOps to be "retired".
-    break;
-  }
-  RetiredMOps += IncMOps;
-
-  // Update resource counts and critical resource.
-  if (SchedModel->hasInstrSchedModel()) {
-    unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
-    assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
-    Rem->RemIssueCount -= DecRemIssue;
-    if (ZoneCritResIdx) {
-      // Scale scheduled micro-ops for comparing with the critical resource.
-      unsigned ScaledMOps =
-        RetiredMOps * SchedModel->getMicroOpFactor();
-
-      // If scaled micro-ops are now more than the previous critical resource by
-      // a full cycle, then micro-ops issue becomes critical.
-      if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
-          >= (int)SchedModel->getLatencyFactor()) {
-        ZoneCritResIdx = 0;
-        DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
-              << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
-      }
-    }
-    for (TargetSchedModel::ProcResIter
-           PI = SchedModel->getWriteProcResBegin(SC),
-           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-      unsigned RCycle =
-        countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
-      if (RCycle > NextCycle)
-        NextCycle = RCycle;
-    }
-  }
-  // Update ExpectedLatency and DependentLatency.
-  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
-  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
-  if (SU->getDepth() > TopLatency) {
-    TopLatency = SU->getDepth();
-    DEBUG(dbgs() << "  " << Available.getName()
-          << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
-  }
-  if (SU->getHeight() > BotLatency) {
-    BotLatency = SU->getHeight();
-    DEBUG(dbgs() << "  " << Available.getName()
-          << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
-  }
-  // If we stall for any reason, bump the cycle.
-  if (NextCycle > CurrCycle) {
-    bumpCycle(NextCycle);
-  }
-  else {
-    // After updating ZoneCritResIdx and ExpectedLatency, check if we're
-    // resource limited. If a stall occured, bumpCycle does this.
-    unsigned LFactor = SchedModel->getLatencyFactor();
-    IsResourceLimited =
-      (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
-      > (int)LFactor;
-  }
-  DEBUG(dumpScheduledState());
-}
-
-/// Release pending ready nodes in to the available queue. This makes them
-/// visible to heuristics.
-void GenericScheduler::SchedBoundary::releasePending() {
-  // If the available queue is empty, it is safe to reset MinReadyCycle.
-  if (Available.empty())
-    MinReadyCycle = UINT_MAX;
-
-  // Check to see if any of the pending instructions are ready to issue.  If
-  // so, add them to the available queue.
-  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
-  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
-    SUnit *SU = *(Pending.begin()+i);
-    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
-
-    if (ReadyCycle < MinReadyCycle)
-      MinReadyCycle = ReadyCycle;
-
-    if (!IsBuffered && ReadyCycle > CurrCycle)
-      continue;
-
-    if (checkHazard(SU))
-      continue;
-
-    Available.push(SU);
-    Pending.remove(Pending.begin()+i);
-    --i; --e;
-  }
-  DEBUG(if (!Pending.empty()) Pending.dump());
-  CheckPending = false;
-}
-
-/// Remove SU from the ready set for this boundary.
-void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) {
-  if (Available.isInQueue(SU))
-    Available.remove(Available.find(SU));
-  else {
-    assert(Pending.isInQueue(SU) && "bad ready count");
-    Pending.remove(Pending.find(SU));
-  }
-}
-
-/// If this queue only has one ready candidate, return it. As a side effect,
-/// defer any nodes that now hit a hazard, and advance the cycle until at least
-/// one node is ready. If multiple instructions are ready, return NULL.
-SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() {
-  if (CheckPending)
-    releasePending();
-
-  if (CurrMOps > 0) {
-    // Defer any ready instrs that now have a hazard.
-    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
-      if (checkHazard(*I)) {
-        Pending.push(*I);
-        I = Available.remove(I);
-        continue;
-      }
-      ++I;
-    }
-  }
-  for (unsigned i = 0; Available.empty(); ++i) {
-    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
-           "permanent hazard"); (void)i;
-    bumpCycle(CurrCycle + 1);
-    releasePending();
-  }
-  if (Available.size() == 1)
-    return *Available.begin();
-  return NULL;
-}
-
-#ifndef NDEBUG
-// This is useful information to dump after bumpNode.
-// Note that the Queue contents are more useful before pickNodeFromQueue.
-void GenericScheduler::SchedBoundary::dumpScheduledState() {
-  unsigned ResFactor;
-  unsigned ResCount;
-  if (ZoneCritResIdx) {
-    ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
-    ResCount = getResourceCount(ZoneCritResIdx);
-  }
-  else {
-    ResFactor = SchedModel->getMicroOpFactor();
-    ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
-  }
-  unsigned LFactor = SchedModel->getLatencyFactor();
-  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
-         << "  Retired: " << RetiredMOps;
-  dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
-  dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
-         << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
-         << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
-         << (IsResourceLimited ? "  - Resource" : "  - Latency")
-         << " limited.\n";
-}
-#endif
-
-void GenericScheduler::SchedCandidate::
-initResourceDelta(const ScheduleDAGMI *DAG,
-                  const TargetSchedModel *SchedModel) {
-  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
-    return;
-
-  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
-  for (TargetSchedModel::ProcResIter
-         PI = SchedModel->getWriteProcResBegin(SC),
-         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
-    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
-      ResDelta.CritResources += PI->Cycles;
-    if (PI->ProcResourceIdx == Policy.DemandResIdx)
-      ResDelta.DemandedResources += PI->Cycles;
-  }
-}
-
-
-/// Return true if this heuristic determines order.
-static bool tryLess(int TryVal, int CandVal,
-                    GenericScheduler::SchedCandidate &TryCand,
-                    GenericScheduler::SchedCandidate &Cand,
-                    GenericScheduler::CandReason Reason) {
-  if (TryVal < CandVal) {
-    TryCand.Reason = Reason;
-    return true;
-  }
-  if (TryVal > CandVal) {
-    if (Cand.Reason > Reason)
-      Cand.Reason = Reason;
-    return true;
-  }
-  Cand.setRepeat(Reason);
-  return false;
-}
-
-static bool tryGreater(int TryVal, int CandVal,
-                       GenericScheduler::SchedCandidate &TryCand,
-                       GenericScheduler::SchedCandidate &Cand,
-                       GenericScheduler::CandReason Reason) {
-  if (TryVal > CandVal) {
-    TryCand.Reason = Reason;
-    return true;
-  }
-  if (TryVal < CandVal) {
-    if (Cand.Reason > Reason)
-      Cand.Reason = Reason;
-    return true;
-  }
-  Cand.setRepeat(Reason);
-  return false;
-}
-
 static bool tryPressure(const PressureChange &TryP,
                         const PressureChange &CandP,
-                        GenericScheduler::SchedCandidate &TryCand,
-                        GenericScheduler::SchedCandidate &Cand,
-                        GenericScheduler::CandReason Reason) {
+                        GenericSchedulerBase::SchedCandidate &TryCand,
+                        GenericSchedulerBase::SchedCandidate &Cand,
+                        GenericSchedulerBase::CandReason Reason) {
   int TryRank = TryP.getPSetOrMax();
   int CandRank = CandP.getPSetOrMax();
   // If both candidates affect the same set, go with the smallest increase.
@@ -2407,32 +2675,6 @@
   return 0;
 }
 
-static bool tryLatency(GenericScheduler::SchedCandidate &TryCand,
-                       GenericScheduler::SchedCandidate &Cand,
-                       GenericScheduler::SchedBoundary &Zone) {
-  if (Zone.isTop()) {
-    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
-      if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
-                  TryCand, Cand, GenericScheduler::TopDepthReduce))
-        return true;
-    }
-    if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
-                   TryCand, Cand, GenericScheduler::TopPathReduce))
-      return true;
-  }
-  else {
-    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
-      if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
-                  TryCand, Cand, GenericScheduler::BotHeightReduce))
-        return true;
-    }
-    if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
-                   TryCand, Cand, GenericScheduler::BotPathReduce))
-      return true;
-  }
-  return false;
-}
-
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -2445,10 +2687,10 @@
 /// \param RPTracker describes reg pressure within the scheduled zone.
 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
-                                       SchedCandidate &TryCand,
-                                       SchedBoundary &Zone,
-                                       const RegPressureTracker &RPTracker,
-                                       RegPressureTracker &TempTracker) {
+                                    SchedCandidate &TryCand,
+                                    SchedBoundary &Zone,
+                                    const RegPressureTracker &RPTracker,
+                                    RegPressureTracker &TempTracker) {
 
   if (DAG->isTrackingPressure()) {
     // Always initialize TryCand's RPDelta.
@@ -2510,10 +2752,15 @@
   // For loops that are acyclic path limited, aggressively schedule for latency.
   // This can result in very long dependence chains scheduled in sequence, so
   // once every cycle (when CurrMOps == 0), switch to normal heuristics.
-  if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps
+  if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps()
       && tryLatency(TryCand, Cand, Zone))
     return;
 
+  // Prioritize instructions that read unbuffered resources by stall cycles.
+  if (tryLess(Zone.getLatencyStallCycles(TryCand.SU),
+              Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+    return;
+
   // Keep clustered nodes together to encourage downstream peephole
   // optimizations which may reduce resource requirements.
   //
@@ -2558,7 +2805,7 @@
   // Prefer immediate defs/users of the last scheduled instruction. This is a
   // local pressure avoidance strategy that also makes the machine code
   // readable.
-  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
+  if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU),
                  TryCand, Cand, NextDefUse))
     return;
 
@@ -2569,90 +2816,14 @@
   }
 }
 
-#ifndef NDEBUG
-const char *GenericScheduler::getReasonStr(
-  GenericScheduler::CandReason Reason) {
-  switch (Reason) {
-  case NoCand:         return "NOCAND    ";
-  case PhysRegCopy:    return "PREG-COPY";
-  case RegExcess:      return "REG-EXCESS";
-  case RegCritical:    return "REG-CRIT  ";
-  case Cluster:        return "CLUSTER   ";
-  case Weak:           return "WEAK      ";
-  case RegMax:         return "REG-MAX   ";
-  case ResourceReduce: return "RES-REDUCE";
-  case ResourceDemand: return "RES-DEMAND";
-  case TopDepthReduce: return "TOP-DEPTH ";
-  case TopPathReduce:  return "TOP-PATH  ";
-  case BotHeightReduce:return "BOT-HEIGHT";
-  case BotPathReduce:  return "BOT-PATH  ";
-  case NextDefUse:     return "DEF-USE   ";
-  case NodeOrder:      return "ORDER     ";
-  };
-  llvm_unreachable("Unknown reason!");
-}
-
-void GenericScheduler::traceCandidate(const SchedCandidate &Cand) {
-  PressureChange P;
-  unsigned ResIdx = 0;
-  unsigned Latency = 0;
-  switch (Cand.Reason) {
-  default:
-    break;
-  case RegExcess:
-    P = Cand.RPDelta.Excess;
-    break;
-  case RegCritical:
-    P = Cand.RPDelta.CriticalMax;
-    break;
-  case RegMax:
-    P = Cand.RPDelta.CurrentMax;
-    break;
-  case ResourceReduce:
-    ResIdx = Cand.Policy.ReduceResIdx;
-    break;
-  case ResourceDemand:
-    ResIdx = Cand.Policy.DemandResIdx;
-    break;
-  case TopDepthReduce:
-    Latency = Cand.SU->getDepth();
-    break;
-  case TopPathReduce:
-    Latency = Cand.SU->getHeight();
-    break;
-  case BotHeightReduce:
-    Latency = Cand.SU->getHeight();
-    break;
-  case BotPathReduce:
-    Latency = Cand.SU->getDepth();
-    break;
-  }
-  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
-  if (P.isValid())
-    dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
-           << ":" << P.getUnitInc() << " ";
-  else
-    dbgs() << "      ";
-  if (ResIdx)
-    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
-  else
-    dbgs() << "         ";
-  if (Latency)
-    dbgs() << " " << Latency << " cycles ";
-  else
-    dbgs() << "          ";
-  dbgs() << '\n';
-}
-#endif
-
 /// Pick the best candidate from the queue.
 ///
 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
 /// DAG building. To adjust for the current scheduling location we need to
 /// maintain the number of vreg uses remaining to be top-scheduled.
 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
-                                            const RegPressureTracker &RPTracker,
-                                            SchedCandidate &Cand) {
+                                         const RegPressureTracker &RPTracker,
+                                         SchedCandidate &Cand) {
   ReadyQueue &Q = Zone.Available;
 
   DEBUG(Q.dump());
@@ -2675,12 +2846,6 @@
   }
 }
 
-static void tracePick(const GenericScheduler::SchedCandidate &Cand,
-                      bool IsTop) {
-  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
-        << GenericScheduler::getReasonStr(Cand.Reason) << '\n');
-}
-
 /// Pick the best candidate node from either the top or bottom queue.
 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // Schedule as far as possible in the direction of no choice. This is most
@@ -2698,8 +2863,12 @@
   CandPolicy NoPolicy;
   SchedCandidate BotCand(NoPolicy);
   SchedCandidate TopCand(NoPolicy);
-  Bot.setPolicy(BotCand.Policy, Top);
-  Top.setPolicy(TopCand.Policy, Bot);
+  // Set the bottom-up policy based on the state of the current bottom zone and
+  // the instructions outside the zone, including the top zone.
+  setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top);
+  // Set the top-down policy based on the state of the current top zone and
+  // the instructions outside the zone, including the bottom zone.
+  setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot);
 
   // Prefer bottom scheduling when heuristics are silent.
   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
@@ -2809,20 +2978,21 @@
 }
 
 /// Update the scheduler's state after scheduling a node. This is the same node
-/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
-/// it's state based on the current cycle before MachineSchedStrategy does.
+/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
+/// update it's state based on the current cycle before MachineSchedStrategy
+/// does.
 ///
 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
 /// them here. See comments in biasPhysRegCopy.
 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
-    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
+    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
     Top.bumpNode(SU);
     if (SU->hasPhysRegUses)
       reschedulePhysRegCopies(SU, true);
   }
   else {
-    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
+    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
     Bot.bumpNode(SU);
     if (SU->hasPhysRegDefs)
       reschedulePhysRegCopies(SU, false);
@@ -2831,8 +3001,8 @@
 
 /// Create the standard converging machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) {
-  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C));
+static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+  ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C));
   // Register DAG post-processors.
   //
   // FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -2845,9 +3015,191 @@
     DAG->addMutation(new MacroFusion(DAG->TII));
   return DAG;
 }
+
 static MachineSchedRegistry
 GenericSchedRegistry("converge", "Standard converging scheduler.",
-                     createGenericSched);
+                     createGenericSchedLive);
+
+//===----------------------------------------------------------------------===//
+// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// PostGenericScheduler - Interface to the scheduling algorithm used by
+/// ScheduleDAGMI.
+///
+/// Callbacks from ScheduleDAGMI:
+///   initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
+class PostGenericScheduler : public GenericSchedulerBase {
+  ScheduleDAGMI *DAG;
+  SchedBoundary Top;
+  SmallVector<SUnit*, 8> BotRoots;
+public:
+  PostGenericScheduler(const MachineSchedContext *C):
+    GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
+
+  virtual ~PostGenericScheduler() {}
+
+  void initPolicy(MachineBasicBlock::iterator Begin,
+                  MachineBasicBlock::iterator End,
+                  unsigned NumRegionInstrs) override {
+    /* no configurable policy */
+  };
+
+  /// PostRA scheduling does not track pressure.
+  bool shouldTrackPressure() const override { return false; }
+
+  void initialize(ScheduleDAGMI *Dag) override {
+    DAG = Dag;
+    SchedModel = DAG->getSchedModel();
+    TRI = DAG->TRI;
+
+    Rem.init(DAG, SchedModel);
+    Top.init(DAG, SchedModel, &Rem);
+    BotRoots.clear();
+
+    // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+    // or are disabled, then these HazardRecs will be disabled.
+    const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+    const TargetMachine &TM = DAG->MF.getTarget();
+    if (!Top.HazardRec) {
+      Top.HazardRec =
+        TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
+    }
+  }
+
+  void registerRoots() override;
+
+  SUnit *pickNode(bool &IsTopNode) override;
+
+  void scheduleTree(unsigned SubtreeID) override {
+    llvm_unreachable("PostRA scheduler does not support subtree analysis.");
+  }
+
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+
+  void releaseTopNode(SUnit *SU) override {
+    Top.releaseTopNode(SU);
+  }
+
+  // Only called for roots.
+  void releaseBottomNode(SUnit *SU) override {
+    BotRoots.push_back(SU);
+  }
+
+protected:
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
+
+  void pickNodeFromQueue(SchedCandidate &Cand);
+};
+} // namespace
+
+void PostGenericScheduler::registerRoots() {
+  Rem.CriticalPath = DAG->ExitSU.getDepth();
+
+  // Some roots may not feed into ExitSU. Check all of them in case.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) {
+    if ((*I)->getDepth() > Rem.CriticalPath)
+      Rem.CriticalPath = (*I)->getDepth();
+  }
+  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+}
+
+/// Apply a set of heursitics to a new candidate for PostRA scheduling.
+///
+/// \param Cand provides the policy and current best candidate.
+/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
+void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
+                                        SchedCandidate &TryCand) {
+
+  // Initialize the candidate if needed.
+  if (!Cand.isValid()) {
+    TryCand.Reason = NodeOrder;
+    return;
+  }
+
+  // Prioritize instructions that read unbuffered resources by stall cycles.
+  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
+              Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
+    return;
+
+  // Avoid critical resource consumption and balance the schedule.
+  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
+              TryCand, Cand, ResourceReduce))
+    return;
+  if (tryGreater(TryCand.ResDelta.DemandedResources,
+                 Cand.ResDelta.DemandedResources,
+                 TryCand, Cand, ResourceDemand))
+    return;
+
+  // Avoid serializing long latency dependence chains.
+  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
+    return;
+  }
+
+  // Fall through to original instruction order.
+  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
+    TryCand.Reason = NodeOrder;
+}
+
+void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
+  ReadyQueue &Q = Top.Available;
+
+  DEBUG(Q.dump());
+
+  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
+    SchedCandidate TryCand(Cand.Policy);
+    TryCand.SU = *I;
+    TryCand.initResourceDelta(DAG, SchedModel);
+    tryCandidate(Cand, TryCand);
+    if (TryCand.Reason != NoCand) {
+      Cand.setBest(TryCand);
+      DEBUG(traceCandidate(Cand));
+    }
+  }
+}
+
+/// Pick the next node to schedule.
+SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
+  if (DAG->top() == DAG->bottom()) {
+    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
+    return NULL;
+  }
+  SUnit *SU;
+  do {
+    SU = Top.pickOnlyChoice();
+    if (!SU) {
+      CandPolicy NoPolicy;
+      SchedCandidate TopCand(NoPolicy);
+      // Set the top-down policy based on the state of the current top zone and
+      // the instructions outside the zone, including the bottom zone.
+      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL);
+      pickNodeFromQueue(TopCand);
+      assert(TopCand.Reason != NoCand && "failed to find a candidate");
+      tracePick(TopCand, true);
+      SU = TopCand.SU;
+    }
+  } while (SU->isScheduled);
+
+  IsTopNode = true;
+  Top.removeReady(SU);
+
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+  return SU;
+}
+
+/// Called after ScheduleDAGMI has scheduled an instruction and updated
+/// scheduled/remaining flags in the DAG nodes.
+void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
+  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+  Top.bumpNode(SU);
+}
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true);
+}
 
 //===----------------------------------------------------------------------===//
 // ILP Scheduler. Currently for experimental analysis of heuristics.
@@ -2889,22 +3241,23 @@
 
 /// \brief Schedule based on the ILP metric.
 class ILPScheduler : public MachineSchedStrategy {
-  ScheduleDAGMI *DAG;
+  ScheduleDAGMILive *DAG;
   ILPOrder Cmp;
 
   std::vector<SUnit*> ReadyQ;
 public:
   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
 
-  virtual void initialize(ScheduleDAGMI *dag) {
-    DAG = dag;
+  void initialize(ScheduleDAGMI *dag) override {
+    assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
+    DAG = static_cast<ScheduleDAGMILive*>(dag);
     DAG->computeDFSResult();
     Cmp.DFSResult = DAG->getDFSResult();
     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
     ReadyQ.clear();
   }
 
-  virtual void registerRoots() {
+  void registerRoots() override {
     // Restore the heap in ReadyQ with the updated DFS results.
     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
@@ -2913,7 +3266,7 @@
   /// -----------------------------------------
 
   /// Callback to select the highest priority node from the ready Q.
-  virtual SUnit *pickNode(bool &IsTopNode) {
+  SUnit *pickNode(bool &IsTopNode) override {
     if (ReadyQ.empty()) return NULL;
     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
     SUnit *SU = ReadyQ.back();
@@ -2929,19 +3282,19 @@
   }
 
   /// \brief Scheduler callback to notify that a new subtree is scheduled.
-  virtual void scheduleTree(unsigned SubtreeID) {
+  void scheduleTree(unsigned SubtreeID) override {
     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
 
   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
   /// DFSResults, and resort the priority Q.
-  virtual void schedNode(SUnit *SU, bool IsTopNode) {
+  void schedNode(SUnit *SU, bool IsTopNode) override {
     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
   }
 
-  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
 
-  virtual void releaseBottomNode(SUnit *SU) {
+  void releaseBottomNode(SUnit *SU) override {
     ReadyQ.push_back(SU);
     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
@@ -2949,10 +3302,10 @@
 } // namespace
 
 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
-  return new ScheduleDAGMI(C, new ILPScheduler(true));
+  return new ScheduleDAGMILive(C, new ILPScheduler(true));
 }
 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
-  return new ScheduleDAGMI(C, new ILPScheduler(false));
+  return new ScheduleDAGMILive(C, new ILPScheduler(false));
 }
 static MachineSchedRegistry ILPMaxRegistry(
   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
@@ -2994,7 +3347,7 @@
   InstructionShuffler(bool alternate, bool topdown)
     : IsAlternating(alternate), IsTopDown(topdown) {}
 
-  virtual void initialize(ScheduleDAGMI *) {
+  virtual void initialize(ScheduleDAGMI*) {
     TopQ.clear();
     BottomQ.clear();
   }
@@ -3041,7 +3394,7 @@
   bool TopDown = !ForceBottomUp;
   assert((TopDown || !ForceTopDown) &&
          "-misched-topdown incompatible with -misched-bottomup");
-  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+  return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown));
 }
 static MachineSchedRegistry ShufflerRegistry(
   "shuffle", "Shuffle machine instructions alternating directions",
@@ -3049,7 +3402,7 @@
 #endif // !NDEBUG
 
 //===----------------------------------------------------------------------===//
-// GraphWriter support for ScheduleDAGMI.
+// GraphWriter support for ScheduleDAGMILive.
 //===----------------------------------------------------------------------===//
 
 #ifndef NDEBUG
@@ -3095,8 +3448,9 @@
   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
     std::string Str;
     raw_string_ostream SS(Str);
-    const SchedDFSResult *DFS =
-      static_cast<const ScheduleDAGMI*>(G)->getDFSResult();
+    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
+    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
+      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
     SS << "SU:" << SU->NodeNum;
     if (DFS)
       SS << " I:" << DFS->getNumInstrs(SU);
@@ -3106,11 +3460,11 @@
     return G->getGraphNodeLabel(SU);
   }
 
-  static std::string getNodeAttributes(const SUnit *N,
-                                       const ScheduleDAG *Graph) {
+  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
     std::string Str("shape=Mrecord");
-    const SchedDFSResult *DFS =
-      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
+    const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
+    const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
+      static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
     if (DFS) {
       Str += ",style=filled,fillcolor=\"#";
       Str += DOT::getColorString(DFS->getSubtreeID(N));
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 105d7c2..dbff1f6 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -60,9 +60,9 @@
       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
@@ -72,7 +72,7 @@
       AU.addPreserved<MachineLoopInfo>();
     }
 
-    virtual void releaseMemory() {
+    void releaseMemory() override {
       CEBCandidates.clear();
     }
 
@@ -98,16 +98,6 @@
     bool PerformTrivialForwardCoalescing(MachineInstr *MI,
                                          MachineBasicBlock *MBB);
   };
-
-  // SuccessorSorter - Sort Successors according to their loop depth. 
-  struct SuccessorSorter {
-    SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
-    bool operator()(const MachineBasicBlock *LHS,
-                    const MachineBasicBlock *RHS) const {
-      return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
-    }
-    MachineLoopInfo *LI;
-  };
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
@@ -181,13 +171,12 @@
   //   Predecessors according to CFG: BB#0 BB#1
   //     %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
   BreakPHIEdge = true;
-  for (MachineRegisterInfo::use_nodbg_iterator
-         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
-       I != E; ++I) {
-    MachineInstr *UseInst = &*I;
+  for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+    MachineInstr *UseInst = MO.getParent();
+    unsigned OpNo = &MO - &UseInst->getOperand(0);
     MachineBasicBlock *UseBlock = UseInst->getParent();
     if (!(UseBlock == MBB && UseInst->isPHI() &&
-          UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+          UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) {
       BreakPHIEdge = false;
       break;
     }
@@ -195,16 +184,15 @@
   if (BreakPHIEdge)
     return true;
 
-  for (MachineRegisterInfo::use_nodbg_iterator
-         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
-       I != E; ++I) {
+  for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     // Determine the block of the use.
-    MachineInstr *UseInst = &*I;
+    MachineInstr *UseInst = MO.getParent();
+    unsigned OpNo = &MO - &UseInst->getOperand(0);
     MachineBasicBlock *UseBlock = UseInst->getParent();
     if (UseInst->isPHI()) {
       // PHI nodes use the operand in the predecessor block, not the block with
       // the PHI.
-      UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+      UseBlock = UseInst->getOperand(OpNo+1).getMBB();
     } else if (UseBlock == DefMBB) {
       LocalUse = true;
       return false;
@@ -219,6 +207,9 @@
 }
 
 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   DEBUG(dbgs() << "******** Machine Sinking ********\n");
 
   const TargetMachine &TM = MF.getTarget();
@@ -460,12 +451,9 @@
 
   // Check if only use in post dominated block is PHI instruction.
   bool NonPHIUse = false;
-  for (MachineRegisterInfo::use_nodbg_iterator
-         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
-       I != E; ++I) {
-    MachineInstr *UseInst = &*I;
-    MachineBasicBlock *UseBlock = UseInst->getParent();
-    if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+  for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
+    MachineBasicBlock *UseBlock = UseInst.getParent();
+    if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
       NonPHIUse = true;
   }
   if (!NonPHIUse)
@@ -553,7 +541,12 @@
       // we should sink to.
       // We give successors with smaller loop depth higher priority.
       SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
-      std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+      // Sort Successors according to their loop depth.
+      std::stable_sort(
+          Succs.begin(), Succs.end(),
+          [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
+            return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+          });
       for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
              E = Succs.end(); SI != E; ++SI) {
         MachineBasicBlock *SuccBlock = *SI;
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 6aa3f67..d07178e 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -302,9 +302,9 @@
 // instructions.
 namespace {
 class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
-  const char *getName() const { return "MinInstr"; }
-  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
-  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+  const char *getName() const override { return "MinInstr"; }
+  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
+  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override;
 
 public:
   MinInstrCountEnsemble(MachineTraceMetrics *mtm)
@@ -627,7 +627,7 @@
     assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
     MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
     assert(!DefI.atEnd() && "Register has no defs");
-    DefMI = &*DefI;
+    DefMI = DefI->getParent();
     DefOp = DefI.getOperandNo();
     assert((++DefI).atEnd() && "Register has multiple defs");
   }
@@ -944,7 +944,7 @@
   // Update Heights[DefMI] to be the maximum height seen.
   MIHeightMap::iterator I;
   bool New;
-  tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+  std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
   if (New)
     return true;
 
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d61470c..1bd75f7 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -246,12 +246,12 @@
         initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
       }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    bool runOnMachineFunction(MachineFunction &MF) {
+    bool runOnMachineFunction(MachineFunction &MF) override {
       MF.verify(this, Banner);
       return false;
     }
@@ -276,7 +276,8 @@
   raw_ostream *OutFile = 0;
   if (OutFileName) {
     std::string ErrorInfo;
-    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append);
+    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+                                 sys::fs::F_Append | sys::fs::F_Text);
     if (!ErrorInfo.empty()) {
       errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
       exit(1);
@@ -1075,7 +1076,7 @@
 
     // Verify SSA form.
     if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
-        llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+        std::next(MRI->def_begin(Reg)) != MRI->def_end())
       report("Multiple virtual register defs in SSA form", MO, MONum);
 
     // Check LiveInts for a live segment, but only for virtual registers.
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 3982612..56cb673 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -37,9 +37,9 @@
       initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -61,6 +61,9 @@
                 "Optimize machine instruction PHIs", false, false)
 
 bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipOptnoneFunction(*Fn.getFunction()))
+    return false;
+
   MRI = &Fn.getRegInfo();
   TII = Fn.getTarget().getInstrInfo();
 
@@ -139,10 +142,8 @@
   if (PHIsInCycle.size() == 16)
     return false;
 
-  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
-         E = MRI->use_end(); I != E; ++I) {
-    MachineInstr *UseMI = &*I;
-    if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+  for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) {
+    if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle))
       return false;
   }
 
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index dcd9072..0e9df58 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -57,8 +57,8 @@
       initializePHIEliminationPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   private:
     /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
@@ -186,7 +186,7 @@
   // Get an iterator to the first instruction after the last PHI node (this may
   // also be the end of the basic block).
   MachineBasicBlock::iterator LastPHIIt =
-    prior(MBB.SkipPHIsAndLabels(MBB.begin()));
+    std::prev(MBB.SkipPHIsAndLabels(MBB.begin()));
 
   while (MBB.front().isPHI())
     LowerPHINode(MBB, LastPHIIt);
@@ -198,9 +198,8 @@
 /// This includes registers with no defs.
 static bool isImplicitlyDefined(unsigned VirtReg,
                                 const MachineRegisterInfo *MRI) {
-  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg),
-       DE = MRI->def_end(); DI != DE; ++DI)
-    if (!DI->isImplicitDef())
+  for (MachineInstr &DI : MRI->def_instructions(VirtReg))
+    if (!DI.isImplicitDef())
       return false;
   return true;
 }
@@ -222,7 +221,7 @@
                                   MachineBasicBlock::iterator LastPHIIt) {
   ++NumLowered;
 
-  MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt);
+  MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt);
 
   // Unlink the PHI node from the basic block, but don't delete the PHI yet.
   MachineInstr *MPhi = MBB.remove(MBB.begin());
@@ -267,7 +266,7 @@
 
   // Update live variable information if there is any.
   if (LV) {
-    MachineInstr *PHICopy = prior(AfterPHIsIt);
+    MachineInstr *PHICopy = std::prev(AfterPHIsIt);
 
     if (IncomingReg) {
       LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
@@ -306,7 +305,7 @@
 
   // Update LiveIntervals for the new copy or implicit def.
   if (LIS) {
-    MachineInstr *NewInstr = prior(AfterPHIsIt);
+    MachineInstr *NewInstr = std::prev(AfterPHIsIt);
     SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr);
 
     SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
@@ -444,7 +443,7 @@
           }
         } else {
           // We just inserted this copy.
-          KillInst = prior(InsertPos);
+          KillInst = std::prev(InsertPos);
         }
       }
       assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
@@ -504,7 +503,7 @@
               }
             } else {
               // We just inserted this copy.
-              KillInst = prior(InsertPos);
+              KillInst = std::prev(InsertPos);
             }
           }
           assert(KillInst->readsRegister(SrcReg) &&
@@ -607,7 +606,7 @@
       if (!ShouldSplit)
         continue;
       if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
-        DEBUG(dbgs() << "Failed to split ciritcal edge.\n");
+        DEBUG(dbgs() << "Failed to split critical edge.\n");
         continue;
       }
       Changed = true;
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index e1b56e9..99bbad1 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -34,11 +34,9 @@
   // Discover any defs/uses in this basic block.
   SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
   MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
-         RE = MRI.reg_end(); RI != RE; ++RI) {
-    MachineInstr* DefUseMI = &*RI;
-    if (DefUseMI->getParent() == MBB)
-      DefUsesInMBB.insert(DefUseMI);
+  for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) {
+    if (RI.getParent() == MBB)
+      DefUsesInMBB.insert(&RI);
   }
 
   MachineBasicBlock::iterator InsertPoint;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index f4ffd03..080b20d 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -14,11 +14,11 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
@@ -30,6 +30,11 @@
 
 using namespace llvm;
 
+namespace llvm {
+extern cl::opt<bool> EnableStackMapLiveness;
+extern cl::opt<bool> EnablePatchPointLiveness;
+}
+
 static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -56,7 +61,7 @@
 OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
     cl::desc("Enable optimized register allocation compilation path."));
 static cl::opt<cl::boolOrDefault>
-EnableMachineSched("enable-misched", cl::Hidden,
+EnableMachineSched("enable-misched",
     cl::desc("Enable the machine instruction scheduling pass."));
 static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
     cl::Hidden,
@@ -65,6 +70,8 @@
     cl::desc("Disable Machine Sinking"));
 static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
     cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting",
+    cl::Hidden, cl::desc("Disable ConstantHoisting"));
 static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
     cl::desc("Disable Codegen Prepare"));
 static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
@@ -83,6 +90,14 @@
                    cl::desc("Print machine instrs"),
                    cl::value_desc("pass-name"), cl::init("option-unspecified"));
 
+// Temporary option to allow experimenting with MachineScheduler as a post-RA
+// scheduler. Targets can "properly" enable this with
+// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't
+// be part of the standard pass pipeline, and the target would just add a PostRA
+// scheduling pass wherever it wants.
+static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
+  cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
+
 // Experimental option to run live interval analysis early.
 static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
     cl::desc("Run live interval analysis earlier in the pipeline"));
@@ -376,13 +391,17 @@
   if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
     addPass(createLoopStrengthReducePass());
     if (PrintLSR)
-      addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+      addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n"));
   }
 
   addPass(createGCLoweringPass());
 
   // Make sure that no unreachable blocks are instruction selected.
   addPass(createUnreachableBlockEliminationPass());
+
+  // Prepare expensive constants for SelectionDAG.
+  if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
+    addPass(createConstantHoistingPass());
 }
 
 /// Turn exception handling constructs into something the code generators can
@@ -404,7 +423,7 @@
     addPass(createDwarfEHPass(TM));
     break;
   case ExceptionHandling::None:
-    addPass(createLowerInvokePass(TM));
+    addPass(createLowerInvokePass());
 
     // The lower invoke pass may create unreachable code. Remove it.
     addPass(createUnreachableBlockEliminationPass());
@@ -422,14 +441,13 @@
 /// Add common passes that perform LLVM IR to IR transforms in preparation for
 /// instruction selection.
 void TargetPassConfig::addISelPrepare() {
-  addPass(createStackProtectorPass(TM));
-
   addPreISel();
 
+  addPass(createStackProtectorPass(TM));
+
   if (PrintISelInput)
-    addPass(createPrintFunctionPass("\n\n"
-                                    "*** Final LLVM Code input to ISel ***\n",
-                                    &dbgs()));
+    addPass(createPrintFunctionPass(
+        dbgs(), "\n\n*** Final LLVM Code input to ISel ***\n"));
 
   // All passes which modify the LLVM IR are now complete; run the verifier
   // to ensure that the IR is valid.
@@ -520,7 +538,10 @@
 
   // Second pass scheduler.
   if (getOptLevel() != CodeGenOpt::None) {
-    addPass(&PostRASchedulerID);
+    if (MISchedPostRA)
+      addPass(&PostMachineSchedulerID);
+    else
+      addPass(&PostRASchedulerID);
     printAndVerify("After PostRAScheduler");
   }
 
@@ -536,6 +557,9 @@
 
   if (addPreEmitPass())
     printAndVerify("After PreEmit passes");
+
+  if (EnableStackMapLiveness || EnablePatchPointLiveness)
+    addPass(&StackMapLivenessID);
 }
 
 /// Add passes that optimize machine instructions in SSA form.
@@ -725,7 +749,10 @@
     printAndVerify("After BranchFolding");
 
   // Tail duplication.
-  if (addPass(&TailDuplicateID))
+  // Note that duplicating tail just increases code size and degrades
+  // performance for targets that require Structured Control Flow.
+  // In addition it can also make CFG irreducible. Thus we disable it.
+  if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID))
     printAndVerify("After TailDuplicate");
 
   // Copy propagation.
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 28f2d2f..e18d9635 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -110,9 +110,9 @@
       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
       if (Aggressive) {
@@ -133,7 +133,8 @@
     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
                        SmallSet<unsigned, 4> &ImmDefRegs,
                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
-    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
+    bool isLoadFoldable(MachineInstr *MI,
+                        SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
   };
 }
 
@@ -187,10 +188,8 @@
   // The source has other uses. See if we can replace the other uses with use of
   // the result of the extension.
   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
-  for (MachineRegisterInfo::use_nodbg_iterator
-       UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
-       UI != UE; ++UI)
-    ReachedBBs.insert(UI->getParent());
+  for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
+    ReachedBBs.insert(UI.getParent());
 
   // Uses that are in the same BB of uses of the result of the instruction.
   SmallVector<MachineOperand*, 8> Uses;
@@ -199,11 +198,8 @@
   SmallVector<MachineOperand*, 8> ExtendedUses;
 
   bool ExtendLife = true;
-  for (MachineRegisterInfo::use_nodbg_iterator
-       UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
-       UI != UE; ++UI) {
-    MachineOperand &UseMO = UI.getOperand();
-    MachineInstr *UseMI = &*UI;
+  for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
+    MachineInstr *UseMI = UseMO.getParent();
     if (UseMI == MI)
       continue;
 
@@ -270,11 +266,9 @@
     // Look for PHI uses of the extended result, we don't want to extend the
     // liveness of a PHI input. It breaks all kinds of assumptions down
     // stream. A PHI use is expected to be the kill of its source values.
-    for (MachineRegisterInfo::use_nodbg_iterator
-         UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
-         UI != UE; ++UI)
-      if (UI->isPHI())
-        PHIBBs.insert(UI->getParent());
+    for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
+      if (UI.isPHI())
+        PHIBBs.insert(UI.getParent());
 
     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
@@ -496,8 +490,9 @@
 /// isLoadFoldable - Check whether MI is a candidate for folding into a later
 /// instruction. We only fold loads to virtual registers and the virtual
 /// register defined has a single use.
-bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
-                                       unsigned &FoldAsLoadDefReg) {
+bool PeepholeOptimizer::isLoadFoldable(
+                              MachineInstr *MI,
+                              SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
   if (!MI->canFoldAsLoad() || !MI->mayLoad())
     return false;
   const MCInstrDesc &MCID = MI->getDesc();
@@ -505,13 +500,13 @@
     return false;
 
   unsigned Reg = MI->getOperand(0).getReg();
-  // To reduce compilation time, we check MRI->hasOneUse when inserting
+  // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
   // loads. It should be checked when processing uses of the load, since
   // uses can be removed during peephole.
   if (!MI->getOperand(0).getSubReg() &&
       TargetRegisterInfo::isVirtualRegister(Reg) &&
-      MRI->hasOneUse(Reg)) {
-    FoldAsLoadDefReg = Reg;
+      MRI->hasOneNonDBGUse(Reg)) {
+    FoldAsLoadDefCandidates.insert(Reg);
     return true;
   }
   return false;
@@ -561,6 +556,9 @@
 }
 
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
   DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
 
@@ -574,18 +572,14 @@
 
   bool Changed = false;
 
-  SmallPtrSet<MachineInstr*, 8> LocalMIs;
-  SmallSet<unsigned, 4> ImmDefRegs;
-  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
-  unsigned FoldAsLoadDefReg;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
 
     bool SeenMoveImm = false;
-    LocalMIs.clear();
-    ImmDefRegs.clear();
-    ImmDefMIs.clear();
-    FoldAsLoadDefReg = 0;
+    SmallPtrSet<MachineInstr*, 8> LocalMIs;
+    SmallSet<unsigned, 4> ImmDefRegs;
+    DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+    SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
 
     for (MachineBasicBlock::iterator
            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
@@ -594,16 +588,20 @@
       ++MII;
       LocalMIs.insert(MI);
 
+      // Skip debug values. They should not affect this peephole optimization.
+      if (MI->isDebugValue())
+          continue;
+
       // If there exists an instruction which belongs to the following
-      // categories, we will discard the load candidate.
-      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
-          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+      // categories, we will discard the load candidates.
+      if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
+          MI->isKill() || MI->isInlineAsm() ||
           MI->hasUnmodeledSideEffects()) {
-        FoldAsLoadDefReg = 0;
+        FoldAsLoadDefCandidates.clear();
         continue;
       }
       if (MI->mayStore() || MI->isCall())
-        FoldAsLoadDefReg = 0;
+        FoldAsLoadDefCandidates.clear();
 
       if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) ||
           (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
@@ -630,26 +628,43 @@
       // Check whether MI is a load candidate for folding into a later
       // instruction. If MI is not a candidate, check whether we can fold an
       // earlier load into MI.
-      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
-        // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
-        // can enable folding by converting SUB to CMP.
-        MachineInstr *DefMI = 0;
-        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
-                                                      FoldAsLoadDefReg, DefMI);
-        if (FoldMI) {
-          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
-          DEBUG(dbgs() << "Replacing: " << *MI);
-          DEBUG(dbgs() << "     With: " << *FoldMI);
-          LocalMIs.erase(MI);
-          LocalMIs.erase(DefMI);
-          LocalMIs.insert(FoldMI);
-          MI->eraseFromParent();
-          DefMI->eraseFromParent();
-          ++NumLoadFold;
-
-          // MI is replaced with FoldMI.
-          Changed = true;
-          continue;
+      if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
+          !FoldAsLoadDefCandidates.empty()) {
+        const MCInstrDesc &MIDesc = MI->getDesc();
+        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+             ++i) {
+          const MachineOperand &MOp = MI->getOperand(i);
+          if (!MOp.isReg())
+            continue;
+          unsigned FoldAsLoadDefReg = MOp.getReg();
+          if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
+            // We need to fold load after optimizeCmpInstr, since
+            // optimizeCmpInstr can enable folding by converting SUB to CMP.
+            // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
+            // we need it for markUsesInDebugValueAsUndef().
+            unsigned FoldedReg = FoldAsLoadDefReg;
+            MachineInstr *DefMI = 0;
+            MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+                                                          FoldAsLoadDefReg,
+                                                          DefMI);
+            if (FoldMI) {
+              // Update LocalMIs since we replaced MI with FoldMI and deleted
+              // DefMI.
+              DEBUG(dbgs() << "Replacing: " << *MI);
+              DEBUG(dbgs() << "     With: " << *FoldMI);
+              LocalMIs.erase(MI);
+              LocalMIs.erase(DefMI);
+              LocalMIs.insert(FoldMI);
+              MI->eraseFromParent();
+              DefMI->eraseFromParent();
+              MRI->markUsesInDebugValueAsUndef(FoldedReg);
+              FoldAsLoadDefCandidates.erase(FoldedReg);
+              ++NumLoadFold;
+              // MI is replaced with FoldMI.
+              Changed = true;
+              break;
+            }
+          }
         }
       }
     }
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 1afc1ec..a13e51f 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -30,7 +30,6 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
@@ -86,7 +85,7 @@
     static char ID;
     PostRAScheduler() : MachineFunctionPass(ID) {}
 
-    void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<TargetPassConfig>();
@@ -97,7 +96,7 @@
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    bool runOnMachineFunction(MachineFunction &Fn);
+    bool runOnMachineFunction(MachineFunction &Fn) override;
   };
   char PostRAScheduler::ID = 0;
 
@@ -121,9 +120,6 @@
     /// AA - AliasAnalysis for making memory reference queries.
     AliasAnalysis *AA;
 
-    /// LiveRegs - true if the register is live.
-    BitVector LiveRegs;
-
     /// The schedule. Null SUnit*'s represent noop instructions.
     std::vector<SUnit*> Sequence;
 
@@ -145,23 +141,23 @@
     /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
     ///
-    void startBlock(MachineBasicBlock *BB);
+    void startBlock(MachineBasicBlock *BB) override;
 
     // Set the index of RegionEnd within the current BB.
     void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; }
 
     /// Initialize the scheduler state for the next scheduling region.
-    virtual void enterRegion(MachineBasicBlock *bb,
-                             MachineBasicBlock::iterator begin,
-                             MachineBasicBlock::iterator end,
-                             unsigned regioninstrs);
+    void enterRegion(MachineBasicBlock *bb,
+                     MachineBasicBlock::iterator begin,
+                     MachineBasicBlock::iterator end,
+                     unsigned regioninstrs) override;
 
     /// Notify that the scheduler has finished scheduling the current region.
-    virtual void exitRegion();
+    void exitRegion() override;
 
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
-    void schedule();
+    void schedule() override;
 
     void EmitSchedule();
 
@@ -172,26 +168,16 @@
 
     /// finishBlock - Clean up register live-range state.
     ///
-    void finishBlock();
-
-    /// FixupKills - Fix register kill flags that have been made
-    /// invalid due to scheduling
-    ///
-    void FixupKills(MachineBasicBlock *MBB);
+    void finishBlock() override;
 
   private:
     void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
     void ReleaseSuccessors(SUnit *SU);
     void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
     void ListScheduleTopDown();
-    void StartBlockForKills(MachineBasicBlock *BB);
-
-    // ToggleKillFlag - Toggle a register operand kill flag. Other
-    // adjustments may be made to the instruction if necessary. Return
-    // true if the operand has been deleted, false if not.
-    bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
 
     void dumpSchedule() const;
+    void emitNoop(unsigned CurCycle);
   };
 }
 
@@ -205,9 +191,8 @@
   AliasAnalysis *AA, const RegisterClassInfo &RCI,
   TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
   SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
-  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA),
-    LiveRegs(TRI->getNumRegs()), EndIndex(0)
-{
+  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+
   const TargetMachine &TM = MF.getTarget();
   const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
   HazardRec =
@@ -260,6 +245,9 @@
 #endif
 
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+  if (skipOptnoneFunction(*Fn.getFunction()))
+    return false;
+
   TII = Fn.getTarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
@@ -320,7 +308,7 @@
     MachineBasicBlock::iterator Current = MBB->end();
     unsigned Count = MBB->size(), CurrentCount = Count;
     for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
-      MachineInstr *MI = llvm::prior(I);
+      MachineInstr *MI = std::prev(I);
       --Count;
       // Calls are not scheduling boundaries before register allocation, but
       // post-ra we don't gain anything by scheduling across calls since we
@@ -352,7 +340,7 @@
     Scheduler.finishBlock();
 
     // Update register kills
-    Scheduler.FixupKills(MBB);
+    Scheduler.fixupKills(MBB);
   }
 
   return true;
@@ -423,148 +411,6 @@
   ScheduleDAGInstrs::finishBlock();
 }
 
-/// StartBlockForKills - Initialize register live-range state for updating kills
-///
-void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
-  // Start with no live registers.
-  LiveRegs.reset();
-
-  // Examine the live-in regs of all successors.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-       SE = BB->succ_end(); SI != SE; ++SI) {
-    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-         E = (*SI)->livein_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      // Repeat, for reg and all subregs.
-      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-           SubRegs.isValid(); ++SubRegs)
-        LiveRegs.set(*SubRegs);
-    }
-  }
-}
-
-bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
-                                          MachineOperand &MO) {
-  // Setting kill flag...
-  if (!MO.isKill()) {
-    MO.setIsKill(true);
-    return false;
-  }
-
-  // If MO itself is live, clear the kill flag...
-  if (LiveRegs.test(MO.getReg())) {
-    MO.setIsKill(false);
-    return false;
-  }
-
-  // If any subreg of MO is live, then create an imp-def for that
-  // subreg and keep MO marked as killed.
-  MO.setIsKill(false);
-  bool AllDead = true;
-  const unsigned SuperReg = MO.getReg();
-  MachineInstrBuilder MIB(MF, MI);
-  for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
-    if (LiveRegs.test(*SubRegs)) {
-      MIB.addReg(*SubRegs, RegState::ImplicitDefine);
-      AllDead = false;
-    }
-  }
-
-  if(AllDead)
-    MO.setIsKill(true);
-  return false;
-}
-
-/// FixupKills - Fix the register kill flags, they may have been made
-/// incorrect by instruction reordering.
-///
-void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
-  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
-
-  BitVector killedRegs(TRI->getNumRegs());
-
-  StartBlockForKills(MBB);
-
-  // Examine block from end to start...
-  unsigned Count = MBB->size();
-  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
-       I != E; --Count) {
-    MachineInstr *MI = --I;
-    if (MI->isDebugValue())
-      continue;
-
-    // Update liveness.  Registers that are defed but not used in this
-    // instruction are now dead. Mark register and all subregs as they
-    // are completely defined.
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (MO.isRegMask())
-        LiveRegs.clearBitsNotInMask(MO.getRegMask());
-      if (!MO.isReg()) continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0) continue;
-      if (!MO.isDef()) continue;
-      // Ignore two-addr defs.
-      if (MI->isRegTiedToUseOperand(i)) continue;
-
-      // Repeat for reg and all subregs.
-      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-           SubRegs.isValid(); ++SubRegs)
-        LiveRegs.reset(*SubRegs);
-    }
-
-    // Examine all used registers and set/clear kill flag. When a
-    // register is used multiple times we only set the kill flag on
-    // the first use. Don't set kill flags on undef operands.
-    killedRegs.reset();
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
-      unsigned Reg = MO.getReg();
-      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
-
-      bool kill = false;
-      if (!killedRegs.test(Reg)) {
-        kill = true;
-        // A register is not killed if any subregs are live...
-        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
-          if (LiveRegs.test(*SubRegs)) {
-            kill = false;
-            break;
-          }
-        }
-
-        // If subreg is not live, then register is killed if it became
-        // live in this instruction
-        if (kill)
-          kill = !LiveRegs.test(Reg);
-      }
-
-      if (MO.isKill() != kill) {
-        DEBUG(dbgs() << "Fixing " << MO << " in ");
-        // Warning: ToggleKillFlag may invalidate MO.
-        ToggleKillFlag(MI, MO);
-        DEBUG(MI->dump());
-      }
-
-      killedRegs.set(Reg);
-    }
-
-    // Mark any used register (that is not using undef) and subregs as
-    // now live...
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
-      unsigned Reg = MO.getReg();
-      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
-
-      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-           SubRegs.isValid(); ++SubRegs)
-        LiveRegs.set(*SubRegs);
-    }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 //  Top-Down Scheduling
 //===----------------------------------------------------------------------===//
@@ -630,6 +476,14 @@
   AvailableQueue.scheduledNode(SU);
 }
 
+/// emitNoop - Add a noop to the current instruction sequence.
+void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+  HazardRec->EmitNoop();
+  Sequence.push_back(0);   // NULL here means noop
+  ++NumNoops;
+}
+
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
 /// schedulers.
 void SchedulePostRATDList::ListScheduleTopDown() {
@@ -678,7 +532,7 @@
 
     DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
 
-    SUnit *FoundSUnit = 0;
+    SUnit *FoundSUnit = 0, *NotPreferredSUnit = 0;
     bool HasNoopHazards = false;
     while (!AvailableQueue.empty()) {
       SUnit *CurSUnit = AvailableQueue.pop();
@@ -686,8 +540,19 @@
       ScheduleHazardRecognizer::HazardType HT =
         HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
       if (HT == ScheduleHazardRecognizer::NoHazard) {
-        FoundSUnit = CurSUnit;
-        break;
+        if (HazardRec->ShouldPreferAnother(CurSUnit)) {
+          if (!NotPreferredSUnit) {
+	    // If this is the first non-preferred node for this cycle, then
+	    // record it and continue searching for a preferred node. If this
+	    // is not the first non-preferred node, then treat it as though
+	    // there had been a hazard.
+            NotPreferredSUnit = CurSUnit;
+            continue;
+          }
+        } else {
+          FoundSUnit = CurSUnit;
+          break;
+        }
       }
 
       // Remember if this is a noop hazard.
@@ -696,6 +561,20 @@
       NotReady.push_back(CurSUnit);
     }
 
+    // If we have a non-preferred node, push it back onto the available list.
+    // If we did not find a preferred node, then schedule this first
+    // non-preferred node.
+    if (NotPreferredSUnit) {
+      if (!FoundSUnit) {
+        DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n");
+        FoundSUnit = NotPreferredSUnit;
+      } else {
+        AvailableQueue.push(NotPreferredSUnit);
+      }
+
+      NotPreferredSUnit = 0;
+    }
+
     // Add the nodes that aren't ready back onto the available list.
     if (!NotReady.empty()) {
       AvailableQueue.push_all(NotReady);
@@ -704,6 +583,11 @@
 
     // If we found a node to schedule...
     if (FoundSUnit) {
+      // If we need to emit noops prior to this instruction, then do so.
+      unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit);
+      for (unsigned i = 0; i != NumPreNoops; ++i)
+        emitNoop(CurCycle);
+
       // ... schedule the node...
       ScheduleNodeTopDown(FoundSUnit, CurCycle);
       HazardRec->EmitInstruction(FoundSUnit);
@@ -728,10 +612,7 @@
         // Otherwise, we have no instructions to issue and we have instructions
         // that will fault if we don't do this right.  This is the case for
         // processors without pipeline interlocks and other cases.
-        DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
-        HazardRec->EmitNoop();
-        Sequence.push_back(0);   // NULL here means noop
-        ++NumNoops;
+        emitNoop(CurCycle);
       }
 
       ++CurCycle;
@@ -769,13 +650,13 @@
     // Update the Begin iterator, as the first instruction in the block
     // may have been scheduled later.
     if (i == 0)
-      RegionBegin = prior(RegionEnd);
+      RegionBegin = std::prev(RegionEnd);
   }
 
   // Reinsert any remaining debug_values.
   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
-    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
     MachineInstr *DbgValue = P.first;
     MachineBasicBlock::iterator OrigPrivMI = P.second;
     BB->splice(++OrigPrivMI, BB, DbgValue);
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 0c5173a..360e8d7 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -41,9 +41,9 @@
     initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual void getAnalysisUsage(AnalysisUsage &au) const;
+  void getAnalysisUsage(AnalysisUsage &au) const override;
 
-  virtual bool runOnMachineFunction(MachineFunction &fn);
+  bool runOnMachineFunction(MachineFunction &fn) override;
 };
 } // end anonymous namespace
 
@@ -80,10 +80,7 @@
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
     // For virtual registers, mark all uses as <undef>, and convert users to
     // implicit-def when possible.
-    for (MachineRegisterInfo::use_nodbg_iterator UI =
-         MRI->use_nodbg_begin(Reg),
-         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineOperand &MO = UI.getOperand();
+    for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
       MO.setIsUndef();
       MachineInstr *UserMI = MO.getParent();
       if (!canTurnIntoImplicitDef(UserMI))
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index b0e494f..136b1ed 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -20,6 +20,7 @@
 #include "PrologEpilogInserter.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -29,7 +30,10 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
@@ -54,6 +58,7 @@
                 "Prologue/Epilogue Insertion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_END(PEI, "prologepilog",
                     "Prologue/Epilogue Insertion & Frame Finalization",
@@ -67,6 +72,7 @@
   AU.setPreservesCFG();
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<StackProtector>();
   AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -95,6 +101,9 @@
   return;
 }
 
+/// StackObjSet - A set of stack object indexes
+typedef SmallSetVector<int, 8> StackObjSet;
+
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
 ///
@@ -160,10 +169,11 @@
 
   // Warn on stack size when we exceeds the given limit.
   MachineFrameInfo *MFI = Fn.getFrameInfo();
-  if (WarnStackSize.getNumOccurrences() > 0 &&
-      WarnStackSize < MFI->getStackSize())
-    errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize()
-           << ") in " << Fn.getName()  << ".\n";
+  uint64_t StackSize = MFI->getStackSize();
+  if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
+    DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
+    F->getContext().diagnose(DiagStackSize);
+  }
 
   delete RS;
   ReturnBlocks.clear();
@@ -409,11 +419,28 @@
   }
 }
 
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+static void
+AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+                      SmallSet<int, 16> &ProtectedObjs,
+                      MachineFrameInfo *MFI, bool StackGrowsDown,
+                      int64_t &Offset, unsigned &MaxAlign) {
+
+  for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+        E = UnassignedObjs.end(); I != E; ++I) {
+    int i = *I;
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+    ProtectedObjs.insert(i);
+  }
+}
+
 /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
 /// abstract stack objects.
 ///
 void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+  StackProtector *SP = &getAnalysis<StackProtector>();
 
   bool StackGrowsDown =
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
@@ -523,8 +550,12 @@
 
   // Make sure that the stack protector comes before the local variables on the
   // stack.
-  SmallSet<int, 16> LargeStackObjs;
+  SmallSet<int, 16> ProtectedObjs;
   if (MFI->getStackProtectorIndex() >= 0) {
+    StackObjSet LargeArrayObjs;
+    StackObjSet SmallArrayObjs;
+    StackObjSet AddrOfObjs;
+
     AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
                       Offset, MaxAlign);
 
@@ -541,12 +572,29 @@
         continue;
       if (MFI->getStackProtectorIndex() == (int)i)
         continue;
-      if (!MFI->MayNeedStackProtector(i))
-        continue;
 
-      AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
-      LargeStackObjs.insert(i);
+      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      case StackProtector::SSPLK_None:
+        continue;
+      case StackProtector::SSPLK_SmallArray:
+        SmallArrayObjs.insert(i);
+        continue;
+      case StackProtector::SSPLK_AddrOf:
+        AddrOfObjs.insert(i);
+        continue;
+      case StackProtector::SSPLK_LargeArray:
+        LargeArrayObjs.insert(i);
+        continue;
+      }
+      llvm_unreachable("Unexpected SSPLayoutKind.");
     }
+
+    AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign);
+    AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign);
+    AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign);
   }
 
   // Then assign frame offsets to stack objects that are not used to spill
@@ -563,7 +611,7 @@
       continue;
     if (MFI->getStackProtectorIndex() == (int)i)
       continue;
-    if (LargeStackObjs.count(i))
+    if (ProtectedObjs.count(i))
       continue;
 
     AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
@@ -711,14 +759,14 @@
       SPAdj += Size;
 
       MachineBasicBlock::iterator PrevI = BB->end();
-      if (I != BB->begin()) PrevI = prior(I);
+      if (I != BB->begin()) PrevI = std::prev(I);
       TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
 
       // Visit the instructions created by eliminateCallFramePseudoInstr().
       if (PrevI == BB->end())
         I = BB->begin();     // The replaced instr was the first in the block.
       else
-        I = llvm::next(PrevI);
+        I = std::next(PrevI);
       continue;
     }
 
@@ -801,9 +849,9 @@
         I = BB->begin();
 
       MachineInstr *MI = I;
-      MachineBasicBlock::iterator J = llvm::next(I);
-      MachineBasicBlock::iterator P = I == BB->begin() ?
-        MachineBasicBlock::iterator(NULL) : llvm::prior(I);
+      MachineBasicBlock::iterator J = std::next(I);
+      MachineBasicBlock::iterator P =
+          I == BB->begin() ? MachineBasicBlock::iterator(NULL) : std::prev(I);
 
       // RS should process this instruction before we might scavenge at this
       // location. This is because we might be replacing a virtual register
@@ -846,7 +894,7 @@
       // spill code will have been inserted in between I and J. This is a
       // problem because we need the spill code before I: Move I to just
       // prior to J.
-      if (I != llvm::prior(J)) {
+      if (I != std::prev(J)) {
         BB->splice(J, BB, I);
 
         // Before we move I, we need to prepare the RS to visit I again.
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 77cfa2b..5a6d39a 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -37,12 +37,12 @@
       initializePEIPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
     /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
     /// frame indexes with appropriate references.
     ///
-    bool runOnMachineFunction(MachineFunction &Fn);
+    bool runOnMachineFunction(MachineFunction &Fn) override;
 
   private:
     RegScavenger *RS;
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 293e306..33584f8 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the RegAllocBase class which provides comon functionality
+// This file defines the RegAllocBase class which provides common functionality
 // for LiveIntervalUnion-based register allocators.
 //
 //===----------------------------------------------------------------------===//
@@ -101,8 +101,8 @@
     // register if possible and populate a list of new live intervals that
     // result from splitting.
     DEBUG(dbgs() << "\nselectOrSplit "
-                 << MRI->getRegClass(VirtReg->reg)->getName()
-                 << ':' << *VirtReg << '\n');
+          << MRI->getRegClass(VirtReg->reg)->getName()
+          << ':' << *VirtReg << " w=" << VirtReg->weight << '\n');
     typedef SmallVector<unsigned, 4> VirtRegVec;
     VirtRegVec SplitVRegs;
     unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
@@ -110,11 +110,16 @@
     if (AvailablePhysReg == ~0u) {
       // selectOrSplit failed to find a register!
       // Probably caused by an inline asm.
-      MachineInstr *MI;
-      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
-           (MI = I.skipInstruction());)
-        if (MI->isInlineAsm())
+      MachineInstr *MI = 0;
+      for (MachineRegisterInfo::reg_instr_iterator
+           I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
+           I != E; ) {
+        MachineInstr *TmpMI = &*(I++);
+        if (TmpMI->isInlineAsm()) {
+          MI = TmpMI;
           break;
+        }
+      }
       if (MI)
         MI->emitError("inline assembly requires more registers than available");
       else
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index c17a8d9..68bd4b5 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -37,7 +37,6 @@
 #ifndef LLVM_CODEGEN_REGALLOCBASE
 #define LLVM_CODEGEN_REGALLOCBASE
 
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 6768e45..b8c04fc 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -64,7 +64,7 @@
   MachineFunction *MF;
 
   // state
-  OwningPtr<Spiller> SpillerInstance;
+  std::unique_ptr<Spiller> SpillerInstance;
   std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
                       CompSpillWeight> Queue;
 
@@ -76,24 +76,22 @@
   RABasic();
 
   /// Return the pass name.
-  virtual const char* getPassName() const {
+  const char* getPassName() const override {
     return "Basic Register Allocator";
   }
 
   /// RABasic analysis usage.
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  virtual void releaseMemory();
+  void releaseMemory() override;
 
-  virtual Spiller &spiller() { return *SpillerInstance; }
+  Spiller &spiller() override { return *SpillerInstance; }
 
-  virtual float getPriority(LiveInterval *LI) { return LI->weight; }
-
-  virtual void enqueue(LiveInterval *LI) {
+  void enqueue(LiveInterval *LI) override {
     Queue.push(LI);
   }
 
-  virtual LiveInterval *dequeue() {
+  LiveInterval *dequeue() override {
     if (Queue.empty())
       return 0;
     LiveInterval *LI = Queue.top();
@@ -101,11 +99,11 @@
     return LI;
   }
 
-  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
-                                 SmallVectorImpl<unsigned> &SplitVRegs);
+  unsigned selectOrSplit(LiveInterval &VirtReg,
+                         SmallVectorImpl<unsigned> &SplitVRegs) override;
 
   /// Perform register allocation.
-  virtual bool runOnMachineFunction(MachineFunction &mf);
+  bool runOnMachineFunction(MachineFunction &mf) override;
 
   // Helper for spilling all live virtual registers currently unified under preg
   // that interfere with the most recently queried lvr.  Return true if spilling
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index e92dbd2..8dc44f5 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -144,23 +144,23 @@
     // not be erased.
     bool isBulkSpilling;
 
-    enum LLVM_ENUM_INT_TYPE(unsigned) {
+    enum : unsigned {
       spillClean = 1,
       spillDirty = 100,
       spillImpossible = ~0u
     };
   public:
-    virtual const char *getPassName() const {
+    const char *getPassName() const override {
       return "Fast Register Allocator";
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
   private:
-    bool runOnMachineFunction(MachineFunction &Fn);
+    bool runOnMachineFunction(MachineFunction &Fn) override;
     void AllocateBasicBlock();
     void handleThroughOperands(MachineInstr *MI,
                                SmallVectorImpl<unsigned> &VirtDead);
@@ -224,7 +224,7 @@
 
   // Check that the use/def chain has exactly one operand - MO.
   MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
-  if (&I.getOperand() != &MO)
+  if (&*I != &MO)
     return false;
   return ++I == MRI->reg_nodbg_end();
 }
@@ -585,12 +585,12 @@
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
-  tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+  std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   if (New) {
     // If there is no hint, peek at the only use of this register.
     if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
         MRI->hasOneNonDBGUse(VirtReg)) {
-      const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
+      const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
       // It's a copy, use the destination register as a hint.
       if (UseMI.isCopyLike())
         Hint = UseMI.getOperand(0).getReg();
@@ -618,7 +618,7 @@
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
-  tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
+  std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   MachineOperand &MO = MI->getOperand(OpNum);
   if (New) {
     LRI = allocVirtReg(MI, LRI, Hint);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index c08d955..6a623b8 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/CommandLine.h"
@@ -59,6 +60,23 @@
              clEnumValEnd),
   cl::init(SplitEditor::SM_Partition));
 
+static cl::opt<unsigned>
+LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
+                             cl::desc("Last chance recoloring max depth"),
+                             cl::init(5));
+
+static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
+    "lcr-max-interf", cl::Hidden,
+    cl::desc("Last chance recoloring maximum number of considered"
+             " interference at a time"),
+    cl::init(8));
+
+// FIXME: Find a good default for this flag and remove the flag.
+static cl::opt<unsigned>
+CSRFirstTimeCost("regalloc-csr-first-time-cost",
+              cl::desc("Cost for first time use of callee-saved register."),
+              cl::init(0), cl::Hidden);
+
 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
                                        createGreedyRegisterAllocator);
 
@@ -66,10 +84,19 @@
 class RAGreedy : public MachineFunctionPass,
                  public RegAllocBase,
                  private LiveRangeEdit::Delegate {
+  // Convenient shortcuts.
+  typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
+  typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
+  typedef SmallSet<unsigned, 16> SmallVirtRegSet;
 
   // context
   MachineFunction *MF;
 
+  // Shortcuts to some useful interface.
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  RegisterClassInfo RCI;
+
   // analyses
   SlotIndexes *Indexes;
   MachineBlockFrequencyInfo *MBFI;
@@ -80,8 +107,8 @@
   LiveDebugVariables *DebugVars;
 
   // state
-  OwningPtr<Spiller> SpillerInstance;
-  std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+  std::unique_ptr<Spiller> SpillerInstance;
+  PQueue Queue;
   unsigned NextCascade;
 
   // Live ranges pass through a number of stages as we try to allocate them.
@@ -160,20 +187,23 @@
     unsigned BrokenHints; ///< Total number of broken hints.
     float MaxWeight;      ///< Maximum spill weight evicted.
 
-    EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+    EvictionCost(): BrokenHints(0), MaxWeight(0) {}
 
     bool isMax() const { return BrokenHints == ~0u; }
 
+    void setMax() { BrokenHints = ~0u; }
+
+    void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
     bool operator<(const EvictionCost &O) const {
-      if (BrokenHints != O.BrokenHints)
-        return BrokenHints < O.BrokenHints;
-      return MaxWeight < O.MaxWeight;
+      return std::tie(BrokenHints, MaxWeight) <
+             std::tie(O.BrokenHints, O.MaxWeight);
     }
   };
 
   // splitting state.
-  OwningPtr<SplitAnalysis> SA;
-  OwningPtr<SplitEditor> SE;
+  std::unique_ptr<SplitAnalysis> SA;
+  std::unique_ptr<SplitEditor> SE;
 
   /// Cached per-block interference maps
   InterferenceCache IntfCache;
@@ -217,12 +247,12 @@
     }
   };
 
-  /// Candidate info for for each PhysReg in AllocationOrder.
+  /// Candidate info for each PhysReg in AllocationOrder.
   /// This vector never shrinks, but grows to the size of the largest register
   /// class.
   SmallVector<GlobalSplitCandidate, 32> GlobalCand;
 
-  enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u };
+  enum : unsigned { NoCand = ~0u };
 
   /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
   /// NoCand which indicates the stack interval.
@@ -232,28 +262,32 @@
   RAGreedy();
 
   /// Return the pass name.
-  virtual const char* getPassName() const {
+  const char* getPassName() const override {
     return "Greedy Register Allocator";
   }
 
   /// RAGreedy analysis usage.
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  virtual void releaseMemory();
-  virtual Spiller &spiller() { return *SpillerInstance; }
-  virtual void enqueue(LiveInterval *LI);
-  virtual LiveInterval *dequeue();
-  virtual unsigned selectOrSplit(LiveInterval&,
-                                 SmallVectorImpl<unsigned>&);
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+  Spiller &spiller() override { return *SpillerInstance; }
+  void enqueue(LiveInterval *LI) override;
+  LiveInterval *dequeue() override;
+  unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
 
   /// Perform register allocation.
-  virtual bool runOnMachineFunction(MachineFunction &mf);
+  bool runOnMachineFunction(MachineFunction &mf) override;
 
   static char ID;
 
 private:
-  bool LRE_CanEraseVirtReg(unsigned);
-  void LRE_WillShrinkVirtReg(unsigned);
-  void LRE_DidCloneVirtReg(unsigned, unsigned);
+  unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
+                             SmallVirtRegSet &, unsigned = 0);
+
+  bool LRE_CanEraseVirtReg(unsigned) override;
+  void LRE_WillShrinkVirtReg(unsigned) override;
+  void LRE_DidCloneVirtReg(unsigned, unsigned) override;
+  void enqueue(PQueue &CurQueue, LiveInterval *LI);
+  LiveInterval *dequeue(PQueue &CurQueue);
 
   BlockFrequency calcSpillCost();
   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
@@ -268,6 +302,9 @@
   bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
   void evictInterference(LiveInterval&, unsigned,
                          SmallVectorImpl<unsigned>&);
+  bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+                                  SmallLISet &RecoloringCandidates,
+                                  const SmallVirtRegSet &FixedRegisters);
 
   unsigned tryAssign(LiveInterval&, AllocationOrder&,
                      SmallVectorImpl<unsigned>&);
@@ -275,6 +312,20 @@
                     SmallVectorImpl<unsigned>&, unsigned = ~0u);
   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
                           SmallVectorImpl<unsigned>&);
+  /// Calculate cost of region splitting.
+  unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
+                                    AllocationOrder &Order,
+                                    BlockFrequency &BestCost,
+                                    unsigned &NumCands, bool IgnoreCSR);
+  /// Perform region splitting.
+  unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+                         bool HasCompact,
+                         SmallVectorImpl<unsigned> &NewVRegs);
+  /// Check other options before using a callee-saved register for the first
+  /// time.
+  unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
+                                 unsigned PhysReg, unsigned &CostPerUseLimit,
+                                 SmallVectorImpl<unsigned> &NewVRegs);
   unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
                          SmallVectorImpl<unsigned>&);
   unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
@@ -283,6 +334,11 @@
     SmallVectorImpl<unsigned>&);
   unsigned trySplit(LiveInterval&, AllocationOrder&,
                     SmallVectorImpl<unsigned>&);
+  unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
+                                   SmallVectorImpl<unsigned> &,
+                                   SmallVirtRegSet &, unsigned);
+  bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
+                               SmallVirtRegSet &, unsigned);
 };
 } // end anonymous namespace
 
@@ -301,7 +357,7 @@
 
 // Hysteresis to use when comparing floats.
 // This helps stabilize decisions based on float comparisons.
-const float Hysteresis = 0.98f;
+const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
 
 
 FunctionPass* llvm::createGreedyRegisterAllocator() {
@@ -396,7 +452,9 @@
   GlobalCand.clear();
 }
 
-void RAGreedy::enqueue(LiveInterval *LI) {
+void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
+
+void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
   // Prioritize live ranges by size, assigning larger ranges first.
   // The queue holds (size, reg) pairs.
   const unsigned Size = LI->getSize();
@@ -414,12 +472,25 @@
     // everything else has been allocated.
     Prio = Size;
   } else {
-    if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() &&
+    // Giant live ranges fall back to the global assignment heuristic, which
+    // prevents excessive spilling in pathological cases.
+    bool ReverseLocal = TRI->reverseLocalAssignment();
+    bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() &&
+      (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
+
+    if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
         LIS->intervalIsInOneMBB(*LI)) {
       // Allocate original local ranges in linear instruction order. Since they
       // are singly defined, this produces optimal coloring in the absence of
       // global interference and other constraints.
-      Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+      if (!ReverseLocal)
+        Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
+      else {
+        // Allocating bottom up may allow many short LRGs to be assigned first
+        // to one of the cheap registers. This could be much faster for very
+        // large blocks on targets with many physical registers.
+        Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex());
+      }
     }
     else {
       // Allocate global and split ranges in long->short order. Long ranges that
@@ -436,14 +507,16 @@
   }
   // The virtual register number is a tie breaker for same-sized ranges.
   // Give lower vreg numbers higher priority to assign them first.
-  Queue.push(std::make_pair(Prio, ~Reg));
+  CurQueue.push(std::make_pair(Prio, ~Reg));
 }
 
-LiveInterval *RAGreedy::dequeue() {
-  if (Queue.empty())
+LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
+
+LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
+  if (CurQueue.empty())
     return 0;
-  LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
-  Queue.pop();
+  LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
+  CurQueue.pop();
   return LI;
 }
 
@@ -471,7 +544,8 @@
   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
     if (Order.isHint(Hint)) {
       DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
-      EvictionCost MaxCost(1);
+      EvictionCost MaxCost;
+      MaxCost.setBrokenHints(1);
       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
         evictInterference(VirtReg, Hint, NewVRegs);
         return Hint;
@@ -543,11 +617,15 @@
   if (CanSplit && IsHint && !BreaksHint)
     return true;
 
-  return A.weight > B.weight;
+  if (A.weight > B.weight) {
+    DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
+    return true;
+  }
+  return false;
 }
 
 /// canEvictInterference - Return true if all interferences between VirtReg and
-/// PhysReg can be evicted.  When OnlyCheap is set, don't do anything
+/// PhysReg can be evicted.
 ///
 /// @param VirtReg Live range that is about to be assigned.
 /// @param PhysReg Desired register for assignment.
@@ -618,6 +696,9 @@
         return false;
       if (Urgent)
         continue;
+      // Apply the eviction policy for non-urgent evictions.
+      if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+        return false;
       // If !MaxCost.isMax(), then we're just looking for a cheap register.
       // Evicting another local live range in this case could lead to suboptimal
       // coloring.
@@ -625,9 +706,6 @@
           !canReassign(*Intf, PhysReg)) {
         return false;
       }
-      // Finally, apply the eviction policy for non-urgent evictions.
-      if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
-        return false;
     }
   }
   MaxCost = Cost;
@@ -685,7 +763,8 @@
   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
 
   // Keep track of the cheapest interference seen so far.
-  EvictionCost BestCost(~0u);
+  EvictionCost BestCost;
+  BestCost.setMax();
   unsigned BestPhys = 0;
   unsigned OrderLimit = Order.getOrder().size();
 
@@ -713,7 +792,7 @@
   }
 
   Order.rewind();
-  while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) {
+  while (unsigned PhysReg = Order.next(OrderLimit)) {
     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
       continue;
     // The first use of a callee-saved register in a function has cost 1.
@@ -1172,9 +1251,7 @@
 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                                   SmallVectorImpl<unsigned> &NewVRegs) {
   unsigned NumCands = 0;
-  unsigned BestCand = NoCand;
   BlockFrequency BestCost;
-  SmallVector<unsigned, 8> UsedCands;
 
   // Check if we can split this live range around a compact region.
   bool HasCompact = calcCompactRegion(GlobalCand.front());
@@ -1186,11 +1263,33 @@
     // No benefit from the compact region, our fallback will be per-block
     // splitting. Make sure we find a solution that is cheaper than spilling.
     BestCost = calcSpillCost();
-    DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
+    DEBUG(dbgs() << "Cost of isolating all blocks = ";
+                 MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
   }
 
+  unsigned BestCand =
+      calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
+                               false/*IgnoreCSR*/);
+
+  // No solutions found, fall back to single block splitting.
+  if (!HasCompact && BestCand == NoCand)
+    return 0;
+
+  return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
+}
+
+unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
+                                            AllocationOrder &Order,
+                                            BlockFrequency &BestCost,
+                                            unsigned &NumCands,
+                                            bool IgnoreCSR) {
+  unsigned BestCand = NoCand;
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
+   if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+     if (IgnoreCSR && !MRI->isPhysRegUsed(CSR))
+       continue;
+
     // Discard bad candidates before we run out of interference cache cursors.
     // This will only affect register classes with a lot of registers (>32).
     if (NumCands == IntfCache.getMaxCursors()) {
@@ -1220,7 +1319,8 @@
       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
       continue;
     }
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
+                 MBFI->printBlockFreq(dbgs(), Cost));
     if (Cost >= BestCost) {
       DEBUG({
         if (BestCand == NoCand)
@@ -1243,7 +1343,8 @@
 
     Cost += calcGlobalSplitCost(Cand);
     DEBUG({
-      dbgs() << ", total = " << Cost << " with bundles";
+      dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
+                                << " with bundles";
       for (int i = Cand.LiveBundles.find_first(); i>=0;
            i = Cand.LiveBundles.find_next(i))
         dbgs() << " EB#" << i;
@@ -1255,11 +1356,13 @@
     }
     ++NumCands;
   }
+  return BestCand;
+}
 
-  // No solutions found, fall back to single block splitting.
-  if (!HasCompact && BestCand == NoCand)
-    return 0;
-
+unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
+                                 bool HasCompact,
+                                 SmallVectorImpl<unsigned> &NewVRegs) {
+  SmallVector<unsigned, 8> UsedCands;
   // Prepare split editor.
   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
@@ -1348,6 +1451,22 @@
 //                         Per-Instruction Splitting
 //===----------------------------------------------------------------------===//
 
+/// Get the number of allocatable registers that match the constraints of \p Reg
+/// on \p MI and that are also in \p SuperRC.
+static unsigned getNumAllocatableRegsForConstraints(
+    const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
+    const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
+    const RegisterClassInfo &RCI) {
+  assert(SuperRC && "Invalid register class");
+
+  const TargetRegisterClass *ConstrainedRC =
+      MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
+                                             /* ExploreBundle */ true);
+  if (!ConstrainedRC)
+    return 0;
+  return RCI.getNumAllocatableRegs(ConstrainedRC);
+}
+
 /// tryInstructionSplit - Split a live range around individual instructions.
 /// This is normally not worthwhile since the spiller is doing essentially the
 /// same thing. However, when the live range is in a constrained register
@@ -1358,8 +1477,9 @@
 unsigned
 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                               SmallVectorImpl<unsigned> &NewVRegs) {
+  const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
   // There is no point to this if there are no larger sub-classes.
-  if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg)))
+  if (!RegClassInfo.isProperSubClass(CurRC))
     return 0;
 
   // Always enable split spill mode, since we're effectively spilling to a
@@ -1373,10 +1493,18 @@
 
   DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
 
-  // Split around every non-copy instruction.
+  const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC);
+  unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
+  // Split around every non-copy instruction if this split will relax
+  // the constraints on the virtual register.
+  // Otherwise, splitting just inserts uncoalescable copies that do not help
+  // the allocation.
   for (unsigned i = 0; i != Uses.size(); ++i) {
     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
-      if (MI->isFullCopy()) {
+      if (MI->isFullCopy() ||
+          SuperRCNumAllocatableRegs ==
+              getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
+                                                  TRI, RCI)) {
         DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI);
         continue;
       }
@@ -1571,7 +1699,7 @@
 
   const float blockFreq =
     SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
-    (1.0f / BlockFrequency::getEntryFrequency());
+    (1.0f / MBFI->getEntryFreq());
   SmallVector<float, 8> GapWeight;
 
   Order.rewind();
@@ -1759,6 +1887,220 @@
   return tryBlockSplit(VirtReg, Order, NewVRegs);
 }
 
+//===----------------------------------------------------------------------===//
+//                          Last Chance Recoloring
+//===----------------------------------------------------------------------===//
+
+/// mayRecolorAllInterferences - Check if the virtual registers that
+/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
+/// recolored to free \p PhysReg.
+/// When true is returned, \p RecoloringCandidates has been augmented with all
+/// the live intervals that need to be recolored in order to free \p PhysReg
+/// for \p VirtReg.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+bool
+RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
+                                     SmallLISet &RecoloringCandidates,
+                                     const SmallVirtRegSet &FixedRegisters) {
+  const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
+
+  for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+    LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+    // If there is LastChanceRecoloringMaxInterference or more interferences,
+    // chances are one would not be recolorable.
+    if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
+        LastChanceRecoloringMaxInterference) {
+      DEBUG(dbgs() << "Early abort: too many interferences.\n");
+      return false;
+    }
+    for (unsigned i = Q.interferingVRegs().size(); i; --i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+      // If Intf is done and sit on the same register class as VirtReg,
+      // it would not be recolorable as it is in the same state as VirtReg.
+      if ((getStage(*Intf) == RS_Done &&
+           MRI->getRegClass(Intf->reg) == CurRC) ||
+          FixedRegisters.count(Intf->reg)) {
+        DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
+        return false;
+      }
+      RecoloringCandidates.insert(Intf);
+    }
+  }
+  return true;
+}
+
+/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
+/// its interferences.
+/// Last chance recoloring chooses a color for \p VirtReg and recolors every
+/// virtual register that was using it. The recoloring process may recursively
+/// use the last chance recoloring. Therefore, when a virtual register has been
+/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
+/// be last-chance-recolored again during this recoloring "session".
+/// E.g.,
+/// Let
+/// vA can use {R1, R2    }
+/// vB can use {    R2, R3}
+/// vC can use {R1        }
+/// Where vA, vB, and vC cannot be split anymore (they are reloads for
+/// instance) and they all interfere.
+///
+/// vA is assigned R1
+/// vB is assigned R2
+/// vC tries to evict vA but vA is already done.
+/// Regular register allocation fails.
+///
+/// Last chance recoloring kicks in:
+/// vC does as if vA was evicted => vC uses R1.
+/// vC is marked as fixed.
+/// vA needs to find a color.
+/// None are available.
+/// vA cannot evict vC: vC is a fixed virtual register now.
+/// vA does as if vB was evicted => vA uses R2.
+/// vB needs to find a color.
+/// R3 is available.
+/// Recoloring => vC = R1, vA = R2, vB = R3
+///
+/// \p Order defines the preferred allocation order for \p VirtReg.
+/// \p NewRegs will contain any new virtual register that have been created
+/// (split, spill) during the process and that must be assigned.
+/// \p FixedRegisters contains all the virtual registers that cannot be
+/// recolored.
+/// \p Depth gives the current depth of the last chance recoloring.
+/// \return a physical register that can be used for VirtReg or ~0u if none
+/// exists.
+unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
+                                           AllocationOrder &Order,
+                                           SmallVectorImpl<unsigned> &NewVRegs,
+                                           SmallVirtRegSet &FixedRegisters,
+                                           unsigned Depth) {
+  DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
+  // Ranges must be Done.
+  assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
+         "Last chance recoloring should really be last chance");
+  // Set the max depth to LastChanceRecoloringMaxDepth.
+  // We may want to reconsider that if we end up with a too large search space
+  // for target with hundreds of registers.
+  // Indeed, in that case we may want to cut the search space earlier.
+  if (Depth >= LastChanceRecoloringMaxDepth) {
+    DEBUG(dbgs() << "Abort because max depth has been reached.\n");
+    return ~0u;
+  }
+
+  // Set of Live intervals that will need to be recolored.
+  SmallLISet RecoloringCandidates;
+  // Record the original mapping virtual register to physical register in case
+  // the recoloring fails.
+  DenseMap<unsigned, unsigned> VirtRegToPhysReg;
+  // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
+  // this recoloring "session".
+  FixedRegisters.insert(VirtReg.reg);
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
+                 << PrintReg(PhysReg, TRI) << '\n');
+    RecoloringCandidates.clear();
+    VirtRegToPhysReg.clear();
+
+    // It is only possible to recolor virtual register interference.
+    if (Matrix->checkInterference(VirtReg, PhysReg) >
+        LiveRegMatrix::IK_VirtReg) {
+      DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
+
+      continue;
+    }
+
+    // Early give up on this PhysReg if it is obvious we cannot recolor all
+    // the interferences.
+    if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
+                                    FixedRegisters)) {
+      DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
+      continue;
+    }
+
+    // RecoloringCandidates contains all the virtual registers that interfer
+    // with VirtReg on PhysReg (or one of its aliases).
+    // Enqueue them for recoloring and perform the actual recoloring.
+    PQueue RecoloringQueue;
+    for (SmallLISet::iterator It = RecoloringCandidates.begin(),
+                              EndIt = RecoloringCandidates.end();
+         It != EndIt; ++It) {
+      unsigned ItVirtReg = (*It)->reg;
+      enqueue(RecoloringQueue, *It);
+      assert(VRM->hasPhys(ItVirtReg) &&
+             "Interferences are supposed to be with allocated vairables");
+
+      // Record the current allocation.
+      VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
+      // unset the related struct.
+      Matrix->unassign(**It);
+    }
+
+    // Do as if VirtReg was assigned to PhysReg so that the underlying
+    // recoloring has the right information about the interferes and
+    // available colors.
+    Matrix->assign(VirtReg, PhysReg);
+
+    // Save the current recoloring state.
+    // If we cannot recolor all the interferences, we will have to start again
+    // at this point for the next physical register.
+    SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
+    if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
+                                Depth)) {
+      // Do not mess up with the global assignment process.
+      // I.e., VirtReg must be unassigned.
+      Matrix->unassign(VirtReg);
+      return PhysReg;
+    }
+
+    DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
+                 << PrintReg(PhysReg, TRI) << '\n');
+
+    // The recoloring attempt failed, undo the changes.
+    FixedRegisters = SaveFixedRegisters;
+    Matrix->unassign(VirtReg);
+
+    for (SmallLISet::iterator It = RecoloringCandidates.begin(),
+                              EndIt = RecoloringCandidates.end();
+         It != EndIt; ++It) {
+      unsigned ItVirtReg = (*It)->reg;
+      if (VRM->hasPhys(ItVirtReg))
+        Matrix->unassign(**It);
+      Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]);
+    }
+  }
+
+  // Last chance recoloring did not worked either, give up.
+  return ~0u;
+}
+
+/// tryRecoloringCandidates - Try to assign a new color to every register
+/// in \RecoloringQueue.
+/// \p NewRegs will contain any new virtual register created during the
+/// recoloring process.
+/// \p FixedRegisters[in/out] contains all the registers that have been
+/// recolored.
+/// \return true if all virtual registers in RecoloringQueue were successfully
+/// recolored, false otherwise.
+bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
+                                       SmallVectorImpl<unsigned> &NewVRegs,
+                                       SmallVirtRegSet &FixedRegisters,
+                                       unsigned Depth) {
+  while (!RecoloringQueue.empty()) {
+    LiveInterval *LI = dequeue(RecoloringQueue);
+    DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
+    unsigned PhysReg;
+    PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
+    if (PhysReg == ~0u || !PhysReg)
+      return false;
+    DEBUG(dbgs() << "Recoloring of " << *LI
+                 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+    Matrix->assign(*LI, PhysReg);
+    FixedRegisters.insert(LI->reg);
+  }
+  return true;
+}
 
 //===----------------------------------------------------------------------===//
 //                            Main Entry Point
@@ -1766,10 +2108,84 @@
 
 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<unsigned> &NewVRegs) {
+  SmallVirtRegSet FixedRegisters;
+  return selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+}
+
+/// Using a CSR for the first time has a cost because it causes push|pop
+/// to be added to prologue|epilogue. Splitting a cold section of the live
+/// range can have lower cost than using the CSR for the first time;
+/// Spilling a live range in the cold path can have lower cost than using
+/// the CSR for the first time. Returns the physical register if we decide
+/// to use the CSR; otherwise return 0.
+unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
+                                         AllocationOrder &Order,
+                                         unsigned PhysReg,
+                                         unsigned &CostPerUseLimit,
+                                         SmallVectorImpl<unsigned> &NewVRegs) {
+  // We use the larger one out of the command-line option and the value report
+  // by TRI.
+  BlockFrequency CSRCost(std::max((unsigned)CSRFirstTimeCost,
+                                  TRI->getCSRFirstUseCost()));
+  if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
+    // We choose spill over using the CSR for the first time if the spill cost
+    // is lower than CSRCost.
+    SA->analyze(&VirtReg);
+    if (calcSpillCost() >= CSRCost)
+      return PhysReg;
+
+    // We are going to spill, set CostPerUseLimit to 1 to make sure that
+    // we will not use a callee-saved register in tryEvict.
+    CostPerUseLimit = 1;
+    return 0;
+  }
+  if (getStage(VirtReg) < RS_Split) {
+    // We choose pre-splitting over using the CSR for the first time if
+    // the cost of splitting is lower than CSRCost.
+    SA->analyze(&VirtReg);
+    unsigned NumCands = 0;
+    unsigned BestCand =
+      calculateRegionSplitCost(VirtReg, Order, CSRCost, NumCands,
+                               true/*IgnoreCSR*/);
+    if (BestCand == NoCand)
+      // Use the CSR if we can't find a region split below CSRCost.
+      return PhysReg;
+
+    // Perform the actual pre-splitting.
+    doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
+    return 0;
+  }
+  return PhysReg;
+}
+
+unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+                                     SmallVectorImpl<unsigned> &NewVRegs,
+                                     SmallVirtRegSet &FixedRegisters,
+                                     unsigned Depth) {
+  unsigned CostPerUseLimit = ~0u;
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
-  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
-    return PhysReg;
+  if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+    // We check other options if we are using a CSR for the first time.
+    bool CSRFirstUse = false;
+    if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+      if (!MRI->isPhysRegUsed(CSR))
+        CSRFirstUse = true;
+
+    // When NewVRegs is not empty, we may have made decisions such as evicting
+    // a virtual register, go with the earlier decisions and use the physical
+    // register.
+    if ((CSRFirstTimeCost || TRI->getCSRFirstUseCost()) &&
+        CSRFirstUse && NewVRegs.empty()) {
+      unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+                                              CostPerUseLimit, NewVRegs);
+      if (CSRReg || !NewVRegs.empty())
+        // Return now if we decide to use a CSR or create new vregs due to
+        // pre-splitting.
+        return CSRReg;
+    } else
+      return PhysReg;
+  }
 
   LiveRangeStage Stage = getStage(VirtReg);
   DEBUG(dbgs() << StageName[Stage]
@@ -1779,7 +2195,7 @@
   // queue. The RS_Split ranges already failed to do this, and they should not
   // get a second chance until they have been split.
   if (Stage != RS_Split)
-    if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+    if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit))
       return PhysReg;
 
   assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
@@ -1797,7 +2213,8 @@
   // If we couldn't allocate a register from spilling, there is probably some
   // invalid inline assembly. The base class wil report it.
   if (Stage >= RS_Done || !VirtReg.isSpillable())
-    return ~0u;
+    return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
+                                   Depth);
 
   // Try splitting VirtReg or interferences.
   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
@@ -1823,6 +2240,9 @@
                << "********** Function: " << mf.getName() << '\n');
 
   MF = &mf;
+  TRI = MF->getTarget().getRegisterInfo();
+  TII = MF->getTarget().getInstrInfo();
+  RCI.runOnMachineFunction(mf);
   if (VerifyEnabled)
     MF->verify(this, "Before greedy register allocator");
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 88c8201..96dbd9a 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,7 +34,6 @@
 #include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
 #include "Spiller.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -45,9 +44,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PBQP/Graph.h"
-#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
-#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/IR/Module.h"
@@ -91,8 +87,8 @@
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0)
-      : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
+  RegAllocPBQP(std::unique_ptr<PBQPBuilder> &b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
@@ -100,15 +96,15 @@
   }
 
   /// Return the pass name.
-  virtual const char* getPassName() const {
+  const char* getPassName() const override {
     return "PBQP Register Allocator";
   }
 
   /// PBQP analysis usage.
-  virtual void getAnalysisUsage(AnalysisUsage &au) const;
+  void getAnalysisUsage(AnalysisUsage &au) const override;
 
   /// Perform register allocation
-  virtual bool runOnMachineFunction(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &MF) override;
 
 private:
 
@@ -120,8 +116,7 @@
   typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
   typedef std::set<unsigned> RegSet;
 
-
-  OwningPtr<PBQPBuilder> builder;
+  std::unique_ptr<PBQPBuilder> builder;
 
   char *customPassID;
 
@@ -132,7 +127,7 @@
   MachineRegisterInfo *mri;
   const MachineBlockFrequencyInfo *mbfi;
 
-  OwningPtr<Spiller> spiller;
+  std::unique_ptr<Spiller> spiller;
   LiveIntervals *lis;
   LiveStacks *lss;
   VirtRegMap *vrm;
@@ -157,13 +152,13 @@
 
 } // End anonymous namespace.
 
-unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const {
+unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const {
   Node2VReg::const_iterator vregItr = node2VReg.find(node);
   assert(vregItr != node2VReg.end() && "No vreg for node.");
   return vregItr->second;
 }
 
-PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
   VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
   assert(nodeItr != vreg2Node.end() && "No node for vreg.");
   return nodeItr->second;
@@ -194,8 +189,8 @@
   MachineRegisterInfo *mri = &mf->getRegInfo();
   const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
 
-  OwningPtr<PBQPRAProblem> p(new PBQPRAProblem());
-  PBQP::Graph &g = p->getGraph();
+  std::unique_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  PBQPRAGraph &g = p->getGraph();
   RegSet pregs;
 
   // Collect the set of preg intervals, record that they're used in the MF.
@@ -245,17 +240,19 @@
       vrAllowed.push_back(preg);
     }
 
-    // Construct the node.
-    PBQP::Graph::NodeId node =
-      g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
-
-    // Record the mapping and allowed set in the problem.
-    p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+    PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0);
 
     PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
         vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
 
-    addSpillCosts(g.getNodeCosts(node), spillCost);
+    addSpillCosts(nodeCosts, spillCost);
+
+    // Construct the node.
+    PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts));
+
+    // Record the mapping and allowed set in the problem.
+    p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end());
+
   }
 
   for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
@@ -264,24 +261,24 @@
     const LiveInterval &l1 = lis->getInterval(vr1);
     const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
 
-    for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
-         vr2Itr != vrEnd; ++vr2Itr) {
+    for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd;
+         ++vr2Itr) {
       unsigned vr2 = *vr2Itr;
       const LiveInterval &l2 = lis->getInterval(vr2);
       const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
 
       assert(!l2.empty() && "Empty interval in vreg set?");
       if (l1.overlaps(l2)) {
-        PBQP::Graph::EdgeId edge =
-          g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
-                    PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+        PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0);
+        addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri);
 
-        addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+        g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+                  std::move(edgeCosts));
       }
     }
   }
 
-  return p.take();
+  return p.release();
 }
 
 void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
@@ -315,8 +312,8 @@
                                                 const MachineBlockFrequencyInfo *mbfi,
                                                 const RegSet &vregs) {
 
-  OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs));
-  PBQP::Graph &g = p->getGraph();
+  std::unique_ptr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs));
+  PBQPRAGraph &g = p->getGraph();
 
   const TargetMachine &tm = mf->getTarget();
   CoalescerPair cp(*tm.getRegisterInfo());
@@ -348,8 +345,7 @@
       // value plucked randomly out of the air.
 
       PBQP::PBQPNum cBenefit =
-        copyFactor * LiveIntervals::getSpillWeight(false, true,
-                                                   mbfi->getBlockFreq(mbb));
+        copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, mi);
 
       if (cp.isPhys()) {
         if (!mf->getRegInfo().isAllocatable(dst)) {
@@ -363,33 +359,37 @@
         }
         if (pregOpt < allowed.size()) {
           ++pregOpt; // +1 to account for spill option.
-          PBQP::Graph::NodeId node = p->getNodeForVReg(src);
-          addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+          PBQPRAGraph::NodeId node = p->getNodeForVReg(src);
+          llvm::dbgs() << "Reading node costs for node " << node << "\n";
+          llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n";
+          PBQP::Vector newCosts(g.getNodeCosts(node));
+          addPhysRegCoalesce(newCosts, pregOpt, cBenefit);
+          g.setNodeCosts(node, newCosts);
         }
       } else {
         const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
         const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
-        PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst);
-        PBQP::Graph::NodeId node2 = p->getNodeForVReg(src);
-        PBQP::Graph::EdgeId edge = g.findEdge(node1, node2);
+        PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst);
+        PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src);
+        PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2);
         if (edge == g.invalidEdgeId()) {
-          edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
-                                                      allowed2->size() + 1,
-                                                      0));
+          PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0);
+          addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit);
+          g.addEdge(node1, node2, costs);
         } else {
-          if (g.getEdgeNode1(edge) == node2) {
+          if (g.getEdgeNode1Id(edge) == node2) {
             std::swap(node1, node2);
             std::swap(allowed1, allowed2);
           }
+          PBQP::Matrix costs(g.getEdgeCosts(edge));
+          addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit);
+          g.setEdgeCosts(edge, costs);
         }
-
-        addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
-                           cBenefit);
       }
     }
   }
 
-  return p.take();
+  return p.release();
 }
 
 void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
@@ -472,14 +472,12 @@
   // Clear the existing allocation.
   vrm->clearAllVirt();
 
-  const PBQP::Graph &g = problem.getGraph();
+  const PBQPRAGraph &g = problem.getGraph();
   // Iterate over the nodes mapping the PBQP solution to a register
   // assignment.
-  for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(),
-                            nodeEnd = g.nodesEnd();
-       nodeItr != nodeEnd; ++nodeItr) {
-    unsigned vreg = problem.getVRegForNode(*nodeItr);
-    unsigned alloc = solution.getSelection(*nodeItr);
+  for (auto NId : g.nodeIds()) {
+    unsigned vreg = problem.getVRegForNode(NId);
+    unsigned alloc = solution.getSelection(NId);
 
     if (problem.isPRegOption(vreg, alloc)) {
       unsigned preg = problem.getPRegForOption(vreg, alloc);
@@ -587,8 +585,8 @@
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      OwningPtr<PBQPRAProblem> problem(
-        builder->build(mf, lis, mbfi, vregsToAlloc));
+      std::unique_ptr<PBQPRAProblem> problem(
+          builder->build(mf, lis, mbfi, vregsToAlloc));
 
 #ifndef NDEBUG
       if (pbqpDumpGraphs) {
@@ -596,7 +594,7 @@
         rs << round;
         std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
         std::string tmp;
-        raw_fd_ostream os(graphFileName.c_str(), tmp);
+        raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text);
         DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
               << graphFileName << "\"\n");
         problem->getGraph().dump(os);
@@ -604,8 +602,7 @@
 #endif
 
       PBQP::Solution solution =
-        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
-          problem->getGraph());
+        PBQP::RegAlloc::solve(problem->getGraph());
 
       pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
 
@@ -623,14 +620,14 @@
   return true;
 }
 
-FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           OwningPtr<PBQPBuilder> &builder,
-                                           char *customPassID) {
+FunctionPass *
+llvm::createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> &builder,
+                                  char *customPassID) {
   return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
-  OwningPtr<PBQPBuilder> Builder;
+  std::unique_ptr<PBQPBuilder> Builder;
   if (pbqpCoalescing)
     Builder.reset(new PBQPBuilderWithCoalescing());
   else
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index cacd7de..aa84446 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements the RegisterClassInfo class which provides dynamic
-// information about target register classes. Callee saved and reserved
-// registers depends on calling conventions and other dynamic information, so
-// some things cannot be determined statically.
+// information about target register classes. Callee-saved vs. caller-saved and
+// reserved registers depend on calling conventions and other dynamic
+// information, so some things cannot be determined statically.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index dd86c1f..682c26c 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -15,7 +15,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "RegisterCoalescer.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -112,7 +111,7 @@
     void eliminateDeadDefs();
 
     /// LiveRangeEdit callback.
-    void LRE_WillEraseInstruction(MachineInstr *MI);
+    void LRE_WillEraseInstruction(MachineInstr *MI) override;
 
     /// coalesceLocals - coalesce the LocalWorkList.
     void coalesceLocals();
@@ -188,15 +187,15 @@
       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-    virtual void releaseMemory();
+    void releaseMemory() override;
 
     /// runOnMachineFunction - pass entry point
-    virtual bool runOnMachineFunction(MachineFunction&);
+    bool runOnMachineFunction(MachineFunction&) override;
 
     /// print - Implement the dump method.
-    virtual void print(raw_ostream &O, const Module* = 0) const;
+    void print(raw_ostream &O, const Module* = 0) const override;
   };
 } /// end anonymous namespace
 
@@ -283,7 +282,6 @@
     if (SrcSub) {
       Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
       if (!Dst) return false;
-      SrcSub = 0;
     } else if (!MRI.getRegClass(Src)->contains(Dst)) {
       return false;
     }
@@ -622,16 +620,15 @@
 
   // If some of the uses of IntA.reg is already coalesced away, return false.
   // It's not possible to determine whether it's safe to perform the coalescing.
-  for (MachineRegisterInfo::use_nodbg_iterator UI =
-         MRI->use_nodbg_begin(IntA.reg),
-       UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
+  for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
+    MachineInstr *UseMI = MO.getParent();
+    unsigned OpNo = &MO - &UseMI->getOperand(0);
     SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
     if (US == IntA.end() || US->valno != AValNo)
       continue;
     // If this use is tied to a def, we can't rewrite the register.
-    if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
+    if (UseMI->isRegTiedToDefOperand(OpNo))
       return false;
   }
 
@@ -669,8 +666,8 @@
   // Update uses of IntA of the specific Val# with IntB.
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
          UE = MRI->use_end(); UI != UE;) {
-    MachineOperand &UseMO = UI.getOperand();
-    MachineInstr *UseMI = &*UI;
+    MachineOperand &UseMO = *UI;
+    MachineInstr *UseMI = UseMO.getParent();
     ++UI;
     if (UseMI->isDebugValue()) {
       // FIXME These don't have an instruction index.  Not clear we have enough
@@ -769,6 +766,14 @@
   if (DstOperand.getSubReg() && !DstOperand.isUndef())
     return false;
 
+  // If both SrcIdx and DstIdx are set, correct rematerialization would widen
+  // the register substantially (beyond both source and dest size). This is bad
+  // for performance since it can cascade through a function, introducing many
+  // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
+  // around after a few subreg copies).
+  if (SrcIdx && DstIdx)
+    return false;
+
   const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
   if (!DefMI->isImplicitDef()) {
     if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
@@ -793,9 +798,9 @@
 
   MachineBasicBlock *MBB = CopyMI->getParent();
   MachineBasicBlock::iterator MII =
-    llvm::next(MachineBasicBlock::iterator(CopyMI));
+    std::next(MachineBasicBlock::iterator(CopyMI));
   TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI);
-  MachineInstr *NewMI = prior(MII);
+  MachineInstr *NewMI = std::prev(MII);
 
   LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
   CopyMI->eraseFromParent();
@@ -816,31 +821,19 @@
   }
 
   if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    const TargetRegisterClass *NewRC = CP.getNewRC();
     unsigned NewIdx = NewMI->getOperand(0).getSubReg();
-    const TargetRegisterClass *RCForInst;
-    if (NewIdx)
-      RCForInst = TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), DefRC,
-                                                NewIdx);
 
-    if (MRI->constrainRegClass(DstReg, DefRC)) {
-      // The materialized instruction is quite capable of setting DstReg
-      // directly, but it may still have a now-trivial subregister index which
-      // we should clear.
-      NewMI->getOperand(0).setSubReg(0);
-    } else if (NewIdx && RCForInst) {
-      // The subreg index on NewMI is essential; we still have to make sure
-      // DstReg:idx is in a class that NewMI can use.
-      MRI->constrainRegClass(DstReg, RCForInst);
-    } else {
-      // DstReg is actually incompatible with NewMI, we have to move to a
-      // super-reg's class. This could come from a sequence like:
-      //     GR32 = MOV32r0
-      //     GR8 = COPY GR32:sub_8
-      MRI->setRegClass(DstReg, CP.getNewRC());
-      updateRegDefsUses(DstReg, DstReg, DstIdx);
-      NewMI->getOperand(0).setSubReg(
-          TRI->composeSubRegIndices(SrcIdx, DefMI->getOperand(0).getSubReg()));
-    }
+    if (NewIdx)
+      NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
+    else
+      NewRC = TRI->getCommonSubClass(NewRC, DefRC);
+
+    assert(NewRC && "subreg chosen for remat incompatible with instruction");
+    MRI->setRegClass(DstReg, NewRC);
+
+    updateRegDefsUses(DstReg, DstReg, DstIdx);
+    NewMI->getOperand(0).setSubReg(NewIdx);
   } else if (NewMI->getOperand(0).getReg() != CopyDstReg) {
     // The New instruction may be defining a sub-register of what's actually
     // been asked for. If so it must implicitly define the whole thing.
@@ -916,10 +909,7 @@
   DstInt->removeValNo(DeadVNI);
 
   // Find new undef uses.
-  for (MachineRegisterInfo::reg_nodbg_iterator
-         I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end();
-       I != E; ++I) {
-    MachineOperand &MO = I.getOperand();
+  for (MachineOperand &MO : MRI->reg_nodbg_operands(DstInt->reg)) {
     if (MO.isDef() || MO.isUndef())
       continue;
     MachineInstr *MI = MO.getParent();
@@ -944,8 +934,11 @@
   LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
 
   SmallPtrSet<MachineInstr*, 8> Visited;
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
-       MachineInstr *UseMI = I.skipInstruction();) {
+  for (MachineRegisterInfo::reg_instr_iterator
+       I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
+       I != E; ) {
+    MachineInstr *UseMI = &*(I++);
+
     // Each instruction can only be rewritten once because sub-register
     // composition is not always idempotent. When SrcReg != DstReg, rewriting
     // the UseMI operands removes them from the SrcReg use-def chain, but when
@@ -956,7 +949,7 @@
 
     SmallVector<unsigned,8> Ops;
     bool Reads, Writes;
-    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+    std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
 
     // If SrcReg wasn't read, it may still be the case that DstReg is live-in
     // because SrcReg is a sub-register.
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 092ecdd..97817da 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -506,7 +506,13 @@
         DeadDef = LRQ.isDeadDef();
       }
     }
-    if (!DeadDef) {
+    if (DeadDef) {
+      // LiveIntervals knows this is a dead even though it's MachineOperand is
+      // not flagged as such. Since this register will not be recorded as
+      // live-out, increase its PDiff value to avoid underflowing pressure.
+      if (PDiff)
+        PDiff->addPressureChange(Reg, false, MRI);
+    } else {
       if (LiveRegs.erase(Reg))
         decreaseRegPressure(Reg);
       else
@@ -876,9 +882,9 @@
                            SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
                            const MachineRegisterInfo *MRI,
                            const LiveIntervals *LIS) {
-  for (MachineRegisterInfo::use_nodbg_iterator
-         UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
-         UI != UE; UI.skipInstruction()) {
+  for (MachineRegisterInfo::use_instr_nodbg_iterator
+       UI = MRI->use_instr_nodbg_begin(Reg),
+       UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) {
       const MachineInstr* MI = &*UI;
       if (MI->isDebugValue())
         continue;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 75ebdaa..bfd26dc 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -175,7 +175,7 @@
     Tracking = true;
   } else {
     assert(MBBI != MBB->end() && "Already past the end of the basic block!");
-    MBBI = llvm::next(MBBI);
+    MBBI = std::next(MBBI);
   }
   assert(MBBI != MBB->end() && "Already at the end of the basic block!");
 
@@ -415,7 +415,7 @@
            "Cannot scavenge register without an emergency spill slot!");
     TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex,
                              RC, TRI);
-    MachineBasicBlock::iterator II = prior(I);
+    MachineBasicBlock::iterator II = std::prev(I);
 
     unsigned FIOperandNum = getFrameIndexOperandNum(II);
     TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
@@ -423,13 +423,13 @@
     // Restore the scavenged register before its use (or first terminator).
     TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex,
                               RC, TRI);
-    II = prior(UseMI);
+    II = std::prev(UseMI);
 
     FIOperandNum = getFrameIndexOperandNum(II);
     TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
   }
 
-  Scavenged[SI].Restore = prior(UseMI);
+  Scavenged[SI].Restore = std::prev(UseMI);
 
   // Doing this here leads to infinite regress.
   // Scavenged[SI].Reg = SReg;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 75e3790..d08eb65 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -63,7 +63,7 @@
 /// not already.  It also adds the current node as a successor of the
 /// specified node.
 bool SUnit::addPred(const SDep &D, bool Required) {
-  // If this node already has this depenence, don't add a redundant one.
+  // If this node already has this dependence, don't add a redundant one.
   for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
          I != E; ++I) {
     // Zero-latency weak edges may be added purely for heuristic ordering. Don't
@@ -301,8 +301,8 @@
 
   SUnit::pred_iterator BestI = Preds.begin();
   unsigned MaxDepth = BestI->getSUnit()->getDepth();
-  for (SUnit::pred_iterator
-         I = llvm::next(BestI), E = Preds.end(); I != E; ++I) {
+  for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
+       ++I) {
     if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
       BestI = I;
   }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 7f1f9c4..c8328ad 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
@@ -44,13 +45,24 @@
     cl::ZeroOrMore, cl::init(false),
     cl::desc("Enable use of AA during MI GAD construction"));
 
+// FIXME: Enable the use of TBAA. There are two known issues preventing this:
+//   1. Stack coloring does not update TBAA when merging allocas
+//   2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations.
+//      Because BasicAA does not handle inttoptr, we'll often miss basic type
+//      punning idioms that we need to catch so we don't miscompile real-world
+//      code.
+static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
+    cl::init(false), cl::desc("Enable use of TBAA during MI GAD construction"));
+
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo &mli,
                                      const MachineDominatorTree &mdt,
                                      bool IsPostRAFlag,
+                                     bool RemoveKillFlags,
                                      LiveIntervals *lis)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
-    IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) {
+    IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
+    CanHandleTerminators(false), FirstDbgValue(0) {
   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
@@ -136,31 +148,32 @@
   if (!V)
     return;
 
+  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+    // For now, ignore PseudoSourceValues which may alias LLVM IR values
+    // because the code that uses this function has no way to cope with
+    // such aliases.
+    if (!PSV->isAliased(MFI)) {
+      bool MayAlias = PSV->mayAlias(MFI);
+      Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias));
+    }
+    return;
+  }
+
   SmallVector<Value *, 4> Objs;
   getUnderlyingObjects(V, Objs);
 
   for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
          I != IE; ++I) {
-    bool MayAlias = true;
     V = *I;
 
-    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-      // For now, ignore PseudoSourceValues which may alias LLVM IR values
-      // because the code that uses this function has no way to cope with
-      // such aliases.
+    assert(!isa<PseudoSourceValue>(V) && "Underlying value is a stack slot!");
 
-      if (PSV->isAliased(MFI)) {
-        Objects.clear();
-        return;
-      }
-
-      MayAlias = PSV->mayAlias(MFI);
-    } else if (!isIdentifiedObject(V)) {
+    if (!isIdentifiedObject(V)) {
       Objects.clear();
       return;
     }
 
-    Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias));
+    Objects.push_back(UnderlyingObjectsVector::value_type(V, true));
   }
 }
 
@@ -284,8 +297,8 @@
 /// this SUnit to following instructions in the same scheduling region that
 /// depend the physical register referenced at OperIdx.
 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
-  const MachineInstr *MI = SU->getInstr();
-  const MachineOperand &MO = MI->getOperand(OperIdx);
+  MachineInstr *MI = SU->getInstr();
+  MachineOperand &MO = MI->getOperand(OperIdx);
 
   // Optionally add output and anti dependencies. For anti
   // dependencies we use a latency of 0 because for a multi-issue
@@ -323,6 +336,8 @@
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
     Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+    if (RemoveKillFlags)
+      MO.setIsKill(false);
   }
   else {
     addPhysRegDataDeps(SU, OperIdx);
@@ -507,6 +522,10 @@
   if (MIa == MIb)
     return false;
 
+  // FIXME: Need to handle multiple memory operands to support all targets.
+  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
+    return true;
+
   if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
     return true;
 
@@ -522,10 +541,6 @@
   MachineMemOperand *MMOa = *MIa->memoperands_begin();
   MachineMemOperand *MMOb = *MIb->memoperands_begin();
 
-  // FIXME: Need to handle multiple memory operands to support all targets.
-  if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
-    llvm_unreachable("Multiple memory operands.");
-
   // The following interface to AA is fashioned after DAGCombiner::isAlias
   // and operates with MachineMemOperand offset with some important
   // assumptions:
@@ -550,10 +565,10 @@
   int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
 
   AliasAnalysis::AliasResult AAResult = AA->alias(
-  AliasAnalysis::Location(MMOa->getValue(), Overlapa,
-                          MMOa->getTBAAInfo()),
-  AliasAnalysis::Location(MMOb->getValue(), Overlapb,
-                          MMOb->getTBAAInfo()));
+      AliasAnalysis::Location(MMOa->getValue(), Overlapa,
+                              UseTBAA ? MMOa->getTBAAInfo() : 0),
+      AliasAnalysis::Location(MMOb->getValue(), Overlapb,
+                              UseTBAA ? MMOb->getTBAAInfo() : 0));
 
   return (AAResult != AliasAnalysis::NoAlias);
 }
@@ -687,10 +702,32 @@
 
     // Assign the Latency field of SU using target-provided information.
     SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
+
+    // If this SUnit uses an unbuffered resource, mark it as such.
+    // These resources are used for in-order execution pipelines within an
+    // out-of-order core and are identified by BufferSize=1. BufferSize=0 is
+    // used for dispatch/issue groups and is not considered here.
+    if (SchedModel.hasInstrSchedModel()) {
+      const MCSchedClassDesc *SC = getSchedClass(SU);
+      for (TargetSchedModel::ProcResIter
+             PI = SchedModel.getWriteProcResBegin(SC),
+             PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+        switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+        case 0:
+          SU->hasReservedResource = true;
+          break;
+        case 1:
+          SU->isUnbuffered = true;
+          break;
+        default:
+          break;
+        }
+      }
+    }
   }
 }
 
-/// If RegPressure is non null, compute register pressure as a side effect. The
+/// If RegPressure is non-null, compute register pressure as a side effect. The
 /// DAG builder is an efficient place to do it because it already visits
 /// operands.
 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
@@ -720,7 +757,7 @@
   // so that they can be given more precise dependencies. We track
   // separately the known memory locations that may alias and those
   // that are known not to alias
-  MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+  MapVector<const Value *, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
   MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
   std::set<SUnit*> RejectMemNodes;
 
@@ -747,7 +784,7 @@
   MachineInstr *DbgMI = NULL;
   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
-    MachineInstr *MI = prior(MII);
+    MachineInstr *MI = std::prev(MII);
     if (MI && DbgMI) {
       DbgValues.push_back(std::make_pair(DbgMI, MI));
       DbgMI = NULL;
@@ -763,11 +800,13 @@
     if (RPTracker) {
       PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0;
       RPTracker->recede(/*LiveUses=*/0, PDiff);
-      assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI");
+      assert(RPTracker->getPos() == std::prev(MII) &&
+             "RPTracker can't find MI");
     }
 
-    assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) &&
-           "Cannot schedule terminators or labels!");
+    assert(
+        (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) &&
+        "Cannot schedule terminators or labels!");
 
     // Add register-based dependencies (data, anti, and output).
     bool HasVRegDef = false;
@@ -815,9 +854,11 @@
     if (isGlobalMemoryObject(AA, MI)) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
-      for (MapVector<const Value *, SUnit *>::iterator I =
+      for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
              NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
-        I->second->addPred(SDep(SU, SDep::Barrier));
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+          I->second[i]->addPred(SDep(SU, SDep::Barrier));
+        }
       }
       for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
              NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
@@ -853,9 +894,11 @@
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
         addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
                            TrueMemOrderLatency);
-      for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
-           E = AliasMemDefs.end(); I != E; ++I)
-        addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
+      for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+           AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
+      }
       for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
@@ -887,19 +930,29 @@
         // A store to a specific PseudoSourceValue. Add precise dependencies.
         // Record the def in MemDefs, first adding a dep if there is
         // an existing def.
-        MapVector<const Value *, SUnit *>::iterator I =
+        MapVector<const Value *, std::vector<SUnit *> >::iterator I =
           ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-        MapVector<const Value *, SUnit *>::iterator IE =
+        MapVector<const Value *, std::vector<SUnit *> >::iterator IE =
           ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
-          addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
-                             0, true);
-          I->second = SU;
+          for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+            addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
+                               0, true);
+
+          // If we're not using AA, then we only need one store per object.
+          if (!AAForDep)
+            I->second.clear();
+          I->second.push_back(SU);
         } else {
-          if (ThisMayAlias)
-            AliasMemDefs[V] = SU;
-          else
-            NonAliasMemDefs[V] = SU;
+          if (ThisMayAlias) {
+            if (!AAForDep)
+              AliasMemDefs[V].clear();
+            AliasMemDefs[V].push_back(SU);
+          } else {
+            if (!AAForDep)
+              NonAliasMemDefs[V].clear();
+            NonAliasMemDefs[V].push_back(SU);
+          }
         }
         // Handle the uses in MemUses, if there are any.
         MapVector<const Value *, std::vector<SUnit *> >::iterator J =
@@ -949,9 +1002,11 @@
         if (Objs.empty()) {
           // A load with no underlying object. Depend on all
           // potentially aliasing stores.
-          for (MapVector<const Value *, SUnit *>::iterator I =
+          for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
-            addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes);
+            for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+              addChainDependency(AAForDep, MFI, SU, I->second[i],
+                                 RejectMemNodes);
 
           PendingLoads.push_back(SU);
           MayAlias = true;
@@ -968,13 +1023,14 @@
             MayAlias = true;
 
           // A load from a specific PseudoSourceValue. Add precise dependencies.
-          MapVector<const Value *, SUnit *>::iterator I =
+          MapVector<const Value *, std::vector<SUnit *> >::iterator I =
             ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-          MapVector<const Value *, SUnit *>::iterator IE =
+          MapVector<const Value *, std::vector<SUnit *> >::iterator IE =
             ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
           if (I != IE)
-            addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes,
-                               0, true);
+            for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+              addChainDependency(AAForDep, MFI, SU, I->second[i],
+                                 RejectMemNodes, 0, true);
           if (ThisMayAlias)
             AliasMemUses[V].push_back(SU);
           else
@@ -999,6 +1055,145 @@
   PendingLoads.clear();
 }
 
+/// \brief Initialize register live-range state for updating kills.
+void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
+  // Start with no live registers.
+  LiveRegs.reset();
+
+  // Examine the live-in regs of all successors.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+       SE = BB->succ_end(); SI != SE; ++SI) {
+    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+         E = (*SI)->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      // Repeat, for reg and all subregs.
+      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        LiveRegs.set(*SubRegs);
+    }
+  }
+}
+
+bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
+  // Setting kill flag...
+  if (!MO.isKill()) {
+    MO.setIsKill(true);
+    return false;
+  }
+
+  // If MO itself is live, clear the kill flag...
+  if (LiveRegs.test(MO.getReg())) {
+    MO.setIsKill(false);
+    return false;
+  }
+
+  // If any subreg of MO is live, then create an imp-def for that
+  // subreg and keep MO marked as killed.
+  MO.setIsKill(false);
+  bool AllDead = true;
+  const unsigned SuperReg = MO.getReg();
+  MachineInstrBuilder MIB(MF, MI);
+  for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
+    if (LiveRegs.test(*SubRegs)) {
+      MIB.addReg(*SubRegs, RegState::ImplicitDefine);
+      AllDead = false;
+    }
+  }
+
+  if(AllDead)
+    MO.setIsKill(true);
+  return false;
+}
+
+// FIXME: Reuse the LivePhysRegs utility for this.
+void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+  LiveRegs.resize(TRI->getNumRegs());
+  BitVector killedRegs(TRI->getNumRegs());
+
+  startBlockForKills(MBB);
+
+  // Examine block from end to start...
+  unsigned Count = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
+
+    // Update liveness.  Registers that are defed but not used in this
+    // instruction are now dead. Mark register and all subregs as they
+    // are completely defined.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        LiveRegs.clearBitsNotInMask(MO.getRegMask());
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+
+      // Repeat for reg and all subregs.
+      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        LiveRegs.reset(*SubRegs);
+    }
+
+    // Examine all used registers and set/clear kill flag. When a
+    // register is used multiple times we only set the kill flag on
+    // the first use. Don't set kill flags on undef operands.
+    killedRegs.reset();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+      bool kill = false;
+      if (!killedRegs.test(Reg)) {
+        kill = true;
+        // A register is not killed if any subregs are live...
+        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
+          if (LiveRegs.test(*SubRegs)) {
+            kill = false;
+            break;
+          }
+        }
+
+        // If subreg is not live, then register is killed if it became
+        // live in this instruction
+        if (kill)
+          kill = !LiveRegs.test(Reg);
+      }
+
+      if (MO.isKill() != kill) {
+        DEBUG(dbgs() << "Fixing " << MO << " in ");
+        // Warning: toggleKillFlag may invalidate MO.
+        toggleKillFlag(MI, MO);
+        DEBUG(MI->dump());
+      }
+
+      killedRegs.set(Reg);
+    }
+
+    // Mark any used register (that is not using undef) and subregs as
+    // now live...
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || MRI.isReserved(Reg)) continue;
+
+      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        LiveRegs.set(*SubRegs);
+    }
+  }
+}
+
 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   SU->getInstr()->dump();
@@ -1234,7 +1429,7 @@
 
   const SDep *backtrack() {
     DFSStack.pop_back();
-    return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
+    return DFSStack.empty() ? 0 : std::prev(DFSStack.back().second);
   }
 
   const SUnit *getCurr() const { return DFSStack.back().first; }
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 8ddb3e8..f59c6cf 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -14,7 +14,6 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
index 3f28e08..0e52ee3 100644
--- a/lib/CodeGen/SelectionDAG/Android.mk
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -41,6 +41,7 @@
 
 # For the device
 # =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
@@ -52,3 +53,4 @@
 include $(LLVM_DEVICE_BUILD_MK)
 include $(LLVM_GEN_INTRINSICS_MK)
 include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 43f72c5..cc0c5fa 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -50,11 +50,28 @@
 namespace {
   static cl::opt<bool>
     CombinerAA("combiner-alias-analysis", cl::Hidden,
-               cl::desc("Turn on alias analysis during testing"));
+               cl::desc("Enable DAG combiner alias-analysis heuristics"));
 
   static cl::opt<bool>
     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
-               cl::desc("Include global information in alias analysis"));
+               cl::desc("Enable DAG combiner's use of IR alias analysis"));
+
+// FIXME: Enable the use of TBAA. There are two known issues preventing this:
+//   1. Stack coloring does not update TBAA when merging allocas
+//   2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations.
+//      Because BasicAA does not handle inttoptr, we'll often miss basic type
+//      punning idioms that we need to catch so we don't miscompile real-world
+//      code.
+  static cl::opt<bool>
+    UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false),
+               cl::desc("Enable DAG combiner's use of TBAA"));
+
+#ifndef NDEBUG
+  static cl::opt<std::string>
+    CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+               cl::desc("Only use DAG-combiner alias analysis in this"
+                        " function"));
+#endif
 
   /// Hidden option to stress test load slicing, i.e., when this option
   /// is enabled, load slicing bypasses most of its profitability guards.
@@ -212,6 +229,7 @@
     SDValue visitSHL(SDNode *N);
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
+    SDValue visitRotate(SDNode *N);
     SDValue visitCTLZ(SDNode *N);
     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTTZ(SDNode *N);
@@ -257,11 +275,12 @@
     SDValue visitCONCAT_VECTORS(SDNode *N);
     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitINSERT_SUBVECTOR(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
 
-    SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+    SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 
     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
@@ -271,6 +290,11 @@
                              bool NotExtCompare = false);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           SDLoc DL, bool foldBooleans = true);
+
+    bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                           SDValue &CC) const;
+    bool isOneUseSetCC(SDValue N) const;
+
     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
@@ -280,6 +304,10 @@
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+    SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+                              SDValue InnerPos, SDValue InnerNeg,
+                              unsigned PosOpcode, unsigned NegOpcode,
+                              SDLoc DL);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
@@ -326,6 +354,14 @@
     /// \return True if some memory operations were changed.
     bool MergeConsecutiveStores(StoreSDNode *N);
 
+    /// \brief Try to transform a truncation where C is a constant:
+    ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
+    ///
+    /// \p N needs to be a truncation and its first operand an AND. Other
+    /// requirements are checked by the function (e.g. that trunc is
+    /// single-use) and if missed an empty SDValue is returned.
+    SDValue distributeTruncateThroughAnd(SDNode *N);
+
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -378,7 +414,7 @@
   explicit WorkListRemover(DAGCombiner &dc)
     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 
-  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     DC.removeFromWorkList(N);
   }
 };
@@ -566,79 +602,121 @@
   }
 }
 
-
 // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
-// that selects between the values 1 and 0, making it equivalent to a setcc.
-// Also, set the incoming LHS, RHS, and CC references to the appropriate
-// nodes based on the type of node we are checking.  This simplifies life a
-// bit for the callers.
-static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
-                              SDValue &CC) {
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                                    SDValue &CC) const {
   if (N.getOpcode() == ISD::SETCC) {
     LHS = N.getOperand(0);
     RHS = N.getOperand(1);
     CC  = N.getOperand(2);
     return true;
   }
-  if (N.getOpcode() == ISD::SELECT_CC &&
-      N.getOperand(2).getOpcode() == ISD::Constant &&
-      N.getOperand(3).getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
-      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
-    LHS = N.getOperand(0);
-    RHS = N.getOperand(1);
-    CC  = N.getOperand(4);
-    return true;
-  }
-  return false;
+
+  if (N.getOpcode() != ISD::SELECT_CC ||
+      !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
+      !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+    return false;
+
+  LHS = N.getOperand(0);
+  RHS = N.getOperand(1);
+  CC  = N.getOperand(4);
+  return true;
 }
 
 // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
 // one use.  If this is true, it allows the users to invert the operation for
 // free when it is profitable to do so.
-static bool isOneUseSetCC(SDValue N) {
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
   SDValue N0, N1, N2;
   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
     return true;
   return false;
 }
 
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
+static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
+  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
+  if (!C)
+    return false;
+
+  APInt SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                             HasAnyUndefs) &&
+          EltVT.getSizeInBits() >= SplatBitSize);
+}
+
+// \brief Returns the SDNode if it is a constant BuildVector or constant.
+static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
+  if (isa<ConstantSDNode>(N))
+    return N.getNode();
+  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+  if(BV && BV->isConstant())
+    return BV;
+  return NULL;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// int.
+static ConstantSDNode *isConstOrConstSplat(SDValue N) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
+    return BV->getConstantSplatValue();
+
+  return nullptr;
+}
+
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
                                     SDValue N0, SDValue N1) {
   EVT VT = N0.getValueType();
-  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
-    if (isa<ConstantSDNode>(N1)) {
-      // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
-      SDValue OpNode =
-        DAG.FoldConstantArithmetic(Opc, VT,
-                                   cast<ConstantSDNode>(N0.getOperand(1)),
-                                   cast<ConstantSDNode>(N1));
-      return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
-    }
-    if (N0.hasOneUse()) {
-      // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
-      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
-                                   N0.getOperand(0), N1);
-      AddToWorkList(OpNode.getNode());
-      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+  if (N0.getOpcode() == Opc) {
+    if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
+      if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+        // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+        SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R);
+        if (!OpNode.getNode())
+          return SDValue();
+        return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+      }
+      if (N0.hasOneUse()) {
+        // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
+        // use
+        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+        if (!OpNode.getNode())
+          return SDValue();
+        AddToWorkList(OpNode.getNode());
+        return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+      }
     }
   }
 
-  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
-    if (isa<ConstantSDNode>(N0)) {
-      // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
-      SDValue OpNode =
-        DAG.FoldConstantArithmetic(Opc, VT,
-                                   cast<ConstantSDNode>(N1.getOperand(1)),
-                                   cast<ConstantSDNode>(N0));
-      return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
-    }
-    if (N1.hasOneUse()) {
-      // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
-      SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT,
-                                   N1.getOperand(0), N0);
-      AddToWorkList(OpNode.getNode());
-      return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+  if (N1.getOpcode() == Opc) {
+    if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
+      if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+        // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+        SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L);
+        if (!OpNode.getNode())
+          return SDValue();
+        return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+      }
+      if (N1.hasOneUse()) {
+        // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
+        // use
+        SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
+        if (!OpNode.getNode())
+          return SDValue();
+        AddToWorkList(OpNode.getNode());
+        return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+      }
     }
   }
 
@@ -1148,6 +1226,8 @@
   case ISD::SHL:                return visitSHL(N);
   case ISD::SRA:                return visitSRA(N);
   case ISD::SRL:                return visitSRL(N);
+  case ISD::ROTR:
+  case ISD::ROTL:               return visitRotate(N);
   case ISD::CTLZ:               return visitCTLZ(N);
   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   case ISD::CTTZ:               return visitCTTZ(N);
@@ -1193,6 +1273,7 @@
   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   }
   return SDValue();
 }
@@ -1507,8 +1588,10 @@
 
       // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
       // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
-        return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
+        if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
+          return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+      }
     }
   }
 
@@ -1778,22 +1861,6 @@
   return SDValue();
 }
 
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
-/// elements are all the same constant or undefined.
-static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
-  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
-  if (!C)
-    return false;
-
-  APInt SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  EVT EltVT = N->getValueType(0).getVectorElementType();
-  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
-                             HasAnyUndefs) &&
-          EltVT.getSizeInBits() >= SplatBitSize);
-}
-
 SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2229,7 +2296,7 @@
   bool HiExists = N->hasAnyUseOfValue(1);
   if (!HiExists &&
       (!LegalOperations ||
-       TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+       TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
                               N->op_begin(), N->getNumOperands());
     return CombineTo(N, Res, Res);
@@ -2454,35 +2521,66 @@
   // The type-legalizer generates this pattern when loading illegal
   // vector types from memory. In many cases this allows additional shuffle
   // optimizations.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-      N0.getOperand(1).getOpcode() == ISD::UNDEF &&
-      N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+  // There are other cases where moving the shuffle after the xor/and/or
+  // is profitable even if shuffles don't perform a swizzle.
+  // If both shuffles use the same mask, and both shuffles have the same first
+  // or second operand, then it might still be profitable to move the shuffle
+  // after the xor/and/or operation.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
 
-    assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+    assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
            "Inputs to shuffles are not the same type");
-
-    unsigned NumElts = VT.getVectorNumElements();
-
+ 
     // Check that both shuffles use the same mask. The masks are known to be of
     // the same length because the result vector type is the same.
-    bool SameMask = true;
-    for (unsigned i = 0; i != NumElts; ++i) {
-      int Idx0 = SVN0->getMaskElt(i);
-      int Idx1 = SVN1->getMaskElt(i);
-      if (Idx0 != Idx1) {
-        SameMask = false;
-        break;
-      }
-    }
+    // Check also that shuffles have only one use to avoid introducing extra
+    // instructions.
+    if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
+        SVN0->getMask().equals(SVN1->getMask())) {
+      SDValue ShOp = N0->getOperand(1);
 
-    if (SameMask) {
-      SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                               N0.getOperand(0), N1.getOperand(0));
-      AddToWorkList(Op.getNode());
-      return DAG.getVectorShuffle(VT, SDLoc(N), Op,
-                                  DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+      // Don't try to fold this node if it requires introducing a
+      // build vector of all zeros that might be illegal at this stage.
+      if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+        if (!LegalTypes)
+          ShOp = DAG.getConstant(0, VT);
+        else
+          ShOp = SDValue();
+      }
+
+      // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
+      // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
+      // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+      if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                      N0->getOperand(0), N1->getOperand(0));
+        AddToWorkList(NewNode.getNode());
+        return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
+                                    &SVN0->getMask()[0]);
+      }
+
+      // Don't try to fold this node if it requires introducing a
+      // build vector of all zeros that might be illegal at this stage.
+      ShOp = N0->getOperand(0);
+      if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+        if (!LegalTypes)
+          ShOp = DAG.getConstant(0, VT);
+        else
+          ShOp = SDValue();
+      }
+
+      // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
+      // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
+      // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+      if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
+        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                      N0->getOperand(1), N1->getOperand(1));
+        AddToWorkList(NewNode.getNode());
+        return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
+                                    &SVN0->getMask()[0]);
+      }
     }
   }
 
@@ -3151,6 +3249,60 @@
       return N0;
     if (ISD::isBuildVectorAllOnes(N1.getNode()))
       return N1;
+
+    // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
+    // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+    // Do this only if the resulting shuffle is legal.
+    if (isa<ShuffleVectorSDNode>(N0) &&
+        isa<ShuffleVectorSDNode>(N1) &&
+        N0->getOperand(1) == N1->getOperand(1) &&
+        ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
+      bool CanFold = true;
+      unsigned NumElts = VT.getVectorNumElements();
+      const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+      const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+      // We construct two shuffle masks:
+      // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
+      // and N1 as the second operand.
+      // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
+      // and N0 as the second operand.
+      // We do this because OR is commutable and therefore there might be
+      // two ways to fold this node into a shuffle.
+      SmallVector<int,4> Mask1;
+      SmallVector<int,4> Mask2;
+      
+      for (unsigned i = 0; i != NumElts && CanFold; ++i) {
+        int M0 = SV0->getMaskElt(i);
+        int M1 = SV1->getMaskElt(i);
+   
+        // Both shuffle indexes are undef. Propagate Undef.
+        if (M0 < 0 && M1 < 0) {
+          Mask1.push_back(M0);
+          Mask2.push_back(M0);
+          continue;
+        }
+
+        if (M0 < 0 || M1 < 0 ||
+            (M0 < (int)NumElts && M1 < (int)NumElts) ||
+            (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
+          CanFold = false;
+          break;
+        }
+        
+        Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
+        Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
+      }
+
+      if (CanFold) {
+        // Fold this sequence only if the resulting shuffle is 'legal'.
+        if (TLI.isShuffleMaskLegal(Mask1, VT))
+          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
+                                      N1->getOperand(0), &Mask1[0]);
+        if (TLI.isShuffleMaskLegal(Mask2, VT))
+          return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
+                                      N0->getOperand(0), &Mask2[0]);
+      }
+    }
   }
 
   // fold (or x, undef) -> -1
@@ -3192,11 +3344,14 @@
   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
              isa<ConstantSDNode>(N0.getOperand(1))) {
     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
-    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
+      SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1);
+      if (!COR.getNode())
+        return SDValue();
       return DAG.getNode(ISD::AND, SDLoc(N), VT,
                          DAG.getNode(ISD::OR, SDLoc(N0), VT,
-                                     N0.getOperand(0), N1),
-                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+                                     N0.getOperand(0), N1), COR);
+    }
   }
   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -3302,6 +3457,155 @@
   return false;
 }
 
+// Return true if we can prove that, whenever Neg and Pos are both in the
+// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos).  This means that
+// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
+//
+//     (or (shift1 X, Neg), (shift2 X, Pos))
+//
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg.  The range [0, OpSize) means that we only need
+// to consider shift amounts with defined behavior.
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
+  // If OpSize is a power of 2 then:
+  //
+  //  (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
+  //  (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+  //
+  // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+  // for the stronger condition:
+  //
+  //     Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1)    [A]
+  //
+  // for all Neg and Pos.  Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+  // we can just replace Neg with Neg' for the rest of the function.
+  //
+  // In other cases we check for the even stronger condition:
+  //
+  //     Neg == OpSize - Pos                                    [B]
+  //
+  // for all Neg and Pos.  Note that the (or ...) then invokes undefined
+  // behavior if Pos == 0 (and consequently Neg == OpSize).
+  //
+  // We could actually use [A] whenever OpSize is a power of 2, but the
+  // only extra cases that it would match are those uninteresting ones
+  // where Neg and Pos are never in range at the same time.  E.g. for
+  // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+  // as well as (sub 32, Pos), but:
+  //
+  //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
+  //
+  // always invokes undefined behavior for 32-bit X.
+  //
+  // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+  unsigned MaskLoBits = 0;
+  if (Neg.getOpcode() == ISD::AND &&
+      isPowerOf2_64(OpSize) &&
+      Neg.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
+    Neg = Neg.getOperand(0);
+    MaskLoBits = Log2_64(OpSize);
+  }
+
+  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
+  if (Neg.getOpcode() != ISD::SUB)
+    return 0;
+  ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+  if (!NegC)
+    return 0;
+  SDValue NegOp1 = Neg.getOperand(1);
+
+  // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+  // Pos'.  The truncation is redundant for the purpose of the equality.
+  if (MaskLoBits &&
+      Pos.getOpcode() == ISD::AND &&
+      Pos.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
+    Pos = Pos.getOperand(0);
+
+  // The condition we need is now:
+  //
+  //     (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+  //
+  // If NegOp1 == Pos then we need:
+  //
+  //              OpSize & Mask == NegC & Mask
+  //
+  // (because "x & Mask" is a truncation and distributes through subtraction).
+  APInt Width;
+  if (Pos == NegOp1)
+    Width = NegC->getAPIntValue();
+  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
+  // Then the condition we want to prove becomes:
+  //
+  //     (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+  //
+  // which, again because "x & Mask" is a truncation, becomes:
+  //
+  //                NegC & Mask == (OpSize - PosC) & Mask
+  //              OpSize & Mask == (NegC + PosC) & Mask
+  else if (Pos.getOpcode() == ISD::ADD &&
+           Pos.getOperand(0) == NegOp1 &&
+           Pos.getOperand(1).getOpcode() == ISD::Constant)
+    Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
+             NegC->getAPIntValue());
+  else
+    return false;
+
+  // Now we just need to check that OpSize & Mask == Width & Mask.
+  if (MaskLoBits)
+    // Opsize & Mask is 0 since Mask is Opsize - 1.
+    return Width.getLoBits(MaskLoBits) == 0;
+  return Width == OpSize;
+}
+
+// A subroutine of MatchRotate used once we have found an OR of two opposite
+// shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
+// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
+// former being preferred if supported.  InnerPos and InnerNeg are Pos and
+// Neg with outer conversions stripped away.
+SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+                                       SDValue Neg, SDValue InnerPos,
+                                       SDValue InnerNeg, unsigned PosOpcode,
+                                       unsigned NegOpcode, SDLoc DL) {
+  // fold (or (shl x, (*ext y)),
+  //          (srl x, (*ext (sub 32, y)))) ->
+  //   (rotl x, y) or (rotr x, (sub 32, y))
+  //
+  // fold (or (shl x, (*ext (sub 32, y))),
+  //          (srl x, (*ext y))) ->
+  //   (rotr x, y) or (rotl x, (sub 32, y))
+  EVT VT = Shifted.getValueType();
+  if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+    bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
+    return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
+                       HasPos ? Pos : Neg).getNode();
+  }
+
+  // fold (or (shl (*ext x), (*ext y)),
+  //          (srl (*ext x), (*ext (sub 32, y)))) ->
+  //   (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y)))
+  //
+  // fold (or (shl (*ext x), (*ext (sub 32, y))),
+  //          (srl (*ext x), (*ext y))) ->
+  //   (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y)))
+  if (Shifted.getOpcode() == ISD::ZERO_EXTEND ||
+      Shifted.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue InnerShifted = Shifted.getOperand(0);
+    EVT InnerVT = InnerShifted.getValueType();
+    bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT);
+    if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) {
+      if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) {
+        SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL,
+                                InnerVT, InnerShifted, HasPosInner ? Pos : Neg);
+        return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode();
+      }
+    }
+  }
+
+  return 0;
+}
+
 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
 // idioms for rotate, and if the target supports rotation instructions, generate
 // a rot[lr].
@@ -3342,6 +3646,7 @@
   unsigned OpSizeInBits = VT.getSizeInBits();
   SDValue LHSShiftArg = LHSShift.getOperand(0);
   SDValue LHSShiftAmt = LHSShift.getOperand(1);
+  SDValue RHSShiftArg = RHSShift.getOperand(0);
   SDValue RHSShiftAmt = RHSShift.getOperand(1);
 
   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
@@ -3395,28 +3700,15 @@
     RExtOp0 = RHSShiftAmt.getOperand(0);
   }
 
-  if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) {
-    // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
-    //   (rotl x, y)
-    // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
-    //   (rotr x, (sub 32, y))
-    if (ConstantSDNode *SUBC =
-            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
-      if (SUBC->getAPIntValue() == OpSizeInBits)
-        return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
-                           HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
-  } else if (LExtOp0.getOpcode() == ISD::SUB &&
-             RExtOp0 == LExtOp0.getOperand(1)) {
-    // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
-    //   (rotr x, y)
-    // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
-    //   (rotl x, (sub 32, y))
-    if (ConstantSDNode *SUBC =
-            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
-      if (SUBC->getAPIntValue() == OpSizeInBits)
-        return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg,
-                           HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
-  }
+  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+                                   LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
+  if (TryL)
+    return TryL;
+
+  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+                                   RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
+  if (TryR)
+    return TryR;
 
   return 0;
 }
@@ -3559,7 +3851,11 @@
 
 /// visitShiftByConstant - Handle transforms common to the three shifts, when
 /// the shift amount is a constant.
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+  // We can't and shouldn't fold opaque constants.
+  if (Amt->isOpaque())
+    return SDValue();
+
   SDNode *LHS = N->getOperand(0).getNode();
   if (!LHS->hasOneUse()) return SDValue();
 
@@ -3585,9 +3881,9 @@
     break;
   }
 
-  // We require the RHS of the binop to be a constant as well.
+  // We require the RHS of the binop to be a constant and not opaque as well.
   ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  if (!BinOpCst) return SDValue();
+  if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
 
   // FIXME: disable this unless the input to the binop is a shift by a constant.
   // If it is not a shift, it pessimizes some common cases like:
@@ -3617,6 +3913,7 @@
   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
                                N->getValueType(0),
                                LHS->getOperand(1), N->getOperand(1));
+  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
 
   // Create the new shift.
   SDValue NewShift = DAG.getNode(N->getOpcode(),
@@ -3627,18 +3924,74 @@
   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
 }
 
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+  assert(N->getOpcode() == ISD::TRUNCATE);
+  assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+  if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+    SDValue N01 = N->getOperand(0).getOperand(1);
+
+    if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
+      EVT TruncVT = N->getValueType(0);
+      SDValue N00 = N->getOperand(0).getOperand(0);
+      APInt TruncC = N01C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+
+      return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
+                         DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
+                         DAG.getConstant(TruncC, TruncVT));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+  if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
+      N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+                         N->getOperand(0), NewOp1);
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSHL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+    // If setcc produces all-one true value then:
+    // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+    if (N1CV && N1CV->isConstant()) {
+      if (N0.getOpcode() == ISD::AND &&
+          TLI.getBooleanContents(true) ==
+          TargetLowering::ZeroOrNegativeOneBooleanContent) {
+        SDValue N00 = N0->getOperand(0);
+        SDValue N01 = N0->getOperand(1);
+        BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+        if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+          SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
+          if (C.getNode())
+            return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+        }
+      } else {
+        N1C = isConstOrConstSplat(N1);
+      }
+    }
   }
 
   // fold (shl c1, c2) -> c1<<c2
@@ -3662,35 +4015,25 @@
     return DAG.getConstant(0, VT);
   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
-      N1.getOperand(0).getOpcode() == ISD::AND &&
-      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
-    SDValue N101 = N1.getOperand(0).getOperand(1);
-    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      EVT TruncVT = N1.getValueType();
-      SDValue N100 = N1.getOperand(0).getOperand(0);
-      APInt TruncC = N101C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
-      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                         DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
-                                     DAG.getNode(ISD::TRUNCATE,
-                                                 SDLoc(N),
-                                                 TruncVT, N100),
-                                     DAG.getConstant(TruncC, TruncVT)));
-    }
+      N1.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   }
 
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SHL &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
-    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 >= OpSizeInBits)
-      return DAG.getConstant(0, VT);
-    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  if (N1C && N0.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t c1 = N0C1->getZExtValue();
+      uint64_t c2 = N1C->getZExtValue();
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
   }
 
   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
@@ -3701,20 +4044,21 @@
   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
               N0.getOpcode() == ISD::ANY_EXTEND ||
               N0.getOpcode() == ISD::SIGN_EXTEND) &&
-      N0.getOperand(0).getOpcode() == ISD::SHL &&
-      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
-    uint64_t c1 =
-      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    EVT InnerShiftVT = N0.getOperand(0).getValueType();
-    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
-    if (c2 >= OpSizeInBits - InnerShiftSize) {
-      if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                         DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
-                                     N0.getOperand(0)->getOperand(0)),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+      N0.getOperand(0).getOpcode() == ISD::SHL) {
+    SDValue N0Op0 = N0.getOperand(0);
+    if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+      uint64_t c1 = N0Op0C1->getZExtValue();
+      uint64_t c2 = N1C->getZExtValue();
+      EVT InnerShiftVT = N0Op0.getValueType();
+      uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+      if (c2 >= OpSizeInBits - InnerShiftSize) {
+        if (c1 + c2 >= OpSizeInBits)
+          return DAG.getConstant(0, VT);
+        return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
+                           DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+                                       N0Op0->getOperand(0)),
+                           DAG.getConstant(c1 + c2, N1.getValueType()));
+      }
     }
   }
 
@@ -3722,19 +4066,20 @@
   // Only fold this if the inner zext has no other uses to avoid increasing
   // the total number of instructions.
   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
-      N0.getOperand(0).getOpcode() == ISD::SRL &&
-      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
-    uint64_t c1 =
-      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
-    if (c1 < VT.getSizeInBits()) {
-      uint64_t c2 = N1C->getZExtValue();
-      if (c1 == c2) {
-        SDValue NewOp0 = N0.getOperand(0);
-        EVT CountVT = NewOp0.getOperand(1).getValueType();
-        SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
-                                     NewOp0, DAG.getConstant(c2, CountVT));
-        AddToWorkList(NewSHL.getNode());
-        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+      N0.getOperand(0).getOpcode() == ISD::SRL) {
+    SDValue N0Op0 = N0.getOperand(0);
+    if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+      uint64_t c1 = N0Op0C1->getZExtValue();
+      if (c1 < VT.getScalarSizeInBits()) {
+        uint64_t c2 = N1C->getZExtValue();
+        if (c1 == c2) {
+          SDValue NewOp0 = N0.getOperand(0);
+          EVT CountVT = NewOp0.getOperand(1).getValueType();
+          SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+                                       NewOp0, DAG.getConstant(c2, CountVT));
+          AddToWorkList(NewSHL.getNode());
+          return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+        }
       }
     }
   }
@@ -3743,40 +4088,39 @@
   //                               (and (srl x, (sub c1, c2), MASK)
   // Only fold this if the inner shift has no other uses -- if it does, folding
   // this will increase the total number of instructions.
-  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
-    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    if (c1 < VT.getSizeInBits()) {
-      uint64_t c2 = N1C->getZExtValue();
-      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
-                                         VT.getSizeInBits() - c1);
-      SDValue Shift;
-      if (c2 > c1) {
-        Mask = Mask.shl(c2-c1);
-        Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                            DAG.getConstant(c2-c1, N1.getValueType()));
-      } else {
-        Mask = Mask.lshr(c1-c2);
-        Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                            DAG.getConstant(c1-c2, N1.getValueType()));
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t c1 = N0C1->getZExtValue();
+      if (c1 < OpSizeInBits) {
+        uint64_t c2 = N1C->getZExtValue();
+        APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+        SDValue Shift;
+        if (c2 > c1) {
+          Mask = Mask.shl(c2 - c1);
+          Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+                              DAG.getConstant(c2 - c1, N1.getValueType()));
+        } else {
+          Mask = Mask.lshr(c1 - c2);
+          Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+                              DAG.getConstant(c1 - c2, N1.getValueType()));
+        }
+        return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
+                           DAG.getConstant(Mask, VT));
       }
-      return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
-                         DAG.getConstant(Mask, VT));
     }
   }
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    unsigned BitSize = VT.getScalarSizeInBits();
     SDValue HiBitsMask =
-      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
-                                            VT.getSizeInBits() -
-                                              N1C->getZExtValue()),
-                      VT);
+      DAG.getConstant(APInt::getHighBitsSet(BitSize,
+                                            BitSize - N1C->getZExtValue()), VT);
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
                        HiBitsMask);
   }
 
   if (N1C) {
-    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    SDValue NewSHL = visitShiftByConstant(N, N1C);
     if (NewSHL.getNode())
       return NewSHL;
   }
@@ -3796,6 +4140,8 @@
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    N1C = isConstOrConstSplat(N1);
   }
 
   // fold (sra c1, c2) -> (sra c1, c2)
@@ -3829,11 +4175,12 @@
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRA) {
-    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+    if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
-      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+      if (Sum >= OpSizeInBits)
+        Sum = OpSizeInBits - 1;
       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, N1C->getValueType(0)));
+                         DAG.getConstant(Sum, N1.getValueType()));
     }
   }
 
@@ -3842,14 +4189,17 @@
   // result_size - n != m.
   // If truncate is free for the target sext(shl) is likely to result in better
   // code.
-  if (N0.getOpcode() == ISD::SHL) {
+  if (N0.getOpcode() == ISD::SHL && N1C) {
     // Get the two constanst of the shifts, CN0 = m, CN = n.
-    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (N01C && N1C) {
+    const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+    if (N01C) {
+      LLVMContext &Ctx = *DAG.getContext();
       // Determine what the truncate's result bitsize and type would be.
-      EVT TruncVT =
-        EVT::getIntegerVT(*DAG.getContext(),
-                          OpSizeInBits - N1C->getZExtValue());
+      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+      if (VT.isVector())
+        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
       // Determine the residual right-shift amount.
       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
 
@@ -3876,44 +4226,33 @@
 
   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
-      N1.getOperand(0).getOpcode() == ISD::AND &&
-      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
-    SDValue N101 = N1.getOperand(0).getOperand(1);
-    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      EVT TruncVT = N1.getValueType();
-      SDValue N100 = N1.getOperand(0).getOperand(0);
-      APInt TruncC = N101C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
-      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                         DAG.getNode(ISD::AND, SDLoc(N),
-                                     TruncVT,
-                                     DAG.getNode(ISD::TRUNCATE,
-                                                 SDLoc(N),
-                                                 TruncVT, N100),
-                                     DAG.getConstant(TruncC, TruncVT)));
-    }
+      N1.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
   }
 
-  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
   //      if c1 is equal to the number of bits the trunc removes
   if (N0.getOpcode() == ISD::TRUNCATE &&
       (N0.getOperand(0).getOpcode() == ISD::SRL ||
        N0.getOperand(0).getOpcode() == ISD::SRA) &&
       N0.getOperand(0).hasOneUse() &&
       N0.getOperand(0).getOperand(1).hasOneUse() &&
-      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
-    EVT LargeVT = N0.getOperand(0).getValueType();
-    ConstantSDNode *LargeShiftAmt =
-      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+      N1C) {
+    SDValue N0Op0 = N0.getOperand(0);
+    if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+      unsigned LargeShiftVal = LargeShift->getZExtValue();
+      EVT LargeVT = N0Op0.getValueType();
 
-    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
-        LargeShiftAmt->getZExtValue()) {
-      SDValue Amt =
-        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
-              getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
-      SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
-                                N0.getOperand(0).getOperand(0), Amt);
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+      if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+        SDValue Amt =
+          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+                          getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
+        SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+                                  N0Op0.getOperand(0), Amt);
+        return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+      }
     }
   }
 
@@ -3927,7 +4266,7 @@
     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
 
   if (N1C) {
-    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    SDValue NewSRA = visitShiftByConstant(N, N1C);
     if (NewSRA.getNode())
       return NewSRA;
   }
@@ -3947,6 +4286,8 @@
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    N1C = isConstOrConstSplat(N1);
   }
 
   // fold (srl c1, c2) -> c1 >>u c2
@@ -3967,14 +4308,15 @@
     return DAG.getConstant(0, VT);
 
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SRL &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
-    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 >= OpSizeInBits)
-      return DAG.getConstant(0, VT);
-    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  if (N1C && N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t c1 = N01C->getZExtValue();
+      uint64_t c2 = N1C->getZExtValue();
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
   }
 
   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
@@ -3999,18 +4341,21 @@
   }
 
   // fold (srl (shl x, c), c) -> (and x, cst2)
-  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
-      N0.getValueSizeInBits() <= 64) {
-    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
+    unsigned BitSize = N0.getScalarValueSizeInBits();
+    if (BitSize <= 64) {
+      uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
+      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getConstant(~0ULL >> ShAmt, VT));
+    }
   }
 
   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     // Shifting in all undef bits?
     EVT SmallVT = N0.getOperand(0).getValueType();
-    if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+    unsigned BitSize = SmallVT.getScalarSizeInBits();
+    if (N1C->getZExtValue() >= BitSize)
       return DAG.getUNDEF(VT);
 
     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -4019,7 +4364,7 @@
                                        N0.getOperand(0),
                           DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
       AddToWorkList(SmallShift.getNode());
-      APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+      APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
       return DAG.getNode(ISD::AND, SDLoc(N), VT,
                          DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
                          DAG.getConstant(Mask, VT));
@@ -4028,14 +4373,14 @@
 
   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
   // bit, which is unmodified by sra.
-  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+  if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
     if (N0.getOpcode() == ISD::SRA)
       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
   }
 
   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
-      N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+      N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
     APInt KnownZero, KnownOne;
     DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
 
@@ -4070,22 +4415,10 @@
 
   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
-      N1.getOperand(0).getOpcode() == ISD::AND &&
-      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
-    SDValue N101 = N1.getOperand(0).getOperand(1);
-    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      EVT TruncVT = N1.getValueType();
-      SDValue N100 = N1.getOperand(0).getOperand(0);
-      APInt TruncC = N101C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
-      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
-                         DAG.getNode(ISD::AND, SDLoc(N),
-                                     TruncVT,
-                                     DAG.getNode(ISD::TRUNCATE,
-                                                 SDLoc(N),
-                                                 TruncVT, N100),
-                                     DAG.getConstant(TruncC, TruncVT)));
-    }
+      N1.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
   }
 
   // fold operands of srl based on knowledge that the low bits are not
@@ -4094,7 +4427,7 @@
     return SDValue(N, 0);
 
   if (N1C) {
-    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    SDValue NewSRL = visitShiftByConstant(N, N1C);
     if (NewSRL.getNode())
       return NewSRL;
   }
@@ -4275,12 +4608,12 @@
 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
   SDLoc DL(N);
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
   // Split the inputs.
   SDValue Lo, Hi, LL, LH, RL, RH;
-  llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
-  llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
 
   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -4338,9 +4671,9 @@
       return SDValue();
 
     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
-    llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
-    llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
-    llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+    std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+    std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+    std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
 
     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
@@ -4353,6 +4686,13 @@
     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   }
 
+  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
+  if (ISD::isBuildVectorAllOnes(N0.getNode()))
+    return N1;
+  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
+  if (ISD::isBuildVectorAllZeros(N0.getNode()))
+    return N2;
+
   return SDValue();
 }
 
@@ -4402,6 +4742,65 @@
                        SDLoc(N));
 }
 
+// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
+// dag node into a ConstantSDNode or a build_vector of constants.
+// This function is called by the DAGCombiner when visiting sext/zext/aext
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 
+// Vector extends are not folded if operations are legal; this is to
+// avoid introducing illegal build_vector dag nodes.
+static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+                                         SelectionDAG &DAG, bool LegalTypes,
+                                         bool LegalOperations) {
+  unsigned Opcode = N->getOpcode();
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+         Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+
+  // fold (sext c1) -> c1
+  // fold (zext c1) -> c1
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+
+  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+  EVT SVT = VT.getScalarType();
+  if (!(VT.isVector() &&
+      (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+      ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+    return 0;
+  
+  // We can fold this node into a build_vector.
+  unsigned VTBits = SVT.getSizeInBits();
+  unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+  unsigned ShAmt = VTBits - EVTBits;
+  SmallVector<SDValue, 8> Elts;
+  unsigned NumElts = N0->getNumOperands();
+  SDLoc DL(N);
+
+  for (unsigned i=0; i != NumElts; ++i) {
+    SDValue Op = N0->getOperand(i);
+    if (Op->getOpcode() == ISD::UNDEF) {
+      Elts.push_back(DAG.getUNDEF(SVT));
+      continue;
+    }
+
+    ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
+    const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+    if (Opcode == ISD::SIGN_EXTEND)
+      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
+                                     SVT));
+    else
+      Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
+                                     SVT));
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode();
+}
+
 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
 // transformation. Returns true if extension are possible and the above
@@ -4492,9 +4891,9 @@
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  // fold (sext c1) -> c1
-  if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
 
   // fold (sext (sext x)) -> (sext x)
   // fold (sext (aext x)) -> (sext x)
@@ -4671,7 +5070,7 @@
       }
     }
 
-    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
     SDValue NegOne =
       DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
@@ -4680,15 +5079,21 @@
                        NegOne, DAG.getConstant(0, VT),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
     if (SCC.getNode()) return SCC;
-    if (!VT.isVector() &&
-        (!LegalOperations ||
-         TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
-      return DAG.getSelect(SDLoc(N), VT,
-                           DAG.getSetCC(SDLoc(N),
-                           getSetCCResultType(VT),
-                           N0.getOperand(0), N0.getOperand(1),
-                           cast<CondCodeSDNode>(N0.getOperand(2))->get()),
-                           NegOne, DAG.getConstant(0, VT));
+
+    if (!VT.isVector()) {
+      EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
+      if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+        SDLoc DL(N);
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        SDValue SetCC = DAG.getSetCC(DL,
+                                     SetCCVT,
+                                     N0.getOperand(0), N0.getOperand(1), CC);
+        EVT SelectVT = getSetCCResultType(VT);
+        return DAG.getSelect(DL, VT,
+                             DAG.getSExtOrTrunc(SetCC, DL, SelectVT),
+                             NegOne, DAG.getConstant(0, VT));
+
+      }
     }
   }
 
@@ -4742,9 +5147,10 @@
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  // fold (zext c1) -> c1
-  if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
   // fold (zext (zext x)) -> (zext x)
   // fold (zext (aext x)) -> (zext x)
   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
@@ -4925,10 +5331,14 @@
   }
 
   if (N0.getOpcode() == ISD::SETCC) {
-    if (!LegalOperations && VT.isVector()) {
+    if (!LegalOperations && VT.isVector() &&
+        N0.getValueType().getVectorElementType() == MVT::i1) {
+      EVT N0VT = N0.getOperand(0).getValueType();
+      if (getSetCCResultType(N0VT) == N0.getValueType())
+        return SDValue();
+
       // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
       // Only do this before legalize for now.
-      EVT N0VT = N0.getOperand(0).getValueType();
       EVT EltVT = VT.getVectorElementType();
       SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
                                     DAG.getConstant(1, EltVT));
@@ -5007,9 +5417,10 @@
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  // fold (aext c1) -> c1
-  if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
   // fold (aext (aext x)) -> (aext x)
   // fold (aext (zext x)) -> (zext x)
   // fold (aext (sext x)) -> (sext x)
@@ -5466,6 +5877,29 @@
                          BSwap, N1);
   }
 
+  // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
+  // into a build_vector.
+  if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+    SmallVector<SDValue, 8> Elts;
+    unsigned NumElts = N0->getNumOperands();
+    unsigned ShAmt = VTBits - EVTBits;
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      SDValue Op = N0->getOperand(i);
+      if (Op->getOpcode() == ISD::UNDEF) {
+        Elts.push_back(Op);
+        continue;
+      }
+
+      ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
+      const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
+                                     Op.getValueType()));
+    }
+
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts);
+  }
+
   return SDValue();
 }
 
@@ -5510,7 +5944,7 @@
   // creates this pattern) and before operation legalization after which
   // we need to be more careful about the vector instructions that we generate.
   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-      LegalTypes && !LegalOperations && N0->hasOneUse()) {
+      LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
 
     EVT VecTy = N0.getOperand(0).getValueType();
     EVT ExTy = N0.getValueType();
@@ -5587,6 +6021,20 @@
     SDValue Reduced = ReduceLoadWidth(N);
     if (Reduced.getNode())
       return Reduced;
+    // Handle the case where the load remains an extending load even
+    // after truncation.
+    if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      if (!LN0->isVolatile() &&
+          LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
+        SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
+                                         VT, LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getMemoryVT(),
+                                         LN0->getMemOperand());
+        DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
+        return NewLoad;
+      }
+    }
   }
   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
   // where ... are all 'undef'.
@@ -5654,8 +6102,7 @@
   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
-      LD1->getPointerInfo().getAddrSpace() !=
-         LD2->getPointerInfo().getAddrSpace())
+      LD1->getAddressSpace() != LD2->getAddressSpace())
     return SDValue();
   EVT LD1VT = LD1->getValueType(0);
 
@@ -5691,14 +6138,7 @@
   if (!LegalTypes &&
       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
       VT.isVector()) {
-    bool isSimple = true;
-    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
-      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
-          N0.getOperand(i).getOpcode() != ISD::Constant &&
-          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
-        isSimple = false;
-        break;
-      }
+    bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
 
     EVT DestEltVT = N->getValueType(0).getVectorElementType();
     assert(!DestEltVT.isVector() &&
@@ -6551,7 +6991,7 @@
       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   }
 
-  // The next optimizations are desireable only if SELECT_CC can be lowered.
+  // The next optimizations are desirable only if SELECT_CC can be lowered.
   // Check against MVT::Other for SELECT_CC, which is a workaround for targets
   // having to say they don't support SELECT_CC on every type the DAG knows
   // about, since there is no way to mark an opcode illegal at all value types
@@ -6608,7 +7048,7 @@
       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   }
 
-  // The next optimizations are desireable only if SELECT_CC can be lowered.
+  // The next optimizations are desirable only if SELECT_CC can be lowered.
   // Check against MVT::Other for SELECT_CC, which is a workaround for targets
   // having to say they don't support SELECT_CC on every type the DAG knows
   // about, since there is no way to mark an opcode illegal at all value types
@@ -7537,7 +7977,12 @@
 
   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
     TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
-  if (UseAA) {
+#ifndef NDEBUG
+  if (CombinerAAOnlyFunc.getNumOccurrences() &&
+      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+    UseAA = false;
+#endif
+  if (UseAA && LD->isUnindexed()) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
 
@@ -7888,14 +8333,6 @@
 };
 }
 
-/// \brief Sorts LoadedSlice according to their offset.
-struct LoadedSliceSorter {
-  bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
-    assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
-    return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
-  }
-};
-
 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
 /// \p UsedBits looks like 0..0 1..1 0..0.
 static bool areUsedBitsDense(const APInt &UsedBits) {
@@ -7939,7 +8376,11 @@
 
   // Sort the slices so that elements that are likely to be next to each
   // other in memory are next to each other in the list.
-  std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+  std::sort(LoadedSlices.begin(), LoadedSlices.end(),
+            [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+    assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+    return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+  });
   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
   // First (resp. Second) is the first (resp. Second) potentially candidate
   // to be placed in a paired load.
@@ -8075,8 +8516,8 @@
 
     // The width of the type must be a power of 2 and greater than 8-bits.
     // Otherwise the load cannot be represented in LLVM IR.
-    // Moreover, if we shifted with a non 8-bits multiple, the slice
-    // will be accross several bytes. We do not support that.
+    // Moreover, if we shifted with a non-8-bits multiple, the slice
+    // will be across several bytes. We do not support that.
     unsigned Width = User->getValueSizeInBits(0);
     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
       return 0;
@@ -8543,14 +8984,6 @@
   unsigned SequenceNum;
 };
 
-/// Sorts store nodes in a link according to their offset from a shared
-// base ptr.
-struct ConsecutiveMemoryChainSorter {
-  bool operator()(MemOpLink LHS, MemOpLink RHS) {
-    return LHS.OffsetFromBase < RHS.OffsetFromBase;
-  }
-};
-
 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   EVT MemVT = St->getMemoryVT();
   int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
@@ -8669,7 +9102,11 @@
 
   // Sort the memory operands according to their distance from the base pointer.
   std::sort(StoreNodes.begin(), StoreNodes.end(),
-            ConsecutiveMemoryChainSorter());
+            [](MemOpLink LHS, MemOpLink RHS) {
+    return LHS.OffsetFromBase < RHS.OffsetFromBase ||
+           (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+            LHS.SequenceNum > RHS.SequenceNum);
+  });
 
   // Scan the memory operations on the chain and find the first non-consecutive
   // store memory address.
@@ -8717,7 +9154,7 @@
       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
         NonZero |= !C->getConstantFPValue()->isNullValue();
       } else {
-        // Non constant.
+        // Non-constant.
         break;
       }
 
@@ -9125,7 +9562,12 @@
 
   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
     TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
-  if (UseAA) {
+#ifndef NDEBUG
+  if (CombinerAAOnlyFunc.getNumOccurrences() &&
+      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+    UseAA = false;
+#endif
+  if (UseAA && ST->isUnindexed()) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
 
@@ -9306,9 +9748,10 @@
   // We only perform this optimization before the op legalization phase because
   // we may introduce new vector instructions which are not backed by TD
   // patterns. For example on AVX, extracting elements from a wide vector
-  // without using extract_subvector.
+  // without using extract_subvector. However, if we can find an underlying
+  // scalar value, then we can always use that.
   if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
-      && ConstEltNo && !LegalOperations) {
+      && ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     int NumElem = VT.getVectorNumElements();
     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
@@ -9320,16 +9763,32 @@
       return DAG.getUNDEF(NVT);
 
     // Select the right vector half to extract from.
+    SDValue SVInVec;
     if (OrigElt < NumElem) {
-      InVec = InVec->getOperand(0);
+      SVInVec = InVec->getOperand(0);
     } else {
-      InVec = InVec->getOperand(1);
+      SVInVec = InVec->getOperand(1);
       OrigElt -= NumElem;
     }
 
-    EVT IndexTy = TLI.getVectorIdxTy();
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
-                       InVec, DAG.getConstant(OrigElt, IndexTy));
+    if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue InOp = SVInVec.getOperand(OrigElt);
+      if (InOp.getValueType() != NVT) {
+        assert(InOp.getValueType().isInteger() && NVT.isInteger());
+        InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+      }
+
+      return InOp;
+    }
+
+    // FIXME: We should handle recursing on other vector shuffles and
+    // scalar_to_vector here as well.
+
+    if (!LegalOperations) {
+      EVT IndexTy = TLI.getVectorIdxTy();
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
+                         SVInVec, DAG.getConstant(OrigElt, IndexTy));
+    }
   }
 
   // Perform only after legalization to ensure build_vector / vector_shuffle
@@ -9836,6 +10295,26 @@
     }
   }
 
+  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+  if (N->getNumOperands() == 2 &&
+      N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+      N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
+    EVT VT = N->getValueType(0);
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SmallVector<SDValue, 8> Opnds;
+    unsigned BuildVecNumElts =  N0.getNumOperands();
+
+    for (unsigned i = 0; i != BuildVecNumElts; ++i)
+      Opnds.push_back(N0.getOperand(i));
+    for (unsigned i = 0; i != BuildVecNumElts; ++i)
+      Opnds.push_back(N1.getOperand(i));
+
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
+                       Opnds.size());
+  }
+
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   // nodes often generate nop CONCAT_VECTOR nodes.
   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -10142,6 +10621,33 @@
   return SDValue();
 }
 
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N2 = N->getOperand(2);
+
+  // If the input vector is a concatenation, and the insert replaces
+  // one of the halves, we can optimize into a single concat_vectors.
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+      N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
+    APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+    EVT VT = N->getValueType(0);
+
+    // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+    // (concat_vectors Z, Y)
+    if (InsIdx == 0)
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                         N->getOperand(1), N0.getOperand(1));
+
+    // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+    // (concat_vectors X, Z)
+    if (InsIdx == VT.getVectorNumElements()/2)
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                         N0.getOperand(0), N->getOperand(1));
+  }
+
+  return SDValue();
+}
+
 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
 /// an AND to a vector_shuffle with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -10204,18 +10710,15 @@
   // this operation.
   if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
       RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    // Check if both vectors are constants. If not bail out.
+    if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
+          cast<BuildVectorSDNode>(RHS)->isConstant()))
+      return SDValue();
+
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
       SDValue LHSOp = LHS.getOperand(i);
       SDValue RHSOp = RHS.getOperand(i);
-      // If these two elements can't be folded, bail out.
-      if ((LHSOp.getOpcode() != ISD::UNDEF &&
-           LHSOp.getOpcode() != ISD::Constant &&
-           LHSOp.getOpcode() != ISD::ConstantFP) ||
-          (RHSOp.getOpcode() != ISD::UNDEF &&
-           RHSOp.getOpcode() != ISD::Constant &&
-           RHSOp.getOpcode() != ISD::ConstantFP))
-        break;
 
       // Can't fold divide by zero.
       if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
@@ -10862,14 +11365,21 @@
 
   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
     TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+#ifndef NDEBUG
+  if (CombinerAAOnlyFunc.getNumOccurrences() &&
+      CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+    UseAA = false;
+#endif
   if (UseAA && SrcValue1 && SrcValue2) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
     int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
     int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
     AliasAnalysis::AliasResult AAResult =
-      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
-               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1,
+                                       UseTBAA ? TBAAInfo1 : 0),
+               AliasAnalysis::Location(SrcValue2, Overlap2,
+                                       UseTBAA ? TBAAInfo2 : 0));
     if (AAResult == AliasAnalysis::NoAlias)
       return false;
   }
@@ -10956,7 +11466,7 @@
     if (Depth > 6 || Aliases.size() == 2) {
       Aliases.clear();
       Aliases.push_back(OriginalChain);
-      break;
+      return;
     }
 
     // Don't bother if we've been before.
@@ -11018,6 +11528,63 @@
       break;
     }
   }
+
+  // We need to be careful here to also search for aliases through the
+  // value operand of a store, etc. Consider the following situation:
+  //   Token1 = ...
+  //   L1 = load Token1, %52
+  //   S1 = store Token1, L1, %51
+  //   L2 = load Token1, %52+8
+  //   S2 = store Token1, L2, %51+8
+  //   Token2 = Token(S1, S2)
+  //   L3 = load Token2, %53
+  //   S3 = store Token2, L3, %52
+  //   L4 = load Token2, %53+8
+  //   S4 = store Token2, L4, %52+8
+  // If we search for aliases of S3 (which loads address %52), and we look
+  // only through the chain, then we'll miss the trivial dependence on L1
+  // (which also loads from %52). We then might change all loads and
+  // stores to use Token1 as their chain operand, which could result in
+  // copying %53 into %52 before copying %52 into %51 (which should
+  // happen first).
+  //
+  // The problem is, however, that searching for such data dependencies
+  // can become expensive, and the cost is not directly related to the
+  // chain depth. Instead, we'll rule out such configurations here by
+  // insisting that we've visited all chain users (except for users
+  // of the original chain, which is not necessary). When doing this,
+  // we need to look through nodes we don't care about (otherwise, things
+  // like register copies will interfere with trivial cases).
+
+  SmallVector<const SDNode *, 16> Worklist;
+  for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
+       IE = Visited.end(); I != IE; ++I)
+    if (*I != OriginalChain.getNode())
+      Worklist.push_back(*I);
+
+  while (!Worklist.empty()) {
+    const SDNode *M = Worklist.pop_back_val();
+
+    // We have already visited M, and want to make sure we've visited any uses
+    // of M that we care about. For uses that we've not visisted, and don't
+    // care about, queue them to the worklist.
+
+    for (SDNode::use_iterator UI = M->use_begin(),
+         UIE = M->use_end(); UI != UIE; ++UI)
+      if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
+        if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
+          // We've not visited this use, and we care about it (it could have an
+          // ordering dependency with the original node).
+          Aliases.clear();
+          Aliases.push_back(OriginalChain);
+          return;
+        }
+
+        // We've not visited this use, but we don't care about it. Mark it as
+        // visited and enqueue it to the worklist.
+        Worklist.push_back(*UI);
+      }
+  }
 }
 
 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a6f7461..baba51e 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -49,8 +49,8 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
@@ -118,7 +118,7 @@
 
   // No-op casts are trivially coalesced by fast-isel.
   if (const CastInst *Cast = dyn_cast<CastInst>(I))
-    if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+    if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
         !hasTrivialKill(Cast->getOperand(0)))
       return false;
 
@@ -133,7 +133,7 @@
          !(I->getOpcode() == Instruction::BitCast ||
            I->getOpcode() == Instruction::PtrToInt ||
            I->getOpcode() == Instruction::IntToPtr) &&
-         cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+         cast<Instruction>(*I->user_begin())->getParent() == I->getParent();
 }
 
 unsigned FastISel::getRegForValue(const Value *V) {
@@ -192,7 +192,7 @@
     // Translate this as an integer zero so that it can be
     // local-CSE'd with actual integer zeros.
     Reg =
-      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+      getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext())));
   } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
     if (CF->isNullValue()) {
       Reg = TargetMaterializeFloatZero(CF);
@@ -229,7 +229,7 @@
     Reg = lookUpRegForValue(Op);
   } else if (isa<UndefValue>(V)) {
     Reg = createResultReg(TLI.getRegClassFor(VT));
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
   }
 
@@ -335,20 +335,20 @@
 
 FastISel::SavePoint FastISel::enterLocalValueArea() {
   MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
-  DebugLoc OldDL = DL;
+  DebugLoc OldDL = DbgLoc;
   recomputeInsertPt();
-  DL = DebugLoc();
+  DbgLoc = DebugLoc();
   SavePoint SP = { OldInsertPt, OldDL };
   return SP;
 }
 
 void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
   if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
-    LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+    LastLocalValue = std::prev(FuncInfo.InsertPt);
 
   // Restore the previous insert position.
   FuncInfo.InsertPt = OldInsertPt.InsertPt;
-  DL = OldInsertPt.DL;
+  DbgLoc = OldInsertPt.DL;
 }
 
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -484,7 +484,7 @@
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
         // N = N + Offset
-        TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
         if (TotalOffs >= MaxOffs) {
           N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
           if (N == 0)
@@ -503,7 +503,7 @@
         if (CI->isZero()) continue;
         // N = N + Offset
         TotalOffs +=
-          TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+          DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         if (TotalOffs >= MaxOffs) {
           N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
           if (N == 0)
@@ -524,7 +524,7 @@
       }
 
       // N = N + Idx * ElementSize;
-      uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
       unsigned IdxN = Pair.first;
       bool IdxNIsKill = Pair.second;
@@ -572,7 +572,7 @@
     if (IA->isAlignStack())
       ExtraInfo |= InlineAsm::Extra_IsAlignStack;
 
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::INLINEASM))
       .addExternalSymbol(IA->getAsmString().c_str())
       .addImm(ExtraInfo);
@@ -643,11 +643,11 @@
     if (Op) {
       if (Op->isReg()) {
         Op->setIsDebug(true);
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0,
                 DI->getVariable());
       } else
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(TargetOpcode::DBG_VALUE))
             .addOperand(*Op)
             .addImm(0)
@@ -667,26 +667,26 @@
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
         .addReg(0U).addImm(DI->getOffset())
         .addMetadata(DI->getVariable());
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
       if (CI->getBitWidth() > 64)
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
           .addCImm(CI).addImm(DI->getOffset())
           .addMetadata(DI->getVariable());
       else
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
           .addImm(CI->getZExtValue()).addImm(DI->getOffset())
           .addMetadata(DI->getVariable());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
         .addFPImm(CF).addImm(DI->getOffset())
         .addMetadata(DI->getVariable());
     } else if (unsigned Reg = lookUpRegForValue(V)) {
       // FIXME: This does not handle register-indirect values at offset 0.
       bool IsIndirect = DI->getOffset() != 0;
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect,
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect,
               Reg, DI->getOffset(), DI->getVariable());
     } else {
       // We can't yet handle anything else here because it would require
@@ -798,8 +798,8 @@
     // Don't attempt a cross-class copy. It will likely fail.
     if (SrcClass == DstClass) {
       ResultReg = createResultReg(DstClass);
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-              ResultReg).addReg(Op0);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0);
     }
   }
 
@@ -822,7 +822,7 @@
     if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
       return false;
 
-  DL = I->getDebugLoc();
+  DbgLoc = I->getDebugLoc();
 
   MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
 
@@ -840,7 +840,7 @@
   // First, try doing target-independent selection.
   if (SelectOperator(I, I->getOpcode())) {
     ++NumFastIselSuccessIndependent;
-    DL = DebugLoc();
+    DbgLoc = DebugLoc();
     return true;
   }
   // Remove dead code.  However, ignore call instructions since we've flushed
@@ -855,7 +855,7 @@
   SavedInsertPt = FuncInfo.InsertPt;
   if (TargetSelectInstruction(I)) {
     ++NumFastIselSuccessTarget;
-    DL = DebugLoc();
+    DbgLoc = DebugLoc();
     return true;
   }
   // Check for dead code and remove as necessary.
@@ -863,7 +863,7 @@
   if (SavedInsertPt != FuncInfo.InsertPt)
     removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
 
-  DL = DebugLoc();
+  DbgLoc = DebugLoc();
   return false;
 }
 
@@ -871,7 +871,7 @@
 /// unless it is the immediate (fall-through) successor, and update
 /// the CFG.
 void
-FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
 
   if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
       FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
@@ -881,7 +881,7 @@
   } else {
     // The unconditional branch case.
     TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
-                     SmallVector<MachineOperand, 0>(), DL);
+                     SmallVector<MachineOperand, 0>(), DbgLoc);
   }
   FuncInfo.MBB->addSuccessor(MSucc);
 }
@@ -1096,7 +1096,7 @@
     MFI(*FuncInfo.MF->getFrameInfo()),
     MCP(*FuncInfo.MF->getConstantPool()),
     TM(FuncInfo.MF->getTarget()),
-    TD(*TM.getDataLayout()),
+    DL(*TM.getDataLayout()),
     TII(*TM.getInstrInfo()),
     TLI(*TM.getTargetLowering()),
     TRI(*TM.getRegisterInfo()),
@@ -1209,7 +1209,7 @@
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
   return ResultReg;
 }
 
@@ -1220,13 +1220,13 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
 
   return ResultReg;
@@ -1240,15 +1240,15 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1262,17 +1262,17 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill)
       .addReg(Op2, Op2IsKill * RegState::Kill);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill)
       .addReg(Op2, Op2IsKill * RegState::Kill);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1285,15 +1285,15 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addImm(Imm);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addImm(Imm);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1306,17 +1306,17 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addImm(Imm1)
       .addImm(Imm2);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addImm(Imm1)
       .addImm(Imm2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1329,15 +1329,15 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addFPImm(FPImm);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addFPImm(FPImm);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1351,17 +1351,17 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill)
       .addImm(Imm);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill)
       .addImm(Imm);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1375,17 +1375,17 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill)
       .addImm(Imm1).addImm(Imm2);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
       .addReg(Op0, Op0IsKill * RegState::Kill)
       .addReg(Op1, Op1IsKill * RegState::Kill)
       .addImm(Imm1).addImm(Imm2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1397,11 +1397,11 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1413,12 +1413,12 @@
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
       .addImm(Imm1).addImm(Imm2);
   else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(II.ImplicitDefs[0]);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
   }
   return ResultReg;
 }
@@ -1432,7 +1432,7 @@
   const TargetRegisterClass *RC = MRI.getRegClass(Op0);
   MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
-          DL, TII.get(TargetOpcode::COPY), ResultReg)
+          DbgLoc, TII.get(TargetOpcode::COPY), ResultReg)
     .addReg(Op0, getKillRegState(Op0IsKill), Idx);
   return ResultReg;
 }
@@ -1498,9 +1498,9 @@
 
       // Set the DebugLoc for the copy. Prefer the location of the operand
       // if there is one; use the location of the PHI otherwise.
-      DL = PN->getDebugLoc();
+      DbgLoc = PN->getDebugLoc();
       if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
-        DL = Inst->getDebugLoc();
+        DbgLoc = Inst->getDebugLoc();
 
       unsigned Reg = getRegForValue(PHIOp);
       if (Reg == 0) {
@@ -1508,7 +1508,7 @@
         return false;
       }
       FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
-      DL = DebugLoc();
+      DbgLoc = DebugLoc();
     }
   }
 
@@ -1523,7 +1523,7 @@
   // this by scanning the single-use users of the load until we get to FoldInst.
   unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
 
-  const Instruction *TheUser = LI->use_back();
+  const Instruction *TheUser = LI->user_back();
   while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
          // Stay in the right block.
          TheUser->getParent() == FoldInst->getParent() &&
@@ -1532,7 +1532,7 @@
     if (!TheUser->hasOneUse())
       return false;
 
-    TheUser = TheUser->use_back();
+    TheUser = TheUser->user_back();
   }
 
   // If we didn't find the fold instruction, then we failed to collapse the
@@ -1559,7 +1559,7 @@
     return false;
 
   MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
-  MachineInstr *User = &*RI;
+  MachineInstr *User = RI->getParent();
 
   // Set the insertion point properly.  Folding the load can cause generation of
   // other random instructions (like sign extends) for addressing modes; make
@@ -1576,8 +1576,8 @@
   if (!isa<AddOperator>(Add))
     return false;
   // Type size needs to match.
-  if (TD.getTypeSizeInBits(GEP->getType()) !=
-      TD.getTypeSizeInBits(Add->getType()))
+  if (DL.getTypeSizeInBits(GEP->getType()) !=
+      DL.getTypeSizeInBits(Add->getType()))
     return false;
   // Must be in the same basic block.
   if (isa<Instruction>(Add) &&
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 4309dc1..5f0006e 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -21,8 +21,8 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
@@ -32,6 +32,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -46,16 +47,15 @@
   if (I->use_empty()) return false;
   if (isa<PHINode>(I)) return true;
   const BasicBlock *BB = I->getParent();
-  for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
-        UI != E; ++UI) {
-    const User *U = *UI;
+  for (const User *U : I->users())
     if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
       return true;
-  }
+
   return false;
 }
 
-void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
+                               SelectionDAG *DAG) {
   const TargetLowering *TLI = TM.getTargetLowering();
 
   Fn = &fn;
@@ -74,7 +74,12 @@
   // them.
   Function::const_iterator BB = Fn->begin(), EB = Fn->end();
   for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+      // Don't fold inalloca allocas or other dynamic allocas into the initial
+      // stack frame allocation, even if they are in the entry block.
+      if (!AI->isStaticAlloca())
+        continue;
+
       if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
         Type *Ty = AI->getAllocatedType();
         uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
@@ -85,21 +90,51 @@
         TySize *= CUI->getZExtValue();   // Get total allocated size.
         if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
 
-        // The object may need to be placed onto the stack near the stack
-        // protector if one exists. Determine here if this object is a suitable
-        // candidate. I.e., it would trigger the creation of a stack protector.
-        bool MayNeedSP =
-          (AI->isArrayAllocation() ||
-           (TySize >= 8 && isa<ArrayType>(Ty) &&
-            cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
         StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
-                                                MayNeedSP, AI);
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
       }
+    }
 
   for (; BB != EB; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
+      // Look for dynamic allocas.
+      if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+        if (!AI->isStaticAlloca()) {
+          unsigned Align = std::max(
+              (unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
+                AI->getAllocatedType()),
+              AI->getAlignment());
+          unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+          if (Align <= StackAlign)
+            Align = 0;
+          // Inform the Frame Information that we have variable-sized objects.
+          MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
+        }
+      }
+
+      // Look for inline asm that clobbers the SP register.
+      if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+        ImmutableCallSite CS(I);
+        if (isa<InlineAsm>(CS.getCalledValue())) {
+          unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+          std::vector<TargetLowering::AsmOperandInfo> Ops =
+            TLI->ParseConstraints(CS);
+          for (size_t I = 0, E = Ops.size(); I != E; ++I) {
+            TargetLowering::AsmOperandInfo &Op = Ops[I];
+            if (Op.Type == InlineAsm::isClobber) {
+              // Clobbers don't have SDValue operands, hence SDValue().
+              TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
+              std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+                TLI->getRegForInlineAsmConstraint(Op.ConstraintCode,
+                                                  Op.ConstraintVT);
+              if (PhysReg.first == SP)
+                MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
+            }
+          }
+        }
+      }
+
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
       if (isUsedOutsideOfDefiningBlock(I))
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 3a8fb85..1c596b8 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -220,10 +220,19 @@
     unsigned VRBase = 0;
     const TargetRegisterClass *RC =
       TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF));
-    // If the register class is unknown for the given definition, then try to
-    // infer one from the value type.
-    if (!RC && i < NumResults)
-      RC = TLI->getRegClassFor(Node->getSimpleValueType(i));
+    // Always let the value type influence the used register class. The
+    // constraints on the instruction may be too lax to represent the value
+    // type correctly. For example, a 64-bit float (X86::FR64) can't live in
+    // the 32-bit float super-class (X86::FR32).
+    if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
+      const TargetRegisterClass *VTRC =
+        TLI->getRegClassFor(Node->getSimpleValueType(i));
+      if (RC)
+        VTRC = TRI->getCommonSubClass(RC, VTRC);
+      if (VTRC)
+        RC = VTRC;
+    }
+
     if (II.OpInfo[i].isOptionalDef()) {
       // Optional def must be a physical register.
       unsigned NumResults = CountResults(Node);
@@ -731,10 +740,16 @@
   unsigned NumDefs = II.getNumDefs();
   const uint16_t *ScratchRegs = NULL;
 
-  // Handle PATCHPOINT specially and then use the generic code.
-  if (Opc == TargetOpcode::PATCHPOINT) {
-    unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
-    NumDefs = NumResults;
+  // Handle STACKMAP and PATCHPOINT specially and then use the generic code.
+  if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+    // Stackmaps do not have arguments and do not preserve their calling
+    // convention. However, to simplify runtime support, they clobber the same
+    // scratch registers as AnyRegCC.
+    unsigned CC = CallingConv::AnyReg;
+    if (Opc == TargetOpcode::PATCHPOINT) {
+      CC = Node->getConstantOperandVal(PatchPointOpers::CCPos);
+      NumDefs = NumResults;
+    }
     ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 9061ae9..20afb3d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -13,15 +13,16 @@
 
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -152,10 +153,10 @@
 
 public:
   // DAGUpdateListener implementation.
-  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     ForgetNode(N);
   }
-  virtual void NodeUpdated(SDNode *N) {}
+  void NodeUpdated(SDNode *N) override {}
 
   // Node replacement helpers
   void ReplacedNode(SDNode *N) {
@@ -729,10 +730,11 @@
         MVT VT = Value.getSimpleValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
         default: llvm_unreachable("This action is not supported yet!");
-        case TargetLowering::Legal:
+        case TargetLowering::Legal: {
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+          unsigned AS = ST->getAddressSpace();
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
             unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
@@ -740,6 +742,7 @@
                                    DAG, TLI, this);
           }
           break;
+        }
         case TargetLowering::Custom: {
           SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
           if (Res.getNode())
@@ -807,7 +810,7 @@
                             DAG.getConstant(IncrementSize, Ptr.getValueType()));
           Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
                            DAG.getConstant(RoundWidth,
-                                    TLI.getShiftAmountTy(Value.getValueType())));
+                                   TLI.getShiftAmountTy(Value.getValueType())));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -818,7 +821,7 @@
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
                            DAG.getConstant(ExtraWidth,
-                                    TLI.getShiftAmountTy(Value.getValueType())));
+                                   TLI.getShiftAmountTy(Value.getValueType())));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
                                  RoundVT, isVolatile, isNonTemporal, Alignment,
                                  TBAAInfo);
@@ -826,7 +829,7 @@
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
           Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                             DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                            DAG.getConstant(IncrementSize, Ptr.getValueType()));
           Lo = DAG.getTruncStore(Chain, dl, Value, Ptr,
                               ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -840,16 +843,18 @@
         switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
                                         StVT.getSimpleVT())) {
         default: llvm_unreachable("This action is not supported yet!");
-        case TargetLowering::Legal:
+        case TargetLowering::Legal: {
+          unsigned AS = ST->getAddressSpace();
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
             unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
           break;
+        }
         case TargetLowering::Custom: {
           SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
           if (Res.getNode())
@@ -889,10 +894,11 @@
 
     switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
     default: llvm_unreachable("This action is not supported yet!");
-    case TargetLowering::Legal:
+    case TargetLowering::Legal: {
+      unsigned AS = LD->getAddressSpace();
       // If this is an unaligned load and the target doesn't support it,
       // expand it.
-      if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+      if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) {
         Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
         unsigned ABIAlignment =
           TLI.getDataLayout()->getABITypeAlignment(Ty);
@@ -901,6 +907,7 @@
         }
       }
       break;
+    }
     case TargetLowering::Custom: {
       SDValue Res = TLI.LowerOperation(RVal, DAG);
       if (Res.getNode()) {
@@ -1017,7 +1024,7 @@
       // Move the top bits to the right place.
       Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
                        DAG.getConstant(RoundWidth,
-                                       TLI.getShiftAmountTy(Hi.getValueType())));
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
 
       // Join the hi and lo parts.
       Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1047,7 +1054,7 @@
       // Move the top bits to the right place.
       Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
                        DAG.getConstant(ExtraWidth,
-                                       TLI.getShiftAmountTy(Hi.getValueType())));
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
 
       // Join the hi and lo parts.
       Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1059,77 +1066,82 @@
     switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
-             isCustom = true;
-             // FALLTHROUGH
+      isCustom = true;
+      // FALLTHROUGH
     case TargetLowering::Legal: {
-             Value = SDValue(Node, 0);
-             Chain = SDValue(Node, 1);
+      Value = SDValue(Node, 0);
+      Chain = SDValue(Node, 1);
 
-             if (isCustom) {
-               SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
-               if (Res.getNode()) {
-                 Value = Res;
-                 Chain = Res.getValue(1);
-               }
-             } else {
-               // If this is an unaligned load and the target doesn't support it,
-               // expand it.
-               if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
-                 Type *Ty =
-                   LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-                 unsigned ABIAlignment =
-                   TLI.getDataLayout()->getABITypeAlignment(Ty);
-                 if (LD->getAlignment() < ABIAlignment){
-                   ExpandUnalignedLoad(cast<LoadSDNode>(Node),
-                                       DAG, TLI, Value, Chain);
-                 }
-               }
-             }
-             break;
+      if (isCustom) {
+        SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+        if (Res.getNode()) {
+          Value = Res;
+          Chain = Res.getValue(1);
+        }
+      } else {
+        // If this is an unaligned load and the target doesn't support
+        // it, expand it.
+        EVT MemVT = LD->getMemoryVT();
+        unsigned AS = LD->getAddressSpace();
+        if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) {
+          Type *Ty =
+            LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          unsigned ABIAlignment =
+            TLI.getDataLayout()->getABITypeAlignment(Ty);
+          if (LD->getAlignment() < ABIAlignment){
+            ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+                                DAG, TLI, Value, Chain);
+          }
+        }
+      }
+      break;
     }
     case TargetLowering::Expand:
-             if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
-               SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
-                                          LD->getMemOperand());
-               unsigned ExtendOp;
-               switch (ExtType) {
-               case ISD::EXTLOAD:
-                 ExtendOp = (SrcVT.isFloatingPoint() ?
-                             ISD::FP_EXTEND : ISD::ANY_EXTEND);
-                 break;
-               case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
-               case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
-               default: llvm_unreachable("Unexpected extend load type!");
-               }
-               Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
-               Chain = Load.getValue(1);
-               break;
-             }
+      if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) &&
+          TLI.isTypeLegal(SrcVT)) {
+        SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr,
+                                   LD->getMemOperand());
+        unsigned ExtendOp;
+        switch (ExtType) {
+        case ISD::EXTLOAD:
+          ExtendOp = (SrcVT.isFloatingPoint() ?
+                      ISD::FP_EXTEND : ISD::ANY_EXTEND);
+          break;
+        case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+        case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+        default: llvm_unreachable("Unexpected extend load type!");
+        }
+        Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+        Chain = Load.getValue(1);
+        break;
+      }
 
-             assert(!SrcVT.isVector() &&
-                    "Vector Loads are handled in LegalizeVectorOps");
+      assert(!SrcVT.isVector() &&
+             "Vector Loads are handled in LegalizeVectorOps");
 
-             // FIXME: This does not work for vectors on most targets.  Sign- and
-             // zero-extend operations are currently folded into extending loads,
-             // whether they are legal or not, and then we end up here without any
-             // support for legalizing them.
-             assert(ExtType != ISD::EXTLOAD &&
-                    "EXTLOAD should always be supported!");
-             // Turn the unsupported load into an EXTLOAD followed by an explicit
-             // zero/sign extend inreg.
-             SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
-                                             Chain, Ptr, SrcVT,
-                                             LD->getMemOperand());
-             SDValue ValRes;
-             if (ExtType == ISD::SEXTLOAD)
-               ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
-                                    Result.getValueType(),
-                                    Result, DAG.getValueType(SrcVT));
-             else
-               ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
-             Value = ValRes;
-             Chain = Result.getValue(1);
-             break;
+      // FIXME: This does not work for vectors on most targets.  Sign-
+      // and zero-extend operations are currently folded into extending
+      // loads, whether they are legal or not, and then we end up here
+      // without any support for legalizing them.
+      assert(ExtType != ISD::EXTLOAD &&
+             "EXTLOAD should always be supported!");
+      // Turn the unsupported load into an EXTLOAD followed by an
+      // explicit zero/sign extend inreg.
+      SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                                      Node->getValueType(0),
+                                      Chain, Ptr, SrcVT,
+                                      LD->getMemOperand());
+      SDValue ValRes;
+      if (ExtType == ISD::SEXTLOAD)
+        ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                             Result.getValueType(),
+                             Result, DAG.getValueType(SrcVT));
+      else
+        ValRes = DAG.getZeroExtendInReg(Result, dl,
+                                        SrcVT.getScalarType());
+      Value = ValRes;
+      Chain = Result.getValue(1);
+      break;
     }
   }
 
@@ -1383,10 +1395,39 @@
   SDValue Vec = Op.getOperand(0);
   SDValue Idx = Op.getOperand(1);
   SDLoc dl(Op);
-  // Store the value to a temporary stack slot, then LOAD the returned part.
-  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
-                            MachinePointerInfo(), false, false, 0);
+
+  // Before we generate a new store to a temporary stack slot, see if there is
+  // already one that we can use. There often is because when we scalarize
+  // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole
+  // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
+  // the vector. If all are expanded here, we don't want one store per vector
+  // element.
+  SDValue StackPtr, Ch;
+  for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
+       UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
+      if (ST->isIndexed() || ST->isTruncatingStore() ||
+          ST->getValue() != Vec)
+        continue;
+
+      // Make sure that nothing else could have stored into the destination of
+      // this store.
+      if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
+        continue;
+
+      StackPtr = ST->getBasePtr();
+      Ch = SDValue(ST, 0);
+      break;
+    }
+  }
+
+  if (!Ch.getNode()) {
+    // Store the value to a temporary stack slot, then LOAD the returned part.
+    StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+    Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                      MachinePointerInfo(), false, false, 0);
+  }
 
   // Add the offset to the index.
   unsigned EltSize =
@@ -1530,9 +1571,8 @@
       // the pointer so that the loaded integer will contain the sign bit.
       unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
       unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
-      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
-                            LoadPtr,
-                            DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
+      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
+                           DAG.getConstant(ByteOffset, LoadPtr.getValueType()));
       // Load a legal integer containing the sign bit.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
                             false, false, false, 0);
@@ -1555,8 +1595,8 @@
   // Select between the nabs and abs value based on the sign bit of
   // the input.
   return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
-                       DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
-                       AbsVal);
+                      DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+                      AbsVal);
 }
 
 void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1776,6 +1816,98 @@
                      false, false, false, 0);
 }
 
+static bool
+ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
+                     const TargetLowering &TLI, SDValue &Res) {
+  unsigned NumElems = Node->getNumOperands();
+  SDLoc dl(Node);
+  EVT VT = Node->getValueType(0);
+
+  // Try to group the scalars into pairs, shuffle the pairs together, then
+  // shuffle the pairs of pairs together, etc. until the vector has
+  // been built. This will work only if all of the necessary shuffle masks
+  // are legal.
+
+  // We do this in two phases; first to check the legality of the shuffles,
+  // and next, assuming that all shuffles are legal, to create the new nodes.
+  for (int Phase = 0; Phase < 2; ++Phase) {
+    SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals,
+                                                               NewIntermedVals;
+    for (unsigned i = 0; i < NumElems; ++i) {
+      SDValue V = Node->getOperand(i);
+      if (V.getOpcode() == ISD::UNDEF)
+        continue;
+
+      SDValue Vec;
+      if (Phase)
+        Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V);
+      IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i)));
+    }
+
+    while (IntermedVals.size() > 2) {
+      NewIntermedVals.clear();
+      for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) {
+        // This vector and the next vector are shuffled together (simply to
+        // append the one to the other).
+        SmallVector<int, 16> ShuffleVec(NumElems, -1);
+
+        SmallVector<int, 16> FinalIndices;
+        FinalIndices.reserve(IntermedVals[i].second.size() +
+                             IntermedVals[i+1].second.size());
+        
+        int k = 0;
+        for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f;
+             ++j, ++k) {
+          ShuffleVec[k] = j;
+          FinalIndices.push_back(IntermedVals[i].second[j]);
+        }
+        for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f;
+             ++j, ++k) {
+          ShuffleVec[k] = NumElems + j;
+          FinalIndices.push_back(IntermedVals[i+1].second[j]);
+        }
+
+        SDValue Shuffle;
+        if (Phase)
+          Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first,
+                                         IntermedVals[i+1].first,
+                                         ShuffleVec.data());
+        else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+          return false;
+        NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices));
+      }
+
+      // If we had an odd number of defined values, then append the last
+      // element to the array of new vectors.
+      if ((IntermedVals.size() & 1) != 0)
+        NewIntermedVals.push_back(IntermedVals.back());
+
+      IntermedVals.swap(NewIntermedVals);
+    }
+
+    assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 &&
+           "Invalid number of intermediate vectors");
+    SDValue Vec1 = IntermedVals[0].first;
+    SDValue Vec2;
+    if (IntermedVals.size() > 1)
+      Vec2 = IntermedVals[1].first;
+    else if (Phase)
+      Vec2 = DAG.getUNDEF(VT);
+
+    SmallVector<int, 16> ShuffleVec(NumElems, -1);
+    for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i)
+      ShuffleVec[IntermedVals[0].second[i]] = i;
+    for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i)
+      ShuffleVec[IntermedVals[1].second[i]] = NumElems + i;
+
+    if (Phase)
+      Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+    else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT))
+      return false;
+  }
+
+  return true;
+}
 
 /// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
 /// support the operation, but do support the resultant vector type.
@@ -1850,25 +1982,38 @@
                        false, false, false, Alignment);
   }
 
-  if (!MoreThanTwoValues) {
-    SmallVector<int, 8> ShuffleVec(NumElems, -1);
-    for (unsigned i = 0; i < NumElems; ++i) {
-      SDValue V = Node->getOperand(i);
-      if (V.getOpcode() == ISD::UNDEF)
-        continue;
-      ShuffleVec[i] = V == Value1 ? 0 : NumElems;
-    }
-    if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
-      // Get the splatted value into the low element of a vector register.
-      SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
-      SDValue Vec2;
-      if (Value2.getNode())
-        Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
-      else
-        Vec2 = DAG.getUNDEF(VT);
+  SmallSet<SDValue, 16> DefinedValues;
+  for (unsigned i = 0; i < NumElems; ++i) {
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF)
+      continue;
+    DefinedValues.insert(Node->getOperand(i));
+  }
 
-      // Return shuffle(LowValVec, undef, <0,0,0,0>)
-      return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+  if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) {
+    if (!MoreThanTwoValues) {
+      SmallVector<int, 8> ShuffleVec(NumElems, -1);
+      for (unsigned i = 0; i < NumElems; ++i) {
+        SDValue V = Node->getOperand(i);
+        if (V.getOpcode() == ISD::UNDEF)
+          continue;
+        ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+      }
+      if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+        // Get the splatted value into the low element of a vector register.
+        SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+        SDValue Vec2;
+        if (Value2.getNode())
+          Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+        else
+          Vec2 = DAG.getUNDEF(VT);
+
+        // Return shuffle(LowValVec, undef, <0,0,0,0>)
+        return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+      }
+    } else {
+      SDValue Res;
+      if (ExpandBVWithShuffles(Node, DAG, TLI, Res))
+        return Res;
     }
   }
 
@@ -2868,6 +3013,7 @@
                                  Node->getOperand(1), Zero, Zero,
                                  cast<AtomicSDNode>(Node)->getMemOperand(),
                                  cast<AtomicSDNode>(Node)->getOrdering(),
+                                 cast<AtomicSDNode>(Node)->getOrdering(),
                                  cast<AtomicSDNode>(Node)->getSynchScope());
     Results.push_back(Swap.getValue(0));
     Results.push_back(Swap.getValue(1));
@@ -3099,7 +3245,8 @@
       EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
 
       // BUILD_VECTOR operands are allowed to be wider than the element type.
-      // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+      // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
+      // it.
       if (NewEltVT.bitsLT(EltVT)) {
 
         // Convert shuffle node.
@@ -3107,8 +3254,9 @@
         // cast operands to v8i32 and re-build the mask.
 
         // Calculate new VT, the size of the new VT should be equal to original.
-        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT,
-                                      VT.getSizeInBits()/NewEltVT.getSizeInBits());
+        EVT NewVT =
+            EVT::getVectorVT(*DAG.getContext(), NewEltVT,
+                             VT.getSizeInBits() / NewEltVT.getSizeInBits());
         assert(NewVT.bitsEq(VT));
 
         // cast operands to new VT
@@ -3116,7 +3264,8 @@
         Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
 
         // Convert the shuffle mask
-        unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+        unsigned int factor =
+                         NewVT.getVectorNumElements()/VT.getVectorNumElements();
 
         // EltVT gets smaller
         assert(factor > 0);
@@ -3782,8 +3931,8 @@
       } else {
         Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
         CC = DAG.getCondCode(ISD::SETNE);
-        Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
-                           Tmp3, Tmp4, CC);
+        Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
+                           Tmp2, Tmp3, Tmp4, CC);
       }
     }
     Results.push_back(Tmp1);
@@ -3813,8 +3962,8 @@
     } else {
       Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
       Tmp4 = DAG.getCondCode(ISD::SETNE);
-      Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
-                         Tmp3, Node->getOperand(4));
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4,
+                         Tmp2, Tmp3, Node->getOperand(4));
     }
     Results.push_back(Tmp1);
     break;
@@ -3976,7 +4125,8 @@
   }
   case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
-    if (Node->getValueType(0).isVector()) {
+    if (Node->getValueType(0).isVector() ||
+        Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) {
       ExtOp   = ISD::BITCAST;
       TruncOp = ISD::BITCAST;
     } else if (Node->getValueType(0).isInteger()) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 4255948..18b2376 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -193,10 +193,10 @@
 SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
   SDValue Op2 = GetPromotedInteger(N->getOperand(2));
   SDValue Op3 = GetPromotedInteger(N->getOperand(3));
-  SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
-                              N->getMemoryVT(), N->getChain(), N->getBasePtr(),
-                              Op2, Op3, N->getMemOperand(), N->getOrdering(),
-                              N->getSynchScope());
+  SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
+                              N->getChain(), N->getBasePtr(), Op2, Op3,
+                              N->getMemOperand(), N->getSuccessOrdering(),
+                              N->getFailureOrdering(), N->getSynchScope());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2448,6 +2448,7 @@
                                N->getOperand(1), Zero, Zero,
                                cast<AtomicSDNode>(N)->getMemOperand(),
                                cast<AtomicSDNode>(N)->getOrdering(),
+                               cast<AtomicSDNode>(N)->getOrdering(),
                                cast<AtomicSDNode>(N)->getSynchScope());
   ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
   ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
@@ -2577,13 +2578,17 @@
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
   TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
   SDValue Tmp1, Tmp2;
-  Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
-                           LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+  if (TLI.isTypeLegal(LHSLo.getValueType()) &&
+      TLI.isTypeLegal(RHSLo.getValueType()))
+    Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
+                             LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
   if (!Tmp1.getNode())
     Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
                         LHSLo, RHSLo, LowCC);
-  Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
-                           LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+  if (TLI.isTypeLegal(LHSHi.getValueType()) &&
+      TLI.isTypeLegal(RHSHi.getValueType()))
+    Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
+                             LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
   if (!Tmp2.getNode())
     Tmp2 = DAG.getNode(ISD::SETCC, dl,
                        getSetCCResultType(LHSHi.getValueType()),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index eb13230..e141883 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -634,7 +634,7 @@
       : SelectionDAG::DAGUpdateListener(dtl.getDAG()),
         DTL(dtl), NodesToAnalyze(nta) {}
 
-    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    void NodeDeleted(SDNode *N, SDNode *E) override {
       assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
              N->getNodeId() != DAGTypeLegalizer::Processed &&
              "Invalid node ID for RAUW deletion!");
@@ -655,7 +655,7 @@
         NodesToAnalyze.insert(E);
     }
 
-    virtual void NodeUpdated(SDNode *N) {
+    void NodeUpdated(SDNode *N) override {
       // Node updates can mean pretty much anything.  It is possible that an
       // operand was set to something already processed (f.e.) in which case
       // this node could become ready.  Recompute its flags.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 13bb08f..947ea10 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -541,6 +541,7 @@
   SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
 
   //===--------------------------------------------------------------------===//
   // Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -670,13 +671,13 @@
                                  LoadSDNode *LD, ISD::LoadExtType ExtType);
 
   /// Helper genWidenVectorStores - Helper function to generate a set of
-  /// stores to store a widen vector into non widen memory
+  /// stores to store a widen vector into non-widen memory
   ///   StChain: list of chains for the stores we have generated
   ///   ST:      store of a widen value
   void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
 
   /// Helper genWidenVectorTruncStores - Helper function to generate a set of
-  /// stores to store a truncate widen vector into non widen memory
+  /// stores to store a truncate widen vector into non-widen memory
   ///   StChain: list of chains for the stores we have generated
   ///   ST:      store of a widen value
   void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c749fde..e9424f2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -78,8 +78,8 @@
       assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
       InOp = GetWidenedVector(InOp);
       EVT LoVT, HiVT;
-      llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
-      llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
+      std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
+      std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
       if (TLI.isBigEndian())
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
@@ -518,7 +518,7 @@
     if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
       GetSplitVector(Cond, CL, CH);
     else
-      llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+      std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
   }
 
   Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
@@ -540,7 +540,7 @@
 
 void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   Lo = DAG.getUNDEF(LoVT);
   Hi = DAG.getUNDEF(HiVT);
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2c3cdcc..551d054 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -77,6 +77,10 @@
   // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
   // operand to the next size up.
   SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
+  // Implements FP_TO_[SU]INT vector promotion of the result type; it is
+  // promoted to the next size up integer type.  The result is then truncated
+  // back to the original type.
+  SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned);
 
   public:
   bool Run();
@@ -88,7 +92,7 @@
   // Before we start legalizing vector nodes, check if there are any vectors.
   bool HasVectors = false;
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) {
+       E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
     // Check if the values of the nodes contain vectors. We don't need to check
     // the operands because we are going to check their values at some point.
     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
@@ -112,7 +116,7 @@
   // node is only legalized after all of its operands are legalized.
   DAG.AssignTopologicalOrder();
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+       E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
     LegalizeOp(SDValue(I, 0));
 
   // Finally, it's possible the root changed.  Get the new root.
@@ -210,6 +214,7 @@
   case ISD::SRL:
   case ISD::ROTL:
   case ISD::ROTR:
+  case ISD::BSWAP:
   case ISD::CTLZ:
   case ISD::CTTZ:
   case ISD::CTLZ_ZERO_UNDEF:
@@ -273,6 +278,12 @@
       Result = PromoteVectorOpINT_TO_FP(Op);
       Changed = true;
       break;
+    case ISD::FP_TO_UINT:
+    case ISD::FP_TO_SINT:
+      // Promote the operation by extending the operand.
+      Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+      Changed = true;
+      break;
     }
     break;
   case TargetLowering::Legal: break;
@@ -351,14 +362,9 @@
   //
   // Increase the bitwidth of the element to the next pow-of-two
   // (which is greater than 8 bits).
-  unsigned NumElts = VT.getVectorNumElements();
-  EVT EltVT = VT.getVectorElementType();
-  EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits());
-  assert(EltVT.isSimple() && "Promoting to a non-simple vector type!");
 
-  // Build a new vector type and check if it is legal.
-  MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
-
+  EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+  assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
   SDLoc dl(Op);
   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
 
@@ -375,6 +381,35 @@
                      Operands.size());
 }
 
+// For FP_TO_INT we promote the result type to a vector type with wider
+// elements and then truncate the result.  This is different from the default
+// PromoteVector which uses bitcast to promote thus assumning that the
+// promoted vector type has the same overall size.
+SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) {
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "Can't promote a vector with multiple results!");
+  EVT VT = Op.getValueType();
+
+  EVT NewVT;
+  unsigned NewOpc;
+  while (1) {
+    NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+    assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
+      NewOpc = ISD::FP_TO_SINT;
+      break;
+    }
+    if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
+      NewOpc = ISD::FP_TO_UINT;
+      break;
+    }
+  }
+
+  SDLoc loc(Op);
+  SDValue promoted  = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
+  return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+}
+
 
 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
   SDLoc dl(Op);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f7a3e3d..940a9c9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,7 @@
   case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
   case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
   case ISD::ANY_EXTEND:
+  case ISD::BSWAP:
   case ISD::CTLZ:
   case ISD::CTPOP:
   case ISD::CTTZ:
@@ -384,6 +385,9 @@
     case ISD::STORE:
       Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
+    case ISD::FP_ROUND:
+      Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+      break;
     }
   }
 
@@ -467,6 +471,15 @@
                       N->getOriginalAlignment(), N->getTBAAInfo());
 }
 
+/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
+/// to be scalarized, it must be <1 x ty>.  Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+  SDValue Elt = GetScalarizedVector(N->getOperand(0));
+  SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+                            N->getValueType(0).getVectorElementType(), Elt,
+                            N->getOperand(1));
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
 
 //===----------------------------------------------------------------------===//
 //  Result Vector Splitting
@@ -521,6 +534,7 @@
     SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
     break;
 
+  case ISD::BSWAP:
   case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
   case ISD::CTTZ:
@@ -624,7 +638,7 @@
   // We know the result is a vector.  The input may be either a vector or a
   // scalar value.
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   SDLoc dl(N);
 
   SDValue InOp = N->getOperand(0);
@@ -679,7 +693,7 @@
                                                 SDValue &Hi) {
   EVT LoVT, HiVT;
   SDLoc dl(N);
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   unsigned LoNumElts = LoVT.getVectorNumElements();
   SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
   Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
@@ -700,7 +714,7 @@
   }
 
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
   SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
   Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
@@ -716,7 +730,7 @@
   SDLoc dl(N);
 
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
   Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
   uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
@@ -740,7 +754,7 @@
   SDLoc dl(N);
 
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) =
+  std::tie(LoVT, HiVT) =
     DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
 
   Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
@@ -804,7 +818,7 @@
                                                     SDValue &Hi) {
   EVT LoVT, HiVT;
   SDLoc dl(N);
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
   Hi = DAG.getUNDEF(HiVT);
 }
@@ -814,7 +828,7 @@
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
   EVT LoVT, HiVT;
   SDLoc dl(LD);
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
 
   ISD::LoadExtType ExtType = LD->getExtensionType();
   SDValue Ch = LD->getChain();
@@ -828,7 +842,7 @@
   const MDNode *TBAAInfo = LD->getTBAAInfo();
 
   EVT LoMemVT, HiMemVT;
-  llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
@@ -859,12 +873,12 @@
 
   EVT LoVT, HiVT;
   SDLoc DL(N);
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
   // Split the input.
   SDValue LL, LH, RL, RH;
-  llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
-  llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
 
   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -875,7 +889,7 @@
   // Get the dest types - they may not match the input types, e.g. int_to_fp.
   EVT LoVT, HiVT;
   SDLoc dl(N);
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
   // If the input also splits, handle it directly for a compile time speedup.
   // Otherwise split it by hand.
@@ -883,7 +897,7 @@
   if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
     GetSplitVector(N->getOperand(0), Lo, Hi);
   else
-    llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+    std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
 
   if (N->getOpcode() == ISD::FP_ROUND) {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
@@ -912,7 +926,7 @@
   EVT SrcVT = N->getOperand(0).getValueType();
   EVT DestVT = N->getValueType(0);
   EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
 
   // We can do better than a generic split operation if the extend is doing
   // more than just doubling the width of the elements and the following are
@@ -938,7 +952,7 @@
     EVT SplitSrcVT =
         EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
     EVT SplitLoVT, SplitHiVT;
-    llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+    std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
     if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
         TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
       DEBUG(dbgs() << "Split vector extend via incremental extend:";
@@ -947,7 +961,7 @@
       SDValue NewSrc =
           DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
       // Get the low and high halves of the new, extended one step, vector.
-      llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+      std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
       // Extend those vector halves the rest of the way.
       Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
       Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1160,13 +1174,13 @@
          "Lo and Hi have differing types");
 
   EVT LoOpVT, HiOpVT;
-  llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+  std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
   assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
 
   SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
-  llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
-  llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
-  llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+  std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+  std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+  std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
 
   SDValue LoSelect =
     DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
@@ -1281,7 +1295,7 @@
   GetSplitVector(N->getOperand(1), Lo, Hi);
 
   EVT LoMemVT, HiMemVT;
-  llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
 
@@ -1370,7 +1384,7 @@
 
   // Extract the halves of the input via extract_subvector.
   SDValue InLoVec, InHiVec;
-  llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
+  std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
   // Truncate them to 1/2 the element size.
   EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
@@ -2180,6 +2194,17 @@
     if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
       Cond1 = GetWidenedVector(Cond1);
 
+    // If we have to split the condition there is no point in widening the
+    // select. This would result in an cycle of widening the select ->
+    // widening the condition operand -> splitting the condition operand ->
+    // splitting the select -> widening the select. Instead split this select
+    // further and widen the resulting type.
+    if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+      SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+      SDValue Res = ModifyToType(SplitSelect, WidenVT);
+      return Res;
+    }
+
     if (Cond1.getValueType() != CondWidenVT)
       Cond1 = ModifyToType(Cond1, CondWidenVT);
   }
@@ -2251,7 +2276,7 @@
 
   SDValue InOp1 = N->getOperand(0);
   EVT InVT = InOp1.getValueType();
-  assert(InVT.isVector() && "can not widen non vector type");
+  assert(InVT.isVector() && "can not widen non-vector type");
   EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
                                    InVT.getVectorElementType(), WidenNumElts);
   InOp1 = GetWidenedVector(InOp1);
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1dd2128..3b3424d 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -603,7 +603,7 @@
   std::vector<SUnit *>::iterator Best = Queue.begin();
   if (!DisableDFASched) {
     signed BestCost = SUSchedulingCost(*Best);
-    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+    for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
            E = Queue.end(); I != E; ++I) {
 
       if (SUSchedulingCost(*I) > BestCost) {
@@ -614,14 +614,14 @@
   }
   // Use default TD scheduling mechanism.
   else {
-    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+    for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
        E = Queue.end(); I != E; ++I)
       if (Picker(*Best, *I))
         Best = I;
   }
 
   SUnit *V = *Best;
-  if (Best != prior(Queue.end()))
+  if (Best != std::prev(Queue.end()))
     std::swap(*Best, Queue.back());
 
   Queue.pop_back();
@@ -633,7 +633,7 @@
 void ResourcePriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
   std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
-  if (I != prior(Queue.end()))
+  if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
 
   Queue.pop_back();
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 4af7172..b62bd62 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -15,8 +15,8 @@
 #define LLVM_CODEGEN_SDNODEDBGVALUE_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 6c5e0ab..0687392 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -80,7 +80,7 @@
   ScheduleDAGFast(MachineFunction &mf)
     : ScheduleDAGSDNodes(mf) {}
 
-  void Schedule();
+  void Schedule() override;
 
   /// AddPred - adds a predecessor edge to SUnit SU.
   /// This returns true if this is a new predecessor.
@@ -107,7 +107,7 @@
   void ListScheduleBottomUp();
 
   /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
-  bool forceUnitLatencies() const { return true; }
+  bool forceUnitLatencies() const override { return true; }
 };
 }  // end anonymous namespace
 
@@ -646,9 +646,10 @@
 public:
   ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
 
-  void Schedule();
+  void Schedule() override;
 
-  MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+  MachineBasicBlock *
+    EmitSchedule(MachineBasicBlock::iterator &InsertPos) override;
 
 private:
   std::vector<SDNode*> Sequence;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 1a562d7..c283664 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -177,7 +177,7 @@
     delete AvailableQueue;
   }
 
-  void Schedule();
+  void Schedule() override;
 
   ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
 
@@ -261,7 +261,7 @@
 
   /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
   /// need actual latency information but the hybrid scheduler does.
-  bool forceUnitLatencies() const {
+  bool forceUnitLatencies() const override {
     return !NeedLatency;
   }
 };
@@ -1539,7 +1539,6 @@
 struct reverse_sort : public queue_sort {
   SF &SortFunc;
   reverse_sort(SF &sf) : SortFunc(sf) {}
-  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
 
   bool operator()(SUnit* left, SUnit* right) const {
     // reverse left/right rather than simply !SortFunc(left, right)
@@ -1559,7 +1558,6 @@
 
   RegReductionPQBase *SPQ;
   bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
-  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
 
   bool operator()(SUnit* left, SUnit* right) const;
 };
@@ -1574,8 +1572,6 @@
   RegReductionPQBase *SPQ;
   src_ls_rr_sort(RegReductionPQBase *spq)
     : SPQ(spq) {}
-  src_ls_rr_sort(const src_ls_rr_sort &RHS)
-    : SPQ(RHS.SPQ) {}
 
   bool operator()(SUnit* left, SUnit* right) const;
 };
@@ -1590,8 +1586,6 @@
   RegReductionPQBase *SPQ;
   hybrid_ls_rr_sort(RegReductionPQBase *spq)
     : SPQ(spq) {}
-  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
-    : SPQ(RHS.SPQ) {}
 
   bool isReady(SUnit *SU, unsigned CurCycle) const;
 
@@ -1609,8 +1603,6 @@
   RegReductionPQBase *SPQ;
   ilp_ls_rr_sort(RegReductionPQBase *spq)
     : SPQ(spq) {}
-  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
-    : SPQ(RHS.SPQ) {}
 
   bool isReady(SUnit *SU, unsigned CurCycle) const;
 
@@ -1675,13 +1667,13 @@
     return scheduleDAG->getHazardRec();
   }
 
-  void initNodes(std::vector<SUnit> &sunits);
+  void initNodes(std::vector<SUnit> &sunits) override;
 
-  void addNode(const SUnit *SU);
+  void addNode(const SUnit *SU) override;
 
-  void updateNode(const SUnit *SU);
+  void updateNode(const SUnit *SU) override;
 
-  void releaseState() {
+  void releaseState() override {
     SUnits = 0;
     SethiUllmanNumbers.clear();
     std::fill(RegPressure.begin(), RegPressure.end(), 0);
@@ -1695,26 +1687,26 @@
     return SU->getNode()->getIROrder();
   }
 
-  bool empty() const { return Queue.empty(); }
+  bool empty() const override { return Queue.empty(); }
 
-  void push(SUnit *U) {
+  void push(SUnit *U) override {
     assert(!U->NodeQueueId && "Node in the queue already");
     U->NodeQueueId = ++CurQueueId;
     Queue.push_back(U);
   }
 
-  void remove(SUnit *SU) {
+  void remove(SUnit *SU) override {
     assert(!Queue.empty() && "Queue is empty!");
     assert(SU->NodeQueueId != 0 && "Not in queue!");
     std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
                                                  SU);
-    if (I != prior(Queue.end()))
+    if (I != std::prev(Queue.end()))
       std::swap(*I, Queue.back());
     Queue.pop_back();
     SU->NodeQueueId = 0;
   }
 
-  bool tracksRegPressure() const { return TracksRegPressure; }
+  bool tracksRegPressure() const override { return TracksRegPressure; }
 
   void dumpRegPressure() const;
 
@@ -1724,9 +1716,9 @@
 
   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
 
-  void scheduledNode(SUnit *SU);
+  void scheduledNode(SUnit *SU) override;
 
-  void unscheduledNode(SUnit *SU);
+  void unscheduledNode(SUnit *SU) override;
 
 protected:
   bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1738,12 +1730,12 @@
 template<class SF>
 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
   std::vector<SUnit *>::iterator Best = Q.begin();
-  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+  for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
          E = Q.end(); I != E; ++I)
     if (Picker(*Best, *I))
       Best = I;
   SUnit *V = *Best;
-  if (Best != prior(Q.end()))
+  if (Best != std::prev(Q.end()))
     std::swap(*Best, Q.back());
   Q.pop_back();
   return V;
@@ -1776,13 +1768,13 @@
                          tii, tri, tli),
       Picker(this) {}
 
-  bool isBottomUp() const { return SF::IsBottomUp; }
+  bool isBottomUp() const override { return SF::IsBottomUp; }
 
-  bool isReady(SUnit *U) const {
+  bool isReady(SUnit *U) const override {
     return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
   }
 
-  SUnit *pop() {
+  SUnit *pop() override {
     if (Queue.empty()) return NULL;
 
     SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 054e3dd..5639894 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -738,13 +738,13 @@
   if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
       // Fast-isel may have inserted some instructions, in which case the
       // BB->back().isPHI() test will not fire when we want it to.
-      prior(Emitter.getInsertPos())->isPHI()) {
+      std::prev(Emitter.getInsertPos())->isPHI()) {
     // Did not insert any instruction.
     Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
     return;
   }
 
-  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+  Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos())));
   ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2ff37e0..5e11dbb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -117,13 +117,13 @@
     virtual MachineBasicBlock*
     EmitSchedule(MachineBasicBlock::iterator &InsertPos);
 
-    virtual void dumpNode(const SUnit *SU) const;
+    void dumpNode(const SUnit *SU) const override;
 
     void dumpSchedule() const;
 
-    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+    std::string getGraphNodeLabel(const SUnit *SU) const override;
 
-    virtual std::string getDAGName() const;
+    std::string getDAGName() const override;
 
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 58aa1fe..fb86103 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -80,7 +80,7 @@
     delete AvailableQueue;
   }
 
-  void Schedule();
+  void Schedule() override;
 
 private:
   void releaseSucc(SUnit *SU, const SDep &D);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45d5a4f..d11ce80 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -18,17 +18,15 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
@@ -179,6 +177,22 @@
   return true;
 }
 
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantSDNode or undef.
+bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+    if (!isa<ConstantSDNode>(Op))
+      return false;
+  }
+  return true;
+}
+
 /// isScalarToVector - Return true if the specified node is a
 /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
 /// element is not an undef.
@@ -217,6 +231,21 @@
   return true;
 }
 
+ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+  switch (ExtType) {
+  case ISD::EXTLOAD:
+    return ISD::ANY_EXTEND;
+  case ISD::SEXTLOAD:
+    return ISD::SIGN_EXTEND;
+  case ISD::ZEXTLOAD:
+    return ISD::ZERO_EXTEND;
+  default:
+    break;
+  }
+
+  llvm_unreachable("Invalid LoadExtType");
+}
+
 /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
 /// when given the operation for (X op Y).
 ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
@@ -369,9 +398,12 @@
     llvm_unreachable("Should only be used on nodes with operands");
   default: break;  // Normal nodes don't need extra info.
   case ISD::TargetConstant:
-  case ISD::Constant:
-    ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+  case ISD::Constant: {
+    const ConstantSDNode *C = cast<ConstantSDNode>(N);
+    ID.AddPointer(C->getConstantIntValue());
+    ID.AddBoolean(C->isOpaque());
     break;
+  }
   case ISD::TargetConstantFP:
   case ISD::ConstantFP: {
     ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
@@ -869,7 +901,7 @@
 
 // EntryNode could meaningfully have debug info if we can find it...
 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
-  : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL),
+  : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(0), OptLevel(OL),
     EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
     UpdateListeners(0) {
@@ -877,10 +909,8 @@
   DbgInfo = new SDDbgInfo();
 }
 
-void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti,
-                        const TargetLowering *tli) {
+void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) {
   MF = &mf;
-  TTI = tti;
   TLI = tli;
   Context = &mf.getFunction()->getContext();
 }
@@ -956,19 +986,21 @@
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
-SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) {
   EVT EltVT = VT.getScalarType();
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
-  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT, isO);
 }
 
-SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
-  return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT, bool isO)
+{
+  return getConstant(*ConstantInt::get(*Context, Val), VT, isT, isO);
 }
 
-SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
+                                  bool isO) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
   EVT EltVT = VT.getScalarType();
@@ -1010,7 +1042,7 @@
     for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
       EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
                                            .trunc(ViaEltSizeInBits),
-                                     ViaEltVT, isT));
+                                     ViaEltVT, isT, isO));
     }
 
     // EltParts is currently in little endian order. If we actually want
@@ -1041,6 +1073,7 @@
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
   ID.AddPointer(Elt);
+  ID.AddBoolean(isO);
   void *IP = 0;
   SDNode *N = NULL;
   if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
@@ -1048,7 +1081,7 @@
       return SDValue(N, 0);
 
   if (!N) {
-    N = new (NodeAllocator) ConstantSDNode(isT, Elt, EltVT);
+    N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
   }
@@ -1139,7 +1172,7 @@
   if (!GVar) {
     // If GV is an alias then use the aliasee for determining thread-localness.
     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal());
   }
 
   unsigned Opc;
@@ -2502,17 +2535,23 @@
 
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
                               EVT VT, SDValue Operand) {
-  // Constant fold unary operations with an integer constant operand.
+  // Constant fold unary operations with an integer constant operand. Even
+  // opaque constant will be folded, because the folding of unary operations
+  // doesn't create new constants with different values. Nevertheless, the
+  // opaque flag is preserved during folding to prevent future folding with
+  // other constants.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
     const APInt &Val = C->getAPIntValue();
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT,
+                         C->isTargetOpcode(), C->isOpaque());
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT,
+                         C->isTargetOpcode(), C->isOpaque());
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
       APFloat apf(EVTToAPFloatSemantics(VT),
@@ -2529,15 +2568,19 @@
         return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT);
       break;
     case ISD::BSWAP:
-      return getConstant(Val.byteSwap(), VT);
+      return getConstant(Val.byteSwap(), VT, C->isTargetOpcode(),
+                         C->isOpaque());
     case ISD::CTPOP:
-      return getConstant(Val.countPopulation(), VT);
+      return getConstant(Val.countPopulation(), VT, C->isTargetOpcode(),
+                         C->isOpaque());
     case ISD::CTLZ:
     case ISD::CTLZ_ZERO_UNDEF:
-      return getConstant(Val.countLeadingZeros(), VT);
+      return getConstant(Val.countLeadingZeros(), VT, C->isTargetOpcode(),
+                         C->isOpaque());
     case ISD::CTTZ:
     case ISD::CTTZ_ZERO_UNDEF:
-      return getConstant(Val.countTrailingZeros(), VT);
+      return getConstant(Val.countTrailingZeros(), VT, C->isTargetOpcode(),
+                         C->isOpaque());
     }
   }
 
@@ -2774,10 +2817,13 @@
 
   ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
   ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
-  if (Scalar1 && Scalar2) {
+  if (Scalar1 && Scalar2 && (Scalar1->isOpaque() || Scalar2->isOpaque()))
+    return SDValue();
+
+  if (Scalar1 && Scalar2)
     // Scalar instruction.
     Inputs.push_back(std::make_pair(Scalar1, Scalar2));
-  } else {
+  else {
     // For vectors extract each constant element into Inputs so we can constant
     // fold them individually.
     BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
@@ -2793,6 +2839,9 @@
       if (!V1 || !V2) // Not a constant, bail.
         return SDValue();
 
+      if (V1->isOpaque() || V2->isOpaque())
+        return SDValue();
+
       // Avoid BUILD_VECTOR nodes that perform implicit truncation.
       // FIXME: This is valid and could be handled by truncating the APInts.
       if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
@@ -3546,10 +3595,10 @@
       Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
 
-  // If the "cost" of materializing the integer immediate is 1 or free, then
-  // it is cost effective to turn the load into the immediate.
-  const TargetTransformInfo *TTI = DAG.getTargetTransformInfo();
-  if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2)
+  // If the "cost" of materializing the integer immediate is less than the cost
+  // of a load, then it is cost effective to turn the load into the immediate.
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+  if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
     return DAG.getConstant(Val, VT);
   return SDValue(0, 0);
 }
@@ -3609,8 +3658,9 @@
                                    DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
-    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
-        TLI.allowsUnalignedMemoryAccesses(VT)) {
+    unsigned AS = 0;
+    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
+        TLI.allowsUnalignedMemoryAccesses(VT, AS)) {
       VT = TLI.getPointerTy();
     } else {
       switch (DstAlign & 7) {
@@ -3667,9 +3717,10 @@
       // FIXME: Only does this for 64-bit or more since we don't have proper
       // cost model for unaligned load / store.
       bool Fast;
+      unsigned AS = 0;
       if (NumMemOps && AllowOverlap &&
           VTSize >= 8 && NewVTSize < Size &&
-          TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+          TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast)
         VTSize = Size;
       else {
         VT = NewVT;
@@ -4182,9 +4233,10 @@
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
-                                SDVTList VTList, SDValue* Ops, unsigned NumOps,
+                                SDVTList VTList, SDValue *Ops, unsigned NumOps,
                                 MachineMemOperand *MMO,
-                                AtomicOrdering Ordering,
+                                AtomicOrdering SuccessOrdering,
+                                AtomicOrdering FailureOrdering,
                                 SynchronizationScope SynchScope) {
   FoldingSetNodeID ID;
   ID.AddInteger(MemVT.getRawBits());
@@ -4206,17 +4258,28 @@
   SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
                                                dl.getDebugLoc(), VTList, MemVT,
                                                Ops, DynOps, NumOps, MMO,
-                                               Ordering, SynchScope);
+                                               SuccessOrdering, FailureOrdering,
+                                               SynchScope);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+                                SDVTList VTList, SDValue *Ops, unsigned NumOps,
+                                MachineMemOperand *MMO,
+                                AtomicOrdering Ordering,
+                                SynchronizationScope SynchScope) {
+  return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering,
+                   Ordering, SynchScope);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
                                 SDValue Chain, SDValue Ptr, SDValue Cmp,
                                 SDValue Swp, MachinePointerInfo PtrInfo,
                                 unsigned Alignment,
-                                AtomicOrdering Ordering,
+                                AtomicOrdering SuccessOrdering,
+                                AtomicOrdering FailureOrdering,
                                 SynchronizationScope SynchScope) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(MemVT);
@@ -4237,14 +4300,15 @@
     MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
 
   return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
-                   Ordering, SynchScope);
+                   SuccessOrdering, FailureOrdering, SynchScope);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
                                 SDValue Chain,
                                 SDValue Ptr, SDValue Cmp,
                                 SDValue Swp, MachineMemOperand *MMO,
-                                AtomicOrdering Ordering,
+                                AtomicOrdering SuccessOrdering,
+                                AtomicOrdering FailureOrdering,
                                 SynchronizationScope SynchScope) {
   assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
@@ -4253,7 +4317,8 @@
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering,
+                   FailureOrdering, SynchScope);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -5633,7 +5698,7 @@
   SDNode::use_iterator &UI;
   SDNode::use_iterator &UE;
 
-  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     // Increment the iterator as needed.
     while (UI != UE && N == *UI)
       ++UI;
@@ -6457,7 +6522,7 @@
                                         unsigned &SplatBitSize,
                                         bool &HasAnyUndefs,
                                         unsigned MinSplatBits,
-                                        bool isBigEndian) {
+                                        bool isBigEndian) const {
   EVT VT = getValueType(0);
   assert(VT.isVector() && "Expected a vector type");
   unsigned sz = VT.getSizeInBits();
@@ -6518,6 +6583,27 @@
   return true;
 }
 
+ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const {
+  SDValue Op0 = getOperand(0);
+  if (Op0.getOpcode() != ISD::Constant)
+    return nullptr;
+
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i) != Op0)
+      return nullptr;
+
+  return cast<ConstantSDNode>(Op0);
+}
+
+bool BuildVectorSDNode::isConstant() const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    unsigned Opc = getOperand(i).getOpcode();
+    if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP)
+      return false;
+  }
+  return true;
+}
+
 bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   // Find the first non-undef value in the shuffle mask.
   unsigned i, e;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2b2713d..4a6e5cf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -34,10 +34,10 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -214,6 +214,20 @@
   llvm_unreachable("Unknown mismatch!");
 }
 
+static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
+                                              const Twine &ErrMsg) {
+  const Instruction *I = dyn_cast_or_null<Instruction>(V);
+  if (!V)
+    return Ctx.emitError(ErrMsg);
+
+  const char *AsmError = ", possible invalid constraint for vector type";
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (isa<InlineAsm>(CI->getCalledValue()))
+      return Ctx.emitError(I, ErrMsg + AsmError);
+
+  return Ctx.emitError(I, ErrMsg);
+}
+
 /// getCopyFromPartsVector - Create a value that contains the specified legal
 /// parts combined into the value they represent.  If the parts combine to a
 /// type larger then ValueVT then AssertOp can be used to specify whether the
@@ -306,16 +320,8 @@
 
   // Handle cases such as i8 -> <1 x i1>
   if (ValueVT.getVectorNumElements() != 1) {
-    LLVMContext &Ctx = *DAG.getContext();
-    Twine ErrMsg("non-trivial scalar-to-vector conversion");
-    if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
-      if (const CallInst *CI = dyn_cast<CallInst>(I))
-        if (isa<InlineAsm>(CI->getCalledValue()))
-          ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
-      Ctx.emitError(I, ErrMsg);
-    } else {
-      Ctx.emitError(ErrMsg);
-    }
+    diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+                                      "non-trivial scalar-to-vector conversion");
     return DAG.getUNDEF(ValueVT);
   }
 
@@ -397,18 +403,9 @@
          "Failed to tile the value with PartVT!");
 
   if (NumParts == 1) {
-    if (PartEVT != ValueVT) {
-      LLVMContext &Ctx = *DAG.getContext();
-      Twine ErrMsg("scalar-to-vector conversion failed");
-      if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
-        if (const CallInst *CI = dyn_cast<CallInst>(I))
-          if (isa<InlineAsm>(CI->getCalledValue()))
-            ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
-        Ctx.emitError(I, ErrMsg);
-      } else {
-        Ctx.emitError(ErrMsg);
-      }
-    }
+    if (PartEVT != ValueVT)
+      diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
+                                        "scalar-to-vector conversion failed");
 
     Parts[0] = Val;
     return;
@@ -627,16 +624,6 @@
       }
     }
 
-    /// areValueTypesLegal - Return true if types of all the values are legal.
-    bool areValueTypesLegal(const TargetLowering &TLI) {
-      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-        MVT RegisterVT = RegVTs[Value];
-        if (!TLI.isTypeLegal(RegisterVT))
-          return false;
-      }
-      return true;
-    }
-
     /// append - Add the specified values to this one.
     void append(const RegsForValue &RHS) {
       ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
@@ -851,12 +838,20 @@
   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
   Ops.push_back(Res);
 
+  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
     MVT RegisterVT = RegVTs[Value];
     for (unsigned i = 0; i != NumRegs; ++i) {
       assert(Reg < Regs.size() && "Mismatch in # registers expected");
-      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+      unsigned TheReg = Regs[Reg++];
+      Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
+
+      if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
+        // If we clobbered the stack pointer, MFI should know about it.
+        assert(DAG.getMachineFunction().getFrameInfo()->
+            hasInlineAsmWithSPAdjust());
+      }
     }
   }
 }
@@ -866,7 +861,7 @@
   AA = &aa;
   GFI = gfi;
   LibInfo = li;
-  TD = DAG.getTarget().getDataLayout();
+  DL = DAG.getTarget().getDataLayout();
   Context = DAG.getContext();
   LPadToCallSiteMap.clear();
 }
@@ -884,6 +879,7 @@
   PendingExports.clear();
   CurInst = NULL;
   HasTailCall = false;
+  SDNodeOrder = LowestSDNodeOrder;
 }
 
 /// clearDanglingDebugInfo - Clear the dangling debug information
@@ -1384,7 +1380,9 @@
                                                   MachineBasicBlock *TBB,
                                                   MachineBasicBlock *FBB,
                                                   MachineBasicBlock *CurBB,
-                                                  MachineBasicBlock *SwitchBB) {
+                                                  MachineBasicBlock *SwitchBB,
+                                                  uint32_t TWeight,
+                                                  uint32_t FWeight) {
   const BasicBlock *BB = CurBB->getBasicBlock();
 
   // If the leaf of the tree is a comparison, merge the condition into
@@ -1409,7 +1407,7 @@
       }
 
       CaseBlock CB(Condition, BOp->getOperand(0),
-                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+                   BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight);
       SwitchCases.push_back(CB);
       return;
     }
@@ -1417,17 +1415,26 @@
 
   // Create a CaseBlock record representing this branch.
   CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
-               NULL, TBB, FBB, CurBB);
+               NULL, TBB, FBB, CurBB, TWeight, FWeight);
   SwitchCases.push_back(CB);
 }
 
+/// Scale down both weights to fit into uint32_t.
+static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
+  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
+  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
+  NewTrue = NewTrue / Scale;
+  NewFalse = NewFalse / Scale;
+}
+
 /// FindMergedConditions - If Cond is an expression like
 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                MachineBasicBlock *TBB,
                                                MachineBasicBlock *FBB,
                                                MachineBasicBlock *CurBB,
                                                MachineBasicBlock *SwitchBB,
-                                               unsigned Opc) {
+                                               unsigned Opc, uint32_t TWeight,
+                                               uint32_t FWeight) {
   // If this node is not part of the or/and tree, emit it as a branch.
   const Instruction *BOp = dyn_cast<Instruction>(Cond);
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1435,7 +1442,8 @@
       BOp->getParent() != CurBB->getBasicBlock() ||
       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
-    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
+                                 TWeight, FWeight);
     return;
   }
 
@@ -1447,6 +1455,7 @@
 
   if (Opc == Instruction::Or) {
     // Codegen X | Y as:
+    // BB1:
     //   jmp_if_X TBB
     //   jmp TmpBB
     // TmpBB:
@@ -1454,14 +1463,34 @@
     //   jmp FBB
     //
 
-    // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+    // The requirement is that
+    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
+    //     = TrueProb for orignal BB.
+    // Assuming the orignal weights are A and B, one choice is to set BB1's
+    // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
+    // assumes that
+    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
+    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
+    // TmpBB, but the math is more complicated.
 
+    uint64_t NewTrueWeight = TWeight;
+    uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
+
+    NewTrueWeight = TWeight;
+    NewFalseWeight = 2 * (uint64_t)FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
   } else {
     assert(Opc == Instruction::And && "Unknown merge op!");
     // Codegen X & Y as:
+    // BB1:
     //   jmp_if_X TmpBB
     //   jmp FBB
     // TmpBB:
@@ -1470,11 +1499,28 @@
     //
     //  This requires creation of TmpBB after CurBB.
 
-    // Emit the LHS condition.
-    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
+    // The requirement is that
+    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
+    //     = FalseProb for orignal BB.
+    // Assuming the orignal weights are A and B, one choice is to set BB1's
+    // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
+    // assumes that
+    //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
 
+    uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
+    uint64_t NewFalseWeight = FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
+
+    NewTrueWeight = 2 * (uint64_t)TWeight;
+    NewFalseWeight = FWeight;
+    ScaleWeights(NewTrueWeight, NewFalseWeight);
     // Emit the RHS condition into TmpBB.
-    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
+                         NewTrueWeight, NewFalseWeight);
   }
 }
 
@@ -1525,8 +1571,9 @@
     // Update machine-CFG edges.
     BrMBB->addSuccessor(Succ0MBB);
 
-    // If this is not a fall-through branch, emit the branch.
-    if (Succ0MBB != NextBlock)
+    // If this is not a fall-through branch or optimizations are switched off,
+    // emit the branch.
+    if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None)
       DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
                               MVT::Other, getControlRoot(),
                               DAG.getBasicBlock(Succ0MBB)));
@@ -1561,7 +1608,8 @@
         (BOp->getOpcode() == Instruction::And ||
          BOp->getOpcode() == Instruction::Or)) {
       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
-                           BOp->getOpcode());
+                           BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
+                           getEdgeWeight(BrMBB, Succ1MBB));
       // If the compares in later blocks need to use values not currently
       // exported from this block, export them now.  This block should always
       // be the first entry.
@@ -2351,7 +2399,7 @@
     volatile double RDensity =
       (double)RSize.roundToDouble() /
                            (Last - RBegin + 1ULL).roundToDouble();
-    double Metric = Range.logBase2()*(LDensity+RDensity);
+    volatile double Metric = Range.logBase2()*(LDensity+RDensity);
     // Should always split in some non-trivial place
     DEBUG(dbgs() <<"=>Step\n"
                  << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
@@ -2590,7 +2638,7 @@
   if (Cases.size() >= 2)
     // Must recompute end() each iteration because it may be
     // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+    for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
          J != Cases.end(); ) {
       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
@@ -2936,6 +2984,13 @@
   if (DestVT != N.getValueType())
     setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(),
                              DestVT, N)); // convert types.
+  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
+  // might fold any kind of constant expression to an integer constant and that
+  // is not what we are looking for. Only regcognize a bitcast of a genuine
+  // constant integer as an opaque constant.
+  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
+    setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false,
+                                 /*isOpaque*/true));
   else
     setValue(&I, N);            // noop cast.
 }
@@ -3261,7 +3316,7 @@
       unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
       if (Field) {
         // N = N + Offset
-        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
         N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
                         DAG.getConstant(Offset, N.getValueType()));
       }
@@ -3275,7 +3330,7 @@
       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero()) continue;
         uint64_t Offs =
-            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+            DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         SDValue OffsVal;
         EVT PTy = TLI->getPointerTy(AS);
         unsigned PtrBits = PTy.getSizeInBits();
@@ -3292,7 +3347,7 @@
 
       // N = N + Idx * ElementSize;
       APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
-                                TD->getTypeAllocSize(Ty));
+                                DL->getTypeAllocSize(Ty));
       SDValue IdxN = getValue(Idx);
 
       // If the index is smaller or larger than intptr_t, truncate or extend
@@ -3370,9 +3425,7 @@
   setValue(&I, DSA);
   DAG.setRoot(DSA.getValue(1));
 
-  // Inform the Frame Information that we have just allocated a variable-sized
-  // object.
-  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+  assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
 }
 
 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3400,7 +3453,7 @@
 
   SDValue Root;
   bool ConstantMemory = false;
-  if (I.isVolatile() || NumValues > MaxParallelChains)
+  if (isVolatile || NumValues > MaxParallelChains)
     // Serialize volatile loads with other side effects.
     Root = getRoot();
   else if (AA->pointsToConstantMemory(
@@ -3413,6 +3466,10 @@
     Root = DAG.getRoot();
   }
 
+  const TargetLowering *TLI = TM.getTargetLowering();
+  if (isVolatile)
+    Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG);
+
   SmallVector<SDValue, 4> Values(NumValues);
   SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
                                           NumValues));
@@ -3536,14 +3593,15 @@
 
 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
   SDLoc dl = getCurSDLoc();
-  AtomicOrdering Order = I.getOrdering();
+  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
+  AtomicOrdering FailureOrder = I.getFailureOrdering();
   SynchronizationScope Scope = I.getSynchScope();
 
   SDValue InChain = getRoot();
 
   const TargetLowering *TLI = TM.getTargetLowering();
   if (TLI->getInsertFencesForAtomic())
-    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+    InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
                                    DAG, *TLI);
 
   SDValue L =
@@ -3554,13 +3612,14 @@
                   getValue(I.getCompareOperand()),
                   getValue(I.getNewValOperand()),
                   MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
-                  TLI->getInsertFencesForAtomic() ? Monotonic : Order,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+                  TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
                   Scope);
 
   SDValue OutChain = L.getValue(1);
 
   if (TLI->getInsertFencesForAtomic())
-    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+    OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
                                     DAG, *TLI);
 
   setValue(&I, L);
@@ -3637,6 +3696,7 @@
   if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic load");
 
+  InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
   SDValue L =
     DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
                   getValue(I.getPointerOperand()),
@@ -5283,7 +5343,7 @@
       return 0;
 
     SmallVector<Value *, 4> Allocas;
-    GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+    GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
 
     for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
            E = Allocas.end(); Object != E; ++Object) {
@@ -5324,6 +5384,8 @@
     (void)getControlRoot();
     return 0;
   }
+  case Intrinsic::clear_cache:
+    return TLI->getClearCacheBuiltinName();
   case Intrinsic::donothing:
     // ignore
     return 0;
@@ -5366,6 +5428,8 @@
   int DemoteStackIdx = -100;
 
   if (!CanLowerReturn) {
+    assert(!CS.hasInAllocaArgument() &&
+           "sret demotion is incompatible with inalloca");
     uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
                       FTy->getReturnType());
     unsigned Align  = TLI->getDataLayout()->getPrefTypeAlignment(
@@ -5508,9 +5572,8 @@
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
 static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
-  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+  for (const User *U : V->users()) {
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
       if (IC->isEquality())
         if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
           if (C->isNullValue())
@@ -5534,7 +5597,7 @@
 
     if (const Constant *LoadCst =
           ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
-                                       Builder.TD))
+                                       Builder.DL))
       return Builder.getValue(LoadCst);
   }
 
@@ -5653,9 +5716,13 @@
     // bloat the code.
     const TargetLowering *TLI = TM.getTargetLowering();
     if (ActuallyDoIt && CSize->getZExtValue() > 4) {
+      unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+      unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
       // TODO: Handle 5 byte compare as 4-byte + 1 byte.
       // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
-      if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT))
+      if (!TLI->isTypeLegal(LoadVT) ||
+          !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) ||
+          !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS))
         ActuallyDoIt = false;
     }
 
@@ -6026,7 +6093,7 @@
   /// MVT::Other.
   EVT getCallOperandValEVT(LLVMContext &Context,
                            const TargetLowering &TLI,
-                           const DataLayout *TD) const {
+                           const DataLayout *DL) const {
     if (CallOperandVal == 0) return MVT::Other;
 
     if (isa<BasicBlock>(CallOperandVal))
@@ -6052,7 +6119,7 @@
     // If OpTy is not a single value, it may be a struct/union that we
     // can tile with integers.
     if (!OpTy->isSingleValueType() && OpTy->isSized()) {
-      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+      unsigned BitSize = DL->getTypeSizeInBits(OpTy);
       switch (BitSize) {
       default: break;
       case 1:
@@ -6108,7 +6175,7 @@
       // types are identical size, use a bitcast to convert (e.g. two differing
       // vector types).
       MVT RegVT = *PhysReg.second->vt_begin();
-      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+      if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                          RegVT, OpInfo.CallOperand);
         OpInfo.ConstraintVT = RegVT;
@@ -6241,7 +6308,7 @@
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD).
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL).
         getSimpleVT();
     }
 
@@ -6716,11 +6783,11 @@
 
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   const TargetLowering *TLI = TM.getTargetLowering();
-  const DataLayout &TD = *TLI->getDataLayout();
+  const DataLayout &DL = *TLI->getDataLayout();
   SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
-                           TD.getABITypeAlignment(I.getType()));
+                           DL.getABITypeAlignment(I.getType()));
   setValue(&I, V);
   DAG.setRoot(V.getValue(1));
 }
@@ -6781,6 +6848,42 @@
   return TLI->LowerCallTo(CLI);
 }
 
+/// \brief Add a stack map intrinsic call's live variable operands to a stackmap
+/// or patchpoint target node's operand list.
+///
+/// Constants are converted to TargetConstants purely as an optimization to
+/// avoid constant materialization and register allocation.
+///
+/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
+/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
+/// address materialization and register allocation, but may also be required
+/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
+/// alloca in the entry block, then the runtime may assume that the alloca's
+/// StackMap location can be read immediately after compilation and that the
+/// location is valid at any point during execution (this is similar to the
+/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
+/// only available in a register, then the runtime would need to trap when
+/// execution reaches the StackMap in order to read the alloca's location.
+static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx,
+                                SmallVectorImpl<SDValue> &Ops,
+                                SelectionDAGBuilder &Builder) {
+  for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) {
+    SDValue OpVal = Builder.getValue(CI.getArgOperand(i));
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
+      Ops.push_back(
+        Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
+      Ops.push_back(
+        Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
+    } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
+      const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
+      Ops.push_back(
+        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+    } else
+      Ops.push_back(OpVal);
+  }
+}
+
 /// \brief Lower llvm.experimental.stackmap directly to its target opcode.
 void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
@@ -6788,61 +6891,64 @@
 
   assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
 
-  SDValue Callee = getValue(CI.getCalledValue());
+  SDValue Chain, InFlag, Callee, NullPtr;
+  SmallVector<SDValue, 32> Ops;
 
-  // Lower into a call sequence with no args and no return value.
-  std::pair<SDValue, SDValue> Result = LowerCallOperands(CI, 0, 0, Callee);
+  SDLoc DL = getCurSDLoc();
+  Callee = getValue(CI.getCalledValue());
+  NullPtr = DAG.getIntPtrConstant(0, true);
+
+  // The stackmap intrinsic only records the live variables (the arguemnts
+  // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+  // intrinsic, this won't be lowered to a function call. This means we don't
+  // have to worry about calling conventions and target specific lowering code.
+  // Instead we perform the call lowering right here.
+  //
+  // chain, flag = CALLSEQ_START(chain, 0)
+  // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
+  // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
+  //
+  Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL);
+  InFlag = Chain.getValue(1);
+
+  // Add the <id> and <numBytes> constants.
+  SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+  Ops.push_back(DAG.getTargetConstant(
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+  SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+  Ops.push_back(DAG.getTargetConstant(
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+
+  // Push live variables for the stack map.
+  addStackMapLiveVars(CI, 2, Ops, *this);
+
+  // We are not pushing any register mask info here on the operands list,
+  // because the stackmap doesn't clobber anything.
+
+  // Push the chain and the glue flag.
+  Ops.push_back(Chain);
+  Ops.push_back(InFlag);
+
+  // Create the STACKMAP node.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
+  Chain = SDValue(SM, 0);
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
+
+  // Stackmaps don't generate values, so nothing goes into the NodeMap.
+
   // Set the root to the target-lowered call chain.
-  SDValue Chain = Result.second;
   DAG.setRoot(Chain);
 
-  /// Get a call instruction from the call sequence chain.
-  /// Tail calls are not allowed.
-  SDNode *CallEnd = Chain.getNode();
-  assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
-         "Expected a callseq node.");
-  SDNode *Call = CallEnd->getOperand(0).getNode();
-  bool hasGlue = Call->getGluedNode();
-
-  // Replace the target specific call node with the stackmap intrinsic.
-  SmallVector<SDValue, 8> Ops;
-
-  // Add the <id> and <numShadowBytes> constants.
-  for (unsigned i = 0; i < 2; ++i) {
-    SDValue tmp = getValue(CI.getOperand(i));
-    Ops.push_back(DAG.getTargetConstant(
-        cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
-  }
-  // Push live variables for the stack map.
-  for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i)
-    Ops.push_back(getValue(CI.getArgOperand(i)));
-
-  // Push the chain (this is originally the first operand of the call, but
-  // becomes now the last or second to last operand).
-  Ops.push_back(*(Call->op_begin()));
-
-    // Push the glue flag (last operand).
-  if (hasGlue)
-    Ops.push_back(*(Call->op_end()-1));
-
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-
-  // Replace the target specific call node with a STACKMAP node.
-  MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(),
-                                         NodeTys, Ops);
-
-  // StackMap generates no value, so nothing goes in the NodeMap.
-
-  // Fixup the consumers of the intrinsic. The chain and glue may be used in the
-  // call sequence.
-  DAG.ReplaceAllUsesWith(Call, MN);
-
-  DAG.DeleteNode(Call);
+  // Inform the Frame Information that we have a stackmap in this function.
+  FuncInfo.MF->getFrameInfo()->setHasStackMap();
 }
 
 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
 void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
-  // void|i64 @llvm.experimental.patchpoint.void|i64(i32 <id>,
+  // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
   //                                                 i32 <numBytes>,
   //                                                 i8* <target>,
   //                                                 i32 <numArgs>,
@@ -6855,17 +6961,19 @@
   SDValue Callee = getValue(CI.getOperand(2)); // <target>
 
   // Get the real number of arguments participating in the call <numArgs>
-  unsigned NumArgs =
-    cast<ConstantSDNode>(getValue(CI.getArgOperand(3)))->getZExtValue();
+  SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos));
+  unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
 
   // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
-  assert(CI.getNumArgOperands() >= NumArgs + 4 &&
+  // Intrinsics include all meta-operands up to but not including CC.
+  unsigned NumMetaOpers = PatchPointOpers::CCPos;
+  assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs &&
          "Not enough arguments provided to the patchpoint intrinsic");
 
   // For AnyRegCC the arguments are lowered later on manually.
   unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs;
   std::pair<SDValue, SDValue> Result =
-    LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC);
+    LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC);
 
   // Set the root to the target-lowered call chain.
   SDValue Chain = Result.second;
@@ -6885,13 +6993,16 @@
   // Replace the target specific call node with the patchable intrinsic.
   SmallVector<SDValue, 8> Ops;
 
-  // Add the <id> and <numNopBytes> constants.
-  for (unsigned i = 0; i < 2; ++i) {
-    SDValue tmp = getValue(CI.getOperand(i));
-    Ops.push_back(DAG.getTargetConstant(
-        cast<ConstantSDNode>(tmp)->getZExtValue(), MVT::i32));
-  }
+  // Add the <id> and <numBytes> constants.
+  SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
+  Ops.push_back(DAG.getTargetConstant(
+                  cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64));
+  SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
+  Ops.push_back(DAG.getTargetConstant(
+                  cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32));
+
   // Assume that the Callee is a constant address.
+  // FIXME: handle function symbols in the future.
   Ops.push_back(
     DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(),
                           /*isTarget=*/true));
@@ -6909,25 +7020,16 @@
   // Add the arguments we omitted previously. The register allocator should
   // place these in any free register.
   if (isAnyRegCC)
-    for (unsigned i = 4, e = NumArgs + 4; i != e; ++i)
+    for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
       Ops.push_back(getValue(CI.getArgOperand(i)));
 
-  // Push the arguments from the call instruction.
+  // Push the arguments from the call instruction up to the register mask.
   SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1;
   for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i)
     Ops.push_back(*i);
 
   // Push live variables for the stack map.
-  for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) {
-    SDValue OpVal = getValue(CI.getArgOperand(i));
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
-      Ops.push_back(
-        DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
-      Ops.push_back(
-        DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
-    } else
-      Ops.push_back(OpVal);
-  }
+  addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this);
 
   // Push the register mask info.
   if (hasGlue)
@@ -6981,6 +7083,9 @@
   } else
     DAG.ReplaceAllUsesWith(Call, MN);
   DAG.DeleteNode(Call);
+
+  // Inform the Frame Information that we have a patchpoint in this function.
+  FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
 }
 
 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -7037,8 +7142,18 @@
         Flags.setInReg();
       if (Args[i].isSRet)
         Flags.setSRet();
-      if (Args[i].isByVal) {
+      if (Args[i].isByVal)
         Flags.setByVal();
+      if (Args[i].isInAlloca) {
+        Flags.setInAlloca();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // inalloca.  This way we can know how many bytes we should've allocated
+        // and how many bytes a callee cleanup function will pop.  If we port
+        // inalloca to more targets, we'll have to add custom inalloca handling
+        // in the various CC lowering callbacks.
+        Flags.setByVal();
+      }
+      if (Args[i].isByVal || Args[i].isInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
         Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
@@ -7202,12 +7317,10 @@
     return A->use_empty();
 
   const BasicBlock *Entry = A->getParent()->begin();
-  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
-       UI != E; ++UI) {
-    const User *U = *UI;
+  for (const User *U : A->users())
     if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
       return false;  // Use not in entry block.
-  }
+
   return true;
 }
 
@@ -7215,7 +7328,7 @@
   SelectionDAG &DAG = SDB->DAG;
   SDLoc dl = SDB->getCurSDLoc();
   const TargetLowering *TLI = getTargetLowering();
-  const DataLayout *TD = TLI->getDataLayout();
+  const DataLayout *DL = TLI->getDataLayout();
   SmallVector<ISD::InputArg, 16> Ins;
 
   if (!FuncInfo->CanLowerReturn) {
@@ -7247,7 +7360,7 @@
       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
       ISD::ArgFlagsTy Flags;
       unsigned OriginalAlignment =
-        TD->getABITypeAlignment(ArgTy);
+        DL->getABITypeAlignment(ArgTy);
 
       if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
         Flags.setZExt();
@@ -7257,11 +7370,21 @@
         Flags.setInReg();
       if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
         Flags.setSRet();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+      if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
         Flags.setByVal();
+      if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) {
+        Flags.setInAlloca();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // inalloca.  This way we can know how many bytes we should've allocated
+        // and how many bytes a callee cleanup function will pop.  If we port
+        // inalloca to more targets, we'll have to add custom inalloca handling
+        // in the various CC lowering callbacks.
+        Flags.setByVal();
+      }
+      if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(I->getType());
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
+        Flags.setByValSize(DL->getTypeAllocSize(ElementTy));
         // For ByVal, alignment should be passed from FE.  BE will guess if
         // this info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 835f643..66835bf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,9 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <vector>
 
@@ -57,6 +56,7 @@
 class MachineInstr;
 class MachineRegisterInfo;
 class MDNode;
+class MVT;
 class PHINode;
 class PtrToIntInst;
 class ReturnInst;
@@ -488,8 +488,12 @@
 private:
   const TargetMachine &TM;
 public:
+  /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
+  /// nodes without a corresponding SDNode.
+  static const unsigned LowestSDNodeOrder = 1;
+
   SelectionDAG &DAG;
-  const DataLayout *TD;
+  const DataLayout *DL;
   AliasAnalysis *AA;
   const TargetLibraryInfo *LibInfo;
 
@@ -534,7 +538,7 @@
 
   SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
-    : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()),
+    : CurInst(NULL), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
       DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
       HasTailCall(false) {
   }
@@ -608,11 +612,13 @@
 
   void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
-                            MachineBasicBlock *SwitchBB, unsigned Opc);
+                            MachineBasicBlock *SwitchBB, unsigned Opc,
+                            uint32_t TW, uint32_t FW);
   void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     MachineBasicBlock *CurBB,
-                                    MachineBasicBlock *SwitchBB);
+                                    MachineBasicBlock *SwitchBB,
+                                    uint32_t TW, uint32_t FW);
   bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
   bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
   void CopyToExportRegsIfNeeded(const Value *V);
@@ -627,7 +633,7 @@
                                                 bool useVoidTy = false);
 
   /// UpdateSplitBlock - When an MBB was split during scheduling, update the
-  /// references that ned to refer to the last resulting block.
+  /// references that need to refer to the last resulting block.
   void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
 
 private:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c04a08d..535feba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -14,11 +14,10 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
@@ -82,7 +81,10 @@
   case ISD::VALUETYPE:                  return "ValueType";
   case ISD::Register:                   return "Register";
   case ISD::RegisterMask:               return "RegisterMask";
-  case ISD::Constant:                   return "Constant";
+  case ISD::Constant:
+    if (cast<ConstantSDNode>(this)->isOpaque())
+      return "OpaqueConstant";
+    return "Constant";
   case ISD::ConstantFP:                 return "ConstantFP";
   case ISD::GlobalAddress:              return "GlobalAddress";
   case ISD::GlobalTLSAddress:           return "GlobalTLSAddress";
@@ -112,7 +114,10 @@
   }
 
   case ISD::BUILD_VECTOR:               return "BUILD_VECTOR";
-  case ISD::TargetConstant:             return "TargetConstant";
+  case ISD::TargetConstant:
+    if (cast<ConstantSDNode>(this)->isOpaque())
+      return "OpaqueTargetConstant";
+    return "TargetConstant";
   case ISD::TargetConstantFP:           return "TargetConstantFP";
   case ISD::TargetGlobalAddress:        return "TargetGlobalAddress";
   case ISD::TargetGlobalTLSAddress:     return "TargetGlobalTLSAddress";
@@ -352,7 +357,7 @@
       for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
            e = MN->memoperands_end(); i != e; ++i) {
         OS << **i;
-        if (llvm::next(i) != e)
+        if (std::next(i) != e)
           OS << " ";
       }
       OS << ">";
@@ -385,7 +390,7 @@
              dyn_cast<GlobalAddressSDNode>(this)) {
     int64_t offset = GADN->getOffset();
     OS << '<';
-    WriteAsOperand(OS, GADN->getGlobal());
+    GADN->getGlobal()->printAsOperand(OS);
     OS << '>';
     if (offset > 0)
       OS << " + " << offset;
@@ -476,9 +481,9 @@
                dyn_cast<BlockAddressSDNode>(this)) {
     int64_t offset = BA->getOffset();
     OS << "<";
-    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    BA->getBlockAddress()->getFunction()->printAsOperand(OS, false);
     OS << ", ";
-    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false);
     OS << ">";
     if (offset > 0)
       OS << " + " << offset;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3a0cfa1..5d0e2b9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -33,8 +32,8 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
@@ -213,7 +212,7 @@
 static cl::opt<RegisterScheduler::FunctionPassCtor, false,
                RegisterPassParser<RegisterScheduler> >
 ISHeuristic("pre-RA-sched",
-            cl::init(&createDefaultScheduler),
+            cl::init(&createDefaultScheduler), cl::Hidden,
             cl::desc("Instruction schedulers available (before register"
                      " allocation):"));
 
@@ -400,7 +399,6 @@
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   LibInfo = &getAnalysis<TargetLibraryInfo>();
-  TTI = getAnalysisIfAvailable<TargetTransformInfo>();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
 
   TargetSubtargetInfo &ST =
@@ -418,8 +416,8 @@
 
   SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
 
-  CurDAG->init(*MF, TTI, TLI);
-  FuncInfo->set(Fn, *MF);
+  CurDAG->init(*MF, TLI);
+  FuncInfo->set(Fn, *MF, CurDAG);
 
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
@@ -428,7 +426,8 @@
 
   SDB->init(GFI, *AA, LibInfo);
 
-  MF->setHasMSInlineAsm(false);
+  MF->setHasInlineAsm(false);
+
   SelectAllBasicBlocks(Fn);
 
   // If the first basic block in the function has live ins that need to be
@@ -448,7 +447,8 @@
   for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
     MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
     bool hasFI = MI->getOperand(0).isFI();
-    unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
+    unsigned Reg =
+        hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
     if (TargetRegisterInfo::isPhysicalRegister(Reg))
       EntryMBB->insert(EntryMBB->begin(), MI);
     else {
@@ -456,7 +456,7 @@
       if (Def) {
         MachineBasicBlock::iterator InsertPos = Def;
         // FIXME: VR def may not be in entry block.
-        Def->getParent()->insert(llvm::next(InsertPos), MI);
+        Def->getParent()->insert(std::next(InsertPos), MI);
       } else
         DEBUG(dbgs() << "Dropping debug info for dead vreg"
               << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
@@ -483,9 +483,10 @@
       // that COPY instructions also need DBG_VALUE, if it is the only
       // user of LDI->second.
       MachineInstr *CopyUseMI = NULL;
-      for (MachineRegisterInfo::use_iterator
-             UI = RegInfo->use_begin(LDI->second);
-           MachineInstr *UseMI = UI.skipInstruction();) {
+      for (MachineRegisterInfo::use_instr_iterator
+           UI = RegInfo->use_instr_begin(LDI->second),
+           E = RegInfo->use_instr_end(); UI != E; ) {
+        MachineInstr *UseMI = &*(UI++);
         if (UseMI->isDebugValue()) continue;
         if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
           CopyUseMI = UseMI; continue;
@@ -511,7 +512,7 @@
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
        ++I) {
 
-    if (MFI->hasCalls() && MF->hasMSInlineAsm())
+    if (MFI->hasCalls() && MF->hasInlineAsm())
       break;
 
     const MachineBasicBlock *MBB = I;
@@ -522,8 +523,8 @@
           II->isStackAligningInlineAsm()) {
         MFI->setHasCalls(true);
       }
-      if (II->isMSInlineAsm()) {
-        MF->setHasMSInlineAsm(true);
+      if (II->isInlineAsm()) {
+        MF->setHasInlineAsm(true);
       }
     }
   }
@@ -563,6 +564,9 @@
   // at this point.
   FuncInfo->clear();
 
+  DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n");
+  DEBUG(MF->print(dbgs()));
+
   return true;
 }
 
@@ -800,7 +804,7 @@
   /// NodeDeleted - Handle nodes deleted from the graph. If the node being
   /// deleted is the current ISelPosition node, update ISelPosition.
   ///
-  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     if (ISelPosition == SelectionDAG::allnodes_iterator(N))
       ++ISelPosition;
   }
@@ -1063,7 +1067,7 @@
         // where they are, so we can be sure to emit subsequent instructions
         // after them.
         if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
-          FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+          FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt));
         else
           FastIS->setLastLocalValue(0);
       }
@@ -1071,7 +1075,7 @@
       unsigned NumFastIselRemaining = std::distance(Begin, End);
       // Do FastISel on as many instructions as possible.
       for (; BI != Begin; --BI) {
-        const Instruction *Inst = llvm::prior(BI);
+        const Instruction *Inst = std::prev(BI);
 
         // If we no longer require this instruction, skip it.
         if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
@@ -1092,7 +1096,7 @@
           // Try to fold the load if so.
           const Instruction *BeforeInst = Inst;
           while (BeforeInst != Begin) {
-            BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst));
+            BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst));
             if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
               break;
           }
@@ -1100,7 +1104,7 @@
               BeforeInst->hasOneUse() &&
               FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
             // If we succeeded, don't re-select the load.
-            BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+            BI = std::next(BasicBlock::const_iterator(BeforeInst));
             --NumFastIselRemaining;
             ++NumFastIselSuccess;
           }
@@ -2194,8 +2198,7 @@
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
-               SDValue N, const TargetLowering *TLI,
-               unsigned ChildNo) {
+               SDValue N, const TargetLowering *TLI, unsigned ChildNo) {
   if (ChildNo >= N.getNumOperands())
     return false;  // Match fails if out of range child #.
   return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
@@ -2231,6 +2234,14 @@
 }
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                  SDValue N, unsigned ChildNo) {
+  if (ChildNo >= N.getNumOperands())
+    return false;  // Match fails if out of range child #.
+  return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDValue N, const SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
@@ -2313,6 +2324,14 @@
   case SelectionDAGISel::OPC_CheckInteger:
     Result = !::CheckInteger(Table, Index, N);
     return Index;
+  case SelectionDAGISel::OPC_CheckChild0Integer:
+  case SelectionDAGISel::OPC_CheckChild1Integer:
+  case SelectionDAGISel::OPC_CheckChild2Integer:
+  case SelectionDAGISel::OPC_CheckChild3Integer:
+  case SelectionDAGISel::OPC_CheckChild4Integer:
+    Result = !::CheckChildInteger(Table, Index, N,
+                     Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer);
+    return Index;
   case SelectionDAGISel::OPC_CheckAndImm:
     Result = !::CheckAndImm(Table, Index, N, SDISel);
     return Index;
@@ -2693,6 +2712,12 @@
     case OPC_CheckInteger:
       if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
       continue;
+    case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
+    case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
+    case OPC_CheckChild4Integer:
+      if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
+                               Opcode-OPC_CheckChild0Integer)) break;
+      continue;
     case OPC_CheckAndImm:
       if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
       continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index b752b48..1483fdd 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -15,12 +15,11 @@
 #include "ScheduleDAGSDNodes.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 82b068d..5de0b03 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/CommandLine.h"
@@ -74,6 +75,7 @@
   isSRet     = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
   isNest     = CS->paramHasAttr(AttrIdx, Attribute::Nest);
   isByVal    = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
+  isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
   isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
   Alignment  = CS->getParamAlignment(AttrIdx);
 }
@@ -1115,6 +1117,54 @@
          (KnownOne.countPopulation() == 1);
 }
 
+bool TargetLowering::isConstTrueVal(const SDNode *N) const {
+  if (!N)
+    return false;
+
+  bool IsVec = false;
+  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) {
+    const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+    if (!BV)
+      return false;
+
+    IsVec = true;
+    CN = BV->getConstantSplatValue();
+  }
+
+  switch (getBooleanContents(IsVec)) {
+  case UndefinedBooleanContent:
+    return CN->getAPIntValue()[0];
+  case ZeroOrOneBooleanContent:
+    return CN->isOne();
+  case ZeroOrNegativeOneBooleanContent:
+    return CN->isAllOnesValue();
+  }
+
+  llvm_unreachable("Invalid boolean contents");
+}
+
+bool TargetLowering::isConstFalseVal(const SDNode *N) const {
+  if (!N)
+    return false;
+
+  bool IsVec = false;
+  const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
+  if (!CN) {
+    const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
+    if (!BV)
+      return false;
+
+    IsVec = true;
+    CN = BV->getConstantSplatValue();
+  }
+
+  if (getBooleanContents(IsVec) == UndefinedBooleanContent)
+    return !CN->getAPIntValue()[0];
+
+  return CN->isNullValue();
+}
+
 /// SimplifySetCC - Try to simplify a setcc built with the specified operands
 /// and cc. If it is unable to simplify it, return a null SDValue.
 SDValue
@@ -1468,18 +1518,32 @@
     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
       if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
-      // X >= C0 --> X > (C0-1)
-      return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(C1-1, N1.getValueType()),
-                          (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+      // X >= C0 --> X > (C0 - 1)
+      APInt C = C1 - 1;
+      ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
+      if ((DCI.isBeforeLegalizeOps() ||
+           isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+          (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
+                                isLegalICmpImmediate(C.getSExtValue())))) {
+        return DAG.getSetCC(dl, VT, N0,
+                            DAG.getConstant(C, N1.getValueType()),
+                            NewCC);
+      }
     }
 
     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
       if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
-      // X <= C0 --> X < (C0+1)
-      return DAG.getSetCC(dl, VT, N0,
-                          DAG.getConstant(C1+1, N1.getValueType()),
-                          (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+      // X <= C0 --> X < (C0 + 1)
+      APInt C = C1 + 1;
+      ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
+      if ((DCI.isBeforeLegalizeOps() ||
+           isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
+          (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 &&
+                                isLegalICmpImmediate(C.getSExtValue())))) {
+        return DAG.getSetCC(dl, VT, N0,
+                            DAG.getConstant(C, N1.getValueType()),
+                            NewCC);
+      }
     }
 
     if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
@@ -1535,7 +1599,7 @@
         N0.getOpcode() == ISD::AND)
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-        EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+        EVT ShiftTy = DCI.isBeforeLegalize() ?
           getPointerTy() : getShiftAmountTy(N0.getValueType());
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
@@ -1565,7 +1629,7 @@
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+            EVT ShiftTy = DCI.isBeforeLegalize() ?
               getPointerTy() : getShiftAmountTy(N0.getValueType());
             EVT CmpTy = N0.getValueType();
             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@@ -1593,7 +1657,7 @@
         }
         NewC = NewC.lshr(ShiftBits);
         if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
-          EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
+          EVT ShiftTy = DCI.isBeforeLegalize() ?
             getPointerTy() : getShiftAmountTy(N0.getValueType());
           EVT CmpTy = N0.getValueType();
           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
@@ -2663,3 +2727,14 @@
              DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
   }
 }
+
+bool TargetLowering::
+verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
+  if (!isa<ConstantSDNode>(Op.getOperand(0))) {
+    DAG.getContext()->emitError("argument to '__builtin_return_address' must "
+                                "be a constant integer");
+    return true;
+  }
+
+  return false;
+}
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index f769b44..1120be8 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -16,7 +16,7 @@
 using namespace llvm;
 
 TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
-  : TD(TM.getDataLayout()) {
+  : DL(TM.getDataLayout()) {
 }
 
 TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 10f64c7..adb3ef9 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -29,10 +29,10 @@
 #include "llvm/CodeGen/GCs.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 
@@ -55,8 +55,8 @@
   public:
     ShadowStackGC();
 
-    bool initializeCustomLowering(Module &M);
-    bool performCustomLowering(Function &F);
+    bool initializeCustomLowering(Module &M) override;
+    bool performCustomLowering(Function &F) override;
 
   private:
     bool IsNullValue(Value *V);
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index da2e710..dc7ca2b 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -60,11 +60,11 @@
 public:
   static char ID; // Pass identification, replacement for typeid
   explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {}
-  bool doInitialization(Module &M);
-  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
-  const char *getPassName() const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {}
+  const char *getPassName() const override {
     return "SJLJ Exception Handling preparation";
   }
 
@@ -149,7 +149,7 @@
 /// instruction with those returned by the personality function.
 void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
                                          Value *SelVal) {
-  SmallVector<Value *, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+  SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end());
   while (!UseWorkList.empty()) {
     Value *Val = UseWorkList.pop_back_val();
     ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
@@ -173,7 +173,7 @@
   Type *LPadType = LPI->getType();
   Value *LPadVal = UndefValue::get(LPadType);
   IRBuilder<> Builder(
-      llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+      std::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
   LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
   LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
 
@@ -251,7 +251,7 @@
     // Aggregate types can't be cast, but are legal argument types, so we have
     // to handle them differently. We use an extract/insert pair as a
     // lightweight method to achieve the same goal.
-    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
       Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt);
       Instruction *NI = InsertValueInst::Create(AI, EI, 0);
       NI->insertAfter(EI);
@@ -294,8 +294,8 @@
       if (Inst->use_empty())
         continue;
       if (Inst->hasOneUse() &&
-          cast<Instruction>(Inst->use_back())->getParent() == BB &&
-          !isa<PHINode>(Inst->use_back()))
+          cast<Instruction>(Inst->user_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->user_back()))
         continue;
 
       // If this is an alloca in the entry block, it's not a real register
@@ -306,11 +306,10 @@
 
       // Avoid iterator invalidation by copying users to a temporary vector.
       SmallVector<Instruction *, 16> Users;
-      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
-           UI != E; ++UI) {
-        Instruction *User = cast<Instruction>(*UI);
-        if (User->getParent() != BB || isa<PHINode>(User))
-          Users.push_back(User);
+      for (User *U : Inst->users()) {
+        Instruction *UI = cast<Instruction>(U);
+        if (UI->getParent() != BB || isa<PHINode>(UI))
+          Users.push_back(UI);
       }
 
       // Find all of the blocks that this value is live in.
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 20049a8..a6c6261 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -129,7 +129,7 @@
   const unsigned Space = SlotIndex::InstrDist/2;
   assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
 
-  IndexList::iterator startItr = prior(curItr);
+  IndexList::iterator startItr = std::prev(curItr);
   unsigned index = startItr->getIndex();
   do {
     curItr->setIndex(index += Space);
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 10a93b7..5f73469 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -188,10 +188,10 @@
 
   // Compute total ingoing and outgoing block frequencies for all bundles.
   BlockFrequencies.resize(mf.getNumBlockIDs());
-  MachineBlockFrequencyInfo &MBFI = getAnalysis<MachineBlockFrequencyInfo>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
     unsigned Num = I->getNumber();
-    BlockFrequencies[Num] = MBFI.getBlockFreq(I);
+    BlockFrequencies[Num] = MBFI->getBlockFreq(I);
   }
 
   // We never change the function.
@@ -221,7 +221,7 @@
   // Hopfield network.
   if (bundles->getBlocks(n).size() > 100) {
     nodes[n].BiasP = 0;
-    nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16);
+    nodes[n].BiasN = (MBFI->getEntryFreq() / 16);
   }
 }
 
@@ -323,10 +323,12 @@
   // affect the entire network in a single iteration. That means very fast
   // convergence, usually in a single iteration.
   for (unsigned iteration = 0; iteration != 10; ++iteration) {
-    // Scan backwards, skipping the last node which was just updated.
+    // Scan backwards, skipping the last node when iteration is not zero. When
+    // iteration is not zero, the last node was just updated.
     bool Changed = false;
     for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
-           llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+           iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()),
+           E = Linked.rend(); I != E; ++I) {
       unsigned n = *I;
       if (nodes[n].update(nodes)) {
         Changed = true;
@@ -340,7 +342,7 @@
     // Scan forwards, skipping the first node which was just updated.
     Changed = false;
     for (SmallVectorImpl<unsigned>::const_iterator I =
-           llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+           std::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
       unsigned n = *I;
       if (nodes[n].update(nodes)) {
         Changed = true;
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 105516b..a88d7ac 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -38,12 +38,14 @@
 class EdgeBundles;
 class MachineBasicBlock;
 class MachineLoopInfo;
+class MachineBlockFrequencyInfo;
 
 class SpillPlacement  : public MachineFunctionPass {
   struct Node;
   const MachineFunction *MF;
   const EdgeBundles *bundles;
   const MachineLoopInfo *loops;
+  const MachineBlockFrequencyInfo *MBFI;
   Node *nodes;
 
   // Nodes that are active in the current computation. Owned by the prepare()
@@ -145,9 +147,9 @@
   }
 
 private:
-  virtual bool runOnMachineFunction(MachineFunction&);
-  virtual void getAnalysisUsage(AnalysisUsage&) const;
-  virtual void releaseMemory();
+  bool runOnMachineFunction(MachineFunction&) override;
+  void getAnalysisUsage(AnalysisUsage&) const override;
+  void releaseMemory() override;
 
   void activate(unsigned);
 };
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index d5b3a4a..094641c 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -89,8 +89,9 @@
     unsigned ss = vrm->assignVirt2StackSlot(li->reg);
 
     // Iterate over reg uses/defs.
-    for (MachineRegisterInfo::reg_iterator
-         regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+    for (MachineRegisterInfo::reg_instr_iterator
+         regItr = mri->reg_instr_begin(li->reg);
+         regItr != mri->reg_instr_end();) {
 
       // Grab the use/def instr.
       MachineInstr *mi = &*regItr;
@@ -98,9 +99,7 @@
       DEBUG(dbgs() << "  Processing " << *mi);
 
       // Step regItr to the next use/def instr.
-      do {
-        ++regItr;
-      } while (regItr != mri->reg_end() && (&*regItr == mi));
+      ++regItr;
 
       // Collect uses & defs for this instr.
       SmallVector<unsigned, 2> indices;
@@ -143,9 +142,9 @@
       if (hasDef) {
         MachineInstrSpan MIS(miItr);
 
-        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg,
+        tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg,
                                  true, ss, trc, tri);
-        lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end());
+        lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end());
       }
     }
   }
@@ -164,7 +163,7 @@
                  VirtRegMap &vrm)
     : SpillerBase(pass, mf, vrm) {}
 
-  void spill(LiveRangeEdit &LRE) {
+  void spill(LiveRangeEdit &LRE) override {
     // Ignore spillIs - we don't use it.
     trivialSpillEverywhere(LRE);
   }
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 68a15f7..16fe979 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -131,11 +131,9 @@
 
   // Get use slots form the use-def chain.
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  for (MachineRegisterInfo::use_nodbg_iterator
-       I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
-       ++I)
-    if (!I.getOperand().isUndef())
-      UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
+  for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg))
+    if (!MO.isUndef())
+      UseSlots.push_back(LIS.getInstructionIndex(MO.getParent()).getRegSlot());
 
   array_pod_sort(UseSlots.begin(), UseSlots.end());
 
@@ -188,7 +186,7 @@
     BlockInfo BI;
     BI.MBB = MFI;
     SlotIndex Start, Stop;
-    tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+    std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
     // If the block contains no uses, the range must be live through. At one
     // point, RegisterCoalescer could create dangling ranges that ended
@@ -509,7 +507,7 @@
   assert(MI && "enterIntvAfter called with invalid index");
 
   VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
-                              llvm::next(MachineBasicBlock::iterator(MI)));
+                              std::next(MachineBasicBlock::iterator(MI)));
   return VNI->def;
 }
 
@@ -570,7 +568,7 @@
   }
 
   VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(),
-                              llvm::next(MachineBasicBlock::iterator(MI)));
+                              std::next(MachineBasicBlock::iterator(MI)));
   return VNI->def;
 }
 
@@ -888,7 +886,7 @@
       // LiveInBlocks.
       MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
       SlotIndex BlockStart, BlockEnd;
-      tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+      std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
 
       // The first block may be live-in, or it may have its own def.
       if (Start != BlockStart) {
@@ -972,7 +970,7 @@
 void SplitEditor::rewriteAssigned(bool ExtendRanges) {
   for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
        RE = MRI.reg_end(); RI != RE;) {
-    MachineOperand &MO = RI.getOperand();
+    MachineOperand &MO = *RI;
     MachineInstr *MI = MO.getParent();
     ++RI;
     // LiveDebugVariables should have handled all DBG_VALUE instructions.
@@ -1183,7 +1181,7 @@
                                         unsigned IntvIn, SlotIndex LeaveBefore,
                                         unsigned IntvOut, SlotIndex EnterAfter){
   SlotIndex Start, Stop;
-  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+  std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
 
   DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
                << ") intf " << LeaveBefore << '-' << EnterAfter
@@ -1286,7 +1284,7 @@
 void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
                                   unsigned IntvIn, SlotIndex LeaveBefore) {
   SlotIndex Start, Stop;
-  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+  std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
   DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
                << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
@@ -1378,7 +1376,7 @@
 void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
                                    unsigned IntvOut, SlotIndex EnterAfter) {
   SlotIndex Start, Stop;
-  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+  std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
   DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
                << "), uses " << BI.FirstInstr << '-' << BI.LastInstr
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 3dbc050..7b1de85 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -30,7 +30,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -44,7 +43,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -112,37 +113,25 @@
   SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
 
   /// Maps liveness intervals for each slot.
-  SmallVector<LiveInterval*, 16> Intervals;
+  SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
   /// VNInfo is used for the construction of LiveIntervals.
   VNInfo::Allocator VNInfoAllocator;
   /// SlotIndex analysis object.
   SlotIndexes *Indexes;
+  /// The stack protector object.
+  StackProtector *SP;
 
   /// The list of lifetime markers found. These markers are to be removed
   /// once the coloring is done.
   SmallVector<MachineInstr*, 8> Markers;
 
-  /// SlotSizeSorter - A Sort utility for arranging stack slots according
-  /// to their size.
-  struct SlotSizeSorter {
-    MachineFrameInfo *MFI;
-    SlotSizeSorter(MachineFrameInfo *mfi) : MFI(mfi) { }
-    bool operator()(int LHS, int RHS) {
-      // We use -1 to denote a uninteresting slot. Place these slots at the end.
-      if (LHS == -1) return false;
-      if (RHS == -1) return true;
-      // Sort according to size.
-      return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
-  }
-};
-
 public:
   static char ID;
   StackColoring() : MachineFunctionPass(ID) {
     initializeStackColoringPass(*PassRegistry::getPassRegistry());
   }
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-  bool runOnMachineFunction(MachineFunction &MF);
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
 
 private:
   /// Debug.
@@ -191,6 +180,7 @@
                    "stack-coloring", "Merge disjoint stack slots", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
 INITIALIZE_PASS_END(StackColoring,
                    "stack-coloring", "Merge disjoint stack slots", false, false)
 
@@ -198,6 +188,7 @@
   AU.addRequired<MachineDominatorTree>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<SlotIndexes>();
+  AU.addRequired<StackProtector>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -253,18 +244,16 @@
     BlockInfo.Begin.resize(NumSlot);
     BlockInfo.End.resize(NumSlot);
 
-    for (MachineBasicBlock::iterator BI = (*FI)->begin(), BE = (*FI)->end();
-         BI != BE; ++BI) {
-
-      if (BI->getOpcode() != TargetOpcode::LIFETIME_START &&
-          BI->getOpcode() != TargetOpcode::LIFETIME_END)
+    for (MachineInstr &MI : **FI) {
+      if (MI.getOpcode() != TargetOpcode::LIFETIME_START &&
+          MI.getOpcode() != TargetOpcode::LIFETIME_END)
         continue;
 
-      Markers.push_back(BI);
+      Markers.push_back(&MI);
 
-      bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START;
-      const MachineOperand &MI = BI->getOperand(0);
-      unsigned Slot = MI.getIndex();
+      bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START;
+      const MachineOperand &MO = MI.getOperand(0);
+      unsigned Slot = MO.getIndex();
 
       MarkersFound++;
 
@@ -310,11 +299,7 @@
 
     SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet;
 
-    for (SmallVectorImpl<const MachineBasicBlock *>::iterator
-           PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end();
-           PI != PE; ++PI) {
-
-      const MachineBasicBlock *BB = *PI;
+    for (const MachineBasicBlock *BB : BasicBlockNumbering) {
       if (!BBSet.count(BB)) continue;
 
       // Use an iterator to avoid repeated lookups.
@@ -369,18 +354,14 @@
         changed = true;
         BlockInfo.LiveIn |= LocalLiveIn;
 
-        for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(),
-             PE = BB->pred_end(); PI != PE; ++PI)
-          NextBBSet.insert(*PI);
+        NextBBSet.insert(BB->pred_begin(), BB->pred_end());
       }
 
       if (LocalLiveOut.test(BlockInfo.LiveOut)) {
         changed = true;
         BlockInfo.LiveOut |= LocalLiveOut;
 
-        for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(),
-             SE = BB->succ_end(); SI != SE; ++SI)
-          NextBBSet.insert(*SI);
+        NextBBSet.insert(BB->succ_begin(), BB->succ_end());
       }
     }
 
@@ -394,18 +375,15 @@
 
   // For each block, find which slots are active within this block
   // and update the live intervals.
-  for (MachineFunction::iterator MBB = MF->begin(), MBBe = MF->end();
-       MBB != MBBe; ++MBB) {
+  for (const MachineBasicBlock &MBB : *MF) {
     Starts.clear();
     Starts.resize(NumSlots);
     Finishes.clear();
     Finishes.resize(NumSlots);
 
     // Create the interval for the basic blocks with lifetime markers in them.
-    for (SmallVectorImpl<MachineInstr*>::const_iterator it = Markers.begin(),
-         e = Markers.end(); it != e; ++it) {
-      const MachineInstr *MI = *it;
-      if (MI->getParent() != MBB)
+    for (const MachineInstr *MI : Markers) {
+      if (MI->getParent() != &MBB)
         continue;
 
       assert((MI->getOpcode() == TargetOpcode::LIFETIME_START ||
@@ -429,14 +407,14 @@
     }
 
     // Create the interval of the blocks that we previously found to be 'alive'.
-    BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB];
+    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
     for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
          pos = MBBLiveness.LiveIn.find_next(pos)) {
-      Starts[pos] = Indexes->getMBBStartIdx(MBB);
+      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
     }
     for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
          pos = MBBLiveness.LiveOut.find_next(pos)) {
-      Finishes[pos] = Indexes->getMBBEndIdx(MBB);
+      Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
     }
 
     for (unsigned i = 0; i < NumSlots; ++i) {
@@ -452,10 +430,10 @@
         // We have a single consecutive region.
         Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
       } else {
-        // We have two non consecutive regions. This happens when
+        // We have two non-consecutive regions. This happens when
         // LIFETIME_START appears after the LIFETIME_END marker.
-        SlotIndex NewStart = Indexes->getMBBStartIdx(MBB);
-        SlotIndex NewFin = Indexes->getMBBEndIdx(MBB);
+        SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
+        SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
         Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
         Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
       }
@@ -465,8 +443,8 @@
 
 bool StackColoring::removeAllMarkers() {
   unsigned Count = 0;
-  for (unsigned i = 0; i < Markers.size(); ++i) {
-    Markers[i]->eraseFromParent();
+  for (MachineInstr *MI : Markers) {
+    MI->eraseFromParent();
     Count++;
   }
   Markers.clear();
@@ -482,64 +460,74 @@
   MachineModuleInfo *MMI = &MF->getMMI();
 
   // Remap debug information that refers to stack slots.
-  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
-  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
-       VE = VMap.end(); VI != VE; ++VI) {
-    const MDNode *Var = VI->first;
-    if (!Var) continue;
-    std::pair<unsigned, DebugLoc> &VP = VI->second;
-    if (SlotRemap.count(VP.first)) {
-      DEBUG(dbgs()<<"Remapping debug info for ["<<Var->getName()<<"].\n");
-      VP.first = SlotRemap[VP.first];
+  for (auto &VI : MMI->getVariableDbgInfo()) {
+    if (!VI.Var)
+      continue;
+    if (SlotRemap.count(VI.Slot)) {
+      DEBUG(dbgs()<<"Remapping debug info for ["<<VI.Var->getName()<<"].\n");
+      VI.Slot = SlotRemap[VI.Slot];
       FixedDbg++;
     }
   }
 
   // Keep a list of *allocas* which need to be remapped.
   DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
-  for (DenseMap<int, int>::const_iterator it = SlotRemap.begin(),
-       e = SlotRemap.end(); it != e; ++it) {
-    const AllocaInst *From = MFI->getObjectAllocation(it->first);
-    const AllocaInst *To = MFI->getObjectAllocation(it->second);
+  for (const std::pair<int, int> &SI : SlotRemap) {
+    const AllocaInst *From = MFI->getObjectAllocation(SI.first);
+    const AllocaInst *To = MFI->getObjectAllocation(SI.second);
     assert(To && From && "Invalid allocation object");
     Allocas[From] = To;
+
+    // AA might be used later for instruction scheduling, and we need it to be
+    // able to deduce the correct aliasing releationships between pointers
+    // derived from the alloca being remapped and the target of that remapping.
+    // The only safe way, without directly informing AA about the remapping
+    // somehow, is to directly update the IR to reflect the change being made
+    // here.
+    Instruction *Inst = const_cast<AllocaInst *>(To);
+    if (From->getType() != To->getType()) {
+      BitCastInst *Cast = new BitCastInst(Inst, From->getType());
+      Cast->insertAfter(Inst);
+      Inst = Cast;
+    }
+
+    // Allow the stack protector to adjust its value map to account for the
+    // upcoming replacement.
+    SP->adjustForColoring(From, To);
+
+    // Note that this will not replace uses in MMOs (which we'll update below),
+    // or anywhere else (which is why we won't delete the original
+    // instruction).
+    const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst);
   }
 
   // Remap all instructions to the new stack slots.
-  MachineFunction::iterator BB, BBE;
-  MachineBasicBlock::iterator I, IE;
-  for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
-    for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
-
+  for (MachineBasicBlock &BB : *MF)
+    for (MachineInstr &I : BB) {
       // Skip lifetime markers. We'll remove them soon.
-      if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
-          I->getOpcode() == TargetOpcode::LIFETIME_END)
+      if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
+          I.getOpcode() == TargetOpcode::LIFETIME_END)
         continue;
 
       // Update the MachineMemOperand to use the new alloca.
-      for (MachineInstr::mmo_iterator MM = I->memoperands_begin(),
-           E = I->memoperands_end(); MM != E; ++MM) {
-        MachineMemOperand *MMO = *MM;
-
+      for (MachineMemOperand *MMO : I.memoperands()) {
         const Value *V = MMO->getValue();
 
         if (!V)
           continue;
 
-        const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V);
-        if (PSV && PSV->isConstant(MFI))
+        // FIXME: In order to enable the use of TBAA when using AA in CodeGen,
+        // we'll also need to update the TBAA nodes in MMOs with values
+        // derived from the merged allocas. When doing this, we'll need to use
+        // the same variant of GetUnderlyingObjects that is used by the
+        // instruction scheduler (that can look through ptrtoint/inttoptr
+        // pairs).
+
+        // We've replaced IR-level uses of the remapped allocas, so we only
+        // need to replace direct uses here.
+        if (!isa<AllocaInst>(V))
           continue;
 
-        // Climb up and find the original alloca.
-        V = GetUnderlyingObject(V);
-        // If we did not find one, or if the one that we found is not in our
-        // map, then move on.
-        if (!V || !isa<AllocaInst>(V)) {
-          // Clear mem operand since we don't know for sure that it doesn't
-          // alias a merged alloca.
-          MMO->setValue(0);
-          continue;
-        }
         const AllocaInst *AI= cast<AllocaInst>(V);
         if (!Allocas.count(AI))
           continue;
@@ -549,9 +537,7 @@
       }
 
       // Update all of the machine instruction operands.
-      for (unsigned i = 0 ; i <  I->getNumOperands(); ++i) {
-        MachineOperand &MO = I->getOperand(i);
-
+      for (MachineOperand &MO : I.operands()) {
         if (!MO.isFI())
           continue;
         int FromSlot = MO.getIndex();
@@ -572,12 +558,12 @@
         // zone are are okay, despite the fact that we don't have a good way
         // for validating all of the usages of the calculation.
 #ifndef NDEBUG
-        bool TouchesMemory = I->mayLoad() || I->mayStore();
+        bool TouchesMemory = I.mayLoad() || I.mayStore();
         // If we *don't* protect the user from escaped allocas, don't bother
         // validating the instructions.
-        if (!I->isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
-          SlotIndex Index = Indexes->getInstructionIndex(I);
-          LiveInterval *Interval = Intervals[FromSlot];
+        if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) {
+          SlotIndex Index = Indexes->getInstructionIndex(&I);
+          const LiveInterval *Interval = &*Intervals[FromSlot];
           assert(Interval->find(Index) != Interval->end() &&
                  "Found instruction usage outside of live range.");
         }
@@ -596,13 +582,10 @@
 }
 
 void StackColoring::removeInvalidSlotRanges() {
-  MachineFunction::const_iterator BB, BBE;
-  MachineBasicBlock::const_iterator I, IE;
-  for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB)
-    for (I = BB->begin(), IE = BB->end(); I != IE; ++I) {
-
-      if (I->getOpcode() == TargetOpcode::LIFETIME_START ||
-          I->getOpcode() == TargetOpcode::LIFETIME_END || I->isDebugValue())
+  for (MachineBasicBlock &BB : *MF)
+    for (MachineInstr &I : BB) {
+      if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
+          I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue())
         continue;
 
       // Some intervals are suspicious! In some cases we find address
@@ -611,13 +594,11 @@
       // violation, but address calculations are okay. This can happen when
       // GEPs are hoisted outside of the lifetime zone.
       // So, in here we only check instructions which can read or write memory.
-      if (!I->mayLoad() && !I->mayStore())
+      if (!I.mayLoad() && !I.mayStore())
         continue;
 
       // Check all of the machine operands.
-      for (unsigned i = 0 ; i <  I->getNumOperands(); ++i) {
-        const MachineOperand &MO = I->getOperand(i);
-
+      for (const MachineOperand &MO : I.operands()) {
         if (!MO.isFI())
           continue;
 
@@ -631,10 +612,10 @@
 
         // Check that the used slot is inside the calculated lifetime range.
         // If it is not, warn about it and invalidate the range.
-        LiveInterval *Interval = Intervals[Slot];
-        SlotIndex Index = Indexes->getInstructionIndex(I);
+        LiveInterval *Interval = &*Intervals[Slot];
+        SlotIndex Index = Indexes->getInstructionIndex(&I);
         if (Interval->find(Index) == Interval->end()) {
-          Intervals[Slot]->clear();
+          Interval->clear();
           DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n");
           EscapedAllocas++;
         }
@@ -659,12 +640,16 @@
 }
 
 bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
+  if (skipOptnoneFunction(*Func.getFunction()))
+    return false;
+
   DEBUG(dbgs() << "********** Stack Coloring **********\n"
                << "********** Function: "
                << ((const Value*)Func.getFunction())->getName() << '\n');
   MF = &Func;
   MFI = MF->getFrameInfo();
   Indexes = &getAnalysis<SlotIndexes>();
+  SP = &getAnalysis<StackProtector>();
   BlockLiveness.clear();
   BasicBlocks.clear();
   BasicBlockNumbering.clear();
@@ -704,9 +689,9 @@
   }
 
   for (unsigned i=0; i < NumSlots; ++i) {
-    LiveInterval *LI = new LiveInterval(i, 0);
-    Intervals.push_back(LI);
+    std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
     LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
+    Intervals.push_back(std::move(LI));
     SortedSlots.push_back(i);
   }
 
@@ -741,7 +726,13 @@
   // Sort the slots according to their size. Place unused slots at the end.
   // Use stable sort to guarantee deterministic code generation.
   std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
-                   SlotSizeSorter(MFI));
+                   [this](int LHS, int RHS) {
+    // We use -1 to denote a uninteresting slot. Place these slots at the end.
+    if (LHS == -1) return false;
+    if (RHS == -1) return true;
+    // Sort according to size.
+    return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+  });
 
   bool Changed = true;
   while (Changed) {
@@ -756,8 +747,8 @@
 
         int FirstSlot = SortedSlots[I];
         int SecondSlot = SortedSlots[J];
-        LiveInterval *First = Intervals[FirstSlot];
-        LiveInterval *Second = Intervals[SecondSlot];
+        LiveInterval *First = &*Intervals[FirstSlot];
+        LiveInterval *Second = &*Intervals[SecondSlot];
         assert (!First->empty() && !Second->empty() && "Found an empty range");
 
         // Merge disjoint slots.
@@ -795,10 +786,5 @@
   expungeSlotMap(SlotRemap, NumSlots);
   remapInstructions(SlotRemap);
 
-  // Release the intervals.
-  for (unsigned I = 0; I < NumSlots; ++I) {
-    delete Intervals[I];
-  }
-
   return removeAllMarkers();
 }
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
new file mode 100644
index 0000000..a374417
--- /dev/null
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -0,0 +1,128 @@
+//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StackMap Liveness analysis pass. The pass calculates
+// the liveness for each basic block in a function and attaches the register
+// live-out information to a stackmap or patchpoint intrinsic if present.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackmaps"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness",
+  cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass"));
+cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
+  cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass"));
+}
+
+STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
+STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
+STATISTIC(NumBBsVisited,          "Number of basic blocks visited");
+STATISTIC(NumBBsHaveNoStackmap,   "Number of basic blocks with no stackmap");
+STATISTIC(NumStackMaps,           "Number of StackMaps visited");
+
+char StackMapLiveness::ID = 0;
+char &llvm::StackMapLivenessID = StackMapLiveness::ID;
+INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
+                "StackMap Liveness Analysis", false, false)
+
+/// Default construct and initialize the pass.
+StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) {
+  initializeStackMapLivenessPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We preserve all information.
+  AU.setPreservesAll();
+  AU.setPreservesCFG();
+  // Default dependencie for all MachineFunction passes.
+  AU.addRequired<MachineFunctionAnalysis>();
+}
+
+/// Calculate the liveness information for the given machine function.
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+  DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
+               << _MF.getName() << " **********\n");
+  MF = &_MF;
+  TRI = MF->getTarget().getRegisterInfo();
+  ++NumStackMapFuncVisited;
+
+  // Skip this function if there are no stackmaps or patchpoints to process.
+  if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) ||
+        (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) {
+    ++NumStackMapFuncSkipped;
+    return false;
+  }
+  return calculateLiveness();
+}
+
+/// Performs the actual liveness calculation for the function.
+bool StackMapLiveness::calculateLiveness() {
+  bool HasChanged = false;
+  // For all basic blocks in the function.
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n");
+    LiveRegs.init(TRI);
+    LiveRegs.addLiveOuts(MBBI);
+    bool HasStackMap = false;
+    // Reverse iterate over all instructions and add the current live register
+    // set to an instruction if we encounter a stackmap or patchpoint
+    // instruction.
+    for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(),
+         E = MBBI->rend(); I != E; ++I) {
+      int Opc = I->getOpcode();
+      if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) ||
+          (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) {
+        addLiveOutSetToMI(*I);
+        HasChanged = true;
+        HasStackMap = true;
+        ++NumStackMaps;
+      }
+      DEBUG(dbgs() << "   " << *I << "   " << LiveRegs);
+      LiveRegs.stepBackward(*I);
+    }
+    ++NumBBsVisited;
+    if (!HasStackMap)
+      ++NumBBsHaveNoStackmap;
+  }
+  return HasChanged;
+}
+
+/// Add the current register live set to the instruction.
+void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) {
+  uint32_t *Mask = createRegisterMask();
+  MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask);
+  MI.addOperand(*MF, MO);
+}
+
+/// Create a register mask and initialize it with the registers from the
+/// register live set.
+uint32_t *StackMapLiveness::createRegisterMask() const {
+  // The mask is owned and cleaned up by the Machine Function.
+  uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs());
+  for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end();
+       RI != RE; ++RI)
+    Mask[*RI / 32] |= 1U << (*RI % 32);
+  return Mask;
+}
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 40893ea..a6522dc 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -10,9 +10,11 @@
 #define DEBUG_TYPE "stackmaps"
 
 #include "llvm/CodeGen/StackMaps.h"
-
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
@@ -20,22 +22,20 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <iterator>
 
 using namespace llvm;
 
-PatchPointOpers::PatchPointOpers(const MachineInstr *MI):
-  MI(MI),
-  HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
-         !MI->getOperand(0).isImplicit()),
-  IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) {
-
+PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
+  : MI(MI),
+    HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+           !MI->getOperand(0).isImplicit()),
+    IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg)
+{
 #ifndef NDEBUG
-  {
   unsigned CheckStartIdx = 0, e = MI->getNumOperands();
   while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
          MI->getOperand(CheckStartIdx).isDef() &&
@@ -43,8 +43,7 @@
     ++CheckStartIdx;
 
   assert(getMetaIdx() == CheckStartIdx &&
-         "Unexpected additonal definition in Patchpoint intrinsic.");
-  }
+         "Unexpected additional definition in Patchpoint intrinsic.");
 #endif
 }
 
@@ -65,7 +64,121 @@
   return ScratchIdx;
 }
 
-void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID,
+MachineInstr::const_mop_iterator
+StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
+                        MachineInstr::const_mop_iterator MOE,
+                        LocationVec &Locs, LiveOutVec &LiveOuts) const {
+  if (MOI->isImm()) {
+    switch (MOI->getImm()) {
+    default: llvm_unreachable("Unrecognized operand type.");
+    case StackMaps::DirectMemRefOp: {
+      unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
+      assert((Size % 8) == 0 && "Need pointer size in bytes.");
+      Size /= 8;
+      unsigned Reg = (++MOI)->getReg();
+      int64_t Imm = (++MOI)->getImm();
+      Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm));
+      break;
+    }
+    case StackMaps::IndirectMemRefOp: {
+      int64_t Size = (++MOI)->getImm();
+      assert(Size > 0 && "Need a valid size for indirect memory locations.");
+      unsigned Reg = (++MOI)->getReg();
+      int64_t Imm = (++MOI)->getImm();
+      Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm));
+      break;
+    }
+    case StackMaps::ConstantOp: {
+      ++MOI;
+      assert(MOI->isImm() && "Expected constant operand.");
+      int64_t Imm = MOI->getImm();
+      Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm));
+      break;
+    }
+    }
+    return ++MOI;
+  }
+
+  // The physical register number will ultimately be encoded as a DWARF regno.
+  // The stack map also records the size of a spill slot that can hold the
+  // register content. (The runtime can track the actual size of the data type
+  // if it needs to.)
+  if (MOI->isReg()) {
+    // Skip implicit registers (this includes our scratch registers)
+    if (MOI->isImplicit())
+      return ++MOI;
+
+    assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
+           "Virtreg operands should have been rewritten before now.");
+    const TargetRegisterClass *RC =
+      AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg());
+    assert(!MOI->getSubReg() && "Physical subreg still around.");
+    Locs.push_back(
+      Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
+    return ++MOI;
+  }
+
+  if (MOI->isRegLiveOut())
+    LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut());
+
+  return ++MOI;
+}
+
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+  int RegNo = TRI->getDwarfRegNum(Reg, false);
+  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
+    RegNo = TRI->getDwarfRegNum(*SR, false);
+
+  assert(RegNo >= 0 && "Invalid Dwarf register number.");
+  return (unsigned) RegNo;
+}
+
+/// Create a live-out register record for the given register Reg.
+StackMaps::LiveOutReg
+StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
+  unsigned RegNo = getDwarfRegNum(Reg, TRI);
+  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+  return LiveOutReg(Reg, RegNo, Size);
+}
+
+/// Parse the register live-out mask and return a vector of live-out registers
+/// that need to be recorded in the stackmap.
+StackMaps::LiveOutVec
+StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
+  assert(Mask && "No register mask specified");
+  const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+  LiveOutVec LiveOuts;
+
+  // Create a LiveOutReg for each bit that is set in the register mask.
+  for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg)
+    if ((Mask[Reg / 32] >> Reg % 32) & 1)
+      LiveOuts.push_back(createLiveOutReg(Reg, TRI));
+
+  // We don't need to keep track of a register if its super-register is already
+  // in the list. Merge entries that refer to the same dwarf register and use
+  // the maximum size that needs to be spilled.
+  std::sort(LiveOuts.begin(), LiveOuts.end());
+  for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end();
+       I != E; ++I) {
+    for (LiveOutVec::iterator II = std::next(I); II != E; ++II) {
+      if (I->RegNo != II->RegNo) {
+        // Skip all the now invalid entries.
+        I = --II;
+        break;
+      }
+      I->Size = std::max(I->Size, II->Size);
+      if (TRI->isSuperRegister(I->Reg, II->Reg))
+        I->Reg = II->Reg;
+      II->MarkInvalid();
+    }
+  }
+  LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(),
+                                LiveOutReg::IsInvalid), LiveOuts.end());
+  return LiveOuts;
+}
+
+void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
                                     MachineInstr::const_mop_iterator MOI,
                                     MachineInstr::const_mop_iterator MOE,
                                     bool recordResult) {
@@ -74,71 +187,64 @@
   MCSymbol *MILabel = OutContext.CreateTempSymbol();
   AP.OutStreamer.EmitLabel(MILabel);
 
-  LocationVec CallsiteLocs;
+  LocationVec Locations;
+  LiveOutVec LiveOuts;
 
   if (recordResult) {
-    std::pair<Location, MachineInstr::const_mop_iterator> ParseResult =
-      OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM);
-
-    Location &Loc = ParseResult.first;
-    assert(Loc.LocType == Location::Register &&
-           "Stackmap return location must be a register.");
-    CallsiteLocs.push_back(Loc);
+    assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value.");
+    parseOperand(MI.operands_begin(), std::next(MI.operands_begin()),
+                 Locations, LiveOuts);
   }
 
+  // Parse operands.
   while (MOI != MOE) {
-    std::pair<Location, MachineInstr::const_mop_iterator> ParseResult =
-      OpParser(MOI, MOE, AP.TM);
-
-    Location &Loc = ParseResult.first;
-
-    // Move large constants into the constant pool.
-    if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) {
-      Loc.LocType = Location::ConstantIndex;
-      Loc.Offset = ConstPool.getConstantIndex(Loc.Offset);
-    }
-
-    CallsiteLocs.push_back(Loc);
-    MOI = ParseResult.second;
+    MOI = parseOperand(MOI, MOE, Locations, LiveOuts);
   }
 
+  // Move large constants into the constant pool.
+  for (LocationVec::iterator I = Locations.begin(), E = Locations.end();
+       I != E; ++I) {
+    // Constants are encoded as sign-extended integers.
+    // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
+    if (I->LocType == Location::Constant &&
+        ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) {
+      I->LocType = Location::ConstantIndex;
+      I->Offset = ConstPool.getConstantIndex(I->Offset);
+    }
+  }
+
+  // Create an expression to calculate the offset of the callsite from function
+  // entry.
   const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub(
     MCSymbolRefExpr::Create(MILabel, OutContext),
     MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext),
     OutContext);
 
-  CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs));
-}
+  CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts));
 
-static MachineInstr::const_mop_iterator
-getStackMapEndMOP(MachineInstr::const_mop_iterator MOI,
-                  MachineInstr::const_mop_iterator MOE) {
-  for (; MOI != MOE; ++MOI)
-    if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit()))
-      break;
-
-  return MOI;
+  // Record the stack size of the current function.
+  const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+  FnStackSize[AP.CurrentFnSym] =
+    MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize();
 }
 
 void StackMaps::recordStackMap(const MachineInstr &MI) {
-  assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap");
+  assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
 
   int64_t ID = MI.getOperand(0).getImm();
-  assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs");
-  recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2),
-                      getStackMapEndMOP(MI.operands_begin(),
-                                        MI.operands_end()));
+  recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2),
+                      MI.operands_end());
 }
 
 void StackMaps::recordPatchPoint(const MachineInstr &MI) {
-  assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap");
+  assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
 
   PatchPointOpers opers(&MI);
   int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
-  assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs");
+
   MachineInstr::const_mop_iterator MOI =
-    llvm::next(MI.operands_begin(), opers.getStackMapStartIdx());
-  recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()),
+    std::next(MI.operands_begin(), opers.getStackMapStartIdx());
+  recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
                       opers.isAnyReg() && opers.hasDef());
 
 #ifndef NDEBUG
@@ -155,12 +261,21 @@
 
 /// serializeToStackMapSection conceptually populates the following fields:
 ///
-/// uint32 : Reserved (header)
+/// Header {
+///   uint8  : Stack Map Version (currently 1)
+///   uint8  : Reserved (expected to be 0)
+///   uint16 : Reserved (expected to be 0)
+/// }
+/// uint32 : NumFunctions
 /// uint32 : NumConstants
-/// int64  : Constants[NumConstants]
 /// uint32 : NumRecords
+/// StkSizeRecord[NumFunctions] {
+///   uint64 : Function Address
+///   uint64 : Stack Size
+/// }
+/// int64  : Constants[NumConstants]
 /// StkMapRecord[NumRecords] {
-///   uint32 : PatchPoint ID
+///   uint64 : PatchPoint ID
 ///   uint32 : Instruction Offset
 ///   uint16 : Reserved (record flags)
 ///   uint16 : NumLocations
@@ -170,6 +285,14 @@
 ///     uint16 : Dwarf RegNum
 ///     int32  : Offset
 ///   }
+///   uint16 : Padding
+///   uint16 : NumLiveOuts
+///   LiveOuts[NumLiveOuts] {
+///     uint16 : Dwarf RegNum
+///     uint8  : Reserved
+///     uint8  : Size in Bytes
+///   }
+///   uint32 : Padding (only if required to align to 8 byte)
 /// }
 ///
 /// Location Encoding, Type, Value:
@@ -199,29 +322,42 @@
   // Serialize data.
   const char *WSMP = "Stack Maps: ";
   (void)WSMP;
-  const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo();
 
   DEBUG(dbgs() << "********** Stack Map Output **********\n");
 
   // Header.
-  AP.OutStreamer.EmitIntValue(0, 4);
+  AP.OutStreamer.EmitIntValue(1, 1); // Version.
+  AP.OutStreamer.EmitIntValue(0, 1); // Reserved.
+  AP.OutStreamer.EmitIntValue(0, 2); // Reserved.
 
+  // Num functions.
+  DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
+  AP.OutStreamer.EmitIntValue(FnStackSize.size(), 4);
   // Num constants.
+  DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.getNumConstants()
+               << '\n');
   AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4);
+  // Num callsites.
+  DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
+  AP.OutStreamer.EmitIntValue(CSInfos.size(), 4);
+
+  // Function stack size entries.
+  for (FnStackSizeMap::iterator I = FnStackSize.begin(), E = FnStackSize.end();
+       I != E; ++I) {
+    AP.OutStreamer.EmitSymbolValue(I->first, 8);
+    AP.OutStreamer.EmitIntValue(I->second, 8);
+  }
 
   // Constant pool entries.
   for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i)
     AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8);
 
-  DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n");
-  AP.OutStreamer.EmitIntValue(CSInfos.size(), 4);
-
+  // Callsite entries.
   for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(),
-                                        CSIE = CSInfos.end();
-       CSII != CSIE; ++CSII) {
-
-    unsigned CallsiteID = CSII->ID;
+       CSIE = CSInfos.end(); CSII != CSIE; ++CSII) {
+    uint64_t CallsiteID = CSII->ID;
     const LocationVec &CSLocs = CSII->Locations;
+    const LiveOutVec &LiveOuts = CSII->LiveOuts;
 
     DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n");
 
@@ -229,15 +365,18 @@
     // runtime than crash in case of in-process compilation. Currently, we do
     // simple overflow checks, but we may eventually communicate other
     // compilation errors this way.
-    if (CSLocs.size() > UINT16_MAX) {
-      AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID.
+    if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) {
+      AP.OutStreamer.EmitIntValue(UINT64_MAX, 8); // Invalid ID.
       AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4);
       AP.OutStreamer.EmitIntValue(0, 2); // Reserved.
       AP.OutStreamer.EmitIntValue(0, 2); // 0 locations.
+      AP.OutStreamer.EmitIntValue(0, 2); // padding.
+      AP.OutStreamer.EmitIntValue(0, 2); // 0 live-out registers.
+      AP.OutStreamer.EmitIntValue(0, 4); // padding.
       continue;
     }
 
-    AP.OutStreamer.EmitIntValue(CallsiteID, 4);
+    AP.OutStreamer.EmitIntValue(CallsiteID, 8);
     AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4);
 
     // Reserved for flags.
@@ -251,6 +390,26 @@
     for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end();
          LocI != LocE; ++LocI, ++operIdx) {
       const Location &Loc = *LocI;
+      unsigned RegNo = 0;
+      int Offset = Loc.Offset;
+      if(Loc.Reg) {
+        RegNo = getDwarfRegNum(Loc.Reg, TRI);
+
+        // If this is a register location, put the subregister byte offset in
+        // the location offset.
+        if (Loc.LocType == Location::Register) {
+          assert(!Loc.Offset && "Register location should have zero offset");
+          unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
+          unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg);
+          if (SubRegIdx)
+            Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+        }
+      }
+      else {
+        assert(Loc.LocType != Location::Register &&
+               "Missing location register");
+      }
+
       DEBUG(
         dbgs() << WSMP << "  Loc " << operIdx << ": ";
         switch (Loc.LocType) {
@@ -258,15 +417,15 @@
           dbgs() << "<Unprocessed operand>";
           break;
         case Location::Register:
-          dbgs() << "Register " << MCRI.getName(Loc.Reg);
+          dbgs() << "Register " << TRI->getName(Loc.Reg);
           break;
         case Location::Direct:
-          dbgs() << "Direct " << MCRI.getName(Loc.Reg);
+          dbgs() << "Direct " << TRI->getName(Loc.Reg);
           if (Loc.Offset)
             dbgs() << " + " << Loc.Offset;
           break;
         case Location::Indirect:
-          dbgs() << "Indirect " << MCRI.getName(Loc.Reg)
+          dbgs() << "Indirect " << TRI->getName(Loc.Reg)
                  << " + " << Loc.Offset;
           break;
         case Location::Constant:
@@ -276,36 +435,39 @@
           dbgs() << "Constant Index " << Loc.Offset;
           break;
         }
-        dbgs() << "\n";
+        dbgs() << "     [encoding: .byte " << Loc.LocType
+               << ", .byte " << Loc.Size
+               << ", .short " << RegNo
+               << ", .int " << Offset << "]\n";
       );
 
-      unsigned RegNo = 0;
-      int Offset = Loc.Offset;
-      if(Loc.Reg) {
-        RegNo = MCRI.getDwarfRegNum(Loc.Reg, false);
-        for (MCSuperRegIterator SR(Loc.Reg, TRI);
-             SR.isValid() && (int)RegNo < 0; ++SR) {
-          RegNo = TRI->getDwarfRegNum(*SR, false);
-        }
-        // If this is a register location, put the subregister byte offset in
-        // the location offset.
-        if (Loc.LocType == Location::Register) {
-          assert(!Loc.Offset && "Register location should have zero offset");
-          unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false);
-          unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg);
-          if (SubRegIdx)
-            Offset = MCRI.getSubRegIdxOffset(SubRegIdx);
-        }
-      }
-      else {
-        assert(Loc.LocType != Location::Register &&
-               "Missing location register");
-      }
       AP.OutStreamer.EmitIntValue(Loc.LocType, 1);
       AP.OutStreamer.EmitIntValue(Loc.Size, 1);
       AP.OutStreamer.EmitIntValue(RegNo, 2);
       AP.OutStreamer.EmitIntValue(Offset, 4);
     }
+
+    DEBUG(dbgs() << WSMP << "  has " << LiveOuts.size()
+                 << " live-out registers\n");
+
+    // Num live-out registers and padding to align to 4 byte.
+    AP.OutStreamer.EmitIntValue(0, 2);
+    AP.OutStreamer.EmitIntValue(LiveOuts.size(), 2);
+
+    operIdx = 0;
+    for (LiveOutVec::const_iterator LI = LiveOuts.begin(), LE = LiveOuts.end();
+         LI != LE; ++LI, ++operIdx) {
+      DEBUG(dbgs() << WSMP << "  LO " << operIdx << ": "
+                   << TRI->getName(LI->Reg)
+                   << "     [encoding: .short " << LI->RegNo
+                   << ", .byte 0, .byte " << LI->Size << "]\n");
+
+      AP.OutStreamer.EmitIntValue(LI->RegNo, 2);
+      AP.OutStreamer.EmitIntValue(0, 1);
+      AP.OutStreamer.EmitIntValue(LI->Size, 1);
+    }
+    // Emit alignment to 8 byte.
+    AP.OutStreamer.EmitValueToAlignment(8);
   }
 
   AP.OutStreamer.AddBlankLine();
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 9020449..f3749e5 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,12 +16,11 @@
 
 #define DEBUG_TYPE "stack-protector"
 #include "llvm/CodeGen/StackProtector.h"
-#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -58,10 +57,33 @@
   return AI ? Layout.lookup(AI) : SSPLK_None;
 }
 
+void StackProtector::adjustForColoring(const AllocaInst *From,
+                                       const AllocaInst *To) {
+  // When coloring replaces one alloca with another, transfer the SSPLayoutKind
+  // tag from the remapped to the target alloca. The remapped alloca should
+  // have a size smaller than or equal to the replacement alloca.
+  SSPLayoutMap::iterator I = Layout.find(From);
+  if (I != Layout.end()) {
+    SSPLayoutKind Kind = I->second;
+    Layout.erase(I);
+
+    // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite
+    // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that
+    // SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
+    I = Layout.find(To);
+    if (I == Layout.end())
+      Layout.insert(std::make_pair(To, Kind));
+    else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf)
+      I->second = Kind;
+  }
+}
+
 bool StackProtector::runOnFunction(Function &Fn) {
   F = &Fn;
   M = F->getParent();
-  DT = getAnalysisIfAvailable<DominatorTree>();
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : 0;
   TLI = TM->getTargetLowering();
 
   if (!RequiresStackProtector())
@@ -69,8 +91,9 @@
 
   Attribute Attr = Fn.getAttributes().getAttribute(
       AttributeSet::FunctionIndex, "stack-protector-buffer-size");
-  if (Attr.isStringAttribute())
-    Attr.getValueAsString().getAsInteger(10, SSPBufferSize);
+  if (Attr.isStringAttribute() &&
+      Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
+      return false; // Invalid integer string
 
   ++NumFunProtected;
   return InsertStackProtectors();
@@ -127,9 +150,7 @@
 }
 
 bool StackProtector::HasAddressTaken(const Instruction *AI) {
-  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
-       UI != UE; ++UI) {
-    const User *U = *UI;
+  for (const User *U : AI->users()) {
     if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
       if (AI == SI->getValueOperand())
         return true;
@@ -261,8 +282,7 @@
   const unsigned MaxSearch = 4;
   bool NoInterposingChain = true;
 
-  for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()),
-                                    E = BB->rend();
+  for (BasicBlock::reverse_iterator I = std::next(BB->rbegin()), E = BB->rend();
        I != E && SearchCounter < MaxSearch; ++I) {
     Instruction *Inst = &*I;
 
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 9f44df8..2717f4c 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -87,7 +87,7 @@
         initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
       }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<SlotIndexes>();
       AU.addPreserved<SlotIndexes>();
@@ -98,7 +98,7 @@
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
     void InitializeSlots();
@@ -142,7 +142,6 @@
   for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = &*MBBI;
-    BlockFrequency Freq = MBFI->getBlockFreq(MBB);
     for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
          MII != EE; ++MII) {
       MachineInstr *MI = &*MII;
@@ -157,7 +156,7 @@
           continue;
         LiveInterval &li = LS->getInterval(FI);
         if (!MI->isDebugValue())
-          li.weight += LiveIntervals::getSpillWeight(false, true, Freq);
+          li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI);
       }
       for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(),
            EE = MI->memoperands_end(); MMOI != EE; ++MMOI) {
@@ -386,8 +385,8 @@
       toErase.push_back(I);
       continue;
     }
-        
-    MachineBasicBlock::iterator NextMI = llvm::next(I);
+
+    MachineBasicBlock::iterator NextMI = std::next(I);
     if (NextMI == MBB->end()) continue;
 
     unsigned LoadReg = 0;
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index ff0181e..3b7a04c 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -15,10 +15,10 @@
 #define DEBUG_TYPE "tailduplication"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -61,9 +61,10 @@
   class TailDuplicatePass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const MachineBranchProbabilityInfo *MBPI;
     MachineModuleInfo *MMI;
     MachineRegisterInfo *MRI;
-    OwningPtr<RegScavenger> RS;
+    std::unique_ptr<RegScavenger> RS;
     bool PreRegAlloc;
 
     // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
@@ -78,7 +79,9 @@
     explicit TailDuplicatePass() :
       MachineFunctionPass(ID), PreRegAlloc(false) {}
 
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   private:
     void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -128,10 +131,15 @@
                 false, false)
 
 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+  if (skipOptnoneFunction(*MF.getFunction()))
+    return false;
+
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
   PreRegAlloc = MRI->isSSA();
   RS.reset();
   if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF))
@@ -144,6 +152,11 @@
   return MadeChange;
 }
 
+void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBranchProbabilityInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = I;
@@ -252,8 +265,8 @@
       // Rewrite uses that are outside of the original def's block.
       MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
       while (UI != MRI->use_end()) {
-        MachineOperand &UseMO = UI.getOperand();
-        MachineInstr *UseMI = &*UI;
+        MachineOperand &UseMO = *UI;
+        MachineInstr *UseMI = UseMO.getParent();
         ++UI;
         if (UseMI->isDebugValue()) {
           // SSAUpdate can replace the use with an undef. That creates
@@ -328,12 +341,10 @@
 
 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
                          const MachineRegisterInfo *MRI) {
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI->isDebugValue())
+  for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
+    if (UseMI.isDebugValue())
       continue;
-    if (UseMI->getParent() != BB)
+    if (UseMI.getParent() != BB)
       return true;
   }
   return false;
@@ -686,7 +697,7 @@
                  << "From simple Succ: " << *TailBB);
 
     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
-    MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+    MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB));
 
     // Make PredFBB explicit.
     if (PredCond.empty())
@@ -721,11 +732,12 @@
     if (PredTBB)
       TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
 
+    uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB);
     PredBB->removeSuccessor(TailBB);
     unsigned NumSuccessors = PredBB->succ_size();
     assert(NumSuccessors <= 1);
     if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
-      PredBB->addSuccessor(NewTarget);
+      PredBB->addSuccessor(NewTarget, Weight);
 
     TDBBs.push_back(PredBB);
   }
@@ -786,7 +798,7 @@
       // Update PredBB livein.
       RS->enterBasicBlock(PredBB);
       if (!PredBB->empty())
-        RS->forward(prior(PredBB->end()));
+        RS->forward(std::prev(PredBB->end()));
       BitVector RegsLiveAtExit(TRI->getNumRegs());
       RS->getRegsUsed(RegsLiveAtExit, false);
       for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
@@ -836,7 +848,7 @@
            "TailDuplicate called on block with multiple successors!");
     for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
            E = TailBB->succ_end(); I != E; ++I)
-      PredBB->addSuccessor(*I);
+      PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I));
 
     Changed = true;
     ++NumTailDups;
@@ -845,7 +857,7 @@
   // If TailBB was duplicated into all its predecessors except for the prior
   // block, which falls through unconditionally, move the contents of this
   // block into the prior block.
-  MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB));
   MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
   SmallVector<MachineOperand, 4> PriorCond;
   // This has to check PrevBB->succ_size() because EH edges are ignored by
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index bf4fd65..cae3ccd 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -13,10 +13,12 @@
 
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
@@ -372,6 +374,65 @@
   return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
 }
 
+static MachineInstr* foldPatchpoint(MachineFunction &MF,
+                                    MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops,
+                                    int FrameIndex,
+                                    const TargetInstrInfo &TII) {
+  unsigned StartIdx = 0;
+  switch (MI->getOpcode()) {
+  case TargetOpcode::STACKMAP:
+    StartIdx = 2; // Skip ID, nShadowBytes.
+    break;
+  case TargetOpcode::PATCHPOINT: {
+    // For PatchPoint, the call args are not foldable.
+    PatchPointOpers opers(MI);
+    StartIdx = opers.getVarIdx();
+    break;
+  }
+  default:
+    llvm_unreachable("unexpected stackmap opcode");
+  }
+
+  // Return false if any operands requested for folding are not foldable (not
+  // part of the stackmap's live values).
+  for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
+       I != E; ++I) {
+    if (*I < StartIdx)
+      return 0;
+  }
+
+  MachineInstr *NewMI =
+    MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(MF, NewMI);
+
+  // No need to fold return, the meta data, and function arguments
+  for (unsigned i = 0; i < StartIdx; ++i)
+    MIB.addOperand(MI->getOperand(i));
+
+  for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+      unsigned SpillSize;
+      unsigned SpillOffset;
+      // Compute the spill slot size and offset.
+      const TargetRegisterClass *RC =
+        MF.getRegInfo().getRegClass(MO.getReg());
+      bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
+                                         SpillOffset, &MF.getTarget());
+      if (!Valid)
+        report_fatal_error("cannot spill patchpoint subregister operand");
+      MIB.addImm(StackMaps::IndirectMemRefOp);
+      MIB.addImm(SpillSize);
+      MIB.addFrameIndex(FrameIndex);
+      MIB.addImm(SpillOffset);
+    }
+    else
+      MIB.addOperand(MO);
+  }
+  return NewMI;
+}
+
 /// foldMemoryOperand - Attempt to fold a load or store of the specified stack
 /// slot into the specified machine instruction for the specified operand(s).
 /// If this is possible, a new instruction is returned with the specified
@@ -393,8 +454,18 @@
   assert(MBB && "foldMemoryOperand needs an inserted instruction");
   MachineFunction &MF = *MBB->getParent();
 
-  // Ask the target to do the actual folding.
-  if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+  MachineInstr *NewMI = 0;
+
+  if (MI->getOpcode() == TargetOpcode::STACKMAP ||
+      MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+    // Fold stackmap/patchpoint.
+    NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
+  } else {
+    // Ask the target to do the actual folding.
+    NewMI =foldMemoryOperandImpl(MF, MI, Ops, FI);
+  }
+ 
+  if (NewMI) {
     NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
     // Add a memory operand, foldMemoryOperandImpl doesn't do that.
     assert((!(Flags & MachineMemOperand::MOStore) ||
@@ -450,7 +521,19 @@
   MachineFunction &MF = *MBB.getParent();
 
   // Ask the target to do the actual folding.
-  MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+  MachineInstr *NewMI = 0;
+  int FrameIndex = 0;
+
+  if ((MI->getOpcode() == TargetOpcode::STACKMAP ||
+       MI->getOpcode() == TargetOpcode::PATCHPOINT) &&
+      isLoadFromStackSlot(LoadMI, FrameIndex)) {
+    // Fold stackmap/patchpoint.
+    NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+  } else {
+    // Ask the target to do the actual folding.
+    NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+  }
+
   if (!NewMI) return 0;
 
   NewMI = MBB.insert(MI, NewMI);
@@ -562,7 +645,7 @@
                                            const MachineBasicBlock *MBB,
                                            const MachineFunction &MF) const {
   // Terminators and labels can't be scheduled around.
-  if (MI->isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isPosition())
     return true;
 
   // Don't attempt to schedule around any instruction that defines
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 30305af..870370b 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -18,11 +18,15 @@
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -201,6 +205,11 @@
   Names[RTLIB::FLOOR_F80] = "floorl";
   Names[RTLIB::FLOOR_F128] = "floorl";
   Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::ROUND_F32] = "roundf";
+  Names[RTLIB::ROUND_F64] = "round";
+  Names[RTLIB::ROUND_F80] = "roundl";
+  Names[RTLIB::ROUND_F128] = "roundl";
+  Names[RTLIB::ROUND_PPCF128] = "roundl";
   Names[RTLIB::COPYSIGN_F32] = "copysignf";
   Names[RTLIB::COPYSIGN_F64] = "copysign";
   Names[RTLIB::COPYSIGN_F80] = "copysignl";
@@ -659,21 +668,23 @@
 /// NOTE: The constructor takes ownership of TLOF.
 TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
                                        const TargetLoweringObjectFile *tlof)
-  : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+  : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) {
   initActions();
 
   // Perform these initializations only once.
-  IsLittleEndian = TD->isLittleEndian();
+  IsLittleEndian = DL->isLittleEndian();
   MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
   MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
     = MaxStoresPerMemmoveOptSize = 4;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
   SelectIsExpensive = false;
+  HasMultipleConditionRegisters = false;
   IntDivIsCheap = false;
   Pow2DivIsCheap = false;
   JumpIsExpensive = false;
   PredictableSelectIsExpensive = false;
+  MaskAndBranchFoldingIsLegal = false;
   StackPointerRegisterToSaveRestore = 0;
   ExceptionPointerRegister = 0;
   ExceptionSelectorRegister = 0;
@@ -754,6 +765,7 @@
   setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
   setOperationAction(ISD::FRINT,  MVT::f16, Expand);
   setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+  setOperationAction(ISD::FROUND, MVT::f16, Expand);
   setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
   setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
   setOperationAction(ISD::FLOG10, MVT::f32, Expand);
@@ -764,6 +776,7 @@
   setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
   setOperationAction(ISD::FRINT,  MVT::f32, Expand);
   setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+  setOperationAction(ISD::FROUND, MVT::f32, Expand);
   setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
   setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
   setOperationAction(ISD::FLOG10, MVT::f64, Expand);
@@ -774,6 +787,7 @@
   setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
   setOperationAction(ISD::FRINT,  MVT::f64, Expand);
   setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+  setOperationAction(ISD::FROUND, MVT::f64, Expand);
   setOperationAction(ISD::FLOG ,  MVT::f128, Expand);
   setOperationAction(ISD::FLOG2,  MVT::f128, Expand);
   setOperationAction(ISD::FLOG10, MVT::f128, Expand);
@@ -784,6 +798,7 @@
   setOperationAction(ISD::FCEIL,  MVT::f128, Expand);
   setOperationAction(ISD::FRINT,  MVT::f128, Expand);
   setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+  setOperationAction(ISD::FROUND, MVT::f128, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -799,7 +814,7 @@
 }
 
 unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
-  return TD->getPointerSizeInBits(AS);
+  return DL->getPointerSizeInBits(AS);
 }
 
 unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
@@ -808,7 +823,7 @@
 }
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8*TD->getPointerSize(0));
+  return MVT::getIntegerVT(8*DL->getPointerSize(0));
 }
 
 EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
@@ -894,6 +909,59 @@
   return false;
 }
 
+/// Replace/modify any TargetFrameIndex operands with a targte-dependent
+/// sequence of memory operands that is recognized by PrologEpilogInserter.
+MachineBasicBlock*
+TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
+                                   MachineBasicBlock *MBB) const {
+  const TargetMachine &TM = getTargetMachine();
+  MachineFunction &MF = *MI->getParent()->getParent();
+
+  // MI changes inside this loop as we grow operands.
+  for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) {
+    MachineOperand &MO = MI->getOperand(OperIdx);
+    if (!MO.isFI())
+      continue;
+
+    // foldMemoryOperand builds a new MI after replacing a single FI operand
+    // with the canonical set of five x86 addressing-mode operands.
+    int FI = MO.getIndex();
+    MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc());
+
+    // Copy operands before the frame-index.
+    for (unsigned i = 0; i < OperIdx; ++i)
+      MIB.addOperand(MI->getOperand(i));
+    // Add frame index operands: direct-mem-ref tag, #FI, offset.
+    MIB.addImm(StackMaps::DirectMemRefOp);
+    MIB.addOperand(MI->getOperand(OperIdx));
+    MIB.addImm(0);
+    // Copy the operands after the frame index.
+    for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
+      MIB.addOperand(MI->getOperand(i));
+
+    // Inherit previous memory operands.
+    MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+    assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
+
+    // Add a new memory operand for this FI.
+    const MachineFrameInfo &MFI = *MF.getFrameInfo();
+    assert(MFI.getObjectOffset(FI) != -1);
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                              MachineMemOperand::MOLoad,
+                              TM.getDataLayout()->getPointerSize(),
+                              MFI.getObjectAlignment(FI));
+    MIB->addMemOperand(MF, MMO);
+
+    // Replace the instruction and update the operand index.
+    MBB->insert(MachineBasicBlock::iterator(MI), MIB);
+    OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1;
+    MI->eraseFromParent();
+    MI = MIB;
+  }
+  return MBB;
+}
+
 /// findRepresentativeClass - Return the largest legal super-reg register class
 /// of the register class for the specified type and its associated "cost".
 std::pair<const TargetRegisterClass*, uint8_t>
@@ -1019,7 +1087,7 @@
     // that wider vector type.
     MVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
+    if (NElts != 1 && !shouldSplitVectorType(VT)) {
       bool IsLegalWiderType = false;
       // First try to promote the elements of integer vectors. If no legal
       // promotion was found, fallback to the widen-vector method.
@@ -1087,7 +1155,7 @@
   for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
     const TargetRegisterClass* RRC;
     uint8_t Cost;
-    tie(RRC, Cost) =  findRepresentativeClass((MVT::SimpleValueType)i);
+    std::tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
     RepRegClassForVT[i] = RRC;
     RepRegClassCostForVT[i] = Cost;
   }
@@ -1230,7 +1298,7 @@
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
 unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
-  return TD->getCallFrameTypeAlignment(Ty);
+  return DL->getABITypeAlignment(Ty);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1364,6 +1432,8 @@
       return false;
     // Allow 2*r as r+r.
     break;
+  default: // Don't allow n * r
+    return false;
   }
 
   return true;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 59d7b57..e41fbfc 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -34,7 +35,7 @@
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 using namespace dwarf;
@@ -43,19 +44,18 @@
 //                                  ELF
 //===----------------------------------------------------------------------===//
 
-MCSymbol *
-TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
-                                                     Mangler *Mang,
-                                                MachineModuleInfo *MMI) const {
+MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
+    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    MachineModuleInfo *MMI) const {
   unsigned Encoding = getPersonalityEncoding();
   switch (Encoding & 0x70) {
   default:
     report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
-    return  getSymbol(*Mang, GV);
+    return TM.getSymbol(GV, Mang);
   case dwarf::DW_EH_PE_pcrel: {
     return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
-                                          getSymbol(*Mang, GV)->getName());
+                                          TM.getSymbol(GV, Mang)->getName());
   }
   }
 }
@@ -87,24 +87,21 @@
   Streamer.EmitSymbolValue(Sym, Size);
 }
 
-const MCExpr *TargetLoweringObjectFileELF::
-getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                        MachineModuleInfo *MMI, unsigned Encoding,
-                        MCStreamer &Streamer) const {
+const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
+    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+    const TargetMachine &TM, MachineModuleInfo *MMI,
+    MCStreamer &Streamer) const {
 
   if (Encoding & dwarf::DW_EH_PE_indirect) {
     MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
-    SmallString<128> Name;
-    Mang->getNameWithPrefix(Name, GV, true);
-    Name += ".DW.stub";
+    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM);
 
     // Add information about the stub reference to ELFMMI so that the stub
     // gets emitted by the asmprinter.
-    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
     MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
     if (StubSym.getPointer() == 0) {
-      MCSymbol *Sym = getSymbol(*Mang, GV);
+      MCSymbol *Sym = TM.getSymbol(GV, Mang);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -114,7 +111,7 @@
   }
 
   return TargetLoweringObjectFile::
-    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+    getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer);
 }
 
 static SectionKind
@@ -199,10 +196,9 @@
   return Flags;
 }
 
-
-const MCSection *TargetLoweringObjectFileELF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
+const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   StringRef SectionName = GV->getSection();
 
   // Infer section flags from the section name if we can.
@@ -235,7 +231,7 @@
 
 const MCSection *TargetLoweringObjectFileELF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
+                       Mangler &Mang, const TargetMachine &TM) const {
   // If we have -ffunction-section or -fdata-section then we should emit the
   // global value to a uniqued section specifically for it.
   bool EmitUniquedSection;
@@ -252,12 +248,12 @@
     Prefix = getSectionPrefixForGlobal(Kind);
 
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    MCSymbol *Sym = getSymbol(*Mang, GV);
-    Name.append(Sym->getName().begin(), Sym->getName().end());
+    TM.getNameWithPrefix(Name, GV, Mang, true);
+
     StringRef Group = "";
     unsigned Flags = getELFSectionFlags(Kind);
     if (GV->isWeakForLinker()) {
-      Group = Sym->getName();
+      Group = Name.substr(strlen(Prefix));
       Flags |= ELF::SHF_GROUP;
     }
 
@@ -405,11 +401,21 @@
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+/// getDepLibFromLinkerOpt - Extract the dependent library name from a linker
+/// option string. Returns StringRef() if the option does not specify a library.
+StringRef TargetLoweringObjectFileMachO::
+getDepLibFromLinkerOpt(StringRef LinkerOption) const {
+  const char *LibCmd = "-l";
+  if (LinkerOption.startswith(LibCmd))
+    return LinkerOption.substr(strlen(LibCmd));
+  return StringRef();
+}
+
 /// emitModuleFlags - Perform code emission for module flags.
 void TargetLoweringObjectFileMachO::
 emitModuleFlags(MCStreamer &Streamer,
                 ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                Mangler *Mang, const TargetMachine &TM) const {
+                Mangler &Mang, const TargetMachine &TM) const {
   unsigned VersionVal = 0;
   unsigned ImageInfoFlags = 0;
   MDNode *LinkerOptions = 0;
@@ -481,9 +487,9 @@
   Streamer.AddBlankLine();
 }
 
-const MCSection *TargetLoweringObjectFileMachO::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
+const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
   StringRef Segment, Section;
   unsigned TAA = 0, StubSize = 0;
@@ -520,9 +526,44 @@
   return S;
 }
 
+bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
+    const MCSection &Section) const {
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+
+    // Sections holding 1 byte strings are atomized based on the data
+    // they contain.
+    // Sections holding 2 byte strings require symbols in order to be
+    // atomized.
+    // There is no dedicated section for 4 byte strings.
+    if (SMO.getKind().isMergeable1ByteCString())
+      return false;
+
+    if (SMO.getSegmentName() == "__DATA" &&
+        SMO.getSectionName() == "__cfstring")
+      return false;
+
+    switch (SMO.getType()) {
+    default:
+      return true;
+
+      // These sections are atomized at the element boundaries without using
+      // symbols.
+    case MachO::S_4BYTE_LITERALS:
+    case MachO::S_8BYTE_LITERALS:
+    case MachO::S_16BYTE_LITERALS:
+    case MachO::S_LITERAL_POINTERS:
+    case MachO::S_NON_LAZY_SYMBOL_POINTERS:
+    case MachO::S_LAZY_SYMBOL_POINTERS:
+    case MachO::S_MOD_INIT_FUNC_POINTERS:
+    case MachO::S_MOD_TERM_FUNC_POINTERS:
+    case MachO::S_INTERPOSING:
+      return false;
+    }
+}
+
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
+                       Mangler &Mang, const TargetMachine &TM) const {
 
   // Handle thread local data.
   if (Kind.isThreadBSS()) return TLSBSSSection;
@@ -556,7 +597,7 @@
       return FourByteConstantSection;
     if (Kind.isMergeableConst8())
       return EightByteConstantSection;
-    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+    if (Kind.isMergeableConst16())
       return SixteenByteConstantSection;
   }
 
@@ -595,55 +636,31 @@
     return FourByteConstantSection;
   if (Kind.isMergeableConst8())
     return EightByteConstantSection;
-  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+  if (Kind.isMergeableConst16())
     return SixteenByteConstantSection;
   return ReadOnlySection;  // .const
 }
 
-/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
-/// not to emit the UsedDirective for some symbols in llvm.used.
-// FIXME: REMOVE this (rdar://7071300)
-bool TargetLoweringObjectFileMachO::
-shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
-  /// On Darwin, internally linked data beginning with "L" or "l" does not have
-  /// the directive emitted (this occurs in ObjC metadata).
-  if (!GV) return false;
-
-  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
-  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
-    // FIXME: ObjC metadata is currently emitted as internal symbols that have
-    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
-    // this horrible hack can go away.
-    MCSymbol *Sym = getSymbol(*Mang, GV);
-    if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
-      return false;
-  }
-
-  return true;
-}
-
-const MCExpr *TargetLoweringObjectFileMachO::
-getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                        MachineModuleInfo *MMI, unsigned Encoding,
-                        MCStreamer &Streamer) const {
+const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
+    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+    const TargetMachine &TM, MachineModuleInfo *MMI,
+    MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
     MachineModuleInfoMachO &MachOMMI =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
-    SmallString<128> Name;
-    Mang->getNameWithPrefix(Name, GV, true);
-    Name += "$non_lazy_ptr";
+    MCSymbol *SSym =
+        getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
 
     // Add information about the stub reference to MachOMMI so that the stub
     // gets emitted by the asmprinter.
-    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
     MachineModuleInfoImpl::StubValueTy &StubSym =
       GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
                                   MachOMMI.getGVStubEntry(SSym);
     if (StubSym.getPointer() == 0) {
-      MCSymbol *Sym = getSymbol(*Mang, GV);
+      MCSymbol *Sym = TM.getSymbol(GV, Mang);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -652,27 +669,24 @@
                         Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
-  return TargetLoweringObjectFile::
-    getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang,
+                                                           TM, MMI, Streamer);
 }
 
-MCSymbol *TargetLoweringObjectFileMachO::
-getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
-                        MachineModuleInfo *MMI) const {
+MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
+    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    MachineModuleInfo *MMI) const {
   // The mach-o version of this method defaults to returning a stub reference.
   MachineModuleInfoMachO &MachOMMI =
     MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, true);
-  Name += "$non_lazy_ptr";
+  MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
 
   // Add information about the stub reference to MachOMMI so that the stub
   // gets emitted by the asmprinter.
-  MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
   MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
   if (StubSym.getPointer() == 0) {
-    MCSymbol *Sym = getSymbol(*Mang, GV);
+    MCSymbol *Sym = TM.getSymbol(GV, Mang);
     StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
   }
 
@@ -718,58 +732,65 @@
   return Flags;
 }
 
-const MCSection *TargetLoweringObjectFileCOFF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
+const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
+    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+    const TargetMachine &TM) const {
   int Selection = 0;
   unsigned Characteristics = getCOFFSectionFlags(Kind);
-  SmallString<128> Name(GV->getSection().c_str());
+  StringRef Name = GV->getSection();
+  StringRef COMDATSymName = "";
   if (GV->isWeakForLinker()) {
     Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-    Name.append("$");
-    Mang->getNameWithPrefix(Name, GV, false, false);
+    MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    COMDATSymName = Sym->getName();
   }
   return getContext().getCOFFSection(Name,
                                      Characteristics,
                                      Kind,
-                                     "",
+                                     COMDATSymName,
                                      Selection);
 }
 
-static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
   if (Kind.isText())
-    return ".text$";
+    return ".text";
   if (Kind.isBSS ())
-    return ".bss$";
-  if (Kind.isThreadLocal()) {
-    // 'LLVM' is just an arbitary string to ensure that the section name gets
-    // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker.
-    return ".tls$LLVM";
-  }
+    return ".bss";
+  if (Kind.isThreadLocal())
+    return ".tls$";
   if (Kind.isWriteable())
-    return ".data$";
-  return ".rdata$";
+    return ".data";
+  return ".rdata";
 }
 
 
 const MCSection *TargetLoweringObjectFileCOFF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
+                       Mangler &Mang, const TargetMachine &TM) const {
+  // If we have -ffunction-sections then we should emit the global value to a
+  // uniqued section specifically for it.
+  bool EmitUniquedSection;
+  if (Kind.isText())
+    EmitUniquedSection = TM.getFunctionSections();
+  else
+    EmitUniquedSection = TM.getDataSections();
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker()) {
-    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false, false);
-
+  // Section names depend on the name of the symbol which is not feasible if the
+  // symbol has private linkage.
+  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+      !GV->hasPrivateLinkage()) {
+    const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
     unsigned Characteristics = getCOFFSectionFlags(Kind);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-
-    return getContext().getCOFFSection(Name.str(), Characteristics,
-                                       Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY);
+    MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    return getContext().getCOFFSection(
+        Name, Characteristics, Kind, Sym->getName(),
+        GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY
+                              : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES);
   }
 
   if (Kind.isText())
@@ -787,10 +808,18 @@
   return DataSection;
 }
 
+StringRef TargetLoweringObjectFileCOFF::
+getDepLibFromLinkerOpt(StringRef LinkerOption) const {
+  const char *LibCmd = "/DEFAULTLIB:";
+  if (LinkerOption.startswith(LibCmd))
+    return LinkerOption.substr(strlen(LibCmd));
+  return StringRef();
+}
+
 void TargetLoweringObjectFileCOFF::
 emitModuleFlags(MCStreamer &Streamer,
                 ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                Mangler *Mang, const TargetMachine &TM) const {
+                Mangler &Mang, const TargetMachine &TM) const {
   MDNode *LinkerOptions = 0;
 
   // Look for the "Linker Options" flag, since it's the only one we support.
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index f7bf86b..3ca2017 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Function.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index b9a6b47..d9e5aae 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -144,7 +144,7 @@
     initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<AliasAnalysis>();
     AU.addPreserved<LiveVariables>();
@@ -156,7 +156,7 @@
   }
 
   /// runOnMachineFunction - Pass entry point.
-  bool runOnMachineFunction(MachineFunction&);
+  bool runOnMachineFunction(MachineFunction&) override;
 };
 } // end anonymous namespace
 
@@ -229,7 +229,7 @@
     for (MachineRegisterInfo::use_nodbg_iterator
            UI = MRI->use_nodbg_begin(SavedReg),
            UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineOperand &UseMO = UI.getOperand();
+      MachineOperand &UseMO = *UI;
       if (!UseMO.isKill())
         continue;
       KillMI = UseMO.getParent();
@@ -255,7 +255,7 @@
   ++KillPos;
 
   unsigned NumVisited = 0;
-  for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+  for (MachineBasicBlock::iterator I = std::next(OldPos); I != KillPos; ++I) {
     MachineInstr *OtherMI = I;
     // DBG_VALUE cannot be counted against the limit.
     if (OtherMI->isDebugValue())
@@ -315,9 +315,7 @@
                                                   unsigned &LastDef) {
   LastDef = 0;
   unsigned LastUse = Dist;
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
-         E = MRI->reg_end(); I != E; ++I) {
-    MachineOperand &MO = I.getOperand();
+  for (MachineOperand &MO : MRI->reg_operands(Reg)) {
     MachineInstr *MI = MO.getParent();
     if (MI->getParent() != MBB || MI->isDebugValue())
       continue;
@@ -417,9 +415,9 @@
     MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
     // If there are multiple defs, we can't do a simple analysis, so just
     // go with what the kill flag says.
-    if (llvm::next(Begin) != MRI->def_end())
+    if (std::next(Begin) != MRI->def_end())
       return true;
-    DefMI = &*Begin;
+    DefMI = Begin->getParent();
     bool IsSrcPhys, IsDstPhys;
     unsigned SrcReg,  DstReg;
     // If the def is something other than a copy, then it isn't going to
@@ -457,7 +455,7 @@
   if (!MRI->hasOneNonDBGUse(Reg))
     // None or more than one use.
     return 0;
-  MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
+  MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
   if (UseMI.getParent() != MBB)
     return 0;
   unsigned SrcReg;
@@ -647,7 +645,7 @@
   if (!Sunk) {
     DistanceMap.insert(std::make_pair(NewMI, Dist));
     mi = NewMI;
-    nmi = llvm::next(mi);
+    nmi = std::next(mi);
   }
 
   // Update source and destination register maps.
@@ -816,7 +814,7 @@
 
   // Move the copies connected to MI down as well.
   MachineBasicBlock::iterator Begin = MI;
-  MachineBasicBlock::iterator AfterMI = llvm::next(Begin);
+  MachineBasicBlock::iterator AfterMI = std::next(Begin);
 
   MachineBasicBlock::iterator End = AfterMI;
   while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
@@ -876,7 +874,7 @@
   }
 
   // Move debug info as well.
-  while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue())
+  while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue())
     --Begin;
 
   nmi = End;
@@ -891,7 +889,7 @@
       LIS->handleMove(CopyMI);
       InsertPos = CopyMI;
     }
-    End = llvm::next(MachineBasicBlock::iterator(MI));
+    End = std::next(MachineBasicBlock::iterator(MI));
   }
 
   // Copies following MI may have been moved as well.
@@ -914,19 +912,17 @@
 /// instruction too close to the defs of its register dependencies.
 bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
                                               MachineInstr *MI) {
-  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
-         DE = MRI->def_end(); DI != DE; ++DI) {
-    MachineInstr *DefMI = &*DI;
-    if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+  for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+    if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike())
       continue;
-    if (DefMI == MI)
+    if (&DefMI == MI)
       return true; // MI is defining something KillMI uses
-    DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+    DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(&DefMI);
     if (DDI == DistanceMap.end())
       return true;  // Below MI
     unsigned DefDist = DDI->second;
     assert(Dist > DefDist && "Visited def already?");
-    if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
+    if (TII->getInstrLatency(InstrItins, &DefMI) > (Dist - DefDist))
       return true;
   }
   return false;
@@ -1060,15 +1056,15 @@
 
   // Move the old kill above MI, don't forget to move debug info as well.
   MachineBasicBlock::iterator InsertPos = mi;
-  while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+  while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue())
     --InsertPos;
   MachineBasicBlock::iterator From = KillMI;
-  MachineBasicBlock::iterator To = llvm::next(From);
-  while (llvm::prior(From)->isDebugValue())
+  MachineBasicBlock::iterator To = std::next(From);
+  while (std::prev(From)->isDebugValue())
     --From;
   MBB->splice(InsertPos, MBB, From, To);
 
-  nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+  nmi = std::prev(InsertPos); // Backtrack so we process the moved instr.
   DistanceMap.erase(DI);
 
   // Update live variables
@@ -1317,13 +1313,14 @@
     assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
 
     // Deal with <undef> uses immediately - simply rewrite the src operand.
-    if (SrcMO.isUndef()) {
+    if (SrcMO.isUndef() && !DstMO.getSubReg()) {
       // Constrain the DstReg register class if required.
       if (TargetRegisterInfo::isVirtualRegister(DstReg))
         if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
                                                              TRI, *MF))
           MRI->constrainRegClass(DstReg, RC);
       SrcMO.setReg(DstReg);
+      SrcMO.setSubReg(0);
       DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
       continue;
     }
@@ -1349,6 +1346,7 @@
   unsigned LastCopiedReg = 0;
   SlotIndex LastCopyIdx;
   unsigned RegB = 0;
+  unsigned SubRegB = 0;
   for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
     unsigned SrcIdx = TiedPairs[tpi].first;
     unsigned DstIdx = TiedPairs[tpi].second;
@@ -1359,6 +1357,7 @@
     // Grab RegB from the instruction because it may have changed if the
     // instruction was commuted.
     RegB = MI->getOperand(SrcIdx).getReg();
+    SubRegB = MI->getOperand(SrcIdx).getSubReg();
 
     if (RegA == RegB) {
       // The register is tied to multiple destinations (or else we would
@@ -1383,8 +1382,25 @@
 #endif
 
     // Emit a copy.
-    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
-            TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+    MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                      TII->get(TargetOpcode::COPY), RegA);
+    // If this operand is folding a truncation, the truncation now moves to the
+    // copy so that the register classes remain valid for the operands.
+    MIB.addReg(RegB, 0, SubRegB);
+    const TargetRegisterClass *RC = MRI->getRegClass(RegB);
+    if (SubRegB) {
+      if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+        assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
+                                             SubRegB) &&
+               "tied subregister must be a truncation");
+        // The superreg class will not be used to constrain the subreg class.
+        RC = 0;
+      }
+      else {
+        assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
+               && "tied subregister must be a truncation");
+      }
+    }
 
     // Update DistanceMap.
     MachineBasicBlock::iterator PrevMI = MI;
@@ -1404,7 +1420,7 @@
       }
     }
 
-    DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+    DEBUG(dbgs() << "\t\tprepend:\t" << *MIB);
 
     MachineOperand &MO = MI->getOperand(SrcIdx);
     assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
@@ -1417,9 +1433,12 @@
     // Make sure regA is a legal regclass for the SrcIdx operand.
     if (TargetRegisterInfo::isVirtualRegister(RegA) &&
         TargetRegisterInfo::isVirtualRegister(RegB))
-      MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
-
+      MRI->constrainRegClass(RegA, RC);
     MO.setReg(RegA);
+    // The getMatchingSuper asserts guarantee that the register class projected
+    // by SubRegB is compatible with RegA with no subregister. So regardless of
+    // whether the dest oper writes a subreg, the source oper should not.
+    MO.setSubReg(0);
 
     // Propagate SrcRegMap.
     SrcRegMap[RegA] = RegB;
@@ -1431,12 +1450,14 @@
       // Replace other (un-tied) uses of regB with LastCopiedReg.
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+        if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+            MO.isUse()) {
           if (MO.isKill()) {
             MO.setIsKill(false);
             RemovedKillFlag = true;
           }
           MO.setReg(LastCopiedReg);
+          MO.setSubReg(0);
         }
       }
     }
@@ -1509,7 +1530,7 @@
     Processed.clear();
     for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
          mi != me; ) {
-      MachineBasicBlock::iterator nmi = llvm::next(mi);
+      MachineBasicBlock::iterator nmi = std::next(mi);
       if (mi->isDebugValue()) {
         mi = nmi;
         continue;
@@ -1664,7 +1685,7 @@
   }
 
   MachineBasicBlock::iterator EndMBBI =
-      llvm::next(MachineBasicBlock::iterator(MI));
+      std::next(MachineBasicBlock::iterator(MI));
 
   if (!DefEmitted) {
     DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index f735ef2..2e22082 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -23,32 +23,32 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 namespace {
   class UnreachableBlockElim : public FunctionPass {
-    virtual bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
   public:
     static char ID; // Pass identification, replacement for typeid
     UnreachableBlockElim() : FunctionPass(ID) {
       initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addPreserved<DominatorTree>();
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<DominatorTreeWrapperPass>();
     }
   };
 }
@@ -95,8 +95,8 @@
 
 namespace {
   class UnreachableMachineBlockElim : public MachineFunctionPass {
-    virtual bool runOnMachineFunction(MachineFunction &F);
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    bool runOnMachineFunction(MachineFunction &F) override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
     MachineModuleInfo *MMI;
   public:
     static char ID; // Pass identification, replacement for typeid
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index e0aa405..f892e94 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "LiveDebugVariables.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -160,6 +161,7 @@
   SlotIndexes *Indexes;
   LiveIntervals *LIS;
   VirtRegMap *VRM;
+  SparseSet<unsigned> PhysRegs;
 
   void rewrite();
   void addMBBLiveIns();
@@ -167,9 +169,9 @@
   static char ID;
   VirtRegRewriter() : MachineFunctionPass(ID) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  virtual bool runOnMachineFunction(MachineFunction&);
+  bool runOnMachineFunction(MachineFunction&) override;
 };
 } // end anonymous namespace
 
@@ -267,6 +269,20 @@
   SmallVector<unsigned, 8> SuperKills;
   SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
 
+  // Here we have a SparseSet to hold which PhysRegs are actually encountered
+  // in the MF we are about to iterate over so that later when we call
+  // setPhysRegUsed, we are only doing it for physRegs that were actually found
+  // in the program and not for all of the possible physRegs for the given
+  // target architecture. If the target has a lot of physRegs, then for a small
+  // program there will be a significant compile time reduction here.
+  PhysRegs.clear();
+  PhysRegs.setUniverse(TRI->getNumRegs());
+
+  // The function with uwtable should guarantee that the stack unwinder
+  // can unwind the stack to the previous frame.  Thus, we can't apply the
+  // noreturn optimization if the caller function has uwtable attribute.
+  bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
+
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
     DEBUG(MBBI->print(dbgs(), Indexes));
@@ -276,9 +292,12 @@
       MachineInstr *MI = MII;
       ++MII;
 
-      // Check if this instruction is a call to a noreturn function.
-      // If so, all the definitions set by this instruction can be ignored.
-      if (IsExitBB && MI->isCall())
+      // Check if this instruction is a call to a noreturn function.  If this
+      // is a call to noreturn function and we don't need the stack unwinding
+      // functionality (i.e. this function does not have uwtable attribute and
+      // the callee function has the nounwind attribute), then we can ignore
+      // the definitions set by this instruction.
+      if (!HasUWTable && IsExitBB && MI->isCall()) {
         for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
                MOE = MI->operands_end(); MOI != MOE; ++MOI) {
           MachineOperand &MO = *MOI;
@@ -294,6 +313,7 @@
           NoReturnInsts.insert(MI);
           break;
         }
+      }
 
       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
            MOE = MI->operands_end(); MOI != MOE; ++MOI) {
@@ -303,6 +323,15 @@
         if (MO.isRegMask())
           MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
 
+        // If we encounter a VirtReg or PhysReg then get at the PhysReg and add
+        // it to the physreg bitset.  Later we use only the PhysRegs that were
+        // actually encountered in the MF to populate the MRI's used physregs.
+        if (MO.isReg() && MO.getReg())
+          PhysRegs.insert(
+              TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
+              VRM->getPhys(MO.getReg()) :
+              MO.getReg());
+
         if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
           continue;
         unsigned VirtReg = MO.getReg();
@@ -376,20 +405,21 @@
 
   // Tell MRI about physical registers in use.
   if (NoReturnInsts.empty()) {
-    for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
-      if (!MRI->reg_nodbg_empty(Reg))
-        MRI->setPhysRegUsed(Reg);
+    for (SparseSet<unsigned>::iterator
+        RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
+      if (!MRI->reg_nodbg_empty(*RegI))
+        MRI->setPhysRegUsed(*RegI);
   } else {
-    for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) {
+    for (SparseSet<unsigned>::iterator
+        I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
+      unsigned Reg = *I;
       if (MRI->reg_nodbg_empty(Reg))
         continue;
       // Check if this register has a use that will impact the rest of the
       // code. Uses in debug and noreturn instructions do not impact the
       // generated code.
-      for (MachineRegisterInfo::reg_nodbg_iterator It =
-             MRI->reg_nodbg_begin(Reg),
-             EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) {
-        if (!NoReturnInsts.count(&(*It))) {
+      for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
+        if (!NoReturnInsts.count(&It)) {
           MRI->setPhysRegUsed(Reg);
           break;
         }
@@ -397,3 +427,4 @@
     }
   }
 }
+