Update aosp/master clang for rebase to r230699.

Change-Id: I6a546ab3d4ae37119eebb735e102cca4f80ab520
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 2976b60..7e7f7fa 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -44,9 +44,12 @@
     CodeGen::CodeGenTypes &CGT;
   protected:
     llvm::CallingConv::ID RuntimeCC;
+    llvm::CallingConv::ID BuiltinCC;
   public:
     ABIInfo(CodeGen::CodeGenTypes &cgt)
-      : CGT(cgt), RuntimeCC(llvm::CallingConv::C) {}
+      : CGT(cgt),
+        RuntimeCC(llvm::CallingConv::C),
+        BuiltinCC(llvm::CallingConv::C) {}
 
     virtual ~ABIInfo();
 
@@ -62,6 +65,11 @@
       return RuntimeCC;
     }
 
+    /// Return the calling convention to use for compiler builtins
+    llvm::CallingConv::ID getBuiltinCC() const {
+      return BuiltinCC;
+    }
+
     virtual void computeInfo(CodeGen::CGFunctionInfo &FI) const = 0;
 
     /// EmitVAArg - Emit the target dependent code to load a value of
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 41f2131..da3e5ce 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -15,22 +15,23 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/Utils.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -39,6 +40,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include <memory>
 using namespace clang;
 using namespace llvm;
@@ -54,37 +56,44 @@
 
   Timer CodeGenerationTime;
 
-  mutable PassManager *CodeGenPasses;
-  mutable PassManager *PerModulePasses;
-  mutable FunctionPassManager *PerFunctionPasses;
+  mutable legacy::PassManager *CodeGenPasses;
+  mutable legacy::PassManager *PerModulePasses;
+  mutable legacy::FunctionPassManager *PerFunctionPasses;
 
 private:
-  PassManager *getCodeGenPasses() const {
+  TargetIRAnalysis getTargetIRAnalysis() const {
+    if (TM)
+      return TM->getTargetIRAnalysis();
+
+    return TargetIRAnalysis();
+  }
+
+  legacy::PassManager *getCodeGenPasses() const {
     if (!CodeGenPasses) {
-      CodeGenPasses = new PassManager();
+      CodeGenPasses = new legacy::PassManager();
       CodeGenPasses->add(new DataLayoutPass());
-      if (TM)
-        TM->addAnalysisPasses(*CodeGenPasses);
+      CodeGenPasses->add(
+          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
     }
     return CodeGenPasses;
   }
 
-  PassManager *getPerModulePasses() const {
+  legacy::PassManager *getPerModulePasses() const {
     if (!PerModulePasses) {
-      PerModulePasses = new PassManager();
+      PerModulePasses = new legacy::PassManager();
       PerModulePasses->add(new DataLayoutPass());
-      if (TM)
-        TM->addAnalysisPasses(*PerModulePasses);
+      PerModulePasses->add(
+          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
     }
     return PerModulePasses;
   }
 
-  FunctionPassManager *getPerFunctionPasses() const {
+  legacy::FunctionPassManager *getPerFunctionPasses() const {
     if (!PerFunctionPasses) {
-      PerFunctionPasses = new FunctionPassManager(TheModule);
+      PerFunctionPasses = new legacy::FunctionPassManager(TheModule);
       PerFunctionPasses->add(new DataLayoutPass());
-      if (TM)
-        TM->addAnalysisPasses(*PerFunctionPasses);
+      PerFunctionPasses->add(
+          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
     }
     return PerFunctionPasses;
   }
@@ -162,7 +171,7 @@
 }
 
 static void addSampleProfileLoaderPass(const PassManagerBuilder &Builder,
-                                       PassManagerBase &PM) {
+                                       legacy::PassManagerBase &PM) {
   const PassManagerBuilderWrapper &BuilderWrapper =
       static_cast<const PassManagerBuilderWrapper &>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
@@ -170,17 +179,17 @@
 }
 
 static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder,
-                                     PassManagerBase &PM) {
+                                     legacy::PassManagerBase &PM) {
   PM.add(createAddDiscriminatorsPass());
 }
 
 static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
-                                    PassManagerBase &PM) {
+                                    legacy::PassManagerBase &PM) {
   PM.add(createBoundsCheckingPass());
 }
 
 static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
-                                     PassManagerBase &PM) {
+                                     legacy::PassManagerBase &PM) {
   const PassManagerBuilderWrapper &BuilderWrapper =
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
@@ -188,13 +197,13 @@
 }
 
 static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
-                                      PassManagerBase &PM) {
+                                      legacy::PassManagerBase &PM) {
   PM.add(createAddressSanitizerFunctionPass());
   PM.add(createAddressSanitizerModulePass());
 }
 
 static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
-                                   PassManagerBase &PM) {
+                                   legacy::PassManagerBase &PM) {
   const PassManagerBuilderWrapper &BuilderWrapper =
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
@@ -214,24 +223,35 @@
 }
 
 static void addThreadSanitizerPass(const PassManagerBuilder &Builder,
-                                   PassManagerBase &PM) {
+                                   legacy::PassManagerBase &PM) {
   PM.add(createThreadSanitizerPass());
 }
 
 static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
-                                     PassManagerBase &PM) {
+                                     legacy::PassManagerBase &PM) {
   const PassManagerBuilderWrapper &BuilderWrapper =
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const LangOptions &LangOpts = BuilderWrapper.getLangOpts();
-  PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFile));
+  PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles));
 }
 
-static TargetLibraryInfo *createTLI(llvm::Triple &TargetTriple,
-                                    const CodeGenOptions &CodeGenOpts) {
-  TargetLibraryInfo *TLI = new TargetLibraryInfo(TargetTriple);
+static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
+                                         const CodeGenOptions &CodeGenOpts) {
+  TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
   if (!CodeGenOpts.SimplifyLibCalls)
-    TLI->disableAllFunctions();
-  return TLI;
+    TLII->disableAllFunctions();
+  return TLII;
+}
+
+static void addSymbolRewriterPass(const CodeGenOptions &Opts,
+                                  legacy::PassManager *MPM) {
+  llvm::SymbolRewriter::RewriteDescriptorList DL;
+
+  llvm::SymbolRewriter::RewriteMapParser MapParser;
+  for (const auto &MapFile : Opts.RewriteMapFiles)
+    MapParser.parse(MapFile, &DL);
+
+  MPM->add(createRewriteSymbolsPass(DL));
 }
 
 void EmitAssemblyHelper::CreatePasses() {
@@ -319,7 +339,7 @@
 
   // Figure out TargetLibraryInfo.
   Triple TargetTriple(TheModule->getTargetTriple());
-  PMBuilder.LibraryInfo = createTLI(TargetTriple, CodeGenOpts);
+  PMBuilder.LibraryInfo = createTLII(TargetTriple, CodeGenOpts);
 
   switch (Inlining) {
   case CodeGenOptions::NoInlining: break;
@@ -339,13 +359,15 @@
   }
 
   // Set up the per-function pass manager.
-  FunctionPassManager *FPM = getPerFunctionPasses();
+  legacy::FunctionPassManager *FPM = getPerFunctionPasses();
   if (CodeGenOpts.VerifyModule)
     FPM->add(createVerifierPass());
   PMBuilder.populateFunctionPassManager(*FPM);
 
   // Set up the per-module pass manager.
-  PassManager *MPM = getPerModulePasses();
+  legacy::PassManager *MPM = getPerModulePasses();
+  if (!CodeGenOpts.RewriteMapFiles.empty())
+    addSymbolRewriterPass(CodeGenOpts, MPM);
   if (CodeGenOpts.VerifyModule)
     MPM->add(createDebugInfoVerifierPass());
 
@@ -366,6 +388,12 @@
       MPM->add(createStripSymbolsPass(true));
   }
 
+  if (CodeGenOpts.ProfileInstrGenerate) {
+    InstrProfOptions Options;
+    Options.NoRedZone = CodeGenOpts.DisableRedZone;
+    MPM->add(createInstrProfilingPass(Options));
+  }
+
   PMBuilder.populateModulePassManager(*MPM);
 }
 
@@ -412,7 +440,7 @@
                                     BackendArgs.data());
 
   std::string FeaturesStr;
-  if (TargetOpts.Features.size()) {
+  if (!TargetOpts.Features.empty()) {
     SubtargetFeatures Features;
     for (std::vector<std::string>::const_iterator
            it = TargetOpts.Features.begin(),
@@ -499,6 +527,7 @@
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.FunctionSections = CodeGenOpts.FunctionSections;
   Options.DataSections = CodeGenOpts.DataSections;
+  Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
 
   Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
   Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
@@ -506,6 +535,7 @@
   Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
   Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
   Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
+  Options.MCOptions.ABIName = TargetOpts.ABI;
 
   TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU,
                                                      FeaturesStr, Options,
@@ -518,14 +548,13 @@
                                        formatted_raw_ostream &OS) {
 
   // Create the code generator passes.
-  PassManager *PM = getCodeGenPasses();
+  legacy::PassManager *PM = getCodeGenPasses();
 
   // Add LibraryInfo.
   llvm::Triple TargetTriple(TheModule->getTargetTriple());
-  PM->add(createTLI(TargetTriple, CodeGenOpts));
-
-  // Add Target specific analysis passes.
-  TM->addAnalysisPasses(*PM);
+  std::unique_ptr<TargetLibraryInfoImpl> TLII(
+      createTLII(TargetTriple, CodeGenOpts));
+  PM->add(new TargetLibraryInfoWrapperPass(*TLII));
 
   // Normal mode, emit a .s or .o file by running the code generator. Note,
   // this also adds codegenerator level optimization passes.
@@ -626,9 +655,8 @@
   // If an optional clang TargetInfo description string was passed in, use it to
   // verify the LLVM TargetMachine's DataLayout.
   if (AsmHelper.TM && !TDesc.empty()) {
-    std::string DLDesc = AsmHelper.TM->getSubtargetImpl()
-                             ->getDataLayout()
-                             ->getStringRepresentation();
+    std::string DLDesc =
+        AsmHelper.TM->getDataLayout()->getStringRepresentation();
     if (DLDesc != TDesc) {
       unsigned DiagID = Diags.getCustomDiagID(
           DiagnosticsEngine::Error, "backend data layout '%0' does not match "
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index f4d90a6..2af2264 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -13,6 +13,7 @@
 
 #include "CodeGenFunction.h"
 #include "CGCall.h"
+#include "CGRecordLayout.h"
 #include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@@ -36,37 +37,72 @@
     CharUnits LValueAlign;
     TypeEvaluationKind EvaluationKind;
     bool UseLibcall;
+    LValue LVal;
+    CGBitFieldInfo BFI;
   public:
-    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
-      assert(lvalue.isSimple());
-
-      AtomicTy = lvalue.getType();
-      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
-      EvaluationKind = CGF.getEvaluationKind(ValueTy);
-
+    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue)
+        : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0),
+          EvaluationKind(TEK_Scalar), UseLibcall(true) {
+      assert(!lvalue.isGlobalReg());
       ASTContext &C = CGF.getContext();
+      if (lvalue.isSimple()) {
+        AtomicTy = lvalue.getType();
+        if (auto *ATy = AtomicTy->getAs<AtomicType>())
+          ValueTy = ATy->getValueType();
+        else
+          ValueTy = AtomicTy;
+        EvaluationKind = CGF.getEvaluationKind(ValueTy);
 
-      uint64_t ValueAlignInBits;
-      uint64_t AtomicAlignInBits;
-      TypeInfo ValueTI = C.getTypeInfo(ValueTy);
-      ValueSizeInBits = ValueTI.Width;
-      ValueAlignInBits = ValueTI.Align;
+        uint64_t ValueAlignInBits;
+        uint64_t AtomicAlignInBits;
+        TypeInfo ValueTI = C.getTypeInfo(ValueTy);
+        ValueSizeInBits = ValueTI.Width;
+        ValueAlignInBits = ValueTI.Align;
 
-      TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
-      AtomicSizeInBits = AtomicTI.Width;
-      AtomicAlignInBits = AtomicTI.Align;
+        TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
+        AtomicSizeInBits = AtomicTI.Width;
+        AtomicAlignInBits = AtomicTI.Align;
 
-      assert(ValueSizeInBits <= AtomicSizeInBits);
-      assert(ValueAlignInBits <= AtomicAlignInBits);
+        assert(ValueSizeInBits <= AtomicSizeInBits);
+        assert(ValueAlignInBits <= AtomicAlignInBits);
 
-      AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
-      ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
-      if (lvalue.getAlignment().isZero())
-        lvalue.setAlignment(AtomicAlign);
+        AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
+        ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
+        if (lvalue.getAlignment().isZero())
+          lvalue.setAlignment(AtomicAlign);
 
-      UseLibcall =
-        (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
-         AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
+        LVal = lvalue;
+      } else if (lvalue.isBitField()) {
+        auto &OrigBFI = lvalue.getBitFieldInfo();
+        auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
+        AtomicSizeInBits = C.toBits(
+            C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
+                .RoundUpToAlignment(lvalue.getAlignment()));
+        auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldAddr());
+        auto OffsetInChars =
+            (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
+            lvalue.getAlignment();
+        VoidPtrAddr = CGF.Builder.CreateConstGEP1_64(
+            VoidPtrAddr, OffsetInChars.getQuantity());
+        auto Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            VoidPtrAddr,
+            CGF.Builder.getIntNTy(AtomicSizeInBits)->getPointerTo(),
+            "atomic_bitfield_base");
+        BFI = OrigBFI;
+        BFI.Offset = Offset;
+        BFI.StorageSize = AtomicSizeInBits;
+        LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(),
+                                    lvalue.getAlignment());
+      } else if (lvalue.isVectorElt()) {
+        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        LVal = lvalue;
+      } else {
+        assert(lvalue.isExtVectorElt());
+        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        LVal = lvalue;
+      }
+      UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
+          AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
     }
 
     QualType getAtomicType() const { return AtomicTy; }
@@ -74,9 +110,10 @@
     CharUnits getAtomicAlignment() const { return AtomicAlign; }
     CharUnits getValueAlignment() const { return ValueAlign; }
     uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
-    uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
+    uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
+    const LValue &getAtomicLValue() const { return LVal; }
 
     /// Is the atomic size larger than the underlying value type?
     ///
@@ -88,7 +125,7 @@
       return (ValueSizeInBits != AtomicSizeInBits);
     }
 
-    bool emitMemSetZeroIfNecessary(LValue dest) const;
+    bool emitMemSetZeroIfNecessary() const;
 
     llvm::Value *getAtomicSizeValue() const {
       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
@@ -104,17 +141,24 @@
                                AggValueSlot resultSlot,
                                SourceLocation loc) const;
 
+    /// \brief Converts a rvalue to integer value.
+    llvm::Value *convertRValueToInt(RValue RVal) const;
+
+    RValue convertIntToValue(llvm::Value *IntVal, AggValueSlot ResultSlot,
+                             SourceLocation Loc) const;
+
     /// Copy an atomic r-value into atomic-layout memory.
-    void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
+    void emitCopyIntoMemory(RValue rvalue) const;
 
     /// Project an l-value down to the value field.
-    LValue projectValue(LValue lvalue) const {
-      llvm::Value *addr = lvalue.getAddress();
+    LValue projectValue() const {
+      assert(LVal.isSimple());
+      llvm::Value *addr = LVal.getAddress();
       if (hasPadding())
         addr = CGF.Builder.CreateStructGEP(addr, 0);
 
-      return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
-                              CGF.getContext(), lvalue.getTBAAInfo());
+      return LValue::MakeAddr(addr, getValueType(), LVal.getAlignment(),
+                              CGF.getContext(), LVal.getTBAAInfo());
     }
 
     /// Materialize an atomic r-value in atomic-layout memory.
@@ -167,14 +211,15 @@
   llvm_unreachable("bad evaluation kind");
 }
 
-bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
-  llvm::Value *addr = dest.getAddress();
+bool AtomicInfo::emitMemSetZeroIfNecessary() const {
+  assert(LVal.isSimple());
+  llvm::Value *addr = LVal.getAddress();
   if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
     return false;
 
   CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
                            AtomicSizeInBits / 8,
-                           dest.getAlignment().getQuantity());
+                           LVal.getAlignment().getQuantity());
   return true;
 }
 
@@ -588,8 +633,14 @@
     break;
   }
 
-  if (!E->getType()->isVoidType() && !Dest)
-    Dest = CreateMemTemp(E->getType(), ".atomicdst");
+  QualType RValTy = E->getType().getUnqualifiedType();
+
+  auto GetDest = [&] {
+    if (!RValTy->isVoidType() && !Dest) {
+      Dest = CreateMemTemp(RValTy, ".atomicdst");
+    }
+    return Dest;
+  };
 
   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
   if (UseLibcall) {
@@ -729,33 +780,30 @@
       } else {
         // Value is returned through parameter before the order.
         RetTy = getContext().VoidTy;
-        Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
-                 getContext().VoidPtrTy);
+        Args.add(RValue::get(EmitCastToVoidPtr(Dest)), getContext().VoidPtrTy);
       }
     }
     // order is always the last parameter
     Args.add(RValue::get(Order),
              getContext().IntTy);
 
-    const CGFunctionInfo &FuncInfo =
-        CGM.getTypes().arrangeFreeFunctionCall(RetTy, Args,
-            FunctionType::ExtInfo(), RequiredArgs::All);
-    llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
-    llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
-    RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
-    if (!RetTy->isVoidType()) {
-      if (UseOptimizedLibcall) {
-        if (HaveRetTy)
-          return Res;
-        llvm::StoreInst *StoreDest = Builder.CreateStore(
-            Res.getScalarVal(),
-            Builder.CreateBitCast(Dest, FTy->getReturnType()->getPointerTo()));
-        StoreDest->setAlignment(Align);
-      }
-    }
-    if (E->getType()->isVoidType())
+    RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
+    // The value is returned directly from the libcall.
+    if (HaveRetTy && !RetTy->isVoidType())
+      return Res;
+    // The value is returned via an explicit out param.
+    if (RetTy->isVoidType())
       return RValue::get(nullptr);
-    return convertTempToRValue(Dest, E->getType(), E->getExprLoc());
+    // The value is returned directly for optimized libcalls but the caller is
+    // expected an out-param.
+    if (UseOptimizedLibcall) {
+      llvm::Value *ResVal = Res.getScalarVal();
+      llvm::StoreInst *StoreDest = Builder.CreateStore(
+          ResVal,
+          Builder.CreateBitCast(GetDest(), ResVal->getType()->getPointerTo()));
+      StoreDest->setAlignment(Align);
+    }
+    return convertTempToRValue(Dest, RValTy, E->getExprLoc());
   }
 
   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
@@ -765,13 +813,15 @@
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
-  llvm::Type *IPtrTy =
-      llvm::IntegerType::get(getLLVMContext(), Size * 8)->getPointerTo();
-  llvm::Value *OrigDest = Dest;
-  Ptr = Builder.CreateBitCast(Ptr, IPtrTy);
-  if (Val1) Val1 = Builder.CreateBitCast(Val1, IPtrTy);
-  if (Val2) Val2 = Builder.CreateBitCast(Val2, IPtrTy);
-  if (Dest && !E->isCmpXChg()) Dest = Builder.CreateBitCast(Dest, IPtrTy);
+  llvm::Type *ITy =
+      llvm::IntegerType::get(getLLVMContext(), Size * 8);
+  llvm::Value *OrigDest = GetDest();
+  Ptr = Builder.CreateBitCast(
+      Ptr, ITy->getPointerTo(Ptr->getType()->getPointerAddressSpace()));
+  if (Val1) Val1 = Builder.CreateBitCast(Val1, ITy->getPointerTo());
+  if (Val2) Val2 = Builder.CreateBitCast(Val2, ITy->getPointerTo());
+  if (Dest && !E->isCmpXChg())
+    Dest = Builder.CreateBitCast(Dest, ITy->getPointerTo());
 
   if (isa<llvm::ConstantInt>(Order)) {
     int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
@@ -808,9 +858,9 @@
       // enforce that in general.
       break;
     }
-    if (E->getType()->isVoidType())
+    if (RValTy->isVoidType())
       return RValue::get(nullptr);
-    return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc());
+    return convertTempToRValue(OrigDest, RValTy, E->getExprLoc());
   }
 
   // Long case, when Order isn't obviously constant.
@@ -876,9 +926,9 @@
 
   // Cleanup and return
   Builder.SetInsertPoint(ContBB);
-  if (E->getType()->isVoidType())
+  if (RValTy->isVoidType())
     return RValue::get(nullptr);
-  return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc());
+  return convertTempToRValue(OrigDest, RValTy, E->getExprLoc());
 }
 
 llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
@@ -892,42 +942,148 @@
 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
                                        AggValueSlot resultSlot,
                                        SourceLocation loc) const {
-  if (EvaluationKind == TEK_Aggregate)
-    return resultSlot.asRValue();
+  if (LVal.isSimple()) {
+    if (EvaluationKind == TEK_Aggregate)
+      return resultSlot.asRValue();
 
-  // Drill into the padding structure if we have one.
-  if (hasPadding())
-    addr = CGF.Builder.CreateStructGEP(addr, 0);
+    // Drill into the padding structure if we have one.
+    if (hasPadding())
+      addr = CGF.Builder.CreateStructGEP(addr, 0);
 
-  // Otherwise, just convert the temporary to an r-value using the
-  // normal conversion routine.
-  return CGF.convertTempToRValue(addr, getValueType(), loc);
+    // Otherwise, just convert the temporary to an r-value using the
+    // normal conversion routine.
+    return CGF.convertTempToRValue(addr, getValueType(), loc);
+  } else if (LVal.isBitField())
+    return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
+        addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
+  else if (LVal.isVectorElt())
+    return CGF.EmitLoadOfLValue(LValue::MakeVectorElt(addr, LVal.getVectorIdx(),
+                                                      LVal.getType(),
+                                                      LVal.getAlignment()),
+                                loc);
+  assert(LVal.isExtVectorElt());
+  return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
+      addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment()));
+}
+
+RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
+                                     AggValueSlot ResultSlot,
+                                     SourceLocation Loc) const {
+  assert(LVal.isSimple());
+  // Try not to in some easy cases.
+  assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
+  if (getEvaluationKind() == TEK_Scalar && !hasPadding()) {
+    auto *ValTy = CGF.ConvertTypeForMem(ValueTy);
+    if (ValTy->isIntegerTy()) {
+      assert(IntVal->getType() == ValTy && "Different integer types.");
+      return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
+    } else if (ValTy->isPointerTy())
+      return RValue::get(CGF.Builder.CreateIntToPtr(IntVal, ValTy));
+    else if (llvm::CastInst::isBitCastable(IntVal->getType(), ValTy))
+      return RValue::get(CGF.Builder.CreateBitCast(IntVal, ValTy));
+  }
+
+  // Create a temporary.  This needs to be big enough to hold the
+  // atomic integer.
+  llvm::Value *Temp;
+  bool TempIsVolatile = false;
+  CharUnits TempAlignment;
+  if (getEvaluationKind() == TEK_Aggregate) {
+    assert(!ResultSlot.isIgnored());
+    Temp = ResultSlot.getAddr();
+    TempAlignment = getValueAlignment();
+    TempIsVolatile = ResultSlot.isVolatile();
+  } else {
+    Temp = CGF.CreateMemTemp(getAtomicType(), "atomic-temp");
+    TempAlignment = getAtomicAlignment();
+  }
+
+  // Slam the integer into the temporary.
+  llvm::Value *CastTemp = emitCastToAtomicIntPointer(Temp);
+  CGF.Builder.CreateAlignedStore(IntVal, CastTemp, TempAlignment.getQuantity())
+      ->setVolatile(TempIsVolatile);
+
+  return convertTempToRValue(Temp, ResultSlot, Loc);
+}
+
+/// An LValue is a candidate for having its loads and stores be made atomic if
+/// we are operating under /volatile:ms *and* the LValue itself is volatile and
+/// performing such an operation can be performed without a libcall.
+bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
+  AtomicInfo AI(*this, LV);
+  bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
+  // An atomic is inline if we don't need to use a libcall.
+  bool AtomicIsInline = !AI.shouldUseLibcall();
+  return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline;
+}
+
+/// An type is a candidate for having its loads and stores be made atomic if
+/// we are operating under /volatile:ms *and* we know the access is volatile and
+/// performing such an operation can be performed without a libcall.
+bool CodeGenFunction::typeIsSuitableForInlineAtomic(QualType Ty,
+                                                    bool IsVolatile) const {
+  // An atomic is inline if we don't need to use a libcall (e.g. it is builtin).
+  bool AtomicIsInline = getContext().getTargetInfo().hasBuiltinAtomic(
+      getContext().getTypeSize(Ty), getContext().getTypeAlign(Ty));
+  return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline;
+}
+
+RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL,
+                                       AggValueSlot Slot) {
+  llvm::AtomicOrdering AO;
+  bool IsVolatile = LV.isVolatileQualified();
+  if (LV.getType()->isAtomicType()) {
+    AO = llvm::SequentiallyConsistent;
+  } else {
+    AO = llvm::Acquire;
+    IsVolatile = true;
+  }
+  return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
 }
 
 /// Emit a load from an l-value of atomic type.  Note that the r-value
 /// we produce is an r-value of the atomic *value* type.
 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
+                                       llvm::AtomicOrdering AO, bool IsVolatile,
                                        AggValueSlot resultSlot) {
   AtomicInfo atomics(*this, src);
+  LValue LVal = atomics.getAtomicLValue();
+  llvm::Value *SrcAddr = nullptr;
+  llvm::AllocaInst *NonSimpleTempAlloca = nullptr;
+  if (LVal.isSimple())
+    SrcAddr = LVal.getAddress();
+  else {
+    if (LVal.isBitField())
+      SrcAddr = LVal.getBitFieldAddr();
+    else if (LVal.isVectorElt())
+      SrcAddr = LVal.getVectorAddr();
+    else {
+      assert(LVal.isExtVectorElt());
+      SrcAddr = LVal.getExtVectorAddr();
+    }
+    NonSimpleTempAlloca = CreateTempAlloca(
+        SrcAddr->getType()->getPointerElementType(), "atomic-load-temp");
+    NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment()));
+  }
 
   // Check whether we should use a library call.
   if (atomics.shouldUseLibcall()) {
     llvm::Value *tempAddr;
-    if (!resultSlot.isIgnored()) {
-      assert(atomics.getEvaluationKind() == TEK_Aggregate);
-      tempAddr = resultSlot.getAddr();
-    } else {
-      tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
-    }
+    if (LVal.isSimple()) {
+      if (!resultSlot.isIgnored()) {
+        assert(atomics.getEvaluationKind() == TEK_Aggregate);
+        tempAddr = resultSlot.getAddr();
+      } else
+        tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+    } else
+      tempAddr = NonSimpleTempAlloca;
 
     // void __atomic_load(size_t size, void *mem, void *return, int order);
     CallArgList args;
     args.add(RValue::get(atomics.getAtomicSizeValue()),
              getContext().getSizeType());
-    args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
-             getContext().VoidPtrTy);
-    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
-             getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy);
+    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy);
     args.add(RValue::get(llvm::ConstantInt::get(
                  IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
              getContext().IntTy);
@@ -938,94 +1094,62 @@
   }
 
   // Okay, we're doing this natively.
-  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
+  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr);
   llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
-  load->setAtomic(llvm::SequentiallyConsistent);
+  load->setAtomic(AO);
 
   // Other decoration.
   load->setAlignment(src.getAlignment().getQuantity());
-  if (src.isVolatileQualified())
+  if (IsVolatile)
     load->setVolatile(true);
   if (src.getTBAAInfo())
     CGM.DecorateInstruction(load, src.getTBAAInfo());
 
-  // Okay, turn that back into the original value type.
-  QualType valueType = atomics.getValueType();
-  llvm::Value *result = load;
-
   // If we're ignoring an aggregate return, don't do anything.
   if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
     return RValue::getAggregate(nullptr, false);
 
-  // The easiest way to do this this is to go through memory, but we
-  // try not to in some easy cases.
-  if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) {
-    llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType);
-    if (isa<llvm::IntegerType>(resultTy)) {
-      assert(result->getType() == resultTy);
-      result = EmitFromMemory(result, valueType);
-    } else if (isa<llvm::PointerType>(resultTy)) {
-      result = Builder.CreateIntToPtr(result, resultTy);
-    } else {
-      result = Builder.CreateBitCast(result, resultTy);
-    }
-    return RValue::get(result);
-  }
+  // Okay, turn that back into the original value type.
+  if (src.isSimple())
+    return atomics.convertIntToValue(load, resultSlot, loc);
 
-  // Create a temporary.  This needs to be big enough to hold the
-  // atomic integer.
-  llvm::Value *temp;
-  bool tempIsVolatile = false;
-  CharUnits tempAlignment;
-  if (atomics.getEvaluationKind() == TEK_Aggregate) {
-    assert(!resultSlot.isIgnored());
-    temp = resultSlot.getAddr();
-    tempAlignment = atomics.getValueAlignment();
-    tempIsVolatile = resultSlot.isVolatile();
-  } else {
-    temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
-    tempAlignment = atomics.getAtomicAlignment();
-  }
-
-  // Slam the integer into the temporary.
-  llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp);
-  Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity())
-    ->setVolatile(tempIsVolatile);
-
-  return atomics.convertTempToRValue(temp, resultSlot, loc);
+  auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca);
+  Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity());
+  return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc);
 }
 
 
 
 /// Copy an r-value into memory as part of storing to an atomic type.
 /// This needs to create a bit-pattern suitable for atomic operations.
-void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
+void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
+  assert(LVal.isSimple());
   // If we have an r-value, the rvalue should be of the atomic type,
   // which means that the caller is responsible for having zeroed
   // any padding.  Just do an aggregate copy of that type.
   if (rvalue.isAggregate()) {
-    CGF.EmitAggregateCopy(dest.getAddress(),
+    CGF.EmitAggregateCopy(LVal.getAddress(),
                           rvalue.getAggregateAddr(),
                           getAtomicType(),
                           (rvalue.isVolatileQualified()
-                           || dest.isVolatileQualified()),
-                          dest.getAlignment());
+                           || LVal.isVolatileQualified()),
+                          LVal.getAlignment());
     return;
   }
 
   // Okay, otherwise we're copying stuff.
 
   // Zero out the buffer if necessary.
-  emitMemSetZeroIfNecessary(dest);
+  emitMemSetZeroIfNecessary();
 
   // Drill past the padding if present.
-  dest = projectValue(dest);
+  LValue TempLVal = projectValue();
 
   // Okay, store the rvalue in.
   if (rvalue.isScalar()) {
-    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
+    CGF.EmitStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true);
   } else {
-    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
+    CGF.EmitStoreOfComplex(rvalue.getComplexVal(), TempLVal, /*init*/ true);
   }
 }
 
@@ -1040,17 +1164,60 @@
 
   // Otherwise, make a temporary and materialize into it.
   llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
-  LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
-  emitCopyIntoMemory(rvalue, tempLV);
+  LValue tempLV =
+      CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
+  AtomicInfo Atomics(CGF, tempLV);
+  Atomics.emitCopyIntoMemory(rvalue);
   return temp;
 }
 
+llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory.
+  if (RVal.isScalar() && !hasPadding()) {
+    llvm::Value *Value = RVal.getScalarVal();
+    if (isa<llvm::IntegerType>(Value->getType()))
+      return Value;
+    else {
+      llvm::IntegerType *InputIntTy =
+          llvm::IntegerType::get(CGF.getLLVMContext(), getValueSizeInBits());
+      if (isa<llvm::PointerType>(Value->getType()))
+        return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
+      else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
+        return CGF.Builder.CreateBitCast(Value, InputIntTy);
+    }
+  }
+  // Otherwise, we need to go through memory.
+  // Put the r-value in memory.
+  llvm::Value *Addr = materializeRValue(RVal);
+
+  // Cast the temporary to the atomic int type and pull a value out.
+  Addr = emitCastToAtomicIntPointer(Addr);
+  return CGF.Builder.CreateAlignedLoad(Addr,
+                                       getAtomicAlignment().getQuantity());
+}
+
+void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
+                                      bool isInit) {
+  bool IsVolatile = lvalue.isVolatileQualified();
+  llvm::AtomicOrdering AO;
+  if (lvalue.getType()->isAtomicType()) {
+    AO = llvm::SequentiallyConsistent;
+  } else {
+    AO = llvm::Release;
+    IsVolatile = true;
+  }
+  return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit);
+}
+
 /// Emit a store to an l-value of atomic type.
 ///
 /// Note that the r-value is expected to be an r-value *of the atomic
 /// type*; this means that for aggregate r-values, it should include
 /// storage for any padding that was necessary.
-void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest, bool isInit) {
+void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
+                                      llvm::AtomicOrdering AO, bool IsVolatile,
+                                      bool isInit) {
   // If this is an aggregate r-value, it should agree in type except
   // maybe for address-space qualification.
   assert(!rvalue.isAggregate() ||
@@ -1061,7 +1228,7 @@
 
   // If this is an initialization, just put the value there normally.
   if (isInit) {
-    atomics.emitCopyIntoMemory(rvalue, dest);
+    atomics.emitCopyIntoMemory(rvalue);
     return;
   }
 
@@ -1086,63 +1253,104 @@
   }
 
   // Okay, we're doing this natively.
-  llvm::Value *intValue;
-
-  // If we've got a scalar value of the right size, try to avoid going
-  // through memory.
-  if (rvalue.isScalar() && !atomics.hasPadding()) {
-    llvm::Value *value = rvalue.getScalarVal();
-    if (isa<llvm::IntegerType>(value->getType())) {
-      intValue = value;
-    } else {
-      llvm::IntegerType *inputIntTy =
-        llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits());
-      if (isa<llvm::PointerType>(value->getType())) {
-        intValue = Builder.CreatePtrToInt(value, inputIntTy);
-      } else {
-        intValue = Builder.CreateBitCast(value, inputIntTy);
-      }
-    }
-
-  // Otherwise, we need to go through memory.
-  } else {
-    // Put the r-value in memory.
-    llvm::Value *addr = atomics.materializeRValue(rvalue);
-
-    // Cast the temporary to the atomic int type and pull a value out.
-    addr = atomics.emitCastToAtomicIntPointer(addr);
-    intValue = Builder.CreateAlignedLoad(addr,
-                                 atomics.getAtomicAlignment().getQuantity());
-  }
+  llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
 
   // Do the atomic store.
   llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
   llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
 
   // Initializations don't need to be atomic.
-  if (!isInit) store->setAtomic(llvm::SequentiallyConsistent);
+  if (!isInit) store->setAtomic(AO);
 
   // Other decoration.
   store->setAlignment(dest.getAlignment().getQuantity());
-  if (dest.isVolatileQualified())
+  if (IsVolatile)
     store->setVolatile(true);
   if (dest.getTBAAInfo())
     CGM.DecorateInstruction(store, dest.getTBAAInfo());
 }
 
+/// Emit a compare-and-exchange op for atomic type.
+///
+std::pair<RValue, RValue> CodeGenFunction::EmitAtomicCompareExchange(
+    LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
+    llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak,
+    AggValueSlot Slot) {
+  // If this is an aggregate r-value, it should agree in type except
+  // maybe for address-space qualification.
+  assert(!Expected.isAggregate() ||
+         Expected.getAggregateAddr()->getType()->getPointerElementType() ==
+             Obj.getAddress()->getType()->getPointerElementType());
+  assert(!Desired.isAggregate() ||
+         Desired.getAggregateAddr()->getType()->getPointerElementType() ==
+             Obj.getAddress()->getType()->getPointerElementType());
+  AtomicInfo Atomics(*this, Obj);
+
+  if (Failure >= Success)
+    // Don't assert on undefined behavior.
+    Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
+
+  auto Alignment = Atomics.getValueAlignment();
+  // Check whether we should use a library call.
+  if (Atomics.shouldUseLibcall()) {
+    auto *ExpectedAddr = Atomics.materializeRValue(Expected);
+    // Produce a source address.
+    auto *DesiredAddr = Atomics.materializeRValue(Desired);
+    // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
+    // void *desired, int success, int failure);
+    CallArgList Args;
+    Args.add(RValue::get(Atomics.getAtomicSizeValue()),
+             getContext().getSizeType());
+    Args.add(RValue::get(EmitCastToVoidPtr(Obj.getAddress())),
+             getContext().VoidPtrTy);
+    Args.add(RValue::get(EmitCastToVoidPtr(ExpectedAddr)),
+             getContext().VoidPtrTy);
+    Args.add(RValue::get(EmitCastToVoidPtr(DesiredAddr)),
+             getContext().VoidPtrTy);
+    Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Success)),
+             getContext().IntTy);
+    Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Failure)),
+             getContext().IntTy);
+    auto SuccessFailureRVal = emitAtomicLibcall(
+        *this, "__atomic_compare_exchange", getContext().BoolTy, Args);
+    auto *PreviousVal =
+        Builder.CreateAlignedLoad(ExpectedAddr, Alignment.getQuantity());
+    return std::make_pair(RValue::get(PreviousVal), SuccessFailureRVal);
+  }
+
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory.
+  auto *ExpectedIntVal = Atomics.convertRValueToInt(Expected);
+  auto *DesiredIntVal = Atomics.convertRValueToInt(Desired);
+
+  // Do the atomic store.
+  auto *Addr = Atomics.emitCastToAtomicIntPointer(Obj.getAddress());
+  auto *Inst = Builder.CreateAtomicCmpXchg(Addr, ExpectedIntVal, DesiredIntVal,
+                                          Success, Failure);
+  // Other decoration.
+  Inst->setVolatile(Obj.isVolatileQualified());
+  Inst->setWeak(IsWeak);
+
+  // Okay, turn that back into the original value type.
+  auto *PreviousVal = Builder.CreateExtractValue(Inst, /*Idxs=*/0);
+  auto *SuccessFailureVal = Builder.CreateExtractValue(Inst, /*Idxs=*/1);
+  return std::make_pair(Atomics.convertIntToValue(PreviousVal, Slot, Loc),
+                        RValue::get(SuccessFailureVal));
+}
+
 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
   AtomicInfo atomics(*this, dest);
 
   switch (atomics.getEvaluationKind()) {
   case TEK_Scalar: {
     llvm::Value *value = EmitScalarExpr(init);
-    atomics.emitCopyIntoMemory(RValue::get(value), dest);
+    atomics.emitCopyIntoMemory(RValue::get(value));
     return;
   }
 
   case TEK_Complex: {
     ComplexPairTy value = EmitComplexExpr(init);
-    atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
+    atomics.emitCopyIntoMemory(RValue::getComplex(value));
     return;
   }
 
@@ -1151,8 +1359,8 @@
     // of atomic type.
     bool Zeroed = false;
     if (!init->getType()->isAtomicType()) {
-      Zeroed = atomics.emitMemSetZeroIfNecessary(dest);
-      dest = atomics.projectValue(dest);
+      Zeroed = atomics.emitMemSetZeroIfNecessary();
+      dest = atomics.projectValue();
     }
 
     // Evaluate the expression directly into the destination.
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index f088cd6..7b8e839 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -792,9 +792,10 @@
       // emission.
       src = LocalDeclMap.lookup(variable);
       if (!src) {
-        DeclRefExpr declRef(const_cast<VarDecl *>(variable),
-                            /*refersToEnclosing*/ CI.isNested(), type,
-                            VK_LValue, SourceLocation());
+        DeclRefExpr declRef(
+            const_cast<VarDecl *>(variable),
+            /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type,
+            VK_LValue, SourceLocation());
         src = EmitDeclRefLValue(&declRef).getAddress();
       }
     }
@@ -863,12 +864,15 @@
 
       // We use one of these or the other depending on whether the
       // reference is nested.
-      DeclRefExpr declRef(const_cast<VarDecl*>(variable),
-                          /*refersToEnclosing*/ CI.isNested(), type,
-                          VK_LValue, SourceLocation());
+      DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+                          /*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
+                          type, VK_LValue, SourceLocation());
 
       ImplicitCastExpr l2r(ImplicitCastExpr::OnStack, type, CK_LValueToRValue,
                            &declRef, VK_RValue);
+      // FIXME: Pass a specific location for the expr init so that the store is
+      // attributed to a reasonable location - otherwise it may be attributed to
+      // locations of subexpressions in the initialization.
       EmitExprAsInit(&l2r, &blockFieldPseudoVar,
                      MakeAddrLValue(blockField, type, align),
                      /*captured by init*/ false);
@@ -915,7 +919,7 @@
   // };
   BlockDescriptorType =
     llvm::StructType::create("struct.__block_descriptor",
-                             UnsignedLongTy, UnsignedLongTy, NULL);
+                             UnsignedLongTy, UnsignedLongTy, nullptr);
 
   // Now form a pointer to that.
   BlockDescriptorType = llvm::PointerType::getUnqual(BlockDescriptorType);
@@ -938,7 +942,7 @@
   GenericBlockLiteralType =
     llvm::StructType::create("struct.__block_literal_generic",
                              VoidPtrTy, IntTy, IntTy, VoidPtrTy,
-                             BlockDescPtrTy, NULL);
+                             BlockDescPtrTy, nullptr);
 
   return GenericBlockLiteralType;
 }
@@ -1103,6 +1107,8 @@
   const BlockDecl *blockDecl = blockInfo.getBlockDecl();
 
   CurGD = GD;
+
+  CurEHLocation = blockInfo.getBlockExpr()->getLocEnd();
   
   BlockInfo = &blockInfo;
 
@@ -1130,8 +1136,7 @@
   args.push_back(&selfDecl);
 
   // Now add the rest of the parameters.
-  for (auto i : blockDecl->params())
-    args.push_back(i);
+  args.append(blockDecl->param_begin(), blockDecl->param_end());
 
   // Create the function declaration.
   const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType();
@@ -1172,7 +1177,7 @@
     Alloca->setAlignment(Align);
     // Set the DebugLocation to empty, so the store is recognized as a
     // frame setup instruction by llvm::DwarfDebug::beginFunction().
-    NoLocation NL(*this, Builder);
+    auto NL = ApplyDebugLocation::CreateEmpty(*this);
     Builder.CreateAlignedStore(BlockPointer, Alloca, Align);
     BlockPointerDbgLoc = Alloca;
   }
@@ -1215,8 +1220,6 @@
     RegionCounter Cnt = getPGORegionCounter(blockDecl->getBody());
     Cnt.beginRegion(Builder);
     EmitStmt(blockDecl->getBody());
-    PGO.emitInstrumentationData();
-    PGO.destroyRegionCounters();
   }
 
   // Remember where we were...
@@ -1324,11 +1327,10 @@
                                           nullptr, SC_Static,
                                           false,
                                           false);
-  // Create a scope with an artificial location for the body of this function.
-  ArtificialLocation AL(*this, Builder);
+  auto NL = ApplyDebugLocation::CreateEmpty(*this);
   StartFunction(FD, C.VoidTy, Fn, FI, args);
-  AL.Emit();
-
+  // Create a scope with an artificial location for the body of this function.
+  auto AL = ApplyDebugLocation::CreateArtificial(*this);
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
   llvm::Value *src = GetAddrOfLocalVar(&srcDecl);
@@ -1496,9 +1498,9 @@
                                           nullptr, SC_Static,
                                           false, false);
   // Create a scope with an artificial location for the body of this function.
-  ArtificialLocation AL(*this, Builder);
+  auto NL = ApplyDebugLocation::CreateEmpty(*this);
   StartFunction(FD, C.VoidTy, Fn, FI, args);
-  AL.Emit();
+  auto AL = ApplyDebugLocation::CreateArtificial(*this);
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index 72ba4fa..6610659 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -33,7 +33,7 @@
                     llvm::BasicBlock *BB,
                     llvm::BasicBlock::iterator InsertPt) const;
 private:
-  void operator=(const CGBuilderInserter &) LLVM_DELETED_FUNCTION;
+  void operator=(const CGBuilderInserter &) = delete;
 
   CodeGenFunction *CGF;
 };
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 494e1ef..bf7d86f 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -20,7 +20,10 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
 
 using namespace clang;
@@ -185,7 +188,8 @@
 }
 
 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
-                                        unsigned BuiltinID, const CallExpr *E) {
+                                        unsigned BuiltinID, const CallExpr *E,
+                                        ReturnValueSlot ReturnValue) {
   // See if we can constant fold this builtin.  If so, don't emit it at all.
   Expr::EvalResult Result;
   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
@@ -386,9 +390,14 @@
     Value *ArgValue = EmitScalarExpr(E->getArg(0));
     llvm::Type *ArgType = ArgValue->getType();
 
-    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
+    // Don't generate llvm.expect on -O0 as the backend won't use it for
+    // anything.
+    // Note, we still IRGen ExpectedValue because it could have side-effects.
+    if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+      return RValue::get(ArgValue);
 
+    Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
     Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
                                         "expval");
     return RValue::get(Result);
@@ -850,6 +859,8 @@
       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
   }
   case Builtin::BI__builtin_setjmp: {
+    if (!getTargetHooks().hasSjLjLowering(*this))
+      break;
     // Buffer is a void**.
     Value *Buf = EmitScalarExpr(E->getArg(0));
 
@@ -872,6 +883,8 @@
     return RValue::get(Builder.CreateCall(F, Buf));
   }
   case Builtin::BI__builtin_longjmp: {
+    if (!getTargetHooks().hasSjLjLowering(*this))
+      break;
     Value *Buf = EmitScalarExpr(E->getArg(0));
     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
 
@@ -1354,6 +1367,9 @@
     return RValue::get(Builder.CreateCall(F, Arg0));
   }
 
+  case Builtin::BI__builtin_pow:
+  case Builtin::BI__builtin_powf:
+  case Builtin::BI__builtin_powl:
   case Builtin::BIpow:
   case Builtin::BIpowf:
   case Builtin::BIpowl: {
@@ -1566,6 +1582,13 @@
   case Builtin::BI__noop:
     // __noop always evaluates to an integer literal zero.
     return RValue::get(ConstantInt::get(IntTy, 0));
+  case Builtin::BI__builtin_call_with_static_chain: {
+    const CallExpr *Call = cast<CallExpr>(E->getArg(0));
+    const Expr *Chain = E->getArg(1);
+    return EmitCall(Call->getCallee()->getType(),
+                    EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
+                    Call->getCalleeDecl(), EmitScalarExpr(Chain));
+  }
   case Builtin::BI_InterlockedExchange:
   case Builtin::BI_InterlockedExchangePointer:
     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
@@ -1640,6 +1663,67 @@
         Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
     return RValue::get(Load);
   }
+
+  case Builtin::BI__exception_code:
+  case Builtin::BI_exception_code:
+    return RValue::get(EmitSEHExceptionCode());
+  case Builtin::BI__exception_info:
+  case Builtin::BI_exception_info:
+    return RValue::get(EmitSEHExceptionInfo());
+  case Builtin::BI__abnormal_termination:
+  case Builtin::BI_abnormal_termination:
+    return RValue::get(EmitSEHAbnormalTermination());
+  case Builtin::BI_setjmpex: {
+    if (getTarget().getTriple().isOSMSVCRT()) {
+      llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
+      llvm::AttributeSet ReturnsTwiceAttr =
+          AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                            llvm::Attribute::ReturnsTwice);
+      llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
+          llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
+          "_setjmpex", ReturnsTwiceAttr);
+      llvm::Value *Buf =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+      llvm::Value *FrameAddr =
+          Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
+                             ConstantInt::get(Int32Ty, 0));
+      llvm::Value *Args[] = {Buf, FrameAddr};
+      llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
+      CS.setAttributes(ReturnsTwiceAttr);
+      return RValue::get(CS.getInstruction());
+    }
+  }
+  case Builtin::BI_setjmp: {
+    if (getTarget().getTriple().isOSMSVCRT()) {
+      llvm::AttributeSet ReturnsTwiceAttr =
+          AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                            llvm::Attribute::ReturnsTwice);
+      llvm::Value *Buf =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+      llvm::CallSite CS;
+      if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
+        llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
+        llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
+            llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
+            "_setjmp3", ReturnsTwiceAttr);
+        llvm::Value *Count = ConstantInt::get(IntTy, 0);
+        llvm::Value *Args[] = {Buf, Count};
+        CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
+      } else {
+        llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
+        llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
+            llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
+            "_setjmp", ReturnsTwiceAttr);
+        llvm::Value *FrameAddr =
+            Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
+                               ConstantInt::get(Int32Ty, 0));
+        llvm::Value *Args[] = {Buf, FrameAddr};
+        CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
+      }
+      CS.setAttributes(ReturnsTwiceAttr);
+      return RValue::get(CS.getInstruction());
+    }
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -1752,6 +1836,7 @@
   case llvm::Triple::ppc64le:
     return EmitPPCBuiltinExpr(BuiltinID, E);
   case llvm::Triple::r600:
+  case llvm::Triple::amdgcn:
     return EmitR600BuiltinExpr(BuiltinID, E);
   default:
     return nullptr;
@@ -2523,7 +2608,7 @@
   // Return type.
   SmallVector<llvm::Type *, 3> Tys;
   if (Modifier & AddRetType) {
-    llvm::Type *Ty = ConvertType(E->getCallReturnType());
+    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
     if (Modifier & VectorizeRetType)
       Ty = llvm::VectorType::get(
           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
@@ -3122,39 +3207,59 @@
   return CGF.EmitNeonCall(TblF, TblOps, Name);
 }
 
-Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
-                                           const CallExpr *E) {
-  unsigned HintID = static_cast<unsigned>(-1);
+Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
   switch (BuiltinID) {
-  default: break;
+  default:
+    return nullptr;
   case ARM::BI__builtin_arm_nop:
-    HintID = 0;
-    break;
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                              llvm::ConstantInt::get(Int32Ty, 0));
   case ARM::BI__builtin_arm_yield:
   case ARM::BI__yield:
-    HintID = 1;
-    break;
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                              llvm::ConstantInt::get(Int32Ty, 1));
   case ARM::BI__builtin_arm_wfe:
   case ARM::BI__wfe:
-    HintID = 2;
-    break;
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                              llvm::ConstantInt::get(Int32Ty, 2));
   case ARM::BI__builtin_arm_wfi:
   case ARM::BI__wfi:
-    HintID = 3;
-    break;
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                              llvm::ConstantInt::get(Int32Ty, 3));
   case ARM::BI__builtin_arm_sev:
   case ARM::BI__sev:
-    HintID = 4;
-    break;
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                              llvm::ConstantInt::get(Int32Ty, 4));
   case ARM::BI__builtin_arm_sevl:
   case ARM::BI__sevl:
-    HintID = 5;
-    break;
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                              llvm::ConstantInt::get(Int32Ty, 5));
   }
+}
 
-  if (HintID != static_cast<unsigned>(-1)) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_hint);
-    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
+Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
+                                           const CallExpr *E) {
+  if (auto Hint = GetValueForARMHint(BuiltinID))
+    return Hint;
+
+  if (BuiltinID == ARM::BI__emit) {
+    bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
+    llvm::FunctionType *FTy =
+        llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
+
+    APSInt Value;
+    if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
+      llvm_unreachable("Sema will ensure that the parameter is constant");
+
+    uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
+
+    llvm::InlineAsm *Emit =
+        IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
+                                 /*SideEffects=*/true)
+                : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
+                                 /*SideEffects=*/true);
+
+    return Builder.CreateCall(Emit);
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
@@ -3257,7 +3362,7 @@
     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
                                        ? Intrinsic::arm_stlexd
                                        : Intrinsic::arm_strexd);
-    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
+    llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
 
     Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
     Value *Val = EmitScalarExpr(E->getArg(0));
@@ -4028,7 +4133,7 @@
     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
                                        ? Intrinsic::aarch64_stlxp
                                        : Intrinsic::aarch64_stxp);
-    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL);
+    llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
 
     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
     Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()),
@@ -4217,36 +4322,36 @@
   case NEON::BI__builtin_neon_vceqzs_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
-        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OEQ,
-        ICmpInst::ICMP_EQ, "vceqz");
+        Ops[0], ConvertType(E->getCallReturnType(getContext())),
+        ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
   case NEON::BI__builtin_neon_vcgezd_s64:
   case NEON::BI__builtin_neon_vcgezd_f64:
   case NEON::BI__builtin_neon_vcgezs_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
-        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGE,
-        ICmpInst::ICMP_SGE, "vcgez");
+        Ops[0], ConvertType(E->getCallReturnType(getContext())),
+        ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
   case NEON::BI__builtin_neon_vclezd_s64:
   case NEON::BI__builtin_neon_vclezd_f64:
   case NEON::BI__builtin_neon_vclezs_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
-        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLE,
-        ICmpInst::ICMP_SLE, "vclez");
+        Ops[0], ConvertType(E->getCallReturnType(getContext())),
+        ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
   case NEON::BI__builtin_neon_vcgtzd_s64:
   case NEON::BI__builtin_neon_vcgtzd_f64:
   case NEON::BI__builtin_neon_vcgtzs_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
-        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OGT,
-        ICmpInst::ICMP_SGT, "vcgtz");
+        Ops[0], ConvertType(E->getCallReturnType(getContext())),
+        ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
   case NEON::BI__builtin_neon_vcltzd_s64:
   case NEON::BI__builtin_neon_vcltzd_f64:
   case NEON::BI__builtin_neon_vcltzs_f32:
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitAArch64CompareBuiltinExpr(
-        Ops[0], ConvertType(E->getCallReturnType()), ICmpInst::FCMP_OLT,
-        ICmpInst::ICMP_SLT, "vcltz");
+        Ops[0], ConvertType(E->getCallReturnType(getContext())),
+        ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
 
   case NEON::BI__builtin_neon_vceqzd_u64: {
     llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
@@ -4698,7 +4803,7 @@
   case NEON::BI__builtin_neon_vfmad_lane_f64:
   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
     Ops.push_back(EmitScalarExpr(E->getArg(3)));
-    llvm::Type *Ty = ConvertType(E->getCallReturnType());
+    llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
     return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
@@ -5825,104 +5930,95 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
     return Builder.CreateStore(Ops[1], Ops[0]);
   }
-  case X86::BI__builtin_ia32_palignr: {
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
-
-    // If palignr is shifting the pair of input vectors less than 9 bytes,
-    // emit a shuffle instruction.
-    if (shiftVal <= 8) {
-      SmallVector<llvm::Constant*, 8> Indices;
-      for (unsigned i = 0; i != 8; ++i)
-        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
-
-      Value* SV = llvm::ConstantVector::get(Indices);
-      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
-    }
-
-    // If palignr is shifting the pair of input vectors more than 8 but less
-    // than 16 bytes, emit a logical right shift of the destination.
-    if (shiftVal < 16) {
-      // MMX has these as 1 x i64 vectors for some odd optimization reasons.
-      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
-
-      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-      Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
-
-      // create i32 constant
-      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
-      return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
-    }
-
-    // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
-  }
-  case X86::BI__builtin_ia32_palignr128: {
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
-
-    // If palignr is shifting the pair of input vectors less than 17 bytes,
-    // emit a shuffle instruction.
-    if (shiftVal <= 16) {
-      SmallVector<llvm::Constant*, 16> Indices;
-      for (unsigned i = 0; i != 16; ++i)
-        Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
-
-      Value* SV = llvm::ConstantVector::get(Indices);
-      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
-    }
-
-    // If palignr is shifting the pair of input vectors more than 16 but less
-    // than 32 bytes, emit a logical right shift of the destination.
-    if (shiftVal < 32) {
-      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
-
-      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
-
-      // create i32 constant
-      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
-      return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
-    }
-
-    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
-  }
+  case X86::BI__builtin_ia32_palignr128:
   case X86::BI__builtin_ia32_palignr256: {
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+    unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
 
-    // If palignr is shifting the pair of input vectors less than 17 bytes,
-    // emit a shuffle instruction.
-    if (shiftVal <= 16) {
-      SmallVector<llvm::Constant*, 32> Indices;
-      // 256-bit palignr operates on 128-bit lanes so we need to handle that
-      for (unsigned l = 0; l != 2; ++l) {
-        unsigned LaneStart = l * 16;
-        unsigned LaneEnd = (l+1) * 16;
-        for (unsigned i = 0; i != 16; ++i) {
-          unsigned Idx = shiftVal + i + LaneStart;
-          if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
-          Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
-        }
+    unsigned NumElts =
+      cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+    assert(NumElts % 16 == 0);
+    unsigned NumLanes = NumElts / 16;
+    unsigned NumLaneElts = NumElts / NumLanes;
+
+    // If palignr is shifting the pair of vectors more than the size of two
+    // lanes, emit zero.
+    if (ShiftVal >= (2 * NumLaneElts))
+      return llvm::Constant::getNullValue(ConvertType(E->getType()));
+
+    // If palignr is shifting the pair of input vectors more than one lane,
+    // but less than two lanes, convert to shifting in zeroes.
+    if (ShiftVal > NumLaneElts) {
+      ShiftVal -= NumLaneElts;
+      Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
+    }
+
+    SmallVector<llvm::Constant*, 32> Indices;
+    // 256-bit palignr operates on 128-bit lanes so we need to handle that
+    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+      for (unsigned i = 0; i != NumLaneElts; ++i) {
+        unsigned Idx = ShiftVal + i;
+        if (Idx >= NumLaneElts)
+          Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
       }
-
-      Value* SV = llvm::ConstantVector::get(Indices);
-      return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
     }
 
-    // If palignr is shifting the pair of input vectors more than 16 but less
-    // than 32 bytes, emit a logical right shift of the destination.
-    if (shiftVal < 32) {
-      llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
+    Value* SV = llvm::ConstantVector::get(Indices);
+    return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
+  }
+  case X86::BI__builtin_ia32_pslldqi256: {
+    // Shift value is in bits so divide by 8.
+    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
 
-      Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-      Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
+    // If pslldq is shifting the vector more than 15 bytes, emit zero.
+    if (shiftVal >= 16)
+      return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
-      // create i32 constant
-      llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
-      return Builder.CreateCall(F, makeArrayRef(Ops.data(), 2), "palignr");
+    SmallVector<llvm::Constant*, 32> Indices;
+    // 256-bit pslldq operates on 128-bit lanes so we need to handle that
+    for (unsigned l = 0; l != 32; l += 16) {
+      for (unsigned i = 0; i != 16; ++i) {
+        unsigned Idx = 32 + i - shiftVal;
+        if (Idx < 32) Idx -= 16; // end of lane, switch operand.
+        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
+      }
     }
 
-    // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-    return llvm::Constant::getNullValue(ConvertType(E->getType()));
+    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
+    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+    Value *Zero = llvm::Constant::getNullValue(VecTy);
+
+    Value *SV = llvm::ConstantVector::get(Indices);
+    SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    return Builder.CreateBitCast(SV, ResultType, "cast");
+  }
+  case X86::BI__builtin_ia32_psrldqi256: {
+    // Shift value is in bits so divide by 8.
+    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+
+    // If psrldq is shifting the vector more than 15 bytes, emit zero.
+    if (shiftVal >= 16)
+      return llvm::Constant::getNullValue(ConvertType(E->getType()));
+
+    SmallVector<llvm::Constant*, 32> Indices;
+    // 256-bit psrldq operates on 128-bit lanes so we need to handle that
+    for (unsigned l = 0; l != 32; l += 16) {
+      for (unsigned i = 0; i != 16; ++i) {
+        unsigned Idx = i + shiftVal;
+        if (Idx >= 16) Idx += 16; // end of lane, switch operand.
+        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
+      }
+    }
+
+    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
+    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
+    Value *Zero = llvm::Constant::getNullValue(VecTy);
+
+    Value *SV = llvm::ConstantVector::get(Indices);
+    SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    return Builder.CreateBitCast(SV, ResultType, "cast");
   }
   case X86::BI__builtin_ia32_movntps:
   case X86::BI__builtin_ia32_movntps256:
@@ -5932,8 +6028,8 @@
   case X86::BI__builtin_ia32_movntdq256:
   case X86::BI__builtin_ia32_movnti:
   case X86::BI__builtin_ia32_movnti64: {
-    llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
-                                           Builder.getInt32(1));
+    llvm::MDNode *Node = llvm::MDNode::get(
+        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
 
     // Convert the type of the pointer to a pointer to the stored type.
     Value *BC = Builder.CreateBitCast(Ops[0],
@@ -5956,20 +6052,10 @@
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
-    const char *name = nullptr;
-    Intrinsic::ID ID = Intrinsic::not_intrinsic;
-    switch(BuiltinID) {
-    default: llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_pswapdsf:
-    case X86::BI__builtin_ia32_pswapdsi:
-      name = "pswapd";
-      ID = Intrinsic::x86_3dnowa_pswapd;
-      break;
-    }
     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, name);
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
+    return Builder.CreateCall(F, Ops, "pswapd");
   }
   case X86::BI__builtin_ia32_rdrand16_step:
   case X86::BI__builtin_ia32_rdrand32_step:
@@ -6011,6 +6097,154 @@
     Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
     return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
   }
+  // SSE comparison intrisics
+  case X86::BI__builtin_ia32_cmpeqps:
+  case X86::BI__builtin_ia32_cmpltps:
+  case X86::BI__builtin_ia32_cmpleps:
+  case X86::BI__builtin_ia32_cmpunordps:
+  case X86::BI__builtin_ia32_cmpneqps:
+  case X86::BI__builtin_ia32_cmpnltps:
+  case X86::BI__builtin_ia32_cmpnleps:
+  case X86::BI__builtin_ia32_cmpordps:
+  case X86::BI__builtin_ia32_cmpeqss:
+  case X86::BI__builtin_ia32_cmpltss:
+  case X86::BI__builtin_ia32_cmpless:
+  case X86::BI__builtin_ia32_cmpunordss:
+  case X86::BI__builtin_ia32_cmpneqss:
+  case X86::BI__builtin_ia32_cmpnltss:
+  case X86::BI__builtin_ia32_cmpnless:
+  case X86::BI__builtin_ia32_cmpordss:
+  case X86::BI__builtin_ia32_cmpeqpd:
+  case X86::BI__builtin_ia32_cmpltpd:
+  case X86::BI__builtin_ia32_cmplepd:
+  case X86::BI__builtin_ia32_cmpunordpd:
+  case X86::BI__builtin_ia32_cmpneqpd:
+  case X86::BI__builtin_ia32_cmpnltpd:
+  case X86::BI__builtin_ia32_cmpnlepd:
+  case X86::BI__builtin_ia32_cmpordpd:
+  case X86::BI__builtin_ia32_cmpeqsd:
+  case X86::BI__builtin_ia32_cmpltsd:
+  case X86::BI__builtin_ia32_cmplesd:
+  case X86::BI__builtin_ia32_cmpunordsd:
+  case X86::BI__builtin_ia32_cmpneqsd:
+  case X86::BI__builtin_ia32_cmpnltsd:
+  case X86::BI__builtin_ia32_cmpnlesd:
+  case X86::BI__builtin_ia32_cmpordsd:
+    // These exist so that the builtin that takes an immediate can be bounds
+    // checked by clang to avoid passing bad immediates to the backend. Since
+    // AVX has a larger immediate than SSE we would need separate builtins to
+    // do the different bounds checking. Rather than create a clang specific
+    // SSE only builtin, this implements eight separate builtins to match gcc
+    // implementation.
+
+    // Choose the immediate.
+    unsigned Imm;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpeqps:
+    case X86::BI__builtin_ia32_cmpeqss:
+    case X86::BI__builtin_ia32_cmpeqpd:
+    case X86::BI__builtin_ia32_cmpeqsd:
+      Imm = 0;
+      break;
+    case X86::BI__builtin_ia32_cmpltps:
+    case X86::BI__builtin_ia32_cmpltss:
+    case X86::BI__builtin_ia32_cmpltpd:
+    case X86::BI__builtin_ia32_cmpltsd:
+      Imm = 1;
+      break;
+    case X86::BI__builtin_ia32_cmpleps:
+    case X86::BI__builtin_ia32_cmpless:
+    case X86::BI__builtin_ia32_cmplepd:
+    case X86::BI__builtin_ia32_cmplesd:
+      Imm = 2;
+      break;
+    case X86::BI__builtin_ia32_cmpunordps:
+    case X86::BI__builtin_ia32_cmpunordss:
+    case X86::BI__builtin_ia32_cmpunordpd:
+    case X86::BI__builtin_ia32_cmpunordsd:
+      Imm = 3;
+      break;
+    case X86::BI__builtin_ia32_cmpneqps:
+    case X86::BI__builtin_ia32_cmpneqss:
+    case X86::BI__builtin_ia32_cmpneqpd:
+    case X86::BI__builtin_ia32_cmpneqsd:
+      Imm = 4;
+      break;
+    case X86::BI__builtin_ia32_cmpnltps:
+    case X86::BI__builtin_ia32_cmpnltss:
+    case X86::BI__builtin_ia32_cmpnltpd:
+    case X86::BI__builtin_ia32_cmpnltsd:
+      Imm = 5;
+      break;
+    case X86::BI__builtin_ia32_cmpnleps:
+    case X86::BI__builtin_ia32_cmpnless:
+    case X86::BI__builtin_ia32_cmpnlepd:
+    case X86::BI__builtin_ia32_cmpnlesd:
+      Imm = 6;
+      break;
+    case X86::BI__builtin_ia32_cmpordps:
+    case X86::BI__builtin_ia32_cmpordss:
+    case X86::BI__builtin_ia32_cmpordpd:
+    case X86::BI__builtin_ia32_cmpordsd:
+      Imm = 7;
+      break;
+    }
+
+    // Choose the intrinsic ID.
+    const char *name;
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpeqps:
+    case X86::BI__builtin_ia32_cmpltps:
+    case X86::BI__builtin_ia32_cmpleps:
+    case X86::BI__builtin_ia32_cmpunordps:
+    case X86::BI__builtin_ia32_cmpneqps:
+    case X86::BI__builtin_ia32_cmpnltps:
+    case X86::BI__builtin_ia32_cmpnleps:
+    case X86::BI__builtin_ia32_cmpordps:
+      name = "cmpps";
+      ID = Intrinsic::x86_sse_cmp_ps;
+      break;
+    case X86::BI__builtin_ia32_cmpeqss:
+    case X86::BI__builtin_ia32_cmpltss:
+    case X86::BI__builtin_ia32_cmpless:
+    case X86::BI__builtin_ia32_cmpunordss:
+    case X86::BI__builtin_ia32_cmpneqss:
+    case X86::BI__builtin_ia32_cmpnltss:
+    case X86::BI__builtin_ia32_cmpnless:
+    case X86::BI__builtin_ia32_cmpordss:
+      name = "cmpss";
+      ID = Intrinsic::x86_sse_cmp_ss;
+      break;
+    case X86::BI__builtin_ia32_cmpeqpd:
+    case X86::BI__builtin_ia32_cmpltpd:
+    case X86::BI__builtin_ia32_cmplepd:
+    case X86::BI__builtin_ia32_cmpunordpd:
+    case X86::BI__builtin_ia32_cmpneqpd:
+    case X86::BI__builtin_ia32_cmpnltpd:
+    case X86::BI__builtin_ia32_cmpnlepd:
+    case X86::BI__builtin_ia32_cmpordpd:
+      name = "cmppd";
+      ID = Intrinsic::x86_sse2_cmp_pd;
+      break;
+    case X86::BI__builtin_ia32_cmpeqsd:
+    case X86::BI__builtin_ia32_cmpltsd:
+    case X86::BI__builtin_ia32_cmplesd:
+    case X86::BI__builtin_ia32_cmpunordsd:
+    case X86::BI__builtin_ia32_cmpneqsd:
+    case X86::BI__builtin_ia32_cmpnltsd:
+    case X86::BI__builtin_ia32_cmpnlesd:
+    case X86::BI__builtin_ia32_cmpordsd:
+      name = "cmpsd";
+      ID = Intrinsic::x86_sse2_cmp_sd;
+      break;
+    }
+
+    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops, name);
   }
 }
 
@@ -6214,6 +6448,9 @@
   case R600::BI__builtin_amdgpu_ldexp:
   case R600::BI__builtin_amdgpu_ldexpf:
     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
+  case R600::BI__builtin_amdgpu_class:
+  case R600::BI__builtin_amdgpu_classf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
    default:
     return nullptr;
   }
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index e467891..9f0e67e 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -167,9 +167,9 @@
   }
 
   if (!InEveryTU) {
-    /// If we don't have a definition for the destructor yet, don't
-    /// emit.  We can't emit aliases to declarations; that's just not
-    /// how aliases work.
+    // If we don't have a definition for the destructor yet, don't
+    // emit.  We can't emit aliases to declarations; that's just not
+    // how aliases work.
     if (Ref->isDeclaration())
       return true;
   }
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index c52b8e2..cc5c1b2 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -213,6 +213,7 @@
                                        const CXXDeleteExpr *DE,
                                        llvm::Value *Ptr, QualType ElementType,
                                        const CXXDestructorDecl *Dtor) = 0;
+  virtual void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) = 0;
 
   virtual llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) = 0;
 
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 2ced44d..7e6fef9 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -51,6 +51,8 @@
   case CC_X86Pascal: return llvm::CallingConv::C;
   // TODO: Add support for __vectorcall to LLVM.
   case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
+  case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
+  case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL;
   }
 }
 
@@ -83,22 +85,23 @@
   // When translating an unprototyped function type, always use a
   // variadic type.
   return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
-                                 false, None, FTNP->getExtInfo(),
-                                 RequiredArgs(0));
+                                 /*instanceMethod=*/false,
+                                 /*chainCall=*/false, None,
+                                 FTNP->getExtInfo(), RequiredArgs(0));
 }
 
 /// Arrange the LLVM function layout for a value of the given function
 /// type, on top of any implicit parameters already stored.
 static const CGFunctionInfo &
-arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool IsInstanceMethod,
+arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
                         SmallVectorImpl<CanQualType> &prefix,
                         CanQual<FunctionProtoType> FTP) {
   RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
   // FIXME: Kill copy.
-  for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
-    prefix.push_back(FTP->getParamType(i));
+  prefix.append(FTP->param_type_begin(), FTP->param_type_end());
   CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
-  return CGT.arrangeLLVMFunctionInfo(resultType, IsInstanceMethod, prefix,
+  return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
+                                     /*chainCall=*/false, prefix,
                                      FTP->getExtInfo(), required);
 }
 
@@ -107,7 +110,8 @@
 const CGFunctionInfo &
 CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
   SmallVector<CanQualType, 16> argTypes;
-  return ::arrangeLLVMFunctionInfo(*this, false, argTypes, FTP);
+  return ::arrangeLLVMFunctionInfo(*this, /*instanceMethod=*/false, argTypes,
+                                   FTP);
 }
 
 static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
@@ -130,9 +134,6 @@
   if (PcsAttr *PCS = D->getAttr<PcsAttr>())
     return (PCS->getPCS() == PcsAttr::AAPCS ? CC_AAPCS : CC_AAPCS_VFP);
 
-  if (D->hasAttr<PnaclCallAttr>())
-    return CC_PnaclCall;
-
   if (D->hasAttr<IntelOclBiccAttr>())
     return CC_IntelOclBicc;
 
@@ -205,8 +206,7 @@
   CanQual<FunctionProtoType> FTP = GetFormalType(MD);
 
   // Add the formal parameters.
-  for (unsigned i = 0, e = FTP->getNumParams(); i != e; ++i)
-    argTypes.push_back(FTP->getParamType(i));
+  argTypes.append(FTP->param_type_begin(), FTP->param_type_end());
 
   TheCXXABI.buildStructorSignature(MD, Type, argTypes);
 
@@ -219,7 +219,9 @@
                                : TheCXXABI.hasMostDerivedReturn(GD)
                                      ? CGM.getContext().VoidPtrTy
                                      : Context.VoidTy;
-  return arrangeLLVMFunctionInfo(resultType, true, argTypes, extInfo, required);
+  return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true,
+                                 /*chainCall=*/false, argTypes, extInfo,
+                                 required);
 }
 
 /// Arrange a call to a C++ method, passing the given arguments.
@@ -243,7 +245,9 @@
                                      : Context.VoidTy;
 
   FunctionType::ExtInfo Info = FPT->getExtInfo();
-  return arrangeLLVMFunctionInfo(ResultType, true, ArgTypes, Info, Required);
+  return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
+                                 /*chainCall=*/false, ArgTypes, Info,
+                                 Required);
 }
 
 /// Arrange the argument and result information for the declaration or
@@ -262,8 +266,9 @@
   // non-variadic type.
   if (isa<FunctionNoProtoType>(FTy)) {
     CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
-    return arrangeLLVMFunctionInfo(noProto->getReturnType(), false, None,
-                                   noProto->getExtInfo(), RequiredArgs::All);
+    return arrangeLLVMFunctionInfo(
+        noProto->getReturnType(), /*instanceMethod=*/false,
+        /*chainCall=*/false, None, noProto->getExtInfo(), RequiredArgs::All);
   }
 
   assert(isa<FunctionProtoType>(FTy));
@@ -307,8 +312,9 @@
   RequiredArgs required =
     (MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All);
 
-  return arrangeLLVMFunctionInfo(GetReturnType(MD->getReturnType()), false,
-                                 argTys, einfo, required);
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(MD->getReturnType()), /*instanceMethod=*/false,
+      /*chainCall=*/false, argTys, einfo, required);
 }
 
 const CGFunctionInfo &
@@ -335,7 +341,8 @@
   assert(MD->isVirtual() && "only virtual memptrs have thunks");
   CanQual<FunctionProtoType> FTP = GetFormalType(MD);
   CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
-  return arrangeLLVMFunctionInfo(Context.VoidTy, false, ArgTys,
+  return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
+                                 /*chainCall=*/false, ArgTys,
                                  FTP->getExtInfo(), RequiredArgs(1));
 }
 
@@ -346,7 +353,8 @@
                             CodeGenModule &CGM,
                             const CallArgList &args,
                             const FunctionType *fnType,
-                            unsigned numExtraRequiredArgs) {
+                            unsigned numExtraRequiredArgs,
+                            bool chainCall) {
   assert(args.size() >= numExtraRequiredArgs);
 
   // In most cases, there are no optional arguments.
@@ -368,8 +376,13 @@
     required = RequiredArgs(args.size());
   }
 
-  return CGT.arrangeFreeFunctionCall(fnType->getReturnType(), args,
-                                     fnType->getExtInfo(), required);
+  // FIXME: Kill copy.
+  SmallVector<CanQualType, 16> argTypes;
+  for (const auto &arg : args)
+    argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
+  return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()),
+                                     /*instanceMethod=*/false, chainCall,
+                                     argTypes, fnType->getExtInfo(), required);
 }
 
 /// Figure out the rules for calling a function with the given formal
@@ -378,8 +391,10 @@
 /// target-dependent in crazy ways.
 const CGFunctionInfo &
 CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args,
-                                      const FunctionType *fnType) {
-  return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType, 0);
+                                      const FunctionType *fnType,
+                                      bool chainCall) {
+  return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType,
+                                     chainCall ? 1 : 0, chainCall);
 }
 
 /// A block function call is essentially a free-function call with an
@@ -387,7 +402,8 @@
 const CGFunctionInfo &
 CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
                                        const FunctionType *fnType) {
-  return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType, 1);
+  return arrangeFreeFunctionLikeCall(*this, CGM, args, fnType, 1,
+                                     /*chainCall=*/false);
 }
 
 const CGFunctionInfo &
@@ -399,8 +415,9 @@
   SmallVector<CanQualType, 16> argTypes;
   for (const auto &Arg : args)
     argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
-  return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes,
-                                 info, required);
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(resultType), /*instanceMethod=*/false,
+      /*chainCall=*/false, argTypes, info, required);
 }
 
 /// Arrange a call to a C++ method, passing the given arguments.
@@ -414,8 +431,9 @@
     argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
 
   FunctionType::ExtInfo info = FPT->getExtInfo();
-  return arrangeLLVMFunctionInfo(GetReturnType(FPT->getReturnType()), true,
-                                 argTypes, info, required);
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(FPT->getReturnType()), /*instanceMethod=*/true,
+      /*chainCall=*/false, argTypes, info, required);
 }
 
 const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
@@ -428,13 +446,15 @@
 
   RequiredArgs required =
     (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
-  return arrangeLLVMFunctionInfo(GetReturnType(resultType), false, argTypes, info,
-                                 required);
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(resultType), /*instanceMethod=*/false,
+      /*chainCall=*/false, argTypes, info, required);
 }
 
 const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
-  return arrangeLLVMFunctionInfo(getContext().VoidTy, false, None,
-                                 FunctionType::ExtInfo(), RequiredArgs::All);
+  return arrangeLLVMFunctionInfo(
+      getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false,
+      None, FunctionType::ExtInfo(), RequiredArgs::All);
 }
 
 /// Arrange the argument and result information for an abstract value
@@ -442,22 +462,20 @@
 /// above functions ultimately defer to.
 const CGFunctionInfo &
 CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
-                                      bool IsInstanceMethod,
+                                      bool instanceMethod,
+                                      bool chainCall,
                                       ArrayRef<CanQualType> argTypes,
                                       FunctionType::ExtInfo info,
                                       RequiredArgs required) {
-#ifndef NDEBUG
-  for (ArrayRef<CanQualType>::const_iterator
-         I = argTypes.begin(), E = argTypes.end(); I != E; ++I)
-    assert(I->isCanonicalAsParam());
-#endif
+  assert(std::all_of(argTypes.begin(), argTypes.end(),
+                     std::mem_fun_ref(&CanQualType::isCanonicalAsParam)));
 
   unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
 
   // Lookup or create unique function info.
   llvm::FoldingSetNodeID ID;
-  CGFunctionInfo::Profile(ID, IsInstanceMethod, info, required, resultType,
-                          argTypes);
+  CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, required,
+                          resultType, argTypes);
 
   void *insertPos = nullptr;
   CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
@@ -465,8 +483,8 @@
     return *FI;
 
   // Construct the function info.  We co-allocate the ArgInfos.
-  FI = CGFunctionInfo::create(CC, IsInstanceMethod, info, resultType, argTypes,
-                              required);
+  FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
+                              resultType, argTypes, required);
   FunctionInfos.InsertNode(FI, insertPos);
 
   bool inserted = FunctionsBeingProcessed.insert(FI).second;
@@ -494,7 +512,8 @@
 }
 
 CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
-                                       bool IsInstanceMethod,
+                                       bool instanceMethod,
+                                       bool chainCall,
                                        const FunctionType::ExtInfo &info,
                                        CanQualType resultType,
                                        ArrayRef<CanQualType> argTypes,
@@ -505,7 +524,8 @@
   FI->CallingConvention = llvmCC;
   FI->EffectiveCallingConvention = llvmCC;
   FI->ASTCallingConvention = info.getCC();
-  FI->InstanceMethod = IsInstanceMethod;
+  FI->InstanceMethod = instanceMethod;
+  FI->ChainCall = chainCall;
   FI->NoReturn = info.getNoReturn();
   FI->ReturnsRetained = info.getProducesResult();
   FI->Required = required;
@@ -1343,6 +1363,7 @@
                                            bool AttrOnCallSite) {
   llvm::AttrBuilder FuncAttrs;
   llvm::AttrBuilder RetAttrs;
+  bool HasOptnone = false;
 
   CallingConv = FI.getEffectiveCallingConvention();
 
@@ -1379,16 +1400,22 @@
       FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     }
-    if (TargetDecl->hasAttr<MallocAttr>())
+    if (TargetDecl->hasAttr<RestrictAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NoAlias);
     if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NonNull);
+
+    HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
   }
 
-  if (CodeGenOpts.OptimizeSize)
-    FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
-  if (CodeGenOpts.OptimizeSize == 2)
-    FuncAttrs.addAttribute(llvm::Attribute::MinSize);
+  // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
+  if (!HasOptnone) {
+    if (CodeGenOpts.OptimizeSize)
+      FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
+    if (CodeGenOpts.OptimizeSize == 2)
+      FuncAttrs.addAttribute(llvm::Attribute::MinSize);
+  }
+
   if (CodeGenOpts.DisableRedZone)
     FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
   if (CodeGenOpts.NoImplicitFloat)
@@ -1493,7 +1520,6 @@
         getLLVMContext(), IRFunctionArgs.getInallocaArgNo() + 1, Attrs));
   }
 
-
   unsigned ArgNo = 0;
   for (CGFunctionInfo::const_arg_iterator I = FI.arg_begin(),
                                           E = FI.arg_end();
@@ -1521,7 +1547,9 @@
         Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
     case ABIArgInfo::Direct:
-      if (AI.getInReg())
+      if (ArgNo == 0 && FI.isChainCall())
+        Attrs.addAttribute(llvm::Attribute::Nest);
+      else if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
       break;
 
@@ -2312,7 +2340,7 @@
   }
 
   if (!RetDbgLoc.isUnknown())
-    Ret->setDebugLoc(RetDbgLoc);
+    Ret->setDebugLoc(std::move(RetDbgLoc));
 }
 
 static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
@@ -2647,12 +2675,7 @@
                                    CallExpr::const_arg_iterator ArgBeg,
                                    CallExpr::const_arg_iterator ArgEnd,
                                    const FunctionDecl *CalleeDecl,
-                                   unsigned ParamsToSkip,
-                                   bool ForceColumnInfo) {
-  CGDebugInfo *DI = getDebugInfo();
-  SourceLocation CallLoc;
-  if (DI) CallLoc = DI->getLocation();
-
+                                   unsigned ParamsToSkip) {
   // We *have* to evaluate arguments from right to left in the MS C++ ABI,
   // because arguments are destroyed left to right in the callee.
   if (CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
@@ -2673,8 +2696,6 @@
       EmitCallArg(Args, *Arg, ArgTypes[I]);
       emitNonNullArgCheck(*this, Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
                           CalleeDecl, ParamsToSkip + I);
-      // Restore the debug location.
-      if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
     }
 
     // Un-reverse the arguments we just evaluated so they match up with the LLVM
@@ -2689,8 +2710,6 @@
     EmitCallArg(Args, *Arg, ArgTypes[I]);
     emitNonNullArgCheck(*this, Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
                         CalleeDecl, ParamsToSkip + I);
-    // Restore the debug location.
-    if (DI) DI->EmitLocation(Builder, CallLoc, ForceColumnInfo);
   }
 }
 
@@ -2713,8 +2732,22 @@
 
 }
 
+struct DisableDebugLocationUpdates {
+  CodeGenFunction &CGF;
+  bool disabledDebugInfo;
+  DisableDebugLocationUpdates(CodeGenFunction &CGF, const Expr *E) : CGF(CGF) {
+    if ((disabledDebugInfo = isa<CXXDefaultArgExpr>(E) && CGF.getDebugInfo()))
+      CGF.disableDebugInfo();
+  }
+  ~DisableDebugLocationUpdates() {
+    if (disabledDebugInfo)
+      CGF.enableDebugInfo();
+  }
+};
+
 void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                                   QualType type) {
+  DisableDebugLocationUpdates Dis(*this, E);
   if (const ObjCIndirectCopyRestoreExpr *CRE
         = dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) {
     assert(getLangOpts().ObjCAutoRefCount);
@@ -3269,7 +3302,8 @@
 
   llvm::BasicBlock *InvokeDest = nullptr;
   if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
-                          llvm::Attribute::NoUnwind))
+                          llvm::Attribute::NoUnwind) ||
+      currentFunctionUsesSEHTry())
     InvokeDest = getInvokeDest();
 
   llvm::CallSite CS;
@@ -3289,6 +3323,12 @@
         Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
                            llvm::Attribute::AlwaysInline);
 
+  // Disable inlining inside SEH __try blocks.
+  if (isSEHTryScope())
+    Attrs =
+        Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                           llvm::Attribute::NoInline);
+
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 8138c6f..5649708 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -24,6 +24,7 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -544,6 +545,7 @@
                                   CXXCtorInitializer *MemberInit,
                                   const CXXConstructorDecl *Constructor,
                                   FunctionArgList &Args) {
+  ApplyDebugLocation Loc(CGF, MemberInit->getSourceLocation());
   assert(MemberInit->isAnyMemberInitializer() &&
          "Must have member initializer!");
   assert(MemberInit->getInit() && "Must have initializer!");
@@ -600,9 +602,9 @@
   CGF.EmitInitializerForField(Field, LHS, MemberInit->getInit(), ArrayIndexes);
 }
 
-void CodeGenFunction::EmitInitializerForField(FieldDecl *Field,
-                                              LValue LHS, Expr *Init,
-                                             ArrayRef<VarDecl *> ArrayIndexes) {
+void CodeGenFunction::EmitInitializerForField(
+    FieldDecl *Field, LValue LHS, Expr *Init,
+    ArrayRef<VarDecl *> ArrayIndexes) {
   QualType FieldType = Field->getType();
   switch (getEvaluationKind(FieldType)) {
   case TEK_Scalar:
@@ -781,8 +783,6 @@
   // delegation optimization.
   if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) &&
       CGM.getTarget().getCXXABI().hasConstructorVariants()) {
-    if (CGDebugInfo *DI = getDebugInfo()) 
-      DI->EmitLocation(Builder, Ctor->getLocEnd());
     EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args, Ctor->getLocEnd());
     return;
   }
@@ -1735,7 +1735,7 @@
                                              bool Delegating, llvm::Value *This,
                                              const CXXConstructExpr *E) {
   // If this is a trivial constructor, just emit what's needed.
-  if (D->isTrivial()) {
+  if (D->isTrivial() && !D->getParent()->mayInsertExtraPadding()) {
     if (E->getNumArgs() == 0) {
       // Trivial default constructor, no codegen required.
       assert(D->isDefaultConstructor() &&
@@ -1748,9 +1748,10 @@
            "trivial 1-arg ctor not a copy/move ctor");
 
     const Expr *Arg = E->getArg(0);
-    QualType Ty = Arg->getType();
+    QualType SrcTy = Arg->getType();
     llvm::Value *Src = EmitLValue(Arg).getAddress();
-    EmitAggregateCopy(This, Src, Ty);
+    QualType DestTy = getContext().getTypeDeclType(D->getParent());
+    EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
     return;
   }
 
@@ -1785,11 +1786,14 @@
 CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
                                         llvm::Value *This, llvm::Value *Src,
                                         const CXXConstructExpr *E) {
-  if (D->isTrivial()) {
+  if (D->isTrivial() &&
+      !D->getParent()->mayInsertExtraPadding()) {
     assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
     assert(D->isCopyOrMoveConstructor() &&
            "trivial 1-arg ctor not a copy/move ctor");
-    EmitAggregateCopy(This, Src, E->arg_begin()->getType());
+    EmitAggregateCopyCtor(This, Src,
+                          getContext().getTypeDeclType(D->getParent()),
+                          E->arg_begin()->getType());
     return;
   }
   llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete);
@@ -1949,6 +1953,14 @@
                                          const CXXRecordDecl *NearestVBase,
                                          CharUnits OffsetFromNearestVBase,
                                          const CXXRecordDecl *VTableClass) {
+  const CXXRecordDecl *RD = Base.getBase();
+
+  // Don't initialize the vtable pointer if the class is marked with the
+  // 'novtable' attribute.
+  if ((RD == VTableClass || RD == NearestVBase) &&
+      VTableClass->hasAttr<MSNoVTableAttr>())
+    return;
+
   // Compute the address point.
   bool NeedsVirtualOffset;
   llvm::Value *VTableAddressPoint =
@@ -1982,10 +1994,14 @@
                                                   NonVirtualOffset,
                                                   VirtualOffset);
 
-  // Finally, store the address point.
-  llvm::Type *AddressPointPtrTy =
-    VTableAddressPoint->getType()->getPointerTo();
-  VTableField = Builder.CreateBitCast(VTableField, AddressPointPtrTy);
+  // Finally, store the address point. Use the same LLVM types as the field to
+  // support optimization.
+  llvm::Type *VTablePtrTy =
+      llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true)
+          ->getPointerTo()
+          ->getPointerTo();
+  VTableField = Builder.CreateBitCast(VTableField, VTablePtrTy->getPointerTo());
+  VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy);
   llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField);
   CGM.DecorateInstruction(Store, CGM.getTBAAInfoForVTablePtr());
 }
@@ -2072,6 +2088,38 @@
   return VTable;
 }
 
+void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD,
+                                                llvm::Value *VTable) {
+  if (!SanOpts.has(SanitizerKind::CFIVptr))
+    return;
+
+  const CXXRecordDecl *RD = MD->getParent();
+  // FIXME: Add blacklisting scheme.
+  if (RD->isInStdNamespace())
+    return;
+
+  std::string OutName;
+  llvm::raw_string_ostream Out(OutName);
+  CGM.getCXXABI().getMangleContext().mangleCXXVTableBitSet(RD, Out);
+
+  llvm::Value *BitSetName = llvm::MetadataAsValue::get(
+      getLLVMContext(), llvm::MDString::get(getLLVMContext(), Out.str()));
+
+  llvm::Value *BitSetTest = Builder.CreateCall2(
+      CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
+      Builder.CreateBitCast(VTable, CGM.Int8PtrTy), BitSetName);
+
+  llvm::BasicBlock *ContBlock = createBasicBlock("vtable.check.cont");
+  llvm::BasicBlock *TrapBlock = createBasicBlock("vtable.check.trap");
+
+  Builder.CreateCondBr(BitSetTest, ContBlock, TrapBlock);
+
+  EmitBlock(TrapBlock);
+  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap));
+  Builder.CreateUnreachable();
+
+  EmitBlock(ContBlock);
+}
 
 // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do
 // quite what we want.
@@ -2157,26 +2205,12 @@
   
   // Check if this is a call expr that returns a record type.
   if (const CallExpr *CE = dyn_cast<CallExpr>(Base))
-    return CE->getCallReturnType()->isRecordType();
+    return CE->getCallReturnType(getContext())->isRecordType();
 
   // We can't devirtualize the call.
   return false;
 }
 
-llvm::Value *
-CodeGenFunction::EmitCXXOperatorMemberCallee(const CXXOperatorCallExpr *E,
-                                             const CXXMethodDecl *MD,
-                                             llvm::Value *This) {
-  llvm::FunctionType *fnType =
-    CGM.getTypes().GetFunctionType(
-                             CGM.getTypes().arrangeCXXMethodDeclaration(MD));
-
-  if (MD->isVirtual() && !CanDevirtualizeMemberFunctionCall(E->getArg(0), MD))
-    return CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, fnType);
-
-  return CGM.GetAddrOfFunction(MD, fnType);
-}
-
 void CodeGenFunction::EmitForwardingCallToLambda(
                                       const CXXMethodDecl *callOperator,
                                       CallArgList &callArgs) {
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index d81e3a5..566befc 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -861,11 +861,6 @@
 
   // Emit the EH cleanup if required.
   if (RequiresEHCleanup) {
-    CGDebugInfo *DI = getDebugInfo();
-    SaveAndRestoreLocation AutoRestoreLocation(*this, Builder);
-    if (DI)
-      DI->EmitLocation(Builder, CurEHLocation);
-
     CGBuilderTy::InsertPoint SavedIP = Builder.saveAndClearIP();
 
     EmitBlock(EHEntry);
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index dd156c6..5f94aec 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -284,15 +284,13 @@
     delete ExtInfo;
   }
   // Objects of EHCleanupScope are not destructed. Use Destroy().
-  ~EHCleanupScope() LLVM_DELETED_FUNCTION;
+  ~EHCleanupScope() = delete;
 
   bool isNormalCleanup() const { return CleanupBits.IsNormalCleanup; }
   llvm::BasicBlock *getNormalBlock() const { return NormalBlock; }
   void setNormalBlock(llvm::BasicBlock *BB) { NormalBlock = BB; }
 
   bool isEHCleanup() const { return CleanupBits.IsEHCleanup; }
-  llvm::BasicBlock *getEHBlock() const { return getCachedEHDispatchBlock(); }
-  void setEHBlock(llvm::BasicBlock *BB) { setCachedEHDispatchBlock(BB); }
 
   bool isActive() const { return CleanupBits.IsActive; }
   void setActive(bool A) { CleanupBits.IsActive = A; }
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 416f69e..6d78ee8 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -52,54 +52,60 @@
          "Region stack mismatch, stack not empty!");
 }
 
-SaveAndRestoreLocation::SaveAndRestoreLocation(CodeGenFunction &CGF,
-                                               CGBuilderTy &B)
-    : DI(CGF.getDebugInfo()), Builder(B) {
-  if (DI) {
-    SavedLoc = DI->getLocation();
-    DI->CurLoc = SourceLocation();
+ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
+                                       SourceLocation TemporaryLocation)
+    : CGF(CGF) {
+  init(TemporaryLocation);
+}
+
+ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
+                                       bool DefaultToEmpty,
+                                       SourceLocation TemporaryLocation)
+    : CGF(CGF) {
+  init(TemporaryLocation, DefaultToEmpty);
+}
+
+void ApplyDebugLocation::init(SourceLocation TemporaryLocation,
+                              bool DefaultToEmpty) {
+  if (auto *DI = CGF.getDebugInfo()) {
+    OriginalLocation = CGF.Builder.getCurrentDebugLocation();
+    if (TemporaryLocation.isInvalid()) {
+      if (DefaultToEmpty)
+        CGF.Builder.SetCurrentDebugLocation(llvm::DebugLoc());
+      else {
+        // Construct a location that has a valid scope, but no line info.
+        assert(!DI->LexicalBlockStack.empty());
+        llvm::DIDescriptor Scope(DI->LexicalBlockStack.back());
+        CGF.Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(0, 0, Scope));
+      }
+    } else
+      DI->EmitLocation(CGF.Builder, TemporaryLocation);
   }
 }
 
-SaveAndRestoreLocation::~SaveAndRestoreLocation() {
-  if (DI)
-    DI->EmitLocation(Builder, SavedLoc);
+ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E)
+    : CGF(CGF) {
+  init(E->getExprLoc());
 }
 
-NoLocation::NoLocation(CodeGenFunction &CGF, CGBuilderTy &B)
-    : SaveAndRestoreLocation(CGF, B) {
-  if (DI)
-    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-}
-
-NoLocation::~NoLocation() {
-  if (DI)
-    assert(Builder.getCurrentDebugLocation().isUnknown());
-}
-
-ArtificialLocation::ArtificialLocation(CodeGenFunction &CGF, CGBuilderTy &B)
-    : SaveAndRestoreLocation(CGF, B) {
-  if (DI)
-    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-}
-
-void ArtificialLocation::Emit() {
-  if (DI) {
-    // Sync the Builder.
-    DI->EmitLocation(Builder, SavedLoc);
-    DI->CurLoc = SourceLocation();
-    // Construct a location that has a valid scope, but no line info.
-    assert(!DI->LexicalBlockStack.empty());
-    llvm::DIDescriptor Scope(DI->LexicalBlockStack.back());
-    Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(0, 0, Scope));
+ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc)
+    : CGF(CGF) {
+  if (CGF.getDebugInfo()) {
+    OriginalLocation = CGF.Builder.getCurrentDebugLocation();
+    if (!Loc.isUnknown())
+      CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
   }
 }
 
-ArtificialLocation::~ArtificialLocation() {
-  if (DI)
-    assert(Builder.getCurrentDebugLocation().getLine() == 0);
+ApplyDebugLocation::~ApplyDebugLocation() {
+  // Query CGF so the location isn't overwritten when location updates are
+  // temporarily disabled (for C++ default function arguments)
+  if (CGF.getDebugInfo())
+    CGF.Builder.SetCurrentDebugLocation(std::move(OriginalLocation));
 }
 
+/// ArtificialLocation - An RAII object that temporarily switches to
+/// an artificial debug location that has a valid scope, but no line
 void CGDebugInfo::setLocation(SourceLocation Loc) {
   // If the new location isn't valid return.
   if (Loc.isInvalid())
@@ -126,13 +132,13 @@
         LBF.getScope(), getOrCreateFile(CurLoc));
     llvm::MDNode *N = D;
     LexicalBlockStack.pop_back();
-    LexicalBlockStack.push_back(N);
+    LexicalBlockStack.emplace_back(N);
   } else if (Scope.isLexicalBlock() || Scope.isSubprogram()) {
     llvm::DIDescriptor D =
         DBuilder.createLexicalBlockFile(Scope, getOrCreateFile(CurLoc));
     llvm::MDNode *N = D;
     LexicalBlockStack.pop_back();
-    LexicalBlockStack.push_back(N);
+    LexicalBlockStack.emplace_back(N);
   }
 }
 
@@ -141,10 +147,9 @@
   if (!Context)
     return TheCU;
 
-  llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator I =
-      RegionMap.find(Context);
+  auto I = RegionMap.find(Context);
   if (I != RegionMap.end()) {
-    llvm::Value *V = I->second;
+    llvm::Metadata *V = I->second;
     return llvm::DIScope(dyn_cast_or_null<llvm::MDNode>(V));
   }
 
@@ -256,18 +261,17 @@
 
   // Cache the results.
   const char *fname = PLoc.getFilename();
-  llvm::DenseMap<const char *, llvm::WeakVH>::iterator it =
-      DIFileCache.find(fname);
+  auto it = DIFileCache.find(fname);
 
   if (it != DIFileCache.end()) {
     // Verify that the information still exists.
-    if (llvm::Value *V = it->second)
+    if (llvm::Metadata *V = it->second)
       return llvm::DIFile(cast<llvm::MDNode>(V));
   }
 
   llvm::DIFile F = DBuilder.createFile(PLoc.getFilename(), getCurrentDirname());
 
-  DIFileCache[fname] = F;
+  DIFileCache[fname].reset(F);
   return F;
 }
 
@@ -424,9 +428,10 @@
         DBuilder.createStructType(TheCU, "objc_object", getOrCreateMainFile(),
                                   0, 0, 0, 0, llvm::DIType(), llvm::DIArray());
 
-    ObjTy.setArrays(DBuilder.getOrCreateArray(
-        &*DBuilder.createMemberType(ObjTy, "isa", getOrCreateMainFile(), 0,
-                                    Size, 0, 0, 0, ISATy)));
+    DBuilder.replaceArrays(
+        ObjTy,
+        DBuilder.getOrCreateArray(&*DBuilder.createMemberType(
+            ObjTy, "isa", getOrCreateMainFile(), 0, Size, 0, 0, 0, ISATy)));
     return ObjTy;
   }
   case BuiltinType::ObjCSel: {
@@ -616,6 +621,21 @@
   return FullName;
 }
 
+static llvm::dwarf::Tag getTagForRecord(const RecordDecl *RD) {
+   llvm::dwarf::Tag Tag;
+  if (RD->isStruct() || RD->isInterface())
+    Tag = llvm::dwarf::DW_TAG_structure_type;
+  else if (RD->isUnion())
+    Tag = llvm::dwarf::DW_TAG_union_type;
+  else {
+    // FIXME: This could be a struct type giving a default visibility different
+    // than C++ class type, but needs llvm metadata changes first.
+    assert(RD->isClass());
+    Tag = llvm::dwarf::DW_TAG_class_type;
+  }
+  return Tag;
+}
+
 // Creates a forward declaration for a RecordDecl in the given context.
 llvm::DICompositeType
 CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
@@ -627,21 +647,15 @@
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
-  llvm::dwarf::Tag Tag;
-  if (RD->isStruct() || RD->isInterface())
-    Tag = llvm::dwarf::DW_TAG_structure_type;
-  else if (RD->isUnion())
-    Tag = llvm::dwarf::DW_TAG_union_type;
-  else {
-    assert(RD->isClass());
-    Tag = llvm::dwarf::DW_TAG_class_type;
-  }
 
   // Create the type.
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
-  llvm::DICompositeType RetTy = DBuilder.createReplaceableForwardDecl(
-      Tag, RDName, Ctx, DefUnit, Line, 0, 0, 0, FullName);
-  ReplaceMap.push_back(std::make_pair(Ty, static_cast<llvm::Value *>(RetTy)));
+  llvm::DICompositeType RetTy = DBuilder.createReplaceableCompositeType(
+      getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, 0, 0,
+      llvm::DIDescriptor::FlagFwdDecl, FullName);
+  ReplaceMap.emplace_back(
+      std::piecewise_construct, std::make_tuple(Ty),
+      std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
   return RetTy;
 }
 
@@ -680,7 +694,7 @@
   if (BlockLiteralGeneric)
     return BlockLiteralGeneric;
 
-  SmallVector<llvm::Value *, 8> EltTys;
+  SmallVector<llvm::Metadata *, 8> EltTys;
   llvm::DIType FieldTy;
   QualType FType;
   uint64_t FieldSize, FieldOffset;
@@ -784,7 +798,7 @@
 
 llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
                                      llvm::DIFile Unit) {
-  SmallVector<llvm::Value *, 16> EltTys;
+  SmallVector<llvm::Metadata *, 16> EltTys;
 
   // Add the result type at least.
   EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit));
@@ -857,10 +871,9 @@
 }
 
 /// CollectRecordLambdaFields - Helper for CollectRecordFields.
-void
-CGDebugInfo::CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
-                                       SmallVectorImpl<llvm::Value *> &elements,
-                                       llvm::DIType RecordTy) {
+void CGDebugInfo::CollectRecordLambdaFields(
+    const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements,
+    llvm::DIType RecordTy) {
   // For C++11 Lambdas a Field will be the same as a Capture, but the Capture
   // has the name and the location of the variable so we should iterate over
   // both concurrently.
@@ -928,14 +941,14 @@
   unsigned Flags = getAccessFlag(Var->getAccess(), RD);
   llvm::DIDerivedType GV = DBuilder.createStaticMemberType(
       RecordTy, VName, VUnit, LineNumber, VTy, Flags, C);
-  StaticDataMemberCache[Var->getCanonicalDecl()] = llvm::WeakVH(GV);
+  StaticDataMemberCache[Var->getCanonicalDecl()].reset(GV);
   return GV;
 }
 
 /// CollectRecordNormalField - Helper for CollectRecordFields.
 void CGDebugInfo::CollectRecordNormalField(
     const FieldDecl *field, uint64_t OffsetInBits, llvm::DIFile tunit,
-    SmallVectorImpl<llvm::Value *> &elements, llvm::DIType RecordTy,
+    SmallVectorImpl<llvm::Metadata *> &elements, llvm::DIType RecordTy,
     const RecordDecl *RD) {
   StringRef name = field->getName();
   QualType type = field->getType();
@@ -959,10 +972,10 @@
 
 /// CollectRecordFields - A helper function to collect debug info for
 /// record fields. This is used while creating debug info entry for a Record.
-void CGDebugInfo::CollectRecordFields(const RecordDecl *record,
-                                      llvm::DIFile tunit,
-                                      SmallVectorImpl<llvm::Value *> &elements,
-                                      llvm::DICompositeType RecordTy) {
+void CGDebugInfo::CollectRecordFields(
+    const RecordDecl *record, llvm::DIFile tunit,
+    SmallVectorImpl<llvm::Metadata *> &elements,
+    llvm::DICompositeType RecordTy) {
   const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record);
 
   if (CXXDecl && CXXDecl->isLambda())
@@ -978,8 +991,7 @@
     for (const auto *I : record->decls())
       if (const auto *V = dyn_cast<VarDecl>(I)) {
         // Reuse the existing static member declaration if one exists
-        llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator MI =
-            StaticDataMemberCache.find(V->getCanonicalDecl());
+        auto MI = StaticDataMemberCache.find(V->getCanonicalDecl());
         if (MI != StaticDataMemberCache.end()) {
           assert(MI->second &&
                  "Static data member declaration should still exist");
@@ -1019,7 +1031,7 @@
       getOrCreateType(QualType(Func, 0), Unit)).getTypeArray();
   assert(Args.getNumElements() && "Invalid number of arguments!");
 
-  SmallVector<llvm::Value *, 16> Elts;
+  SmallVector<llvm::Metadata *, 16> Elts;
 
   // First element is always return type. For 'void' functions it is NULL.
   Elts.push_back(Args.getElement(0));
@@ -1036,7 +1048,7 @@
     llvm::DIType PointeeType = getOrCreateType(PointeeTy, Unit);
     llvm::DIType ThisPtrType =
         DBuilder.createPointerType(PointeeType, Size, Align);
-    TypeCache[ThisPtr.getAsOpaquePtr()] = ThisPtrType;
+    TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
     // TODO: This and the artificial type below are misleading, the
     // types aren't artificial the argument is, but the current
     // metadata doesn't represent that.
@@ -1044,7 +1056,7 @@
     Elts.push_back(ThisPtrType);
   } else {
     llvm::DIType ThisPtrType = getOrCreateType(ThisPtr, Unit);
-    TypeCache[ThisPtr.getAsOpaquePtr()] = ThisPtrType;
+    TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
     ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
     Elts.push_back(ThisPtrType);
   }
@@ -1147,7 +1159,7 @@
       /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags,
       CGM.getLangOpts().Optimize, nullptr, TParamsArray);
 
-  SPCache[Method->getCanonicalDecl()] = llvm::WeakVH(SP);
+  SPCache[Method->getCanonicalDecl()].reset(SP);
 
   return SP;
 }
@@ -1157,7 +1169,7 @@
 /// a Record.
 void CGDebugInfo::CollectCXXMemberFunctions(
     const CXXRecordDecl *RD, llvm::DIFile Unit,
-    SmallVectorImpl<llvm::Value *> &EltTys, llvm::DIType RecordTy) {
+    SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DIType RecordTy) {
 
   // Since we want more than just the individual member decls if we
   // have templated functions iterate over every declaration to gather
@@ -1188,7 +1200,7 @@
     auto MI = SPCache.find(Method->getCanonicalDecl());
     EltTys.push_back(MI == SPCache.end()
                          ? CreateCXXMemberFunction(Method, Unit, RecordTy)
-                         : static_cast<llvm::Value *>(MI->second));
+                         : static_cast<llvm::Metadata *>(MI->second));
   }
 }
 
@@ -1196,7 +1208,7 @@
 /// C++ base classes. This is used while creating debug info entry for
 /// a Record.
 void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile Unit,
-                                  SmallVectorImpl<llvm::Value *> &EltTys,
+                                  SmallVectorImpl<llvm::Metadata *> &EltTys,
                                   llvm::DIType RecordTy) {
 
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
@@ -1238,7 +1250,7 @@
 CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
                                    ArrayRef<TemplateArgument> TAList,
                                    llvm::DIFile Unit) {
-  SmallVector<llvm::Value *, 16> TemplateParams;
+  SmallVector<llvm::Metadata *, 16> TemplateParams;
   for (unsigned i = 0, e = TAList.size(); i != e; ++i) {
     const TemplateArgument &TA = TAList[i];
     StringRef Name;
@@ -1263,7 +1275,7 @@
       const ValueDecl *D = TA.getAsDecl();
       QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
       llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::Value *V = nullptr;
+      llvm::Constant *V = nullptr;
       const CXXMethodDecl *MD;
       // Variable pointer template parameters have a value that is the address
       // of the variable.
@@ -1295,7 +1307,7 @@
     case TemplateArgument::NullPtr: {
       QualType T = TA.getNullPtrType();
       llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::Value *V = nullptr;
+      llvm::Constant *V = nullptr;
       // Special case member data pointer null values since they're actually -1
       // instead of zero.
       if (const MemberPointerType *MPT =
@@ -1332,7 +1344,7 @@
       QualType T = E->getType();
       if (E->isGLValue())
         T = CGM.getContext().getLValueReferenceType(T);
-      llvm::Value *V = CGM.EmitConstantExpr(E, T);
+      llvm::Constant *V = CGM.EmitConstantExpr(E, T);
       assert(V && "Expression in template argument isn't constant");
       llvm::DIType TTy = getOrCreateType(T, Unit);
       llvm::DITemplateValueParameter TVP =
@@ -1385,7 +1397,7 @@
   ASTContext &Context = CGM.getContext();
 
   /* Function type */
-  llvm::Value *STy = getOrCreateType(Context.IntTy, Unit);
+  llvm::Metadata *STy = getOrCreateType(Context.IntTy, Unit);
   llvm::DITypeArray SElements = DBuilder.getOrCreateTypeArray(STy);
   llvm::DIType SubTy = DBuilder.createSubroutineType(Unit, SElements);
   unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
@@ -1404,7 +1416,7 @@
 /// CollectVTableInfo - If the C++ class has vtable info then insert appropriate
 /// debug info entry in EltTys vector.
 void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile Unit,
-                                    SmallVectorImpl<llvm::Value *> &EltTys) {
+                                    SmallVectorImpl<llvm::Metadata *> &EltTys) {
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
 
   // If there is a primary base then it will hold vtable info.
@@ -1447,12 +1459,11 @@
   void *TyPtr = Ty.getAsOpaquePtr();
   auto I = TypeCache.find(TyPtr);
   if (I == TypeCache.end() ||
-      !llvm::DIType(cast<llvm::MDNode>(static_cast<llvm::Value *>(I->second)))
-           .isForwardDecl())
+      !llvm::DIType(cast<llvm::MDNode>(I->second)).isForwardDecl())
     return;
   llvm::DIType Res = CreateTypeDefinition(Ty->castAs<EnumType>());
   assert(!Res.isForwardDecl());
-  TypeCache[TyPtr] = Res;
+  TypeCache[TyPtr].reset(Res);
 }
 
 void CGDebugInfo::completeType(const RecordDecl *RD) {
@@ -1482,12 +1493,11 @@
   void *TyPtr = Ty.getAsOpaquePtr();
   auto I = TypeCache.find(TyPtr);
   if (I != TypeCache.end() &&
-      !llvm::DIType(cast<llvm::MDNode>(static_cast<llvm::Value *>(I->second)))
-           .isForwardDecl())
+      !llvm::DIType(cast<llvm::MDNode>(I->second)).isForwardDecl())
     return;
   llvm::DIType Res = CreateTypeDefinition(Ty->castAs<RecordType>());
   assert(!Res.isForwardDecl());
-  TypeCache[TyPtr] = Res;
+  TypeCache[TyPtr].reset(Res);
 }
 
 static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
@@ -1564,18 +1574,19 @@
   assert(FwdDecl.isCompositeType() &&
          "The debug type of a RecordType should be a llvm::DICompositeType");
 
-  if (FwdDecl.isForwardDecl())
+  const RecordDecl *D = RD->getDefinition();
+  if (!D || !D->isCompleteDefinition())
     return FwdDecl;
 
   if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
     CollectContainingType(CXXDecl, FwdDecl);
 
   // Push the struct on region stack.
-  LexicalBlockStack.push_back(&*FwdDecl);
-  RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDecl);
+  LexicalBlockStack.emplace_back(&*FwdDecl);
+  RegionMap[Ty->getDecl()].reset(FwdDecl);
 
   // Convert all the elements.
-  SmallVector<llvm::Value *, 16> EltTys;
+  SmallVector<llvm::Metadata *, 16> EltTys;
   // what about nested types?
 
   // Note: The split of CXXDecl information here is intentional, the
@@ -1597,9 +1608,13 @@
   RegionMap.erase(Ty->getDecl());
 
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
-  FwdDecl.setArrays(Elements);
+  DBuilder.replaceArrays(FwdDecl, Elements);
 
-  RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDecl);
+  if (FwdDecl->isTemporary())
+    FwdDecl = llvm::DICompositeType(llvm::MDNode::replaceWithPermanent(
+      llvm::TempMDNode(FwdDecl.get())));
+
+  RegionMap[Ty->getDecl()].reset(FwdDecl);
   return FwdDecl;
 }
 
@@ -1650,7 +1665,7 @@
   // debug type since we won't be able to lay out the entire type.
   ObjCInterfaceDecl *Def = ID->getDefinition();
   if (!Def || !Def->getImplementation()) {
-    llvm::DIType FwdDecl = DBuilder.createReplaceableForwardDecl(
+    llvm::DIType FwdDecl = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_structure_type, ID->getName(), TheCU, DefUnit, Line,
         RuntimeLang);
     ObjCInterfaceCache.push_back(ObjCInterfaceCacheEntry(Ty, FwdDecl, Unit));
@@ -1680,14 +1695,14 @@
       llvm::DIArray(), RuntimeLang);
 
   QualType QTy(Ty, 0);
-  TypeCache[QTy.getAsOpaquePtr()] = RealDecl;
+  TypeCache[QTy.getAsOpaquePtr()].reset(RealDecl);
 
   // Push the struct on region stack.
-  LexicalBlockStack.push_back(static_cast<llvm::MDNode *>(RealDecl));
-  RegionMap[Ty->getDecl()] = llvm::WeakVH(RealDecl);
+  LexicalBlockStack.emplace_back(static_cast<llvm::MDNode *>(RealDecl));
+  RegionMap[Ty->getDecl()].reset(RealDecl);
 
   // Convert all the elements.
-  SmallVector<llvm::Value *, 16> EltTys;
+  SmallVector<llvm::Metadata *, 16> EltTys;
 
   ObjCInterfaceDecl *SClass = ID->getSuperClass();
   if (SClass) {
@@ -1799,7 +1814,7 @@
   }
 
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
-  RealDecl.setArrays(Elements);
+  DBuilder.replaceArrays(RealDecl, Elements);
 
   LexicalBlockStack.pop_back();
   return RealDecl;
@@ -1813,7 +1828,7 @@
     // Use Count == -1 to express such arrays.
     Count = -1;
 
-  llvm::Value *Subscript = DBuilder.getOrCreateSubrange(0, Count);
+  llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(0, Count);
   llvm::DIArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -1849,7 +1864,7 @@
   // Add the dimensions of the array.  FIXME: This loses CV qualifiers from
   // interior arrays, do we care?  Why aren't nested arrays represented the
   // obvious/recursive way?
-  SmallVector<llvm::Value *, 8> Subscripts;
+  SmallVector<llvm::Metadata *, 8> Subscripts;
   QualType EltTy(Ty, 0);
   while ((Ty = dyn_cast<ArrayType>(EltTy))) {
     // If the number of elements is known, then count is that number. Otherwise,
@@ -1892,7 +1907,8 @@
   llvm::DIType ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
   if (!Ty->getPointeeType()->isFunctionType())
     return DBuilder.createMemberPointerType(
-        getOrCreateType(Ty->getPointeeType(), U), ClassType);
+      getOrCreateType(Ty->getPointeeType(), U), ClassType,
+      CGM.getContext().getTypeSize(Ty));
 
   const FunctionProtoType *FPT =
       Ty->getPointeeType()->getAs<FunctionProtoType>();
@@ -1900,7 +1916,7 @@
       getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
                                         Ty->getClass(), FPT->getTypeQuals())),
                                     FPT, U),
-      ClassType);
+      ClassType, CGM.getContext().getTypeSize(Ty));
 }
 
 llvm::DIType CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile U) {
@@ -1929,10 +1945,12 @@
     llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
-    llvm::DIType RetTy = DBuilder.createReplaceableForwardDecl(
+    llvm::DIType RetTy = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
-        0, Size, Align, FullName);
-    ReplaceMap.push_back(std::make_pair(Ty, static_cast<llvm::Value *>(RetTy)));
+        0, Size, Align, llvm::DIDescriptor::FlagFwdDecl, FullName);
+    ReplaceMap.emplace_back(
+        std::piecewise_construct, std::make_tuple(Ty),
+        std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
     return RetTy;
   }
 
@@ -1951,7 +1969,7 @@
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
   // Create DIEnumerator elements for each enumerator.
-  SmallVector<llvm::Value *, 16> Enumerators;
+  SmallVector<llvm::Metadata *, 16> Enumerators;
   ED = ED->getDefinition();
   for (const auto *Enum : ED->enumerators()) {
     Enumerators.push_back(DBuilder.createEnumerator(
@@ -2039,7 +2057,7 @@
   auto it = TypeCache.find(Ty.getAsOpaquePtr());
   if (it != TypeCache.end()) {
     // Verify that the debug info still exists.
-    if (llvm::Value *V = it->second)
+    if (llvm::Metadata *V = it->second)
       return llvm::DIType(cast<llvm::MDNode>(V));
   }
 
@@ -2074,7 +2092,7 @@
   void *TyPtr = Ty.getAsOpaquePtr();
 
   // And update the type cache.
-  TypeCache[TyPtr] = Res;
+  TypeCache[TyPtr].reset(Res);
 
   return Res;
 }
@@ -2206,10 +2224,10 @@
   // Propagate members from the declaration to the definition
   // CreateType(const RecordType*) will overwrite this with the members in the
   // correct order if the full type is needed.
-  Res.setArrays(T.getElements());
+  DBuilder.replaceArrays(Res, T.getElements());
 
   // And update the type cache.
-  TypeCache[QTy.getAsOpaquePtr()] = Res;
+  TypeCache[QTy.getAsOpaquePtr()].reset(Res);
   return Res;
 }
 
@@ -2243,27 +2261,16 @@
 
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
-  if (RD->isUnion())
-    RealDecl = DBuilder.createUnionType(RDContext, RDName, DefUnit, Line, Size,
-                                        Align, 0, llvm::DIArray(), 0, FullName);
-  else if (RD->isClass()) {
-    // FIXME: This could be a struct type giving a default visibility different
-    // than C++ class type, but needs llvm metadata changes first.
-    RealDecl = DBuilder.createClassType(
-        RDContext, RDName, DefUnit, Line, Size, Align, 0, 0, llvm::DIType(),
-        llvm::DIArray(), llvm::DIType(), llvm::DIArray(), FullName);
-  } else
-    RealDecl = DBuilder.createStructType(
-        RDContext, RDName, DefUnit, Line, Size, Align, 0, llvm::DIType(),
-        llvm::DIArray(), 0, llvm::DIType(), FullName);
+  RealDecl = DBuilder.createReplaceableCompositeType(getTagForRecord(RD),
+      RDName, RDContext, DefUnit, Line, 0, Size, Align, 0, FullName);
 
-  RegionMap[Ty->getDecl()] = llvm::WeakVH(RealDecl);
-  TypeCache[QualType(Ty, 0).getAsOpaquePtr()] = RealDecl;
+  RegionMap[Ty->getDecl()].reset(RealDecl);
+  TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl);
 
   if (const ClassTemplateSpecializationDecl *TSpecial =
           dyn_cast<ClassTemplateSpecializationDecl>(RD))
-    RealDecl.setArrays(llvm::DIArray(),
-                       CollectCXXTemplateParams(TSpecial, DefUnit));
+    DBuilder.replaceArrays(RealDecl, llvm::DIArray(),
+                           CollectCXXTemplateParams(TSpecial, DefUnit));
   return RealDecl;
 }
 
@@ -2288,7 +2295,7 @@
   } else if (RD->isDynamicClass())
     ContainingType = RealDecl;
 
-  RealDecl.setContainingType(ContainingType);
+  DBuilder.replaceVTableHolder(RealDecl, ContainingType);
 }
 
 /// CreateMemberType - Create new member and increase Offset by FType's size.
@@ -2401,8 +2408,9 @@
                                        CGM.getLangOpts().Optimize, nullptr,
                                        TParamsArray, getFunctionDeclaration(FD));
   const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
-  FwdDeclReplaceMap.push_back(std::make_pair(CanonDecl,
-                                             static_cast<llvm::Value *>(SP)));
+  FwdDeclReplaceMap.emplace_back(
+      std::piecewise_construct, std::make_tuple(CanonDecl),
+      std::make_tuple(static_cast<llvm::Metadata *>(SP)));
   return SP;
 }
 
@@ -2421,8 +2429,10 @@
                                              Line, getOrCreateType(T, Unit),
                                              !VD->isExternallyVisible(),
                                              nullptr, nullptr);
-  FwdDeclReplaceMap.push_back(std::make_pair(cast<VarDecl>(VD->getCanonicalDecl()),
-                                             static_cast<llvm::Value *>(GV)));
+  FwdDeclReplaceMap.emplace_back(
+      std::piecewise_construct,
+      std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())),
+      std::make_tuple(static_cast<llvm::Metadata *>(GV)));
   return GV;
 }
 
@@ -2434,13 +2444,10 @@
   if (const TypeDecl *TD = dyn_cast<TypeDecl>(D))
     return getOrCreateType(CGM.getContext().getTypeDeclType(TD),
                            getOrCreateFile(TD->getLocation()));
-  llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator I =
-      DeclCache.find(D->getCanonicalDecl());
+  auto I = DeclCache.find(D->getCanonicalDecl());
 
-  if (I != DeclCache.end()) {
-    llvm::Value *V = I->second;
-    return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(V));
-  }
+  if (I != DeclCache.end())
+    return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(I->second));
 
   // No definition for now. Emit a forward definition that might be
   // merged with a potential upcoming definition.
@@ -2465,8 +2472,7 @@
   // Setup context.
   llvm::DIScope S = getContextDescriptor(cast<Decl>(D->getDeclContext()));
 
-  llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator MI =
-      SPCache.find(FD->getCanonicalDecl());
+  auto MI = SPCache.find(FD->getCanonicalDecl());
   if (MI == SPCache.end()) {
     if (const CXXMethodDecl *MD =
             dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
@@ -2477,18 +2483,15 @@
     }
   }
   if (MI != SPCache.end()) {
-    llvm::Value *V = MI->second;
-    llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(V));
+    llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(MI->second));
     if (SP.isSubprogram() && !SP.isDefinition())
       return SP;
   }
 
   for (auto NextFD : FD->redecls()) {
-    llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator MI =
-        SPCache.find(NextFD->getCanonicalDecl());
+    auto MI = SPCache.find(NextFD->getCanonicalDecl());
     if (MI != SPCache.end()) {
-      llvm::Value *V = MI->second;
-      llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(V));
+      llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(MI->second));
       if (SP.isSubprogram() && !SP.isDefinition())
         return SP;
     }
@@ -2513,7 +2516,7 @@
     return getOrCreateMethodType(Method, F);
   if (const ObjCMethodDecl *OMethod = dyn_cast<ObjCMethodDecl>(D)) {
     // Add "self" and "_cmd"
-    SmallVector<llvm::Value *, 16> Elts;
+    SmallVector<llvm::Metadata *, 16> Elts;
 
     // First element is always return type. For 'void' functions it is NULL.
     QualType ResultTy = OMethod->getReturnType();
@@ -2546,7 +2549,7 @@
   // unspecified parameter.
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
     if (FD->isVariadic()) {
-      SmallVector<llvm::Value *, 16> EltTys;
+      SmallVector<llvm::Metadata *, 16> EltTys;
       EltTys.push_back(getOrCreateType(FD->getReturnType(), F));
       if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType))
         for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
@@ -2581,15 +2584,13 @@
     LinkageName = Fn->getName();
   } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     // If there is a DISubprogram for this function available then use it.
-    llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator FI =
-        SPCache.find(FD->getCanonicalDecl());
+    auto FI = SPCache.find(FD->getCanonicalDecl());
     if (FI != SPCache.end()) {
-      llvm::Value *V = FI->second;
-      llvm::DIDescriptor SP(dyn_cast_or_null<llvm::MDNode>(V));
+      llvm::DIDescriptor SP(dyn_cast_or_null<llvm::MDNode>(FI->second));
       if (SP.isSubprogram() && llvm::DISubprogram(SP).isDefinition()) {
         llvm::MDNode *SPN = SP;
-        LexicalBlockStack.push_back(SPN);
-        RegionMap[D] = llvm::WeakVH(SP);
+        LexicalBlockStack.emplace_back(SPN);
+        RegionMap[D].reset(SP);
         return;
       }
     }
@@ -2629,54 +2630,42 @@
   // code for the initialization of globals. Do not record these decls
   // as they will overwrite the actual VarDecl Decl in the cache.
   if (HasDecl && isa<FunctionDecl>(D))
-    DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(SP)));
+    DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(SP));
 
   // Push the function onto the lexical block stack.
   llvm::MDNode *SPN = SP;
-  LexicalBlockStack.push_back(SPN);
+  LexicalBlockStack.emplace_back(SPN);
 
   if (HasDecl)
-    RegionMap[D] = llvm::WeakVH(SP);
+    RegionMap[D].reset(SP);
 }
 
 /// EmitLocation - Emit metadata to indicate a change in line/column
 /// information in the source file. If the location is invalid, the
 /// previous location will be reused.
-void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc,
-                               bool ForceColumnInfo) {
+void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
   // Update our current location
   setLocation(Loc);
 
   if (CurLoc.isInvalid() || CurLoc.isMacroID())
     return;
 
-  // Don't bother if things are the same as last time.
-  SourceManager &SM = CGM.getContext().getSourceManager();
-  if (CurLoc == PrevLoc ||
-      SM.getExpansionLoc(CurLoc) == SM.getExpansionLoc(PrevLoc))
-    // New Builder may not be in sync with CGDebugInfo.
-    if (!Builder.getCurrentDebugLocation().isUnknown() &&
-        Builder.getCurrentDebugLocation().getScope(CGM.getLLVMContext()) ==
-            LexicalBlockStack.back())
-      return;
-
-  // Update last state.
-  PrevLoc = CurLoc;
-
   llvm::MDNode *Scope = LexicalBlockStack.back();
   Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
-      getLineNumber(CurLoc), getColumnNumber(CurLoc, ForceColumnInfo), Scope));
+      getLineNumber(CurLoc), getColumnNumber(CurLoc), Scope));
 }
 
 /// CreateLexicalBlock - Creates a new lexical block node and pushes it on
 /// the stack.
 void CGDebugInfo::CreateLexicalBlock(SourceLocation Loc) {
+  llvm::MDNode *Back = nullptr;
+  if (!LexicalBlockStack.empty())
+    Back = LexicalBlockStack.back().get();
   llvm::DIDescriptor D = DBuilder.createLexicalBlock(
-      llvm::DIDescriptor(LexicalBlockStack.empty() ? nullptr
-                                                   : LexicalBlockStack.back()),
-      getOrCreateFile(CurLoc), getLineNumber(CurLoc), getColumnNumber(CurLoc));
+      llvm::DIDescriptor(Back), getOrCreateFile(CurLoc), getLineNumber(CurLoc),
+      getColumnNumber(CurLoc));
   llvm::MDNode *DN = D;
-  LexicalBlockStack.push_back(DN);
+  LexicalBlockStack.emplace_back(DN);
 }
 
 /// EmitLexicalBlockStart - Constructs the debug code for entering a declarative
@@ -2732,7 +2721,7 @@
 llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                                                        uint64_t *XOffset) {
 
-  SmallVector<llvm::Value *, 5> EltTys;
+  SmallVector<llvm::Metadata *, 5> EltTys;
   QualType FType;
   uint64_t FieldSize, FieldOffset;
   unsigned FieldAlign;
@@ -2803,7 +2792,7 @@
 }
 
 /// EmitDeclare - Emit local variable declaration debug info.
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::LLVMConstants Tag,
+void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::Tag Tag,
                               llvm::Value *Storage, unsigned ArgNo,
                               CGBuilderTy &Builder) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
@@ -2834,6 +2823,7 @@
     Line = getLineNumber(VD->getLocation());
     Column = getColumnNumber(VD->getLocation());
   }
+  SmallVector<int64_t, 9> Expr;
   unsigned Flags = 0;
   if (VD->isImplicit())
     Flags |= llvm::DIDescriptor::FlagArtificial;
@@ -2847,7 +2837,7 @@
   if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage))
     if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
         !VD->getType()->isPointerType())
-      Flags |= llvm::DIDescriptor::FlagIndirectVariable;
+      Expr.push_back(llvm::dwarf::DW_OP_deref);
 
   llvm::MDNode *Scope = LexicalBlockStack.back();
 
@@ -2855,17 +2845,16 @@
   if (!Name.empty()) {
     if (VD->hasAttr<BlocksAttr>()) {
       CharUnits offset = CharUnits::fromQuantity(32);
-      SmallVector<int64_t, 9> addr;
-      addr.push_back(llvm::dwarf::DW_OP_plus);
+      Expr.push_back(llvm::dwarf::DW_OP_plus);
       // offset of __forwarding field
       offset = CGM.getContext().toCharUnitsFromBits(
           CGM.getTarget().getPointerWidth(0));
-      addr.push_back(offset.getQuantity());
-      addr.push_back(llvm::dwarf::DW_OP_deref);
-      addr.push_back(llvm::dwarf::DW_OP_plus);
+      Expr.push_back(offset.getQuantity());
+      Expr.push_back(llvm::dwarf::DW_OP_deref);
+      Expr.push_back(llvm::dwarf::DW_OP_plus);
       // offset of x field
       offset = CGM.getContext().toCharUnitsFromBits(XOffset);
-      addr.push_back(offset.getQuantity());
+      Expr.push_back(offset.getQuantity());
 
       // Create the descriptor for the variable.
       llvm::DIVariable D = DBuilder.createLocalVariable(
@@ -2873,12 +2862,12 @@
 
       // Insert an llvm.dbg.declare into the current block.
       llvm::Instruction *Call =
-          DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr),
+          DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
                                  Builder.GetInsertBlock());
       Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
       return;
     } else if (isa<VariableArrayType>(VD->getType()))
-      Flags |= llvm::DIDescriptor::FlagIndirectVariable;
+      Expr.push_back(llvm::dwarf::DW_OP_deref);
   } else if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) {
     // If VD is an anonymous union then Storage represents value for
     // all union fields.
@@ -2899,7 +2888,8 @@
 
         // Insert an llvm.dbg.declare into the current block.
         llvm::Instruction *Call = DBuilder.insertDeclare(
-            Storage, D, DBuilder.createExpression(), Builder.GetInsertBlock());
+            Storage, D, DBuilder.createExpression(Expr),
+            Builder.GetInsertBlock());
         Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
       }
       return;
@@ -2913,7 +2903,7 @@
 
   // Insert an llvm.dbg.declare into the current block.
   llvm::Instruction *Call = DBuilder.insertDeclare(
-      Storage, D, DBuilder.createExpression(), Builder.GetInsertBlock());
+      Storage, D, DBuilder.createExpression(Expr), Builder.GetInsertBlock());
   Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
 }
 
@@ -3047,7 +3037,7 @@
   const llvm::StructLayout *blockLayout =
       CGM.getDataLayout().getStructLayout(block.StructureType);
 
-  SmallVector<llvm::Value *, 16> fields;
+  SmallVector<llvm::Metadata *, 16> fields;
   fields.push_back(createFieldType("__isa", C.VoidPtrTy, 0, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(0),
                                    tunit, tunit));
@@ -3181,8 +3171,7 @@
 CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
   if (!D->isStaticDataMember())
     return llvm::DIDerivedType();
-  llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator MI =
-      StaticDataMemberCache.find(D->getCanonicalDecl());
+  auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
   if (MI != StaticDataMemberCache.end()) {
     assert(MI->second && "Static data member declaration should still exist");
     return llvm::DIDerivedType(cast<llvm::MDNode>(MI->second));
@@ -3255,7 +3244,7 @@
         Var->hasInternalLinkage(), Var,
         getOrCreateStaticDataMemberDeclarationOrNull(D));
   }
-  DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(GV)));
+  DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(GV));
 }
 
 /// EmitGlobalVariable - Emit global variable's debug info.
@@ -3291,13 +3280,12 @@
   llvm::DIDescriptor DContext =
       getContextDescriptor(dyn_cast<Decl>(VD->getDeclContext()));
 
-  auto pair = DeclCache.insert(std::make_pair(VD, llvm::WeakVH()));
-  if (!pair.second)
+  auto &GV = DeclCache[VD];
+  if (GV)
     return;
-  llvm::DIGlobalVariable GV = DBuilder.createGlobalVariable(
+  GV.reset(DBuilder.createGlobalVariable(
       DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
-      true, Init, getOrCreateStaticDataMemberDeclarationOrNull(VarD));
-  pair.first->second = llvm::WeakVH(GV);
+      true, Init, getOrCreateStaticDataMemberDeclarationOrNull(VarD)));
 }
 
 llvm::DIScope CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
@@ -3333,11 +3321,11 @@
 llvm::DIImportedEntity
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
-    return llvm::DIImportedEntity(nullptr);
-  llvm::WeakVH &VH = NamespaceAliasCache[&NA];
+    return llvm::DIImportedEntity();
+  auto &VH = NamespaceAliasCache[&NA];
   if (VH)
     return llvm::DIImportedEntity(cast<llvm::MDNode>(VH));
-  llvm::DIImportedEntity R(nullptr);
+  llvm::DIImportedEntity R;
   if (const NamespaceAliasDecl *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
@@ -3350,7 +3338,7 @@
         getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
         getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
         getLineNumber(NA.getLocation()), NA.getName());
-  VH = R;
+  VH.reset(R);
   return R;
 }
 
@@ -3359,8 +3347,7 @@
 llvm::DINameSpace
 CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
   NSDecl = NSDecl->getCanonicalDecl();
-  llvm::DenseMap<const NamespaceDecl *, llvm::WeakVH>::iterator I =
-    NameSpaceCache.find(NSDecl);
+  auto I = NameSpaceCache.find(NSDecl);
   if (I != NameSpaceCache.end())
     return llvm::DINameSpace(cast<llvm::MDNode>(I->second));
 
@@ -3370,7 +3357,7 @@
     getContextDescriptor(dyn_cast<Decl>(NSDecl->getDeclContext()));
   llvm::DINameSpace NS =
     DBuilder.createNameSpace(Context, NSDecl->getName(), FileD, LineNo);
-  NameSpaceCache[NSDecl] = llvm::WeakVH(NS);
+  NameSpaceCache[NSDecl].reset(NS);
   return NS;
 }
 
@@ -3401,19 +3388,19 @@
   for (const auto &p : FwdDeclReplaceMap) {
     assert(p.second);
     llvm::DIDescriptor FwdDecl(cast<llvm::MDNode>(p.second));
-    llvm::WeakVH VH;
+    llvm::Metadata *Repl;
 
     auto it = DeclCache.find(p.first);
-    // If there has been no definition for the declaration, call RAUV
+    // If there has been no definition for the declaration, call RAUW
     // with ourselves, that will destroy the temporary MDNode and
     // replace it with a standard one, avoiding leaking memory.
     if (it == DeclCache.end())
-      VH = p.second;
+      Repl = p.second;
     else
-      VH = it->second;
+      Repl = it->second;
 
     FwdDecl.replaceAllUsesWith(CGM.getLLVMContext(),
-                               llvm::DIDescriptor(cast<llvm::MDNode>(VH)));
+                               llvm::DIDescriptor(cast<llvm::MDNode>(Repl)));
   }
 
   // We keep our own list of retained types, because we need to look
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 89d592e..62ba801 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -43,17 +43,17 @@
   class CodeGenFunction;
   class CGBlockInfo;
 
-/// CGDebugInfo - This class gathers all debug information during compilation
+/// \brief This class gathers all debug information during compilation
 /// and is responsible for emitting to llvm globals or pass directly to
 /// the backend.
 class CGDebugInfo {
-  friend class ArtificialLocation;
+  friend class ApplyDebugLocation;
   friend class SaveAndRestoreLocation;
   CodeGenModule &CGM;
   const CodeGenOptions::DebugInfoKind DebugKind;
   llvm::DIBuilder DBuilder;
   llvm::DICompileUnit TheCU;
-  SourceLocation CurLoc, PrevLoc;
+  SourceLocation CurLoc;
   llvm::DIType VTablePtrType;
   llvm::DIType ClassTy;
   llvm::DICompositeType ObjTy;
@@ -64,8 +64,8 @@
   llvm::DIType OCLEventDITy;
   llvm::DIType BlockLiteralGeneric;
 
-  /// TypeCache - Cache of previously constructed Types.
-  llvm::DenseMap<const void *, llvm::WeakVH> TypeCache;
+  /// \brief Cache of previously constructed Types.
+  llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache;
 
   struct ObjCInterfaceCacheEntry {
     const ObjCInterfaceType *Type;
@@ -76,42 +76,44 @@
         : Type(Type), Decl(Decl), Unit(Unit) {}
   };
 
-  /// ObjCInterfaceCache - Cache of previously constructed interfaces
+  /// \brief Cache of previously constructed interfaces
   /// which may change.
   llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
 
-  /// RetainedTypes - list of interfaces we want to keep even if orphaned.
+  /// \brief list of interfaces we want to keep even if orphaned.
   std::vector<void *> RetainedTypes;
 
-  /// ReplaceMap - Cache of forward declared types to RAUW at the end of
+  /// \brief Cache of forward declared types to RAUW at the end of
   /// compilation.
-  std::vector<std::pair<const TagType *, llvm::WeakVH>> ReplaceMap;
+  std::vector<std::pair<const TagType *, llvm::TrackingMDRef>> ReplaceMap;
 
   /// \brief Cache of replaceable forward declarartions (functions and
   /// variables) to RAUW at the end of compilation.
-  std::vector<std::pair<const DeclaratorDecl *, llvm::WeakVH>> FwdDeclReplaceMap;
+  std::vector<std::pair<const DeclaratorDecl *, llvm::TrackingMDRef>>
+      FwdDeclReplaceMap;
 
   // LexicalBlockStack - Keep track of our current nested lexical block.
-  std::vector<llvm::TrackingVH<llvm::MDNode> > LexicalBlockStack;
-  llvm::DenseMap<const Decl *, llvm::WeakVH> RegionMap;
+  std::vector<llvm::TrackingMDNodeRef> LexicalBlockStack;
+  llvm::DenseMap<const Decl *, llvm::TrackingMDRef> RegionMap;
   // FnBeginRegionCount - Keep track of LexicalBlockStack counter at the
   // beginning of a function. This is used to pop unbalanced regions at
   // the end of a function.
   std::vector<unsigned> FnBeginRegionCount;
 
-  /// DebugInfoNames - This is a storage for names that are
+  /// \brief This is a storage for names that are
   /// constructed on demand. For example, C++ destructors, C++ operators etc..
   llvm::BumpPtrAllocator DebugInfoNames;
   StringRef CWDName;
 
-  llvm::DenseMap<const char *, llvm::WeakVH> DIFileCache;
-  llvm::DenseMap<const FunctionDecl *, llvm::WeakVH> SPCache;
+  llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache;
+  llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache;
   /// \brief Cache declarations relevant to DW_TAG_imported_declarations (C++
   /// using declarations) that aren't covered by other more specific caches.
-  llvm::DenseMap<const Decl *, llvm::WeakVH> DeclCache;
-  llvm::DenseMap<const NamespaceDecl *, llvm::WeakVH> NameSpaceCache;
-  llvm::DenseMap<const NamespaceAliasDecl *, llvm::WeakVH> NamespaceAliasCache;
-  llvm::DenseMap<const Decl *, llvm::WeakVH> StaticDataMemberCache;
+  llvm::DenseMap<const Decl *, llvm::TrackingMDRef> DeclCache;
+  llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NameSpaceCache;
+  llvm::DenseMap<const NamespaceAliasDecl *, llvm::TrackingMDRef>
+      NamespaceAliasCache;
+  llvm::DenseMap<const Decl *, llvm::TrackingMDRef> StaticDataMemberCache;
 
   /// Helper functions for getOrCreateType.
   unsigned Checksum(const ObjCInterfaceDecl *InterfaceDecl);
@@ -162,14 +164,12 @@
                                              llvm::DIFile F,
                                              llvm::DIType RecordTy);
 
-  void CollectCXXMemberFunctions(const CXXRecordDecl *Decl,
-                                 llvm::DIFile F,
-                                 SmallVectorImpl<llvm::Value *> &E,
+  void CollectCXXMemberFunctions(const CXXRecordDecl *Decl, llvm::DIFile F,
+                                 SmallVectorImpl<llvm::Metadata *> &E,
                                  llvm::DIType T);
 
-  void CollectCXXBases(const CXXRecordDecl *Decl,
-                       llvm::DIFile F,
-                       SmallVectorImpl<llvm::Value *> &EltTys,
+  void CollectCXXBases(const CXXRecordDecl *Decl, llvm::DIFile F,
+                       SmallVectorImpl<llvm::Metadata *> &EltTys,
                        llvm::DIType RecordTy);
 
   llvm::DIArray
@@ -192,23 +192,21 @@
 
   // Helpers for collecting fields of a record.
   void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
-                                 SmallVectorImpl<llvm::Value *> &E,
+                                 SmallVectorImpl<llvm::Metadata *> &E,
                                  llvm::DIType RecordTy);
   llvm::DIDerivedType CreateRecordStaticField(const VarDecl *Var,
                                               llvm::DIType RecordTy,
                                               const RecordDecl* RD);
   void CollectRecordNormalField(const FieldDecl *Field, uint64_t OffsetInBits,
                                 llvm::DIFile F,
-                                SmallVectorImpl<llvm::Value *> &E,
-                                llvm::DIType RecordTy,
-                                const RecordDecl* RD);
+                                SmallVectorImpl<llvm::Metadata *> &E,
+                                llvm::DIType RecordTy, const RecordDecl *RD);
   void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile F,
-                           SmallVectorImpl<llvm::Value *> &E,
+                           SmallVectorImpl<llvm::Metadata *> &E,
                            llvm::DICompositeType RecordTy);
 
-  void CollectVTableInfo(const CXXRecordDecl *Decl,
-                         llvm::DIFile F,
-                         SmallVectorImpl<llvm::Value *> &EltTys);
+  void CollectVTableInfo(const CXXRecordDecl *Decl, llvm::DIFile F,
+                         SmallVectorImpl<llvm::Metadata *> &EltTys);
 
   // CreateLexicalBlock - Create a new lexical block node and push it on
   // the stack.
@@ -220,20 +218,15 @@
 
   void finalize();
 
-  /// setLocation - Update the current source location. If \arg loc is
+  /// \brief Update the current source location. If \arg loc is
   /// invalid it is ignored.
   void setLocation(SourceLocation Loc);
 
-  /// getLocation - Return the current source location.
-  SourceLocation getLocation() const { return CurLoc; }
-
-  /// EmitLocation - Emit metadata to indicate a change in line/column
+  /// \brief Emit metadata to indicate a change in line/column
   /// information in the source file.
-  /// \param ForceColumnInfo  Assume DebugColumnInfo option is true.
-  void EmitLocation(CGBuilderTy &Builder, SourceLocation Loc,
-                    bool ForceColumnInfo = false);
+  void EmitLocation(CGBuilderTy &Builder, SourceLocation Loc);
 
-  /// EmitFunctionStart - Emit a call to llvm.dbg.function.start to indicate
+  /// \brief Emit a call to llvm.dbg.function.start to indicate
   /// start of a new function.
   /// \param Loc       The location of the function header.
   /// \param ScopeLoc  The location of the function body.
@@ -242,23 +235,23 @@
                          QualType FnType, llvm::Function *Fn,
                          CGBuilderTy &Builder);
 
-  /// EmitFunctionEnd - Constructs the debug code for exiting a function.
+  /// \brief Constructs the debug code for exiting a function.
   void EmitFunctionEnd(CGBuilderTy &Builder);
 
-  /// EmitLexicalBlockStart - Emit metadata to indicate the beginning of a
+  /// \brief Emit metadata to indicate the beginning of a
   /// new lexical block and push the block onto the stack.
   void EmitLexicalBlockStart(CGBuilderTy &Builder, SourceLocation Loc);
 
-  /// EmitLexicalBlockEnd - Emit metadata to indicate the end of a new lexical
+  /// \brief Emit metadata to indicate the end of a new lexical
   /// block and pop the current block.
   void EmitLexicalBlockEnd(CGBuilderTy &Builder, SourceLocation Loc);
 
-  /// EmitDeclareOfAutoVariable - Emit call to llvm.dbg.declare for an automatic
+  /// \brief Emit call to llvm.dbg.declare for an automatic
   /// variable declaration.
   void EmitDeclareOfAutoVariable(const VarDecl *Decl, llvm::Value *AI,
                                  CGBuilderTy &Builder);
 
-  /// EmitDeclareOfBlockDeclRefVariable - Emit call to llvm.dbg.declare for an
+  /// \brief Emit call to llvm.dbg.declare for an
   /// imported variable declaration in a block.
   void EmitDeclareOfBlockDeclRefVariable(const VarDecl *variable,
                                          llvm::Value *storage,
@@ -266,12 +259,12 @@
                                          const CGBlockInfo &blockInfo,
                                          llvm::Instruction *InsertPoint = 0);
 
-  /// EmitDeclareOfArgVariable - Emit call to llvm.dbg.declare for an argument
+  /// \brief Emit call to llvm.dbg.declare for an argument
   /// variable declaration.
   void EmitDeclareOfArgVariable(const VarDecl *Decl, llvm::Value *AI,
                                 unsigned ArgNo, CGBuilderTy &Builder);
 
-  /// EmitDeclareOfBlockLiteralArgVariable - Emit call to
+  /// \brief Emit call to
   /// llvm.dbg.declare for the block-literal argument to a block
   /// invocation function.
   void EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
@@ -279,28 +272,28 @@
                                             llvm::Value *LocalAddr,
                                             CGBuilderTy &Builder);
 
-  /// EmitGlobalVariable - Emit information about a global variable.
+  /// \brief Emit information about a global variable.
   void EmitGlobalVariable(llvm::GlobalVariable *GV, const VarDecl *Decl);
 
-  /// EmitGlobalVariable - Emit global variable's debug info.
+  /// \brief Emit global variable's debug info.
   void EmitGlobalVariable(const ValueDecl *VD, llvm::Constant *Init);
 
-  /// \brief - Emit C++ using directive.
+  /// \brief Emit C++ using directive.
   void EmitUsingDirective(const UsingDirectiveDecl &UD);
 
-  /// EmitExplicitCastType - Emit the type explicitly casted to.
+  /// \brief Emit the type explicitly casted to.
   void EmitExplicitCastType(QualType Ty);
 
-  /// \brief - Emit C++ using declaration.
+  /// \brief Emit C++ using declaration.
   void EmitUsingDecl(const UsingDecl &UD);
 
-  /// \brief - Emit C++ namespace alias.
+  /// \brief Emit C++ namespace alias.
   llvm::DIImportedEntity EmitNamespaceAlias(const NamespaceAliasDecl &NA);
 
-  /// getOrCreateRecordType - Emit record type's standalone debug info.
+  /// \brief Emit record type's standalone debug info.
   llvm::DIType getOrCreateRecordType(QualType Ty, SourceLocation L);
 
-  /// getOrCreateInterfaceType - Emit an objective c interface type standalone
+  /// \brief Emit an objective c interface type standalone
   /// debug info.
   llvm::DIType getOrCreateInterfaceType(QualType Ty,
                                         SourceLocation Loc);
@@ -313,18 +306,18 @@
   void completeTemplateDefinition(const ClassTemplateSpecializationDecl &SD);
 
 private:
-  /// EmitDeclare - Emit call to llvm.dbg.declare for a variable declaration.
+  /// \brief Emit call to llvm.dbg.declare for a variable declaration.
   /// Tag accepts custom types DW_TAG_arg_variable and DW_TAG_auto_variable,
   /// otherwise would be of type llvm::dwarf::Tag.
-  void EmitDeclare(const VarDecl *decl, llvm::dwarf::LLVMConstants Tag,
-                   llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder);
+  void EmitDeclare(const VarDecl *decl, llvm::dwarf::Tag Tag, llvm::Value *AI,
+                   unsigned ArgNo, CGBuilderTy &Builder);
 
   // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.
   // See BuildByRefType.
   llvm::DIType EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                                             uint64_t *OffSet);
 
-  /// getContextDescriptor - Get context info for the decl.
+  /// \brief Get context info for the decl.
   llvm::DIScope getContextDescriptor(const Decl *Decl);
 
   llvm::DIScope getCurrentContextDescriptor(const Decl *Decl);
@@ -333,38 +326,38 @@
   llvm::DICompositeType getOrCreateRecordFwdDecl(const RecordType *,
                                                  llvm::DIDescriptor);
 
-  /// createContextChain - Create a set of decls for the context chain.
+  /// \brief Create a set of decls for the context chain.
   llvm::DIDescriptor createContextChain(const Decl *Decl);
 
-  /// getCurrentDirname - Return current directory name.
+  /// \brief Return current directory name.
   StringRef getCurrentDirname();
 
-  /// CreateCompileUnit - Create new compile unit.
+  /// \brief Create new compile unit.
   void CreateCompileUnit();
 
-  /// getOrCreateFile - Get the file debug info descriptor for the input
+  /// \brief Get the file debug info descriptor for the input
   /// location.
   llvm::DIFile getOrCreateFile(SourceLocation Loc);
 
-  /// getOrCreateMainFile - Get the file info for main compile unit.
+  /// \brief Get the file info for main compile unit.
   llvm::DIFile getOrCreateMainFile();
 
-  /// getOrCreateType - Get the type from the cache or create a new type if
+  /// \brief Get the type from the cache or create a new type if
   /// necessary.
   llvm::DIType getOrCreateType(QualType Ty, llvm::DIFile Fg);
 
-  /// getOrCreateLimitedType - Get the type from the cache or create a new
+  /// \brief Get the type from the cache or create a new
   /// partial type if necessary.
   llvm::DIType getOrCreateLimitedType(const RecordType *Ty, llvm::DIFile F);
 
-  /// CreateTypeNode - Create type metadata for a source language type.
+  /// \brief Create type metadata for a source language type.
   llvm::DIType CreateTypeNode(QualType Ty, llvm::DIFile Fg);
 
-  /// getObjCInterfaceDecl - return the underlying ObjCInterfaceDecl
+  /// \brief return the underlying ObjCInterfaceDecl
   /// if Ty is an ObjCInterface or a pointer to one.
   ObjCInterfaceDecl* getObjCInterfaceDecl(QualType Ty);
 
-  /// CreateMemberType - Create new member and increase Offset by FType's size.
+  /// \brief Create new member and increase Offset by FType's size.
   llvm::DIType CreateMemberType(llvm::DIFile Unit, QualType FType,
                                 StringRef Name, uint64_t *Offset);
 
@@ -372,7 +365,7 @@
   /// declaration.
   llvm::DIDescriptor getDeclarationOrDefinition(const Decl *D);
 
-  /// getFunctionDeclaration - Return debug info descriptor to describe method
+  /// \brief Return debug info descriptor to describe method
   /// declaration for the given method definition.
   llvm::DISubprogram getFunctionDeclaration(const Decl *D);
 
@@ -396,30 +389,30 @@
                          StringRef LinkageName, llvm::GlobalVariable *Var,
                          llvm::DIDescriptor DContext);
 
-  /// getFunctionName - Get function name for the given FunctionDecl. If the
+  /// \brief Get function name for the given FunctionDecl. If the
   /// name is constructed on demand (e.g. C++ destructor) then the name
   /// is stored on the side.
   StringRef getFunctionName(const FunctionDecl *FD);
 
-  /// getObjCMethodName - Returns the unmangled name of an Objective-C method.
+  /// \brief Returns the unmangled name of an Objective-C method.
   /// This is the display name for the debugging info.
   StringRef getObjCMethodName(const ObjCMethodDecl *FD);
 
-  /// getSelectorName - Return selector name. This is used for debugging
+  /// \brief Return selector name. This is used for debugging
   /// info.
   StringRef getSelectorName(Selector S);
 
-  /// getClassName - Get class name including template argument list.
+  /// \brief Get class name including template argument list.
   StringRef getClassName(const RecordDecl *RD);
 
-  /// getVTableName - Get vtable name for the given Class.
+  /// \brief Get vtable name for the given Class.
   StringRef getVTableName(const CXXRecordDecl *Decl);
 
-  /// getLineNumber - Get line number for the location. If location is invalid
+  /// \brief Get line number for the location. If location is invalid
   /// then use current location.
   unsigned getLineNumber(SourceLocation Loc);
 
-  /// getColumnNumber - Get column number for the location. If location is
+  /// \brief Get column number for the location. If location is
   /// invalid then use current location.
   /// \param Force  Assume DebugColumnInfo option is true.
   unsigned getColumnNumber(SourceLocation Loc, bool Force=false);
@@ -439,7 +432,7 @@
                            StringRef &Name, StringRef &LinkageName,
                            llvm::DIDescriptor &VDContext);
 
-  /// internString - Allocate a copy of \p A using the DebugInfoNames allocator
+  /// \brief Allocate a copy of \p A using the DebugInfoNames allocator
   /// and return a reference to it. If multiple arguments are given the strings
   /// are concatenated.
   StringRef internString(StringRef A, StringRef B = StringRef()) {
@@ -450,50 +443,60 @@
   }
 };
 
-/// SaveAndRestoreLocation - An RAII object saves the current location
-/// and automatically restores it to the original value.
-class SaveAndRestoreLocation {
-protected:
-  SourceLocation SavedLoc;
-  CGDebugInfo *DI;
-  CGBuilderTy &Builder;
+/// \brief A scoped helper to set the current debug location to the specified
+/// location or preferred location of the specified Expr.
+class ApplyDebugLocation {
+private:
+  void init(SourceLocation TemporaryLocation, bool DefaultToEmpty = false);
+  ApplyDebugLocation(CodeGenFunction &CGF, bool DefaultToEmpty,
+                     SourceLocation TemporaryLocation);
+
+  llvm::DebugLoc OriginalLocation;
+  CodeGenFunction &CGF;
 public:
-  SaveAndRestoreLocation(CodeGenFunction &CGF, CGBuilderTy &B);
-  /// Autorestore everything back to normal.
-  ~SaveAndRestoreLocation();
-};
 
-/// NoLocation - An RAII object that temporarily disables debug
-/// locations. This is useful for emitting instructions that should be
-/// counted towards the function prologue.
-class NoLocation : public SaveAndRestoreLocation {
-public:
-  NoLocation(CodeGenFunction &CGF, CGBuilderTy &B);
-  /// Autorestore everything back to normal.
-  ~NoLocation();
-};
+  /// \brief Set the location to the (valid) TemporaryLocation.
+  ApplyDebugLocation(CodeGenFunction &CGF, SourceLocation TemporaryLocation);
+  ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E);
+  ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc);
 
-/// ArtificialLocation - An RAII object that temporarily switches to
-/// an artificial debug location that has a valid scope, but no line
-/// information. This is useful when emitting compiler-generated
-/// helper functions that have no source location associated with
-/// them. The DWARF specification allows the compiler to use the
-/// special line number 0 to indicate code that can not be attributed
-/// to any source location.
-///
-/// This is necessary because passing an empty SourceLocation to
-/// CGDebugInfo::setLocation() will result in the last valid location
-/// being reused.
-class ArtificialLocation : public SaveAndRestoreLocation {
-public:
-  ArtificialLocation(CodeGenFunction &CGF, CGBuilderTy &B);
+  ~ApplyDebugLocation();
 
-  /// Set the current location to line 0, but within the current scope
-  /// (= the top of the LexicalBlockStack).
-  void Emit();
+  /// \brief Apply TemporaryLocation if it is valid. Otherwise switch to an
+  /// artificial debug location that has a valid scope, but no line information.
+  ///
+  /// Artificial locations are useful when emitting compiler-generated helper
+  /// functions that have no source location associated with them. The DWARF
+  /// specification allows the compiler to use the special line number 0 to
+  /// indicate code that can not be attributed to any source location. Note that
+  /// passing an empty SourceLocation to CGDebugInfo::setLocation() will result
+  /// in the last valid location being reused.
+  static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF) {
+    return ApplyDebugLocation(CGF, false, SourceLocation());
+  }
+  /// \brief Apply TemporaryLocation if it is valid. Otherwise switch to an
+  /// artificial debug location that has a valid scope, but no line information.
+  static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF,
+                                             SourceLocation TemporaryLocation) {
+    return ApplyDebugLocation(CGF, false, TemporaryLocation);
+  }
 
-  /// Autorestore everything back to normal.
-  ~ArtificialLocation();
+  /// \brief Set the IRBuilder to not attach debug locations.  Note that passing
+  /// an empty SourceLocation to CGDebugInfo::setLocation() will result in the
+  /// last valid location being reused.  Note that all instructions that do not
+  /// have a location at the beginning of a function are counted towards to
+  /// funciton prologue.
+  static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF) {
+    return ApplyDebugLocation(CGF, true, SourceLocation());
+  }
+
+  /// \brief Apply TemporaryLocation if it is valid. Otherwise set the IRBuilder
+  /// to not attach debug locations.
+  static ApplyDebugLocation CreateDefaultEmpty(CodeGenFunction &CGF,
+                                             SourceLocation TemporaryLocation) {
+    return ApplyDebugLocation(CGF, true, TemporaryLocation);
+  }
+
 };
 
 
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 959ac9a..fb72a9a 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -206,6 +206,9 @@
   GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
   setGlobalVisibility(GV, &D);
 
+  if (supportsCOMDAT() && GV->isWeakForLinker())
+    GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
+
   if (D.getTLSKind())
     setTLSMode(GV, D);
 
@@ -596,10 +599,8 @@
   lvalue.setAddress(CGF.BuildBlockByrefAddress(lvalue.getAddress(), var));
 }
 
-void CodeGenFunction::EmitScalarInit(const Expr *init,
-                                     const ValueDecl *D,
-                                     LValue lvalue,
-                                     bool capturedByInit) {
+void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
+                                     LValue lvalue, bool capturedByInit) {
   Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime();
   if (!lifetime) {
     llvm::Value *value = EmitScalarExpr(init);
@@ -1089,6 +1090,7 @@
   if (emission.wasEmittedAsGlobal()) return;
 
   const VarDecl &D = *emission.Variable;
+  auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation());
   QualType type = D.getType();
 
   // If this local has an initializer, emit it now.
@@ -1192,10 +1194,8 @@
 /// \param alignment the alignment of the address
 /// \param capturedByInit true if the variable is a __block variable
 ///   whose address is potentially changed by the initializer
-void CodeGenFunction::EmitExprAsInit(const Expr *init,
-                                     const ValueDecl *D,
-                                     LValue lvalue,
-                                     bool capturedByInit) {
+void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D,
+                                     LValue lvalue, bool capturedByInit) {
   QualType type = D->getType();
 
   if (type->isReferenceType()) {
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index e002cdf..9a4303e 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -141,7 +141,7 @@
 
   if (!T->isReferenceType()) {
     if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
-      (void)CGM.getOpenMPRuntime().EmitOMPThreadPrivateVarDefinition(
+      (void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition(
           &D, DeclPtr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
           PerformInit, this);
     if (PerformInit)
@@ -267,15 +267,7 @@
   addUsedGlobal(PtrArray);
 
   // If the GV is already in a comdat group, then we have to join it.
-  llvm::Comdat *C = GV->getComdat();
-
-  // LinkOnce and Weak linkage are lowered down to a single-member comdat group.
-  // Make an explicit group so we can join it.
-  if (!C && (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())) {
-    C = TheModule.getOrInsertComdat(GV->getName());
-    GV->setComdat(C);
-  }
-  if (C)
+  if (llvm::Comdat *C = GV->getComdat())
     PtrArray->setComdat(C);
 }
 
@@ -333,8 +325,9 @@
     AddGlobalCtor(Fn, 65535, COMDATKey);
     DelayedCXXInitPosition.erase(D);
   } else if (D->hasAttr<SelectAnyAttr>()) {
-    // SelectAny globals will be comdat-folded. Put the initializer into a COMDAT
-    // group associated with the global, so the initializers get folded too.
+    // SelectAny globals will be comdat-folded. Put the initializer into a
+    // COMDAT group associated with the global, so the initializers get folded
+    // too.
     AddGlobalCtor(Fn, 65535, COMDATKey);
     DelayedCXXInitPosition.erase(D);
   } else {
@@ -452,6 +445,8 @@
   if (D->hasAttr<NoDebugAttr>())
     DebugInfo = nullptr; // disable debug info indefinitely for this function
 
+  CurEHLocation = D->getLocStart();
+
   StartFunction(GlobalDecl(D), getContext().VoidTy, Fn,
                 getTypes().arrangeNullaryFunction(),
                 FunctionArgList(), D->getLocation(),
@@ -474,11 +469,11 @@
                                            ArrayRef<llvm::Function *> Decls,
                                            llvm::GlobalVariable *Guard) {
   {
-    ArtificialLocation AL(*this, Builder);
+    auto NL = ApplyDebugLocation::CreateEmpty(*this);
     StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
                   getTypes().arrangeNullaryFunction(), FunctionArgList());
     // Emit an artificial location for this function.
-    AL.Emit();
+    auto AL = ApplyDebugLocation::CreateArtificial(*this);
 
     llvm::BasicBlock *ExitBlock = nullptr;
     if (Guard) {
@@ -525,11 +520,11 @@
                   const std::vector<std::pair<llvm::WeakVH, llvm::Constant*> >
                                                 &DtorsAndObjects) {
   {
-    ArtificialLocation AL(*this, Builder);
+    auto NL = ApplyDebugLocation::CreateEmpty(*this);
     StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
                   getTypes().arrangeNullaryFunction(), FunctionArgList());
     // Emit an artificial location for this function.
-    AL.Emit();
+    auto AL = ApplyDebugLocation::CreateArtificial(*this);
 
     // Emit the dtors, in reverse order from construction.
     for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) {
@@ -561,6 +556,8 @@
   llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
       FTy, "__cxx_global_array_dtor", VD->getLocation());
 
+  CurEHLocation = VD->getLocStart();
+
   StartFunction(VD, getContext().VoidTy, fn, FI, args);
 
   emitDestroy(addr, type, destroyer, useEHCleanupForArray);
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 8cd49d1..4e9eb32 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -12,9 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
+#include "CGCXXABI.h"
 #include "CGCleanup.h"
 #include "CGObjCRuntime.h"
 #include "TargetInfo.h"
+#include "clang/AST/Mangle.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
 #include "llvm/IR/CallSite.h"
@@ -52,15 +54,6 @@
   return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
 }
 
-static llvm::Constant *getReThrowFn(CodeGenModule &CGM) {
-  // void __cxa_rethrow();
-
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
-
-  return CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
-}
-
 static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) {
   // void *__cxa_get_exception_ptr(void*);
 
@@ -71,6 +64,9 @@
 }
 
 static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) {
+  if (CGM.getTarget().getCXXABI().isMicrosoft())
+    return CGM.getIntrinsic(llvm::Intrinsic::eh_begincatch);
+
   // void *__cxa_begin_catch(void*);
 
   llvm::FunctionType *FTy =
@@ -80,6 +76,9 @@
 }
 
 static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) {
+  if (CGM.getTarget().getCXXABI().isMicrosoft())
+    return CGM.getIntrinsic(llvm::Intrinsic::eh_endcatch);
+
   // void __cxa_end_catch();
 
   llvm::FunctionType *FTy =
@@ -106,10 +105,14 @@
   StringRef name;
 
   // In C++, use std::terminate().
-  if (CGM.getLangOpts().CPlusPlus)
-    name = "_ZSt9terminatev"; // FIXME: mangling!
-  else if (CGM.getLangOpts().ObjC1 &&
-           CGM.getLangOpts().ObjCRuntime.hasTerminate())
+  if (CGM.getLangOpts().CPlusPlus &&
+      CGM.getTarget().getCXXABI().isItaniumFamily()) {
+    name = "_ZSt9terminatev";
+  } else if (CGM.getLangOpts().CPlusPlus &&
+             CGM.getTarget().getCXXABI().isMicrosoft()) {
+    name = "\01?terminate@@YAXXZ";
+  } else if (CGM.getLangOpts().ObjC1 &&
+             CGM.getLangOpts().ObjCRuntime.hasTerminate())
     name = "objc_terminate";
   else
     name = "abort";
@@ -134,7 +137,12 @@
     // This function must have prototype void(void*).
     const char *CatchallRethrowFn;
 
-    static const EHPersonality &get(CodeGenModule &CGM);
+    static const EHPersonality &get(CodeGenModule &CGM,
+                                    const FunctionDecl *FD);
+    static const EHPersonality &get(CodeGenFunction &CGF) {
+      return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(CGF.CurCodeDecl));
+    }
+
     static const EHPersonality GNU_C;
     static const EHPersonality GNU_C_SJLJ;
     static const EHPersonality GNU_C_SEH;
@@ -145,6 +153,9 @@
     static const EHPersonality GNU_CPlusPlus;
     static const EHPersonality GNU_CPlusPlus_SJLJ;
     static const EHPersonality GNU_CPlusPlus_SEH;
+    static const EHPersonality MSVC_except_handler;
+    static const EHPersonality MSVC_C_specific_handler;
+    static const EHPersonality MSVC_CxxFrameHandler3;
   };
 }
 
@@ -167,6 +178,12 @@
 EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", nullptr };
 const EHPersonality
 EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", nullptr };
+const EHPersonality
+EHPersonality::MSVC_except_handler = { "_except_handler3", nullptr };
+const EHPersonality
+EHPersonality::MSVC_C_specific_handler = { "__C_specific_handler", nullptr };
+const EHPersonality
+EHPersonality::MSVC_CxxFrameHandler3 = { "__CxxFrameHandler3", nullptr };
 
 /// On Win64, use libgcc's SEH personality function. We fall back to dwarf on
 /// other platforms, unless the user asked for SjLj exceptions.
@@ -239,9 +256,29 @@
   llvm_unreachable("bad runtime kind");
 }
 
-const EHPersonality &EHPersonality::get(CodeGenModule &CGM) {
+static const EHPersonality &getSEHPersonalityMSVC(const llvm::Triple &T) {
+  if (T.getArch() == llvm::Triple::x86)
+    return EHPersonality::MSVC_except_handler;
+  return EHPersonality::MSVC_C_specific_handler;
+}
+
+const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
+                                        const FunctionDecl *FD) {
   const llvm::Triple &T = CGM.getTarget().getTriple();
   const LangOptions &L = CGM.getLangOpts();
+
+  // Try to pick a personality function that is compatible with MSVC if we're
+  // not compiling Obj-C. Obj-C users better have an Obj-C runtime that supports
+  // the GCC-style personality function.
+  if (T.isWindowsMSVCEnvironment() && !L.ObjC1) {
+    if (L.SjLjExceptions)
+      return EHPersonality::GNU_CPlusPlus_SJLJ;
+    else if (FD && FD->usesSEHTry())
+      return getSEHPersonalityMSVC(T);
+    else
+      return EHPersonality::MSVC_CxxFrameHandler3;
+  }
+
   if (L.CPlusPlus && L.ObjC1)
     return getObjCXXPersonality(T, L);
   else if (L.CPlusPlus)
@@ -326,7 +363,7 @@
   if (!LangOpts.ObjCRuntime.isNeXTFamily())
     return;
 
-  const EHPersonality &ObjCXX = EHPersonality::get(*this);
+  const EHPersonality &ObjCXX = EHPersonality::get(*this, /*FD=*/nullptr);
   const EHPersonality &CXX =
       getCXXPersonality(getTarget().getTriple(), LangOpts);
   if (&ObjCXX == &CXX)
@@ -423,15 +460,30 @@
   return Builder.CreateLoad(getEHSelectorSlot(), "sel");
 }
 
+llvm::Value *CodeGenFunction::getAbnormalTerminationSlot() {
+  if (!AbnormalTerminationSlot)
+    AbnormalTerminationSlot =
+        CreateTempAlloca(Int8Ty, "abnormal.termination.slot");
+  return AbnormalTerminationSlot;
+}
+
 void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
                                        bool KeepInsertionPoint) {
-  if (CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) {
-    ErrorUnsupported(E, "throw expression");
+  if (!E->getSubExpr()) {
+    CGM.getCXXABI().emitRethrow(*this, /*isNoReturn*/true);
+
+    // throw is an expression, and the expression emitters expect us
+    // to leave ourselves at a valid insertion point.
+    if (KeepInsertionPoint)
+      EmitBlock(createBasicBlock("throw.cont"));
+
     return;
   }
 
-  if (!E->getSubExpr()) {
-    EmitNoreturnRuntimeCallOrInvoke(getReThrowFn(CGM), None);
+  if (CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) {
+    // Call std::terminate().
+    llvm::CallInst *TermCall = EmitNounwindRuntimeCall(getTerminateFn(CGM));
+    TermCall->setDoesNotReturn();
 
     // throw is an expression, and the expression emitters expect us
     // to leave ourselves at a valid insertion point.
@@ -551,8 +603,9 @@
 
     llvm::Value *zero = CGF.Builder.getInt32(0);
     llvm::Value *failsFilter =
-      CGF.Builder.CreateICmpSLT(selector, zero, "ehspec.fails");
-    CGF.Builder.CreateCondBr(failsFilter, unexpectedBB, CGF.getEHResumeBlock(false));
+        CGF.Builder.CreateICmpSLT(selector, zero, "ehspec.fails");
+    CGF.Builder.CreateCondBr(failsFilter, unexpectedBB,
+                             CGF.getEHResumeBlock(false));
 
     CGF.EmitBlock(unexpectedBB);
   }
@@ -597,11 +650,6 @@
 }
 
 void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) {
-  if (CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) {
-    ErrorUnsupported(&S, "try statement");
-    return;
-  }
-
   EnterCXXTryStmt(S);
   EmitStmt(S.getTryBlock());
   ExitCXXTryStmt(S);
@@ -703,8 +751,15 @@
   assert(EHStack.requiresLandingPad());
   assert(!EHStack.empty());
 
-  if (!CGM.getLangOpts().Exceptions)
-    return nullptr;
+  // If exceptions are disabled, there are usually no landingpads. However, when
+  // SEH is enabled, functions using SEH still get landingpads.
+  const LangOptions &LO = CGM.getLangOpts();
+  if (!LO.Exceptions) {
+    if (!LO.Borland && !LO.MicrosoftExt)
+      return nullptr;
+    if (!currentFunctionUsesSEHTry())
+      return nullptr;
+  }
 
   // Check the innermost scope for a cached landing pad.  If this is
   // a non-EH cleanup, we'll check enclosing scopes in EmitLandingPad.
@@ -742,18 +797,16 @@
 
   // Save the current IR generation state.
   CGBuilderTy::InsertPoint savedIP = Builder.saveAndClearIP();
-  SaveAndRestoreLocation AutoRestoreLocation(*this, Builder);
-  if (CGDebugInfo *DI = getDebugInfo())
-    DI->EmitLocation(Builder, CurEHLocation);
+  auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, CurEHLocation);
 
-  const EHPersonality &personality = EHPersonality::get(CGM);
+  const EHPersonality &personality = EHPersonality::get(*this);
 
   // Create and configure the landing pad.
   llvm::BasicBlock *lpad = createBasicBlock("lpad");
   EmitBlock(lpad);
 
   llvm::LandingPadInst *LPadInst =
-    Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty, NULL),
+    Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr),
                              getOpaquePersonalityFn(CGM, personality), 0);
 
   llvm::Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0);
@@ -772,8 +825,8 @@
   bool hasFilter = false;
   SmallVector<llvm::Value*, 4> filterTypes;
   llvm::SmallPtrSet<llvm::Value*, 4> catchTypes;
-  for (EHScopeStack::iterator I = EHStack.begin(), E = EHStack.end();
-         I != E; ++I) {
+  for (EHScopeStack::iterator I = EHStack.begin(), E = EHStack.end(); I != E;
+       ++I) {
 
     switch (I->getKind()) {
     case EHScope::Cleanup:
@@ -1281,7 +1334,7 @@
     // constructor function-try-block's catch handler (p14), so this
     // really only applies to destructors.
     if (doImplicitRethrow && HaveInsertPoint()) {
-      EmitRuntimeCallOrInvoke(getReThrowFn(CGM));
+      CGM.getCXXABI().emitRethrow(*this, /*isNoReturn*/false);
       Builder.CreateUnreachable();
       Builder.ClearInsertionPoint();
     }
@@ -1525,6 +1578,8 @@
     // we don't want it to turn into an exported symbol.
     fn->setLinkage(llvm::Function::LinkOnceODRLinkage);
     fn->setVisibility(llvm::Function::HiddenVisibility);
+    if (CGM.supportsCOMDAT())
+      fn->setComdat(CGM.getModule().getOrInsertComdat(fn->getName()));
 
     // Set up the function.
     llvm::BasicBlock *entry =
@@ -1563,9 +1618,9 @@
   Builder.SetInsertPoint(TerminateLandingPad);
 
   // Tell the backend that this is a landing pad.
-  const EHPersonality &Personality = EHPersonality::get(CGM);
+  const EHPersonality &Personality = EHPersonality::get(*this);
   llvm::LandingPadInst *LPadInst =
-    Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty, NULL),
+    Builder.CreateLandingPad(llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr),
                              getOpaquePersonalityFn(CGM, Personality), 0);
   LPadInst->addClause(getCatchAllValue(*this));
 
@@ -1622,15 +1677,14 @@
   EHResumeBlock = createBasicBlock("eh.resume");
   Builder.SetInsertPoint(EHResumeBlock);
 
-  const EHPersonality &Personality = EHPersonality::get(CGM);
+  const EHPersonality &Personality = EHPersonality::get(*this);
 
   // This can always be a call because we necessarily didn't find
   // anything on the EH stack which needs our help.
   const char *RethrowName = Personality.CatchallRethrowFn;
   if (RethrowName != nullptr && !isCleanup) {
     EmitRuntimeCall(getCatchallRethrowFn(CGM, RethrowName),
-                    getExceptionFromSlot())
-      ->setDoesNotReturn();
+                    getExceptionFromSlot())->setDoesNotReturn();
     Builder.CreateUnreachable();
     Builder.restoreIP(SavedIP);
     return EHResumeBlock;
@@ -1641,7 +1695,7 @@
   llvm::Value *Sel = getSelectorFromSlot();
 
   llvm::Type *LPadType = llvm::StructType::get(Exn->getType(),
-                                               Sel->getType(), NULL);
+                                               Sel->getType(), nullptr);
   llvm::Value *LPadVal = llvm::UndefValue::get(LPadType);
   LPadVal = Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val");
   LPadVal = Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val");
@@ -1652,9 +1706,307 @@
 }
 
 void CodeGenFunction::EmitSEHTryStmt(const SEHTryStmt &S) {
-  CGM.ErrorUnsupported(&S, "SEH __try");
+  // FIXME: Implement SEH on other architectures.
+  const llvm::Triple &T = CGM.getTarget().getTriple();
+  if (T.getArch() != llvm::Triple::x86_64 ||
+      !T.isKnownWindowsMSVCEnvironment()) {
+    ErrorUnsupported(&S, "__try statement");
+    return;
+  }
+
+  SEHFinallyInfo FI;
+  EnterSEHTryStmt(S, FI);
+  {
+    JumpDest TryExit = getJumpDestInCurrentScope("__try.__leave");
+
+    SEHTryEpilogueStack.push_back(&TryExit);
+    EmitStmt(S.getTryBlock());
+    SEHTryEpilogueStack.pop_back();
+
+    if (!TryExit.getBlock()->use_empty())
+      EmitBlock(TryExit.getBlock(), /*IsFinished=*/true);
+    else
+      delete TryExit.getBlock();
+  }
+  ExitSEHTryStmt(S, FI);
+}
+
+namespace {
+struct PerformSEHFinally : EHScopeStack::Cleanup  {
+  CodeGenFunction::SEHFinallyInfo *FI;
+  PerformSEHFinally(CodeGenFunction::SEHFinallyInfo *FI) : FI(FI) {}
+
+  void Emit(CodeGenFunction &CGF, Flags F) override {
+    // Cleanups are emitted at most twice: once for normal control flow and once
+    // for exception control flow. Branch into the finally block, and remember
+    // the continuation block so we can branch out later.
+    if (!FI->FinallyBB) {
+      FI->FinallyBB = CGF.createBasicBlock("__finally");
+      FI->FinallyBB->insertInto(CGF.CurFn);
+      FI->FinallyBB->moveAfter(CGF.Builder.GetInsertBlock());
+    }
+
+    // Set the termination status and branch in.
+    CGF.Builder.CreateStore(
+        llvm::ConstantInt::get(CGF.Int8Ty, F.isForEHCleanup()),
+        CGF.getAbnormalTerminationSlot());
+    CGF.Builder.CreateBr(FI->FinallyBB);
+
+    // Create a continuation block for normal or exceptional control.
+    if (F.isForEHCleanup()) {
+      assert(!FI->ResumeBB && "double emission for EH");
+      FI->ResumeBB = CGF.createBasicBlock("__finally.resume");
+      CGF.EmitBlock(FI->ResumeBB);
+    } else {
+      assert(F.isForNormalCleanup() && !FI->ContBB && "double normal emission");
+      FI->ContBB = CGF.createBasicBlock("__finally.cont");
+      CGF.EmitBlock(FI->ContBB);
+      // Try to keep source order.
+      FI->ContBB->moveAfter(FI->FinallyBB);
+    }
+  }
+};
+}
+
+/// Create a stub filter function that will ultimately hold the code of the
+/// filter expression. The EH preparation passes in LLVM will outline the code
+/// from the main function body into this stub.
+llvm::Function *
+CodeGenFunction::GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
+                                           const SEHExceptStmt &Except) {
+  const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl;
+  llvm::Function *ParentFn = ParentCGF.CurFn;
+
+  Expr *FilterExpr = Except.getFilterExpr();
+
+  // Get the mangled function name.
+  SmallString<128> Name;
+  {
+    llvm::raw_svector_ostream OS(Name);
+    const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl);
+    assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH");
+    CGM.getCXXABI().getMangleContext().mangleSEHFilterExpression(Parent, OS);
+  }
+
+  // Arrange a function with the declaration:
+  // int filt(EXCEPTION_POINTERS *exception_pointers, void *frame_pointer)
+  QualType RetTy = getContext().IntTy;
+  FunctionArgList Args;
+  SEHPointersDecl = ImplicitParamDecl::Create(
+      getContext(), nullptr, FilterExpr->getLocStart(),
+      &getContext().Idents.get("exception_pointers"), getContext().VoidPtrTy);
+  Args.push_back(SEHPointersDecl);
+  Args.push_back(ImplicitParamDecl::Create(
+      getContext(), nullptr, FilterExpr->getLocStart(),
+      &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
+  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false);
+  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  llvm::Function *Fn = llvm::Function::Create(FnTy, ParentFn->getLinkage(),
+                                              Name.str(), &CGM.getModule());
+  // The filter is either in the same comdat as the function, or it's internal.
+  if (llvm::Comdat *C = ParentFn->getComdat()) {
+    Fn->setComdat(C);
+  } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) {
+    // FIXME: Unreachable with Rafael's changes?
+    llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName());
+    ParentFn->setComdat(C);
+    Fn->setComdat(C);
+  } else {
+    Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+  }
+
+  StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
+                FilterExpr->getLocStart(), FilterExpr->getLocStart());
+
+  EmitSEHExceptionCodeSave();
+
+  // Insert dummy allocas for every local variable in scope. We'll initialize
+  // them and prune the unused ones after we find out which ones were
+  // referenced.
+  for (const auto &DeclPtrs : ParentCGF.LocalDeclMap) {
+    const Decl *VD = DeclPtrs.first;
+    llvm::Value *Ptr = DeclPtrs.second;
+    auto *ValTy = cast<llvm::PointerType>(Ptr->getType())->getElementType();
+    LocalDeclMap[VD] = CreateTempAlloca(ValTy, Ptr->getName() + ".filt");
+  }
+
+  // Emit the original filter expression, convert to i32, and return.
+  llvm::Value *R = EmitScalarExpr(FilterExpr);
+  R = Builder.CreateIntCast(R, CGM.IntTy,
+                            FilterExpr->getType()->isSignedIntegerType());
+  Builder.CreateStore(R, ReturnValue);
+
+  FinishFunction(FilterExpr->getLocEnd());
+
+  for (const auto &DeclPtrs : ParentCGF.LocalDeclMap) {
+    const Decl *VD = DeclPtrs.first;
+    auto *Alloca = cast<llvm::AllocaInst>(LocalDeclMap[VD]);
+    if (Alloca->hasNUses(0)) {
+      Alloca->eraseFromParent();
+      continue;
+    }
+    ErrorUnsupported(FilterExpr,
+                     "SEH filter expression local variable capture");
+  }
+
+  return Fn;
+}
+
+void CodeGenFunction::EmitSEHExceptionCodeSave() {
+  // Save the exception code in the exception slot to unify exception access in
+  // the filter function and the landing pad.
+  // struct EXCEPTION_POINTERS {
+  //   EXCEPTION_RECORD *ExceptionRecord;
+  //   CONTEXT *ContextRecord;
+  // };
+  // void *exn.slot =
+  //     (void *)(uintptr_t)exception_pointers->ExceptionRecord->ExceptionCode;
+  llvm::Value *Ptrs = Builder.CreateLoad(GetAddrOfLocalVar(SEHPointersDecl));
+  llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo();
+  llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr);
+  Ptrs = Builder.CreateBitCast(Ptrs, PtrsTy->getPointerTo());
+  llvm::Value *Rec = Builder.CreateStructGEP(Ptrs, 0);
+  Rec = Builder.CreateLoad(Rec);
+  llvm::Value *Code = Builder.CreateLoad(Rec);
+  Code = Builder.CreateZExt(Code, CGM.IntPtrTy);
+  // FIXME: Change landing pads to produce {i32, i32} and make the exception
+  // slot an i32.
+  Code = Builder.CreateIntToPtr(Code, CGM.VoidPtrTy);
+  Builder.CreateStore(Code, getExceptionSlot());
+}
+
+llvm::Value *CodeGenFunction::EmitSEHExceptionInfo() {
+  // Sema should diagnose calling this builtin outside of a filter context, but
+  // don't crash if we screw up.
+  if (!SEHPointersDecl)
+    return llvm::UndefValue::get(Int8PtrTy);
+  return Builder.CreateLoad(GetAddrOfLocalVar(SEHPointersDecl));
+}
+
+llvm::Value *CodeGenFunction::EmitSEHExceptionCode() {
+  // If we're in a landing pad or filter function, the exception slot contains
+  // the code.
+  assert(ExceptionSlot);
+  llvm::Value *Code =
+      Builder.CreatePtrToInt(getExceptionFromSlot(), CGM.IntPtrTy);
+  return Builder.CreateTrunc(Code, CGM.Int32Ty);
+}
+
+llvm::Value *CodeGenFunction::EmitSEHAbnormalTermination() {
+  // Load from the abnormal termination slot. It will be uninitialized outside
+  // of __finally blocks, which we should warn or error on.
+  llvm::Value *IsEH = Builder.CreateLoad(getAbnormalTerminationSlot());
+  return Builder.CreateZExt(IsEH, Int32Ty);
+}
+
+void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI) {
+  if (S.getFinallyHandler()) {
+    // Push a cleanup for __finally blocks.
+    EHStack.pushCleanup<PerformSEHFinally>(NormalAndEHCleanup, &FI);
+    return;
+  }
+
+  // Otherwise, we must have an __except block.
+  SEHExceptStmt *Except = S.getExceptHandler();
+  assert(Except);
+  EHCatchScope *CatchScope = EHStack.pushCatch(1);
+
+  // If the filter is known to evaluate to 1, then we can use the clause "catch
+  // i8* null".
+  llvm::Constant *C =
+      CGM.EmitConstantExpr(Except->getFilterExpr(), getContext().IntTy, this);
+  if (C && C->isOneValue()) {
+    CatchScope->setCatchAllHandler(0, createBasicBlock("__except"));
+    return;
+  }
+
+  // In general, we have to emit an outlined filter function. Use the function
+  // in place of the RTTI typeinfo global that C++ EH uses.
+  CodeGenFunction FilterCGF(CGM, /*suppressNewContext=*/true);
+  llvm::Function *FilterFunc =
+      FilterCGF.GenerateSEHFilterFunction(*this, *Except);
+  llvm::Constant *OpaqueFunc =
+      llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy);
+  CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except"));
+}
+
+void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI) {
+  // Just pop the cleanup if it's a __finally block.
+  if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
+    PopCleanupBlock();
+    assert(FI.ContBB && "did not emit normal cleanup");
+
+    // Emit the code into FinallyBB.
+    CGBuilderTy::InsertPoint SavedIP = Builder.saveIP();
+    Builder.SetInsertPoint(FI.FinallyBB);
+    EmitStmt(Finally->getBlock());
+
+    if (HaveInsertPoint()) {
+      if (FI.ResumeBB) {
+        llvm::Value *IsEH = Builder.CreateLoad(getAbnormalTerminationSlot(),
+                                               "abnormal.termination");
+        IsEH = Builder.CreateICmpEQ(IsEH, llvm::ConstantInt::get(Int8Ty, 0));
+        Builder.CreateCondBr(IsEH, FI.ContBB, FI.ResumeBB);
+      } else {
+        // There was nothing exceptional in the try body, so we only have normal
+        // control flow.
+        Builder.CreateBr(FI.ContBB);
+      }
+    }
+
+    Builder.restoreIP(SavedIP);
+
+    return;
+  }
+
+  // Otherwise, we must have an __except block.
+  const SEHExceptStmt *Except = S.getExceptHandler();
+  assert(Except && "__try must have __finally xor __except");
+  EHCatchScope &CatchScope = cast<EHCatchScope>(*EHStack.begin());
+
+  // Don't emit the __except block if the __try block lacked invokes.
+  // TODO: Model unwind edges from instructions, either with iload / istore or
+  // a try body function.
+  if (!CatchScope.hasEHBranches()) {
+    CatchScope.clearHandlerBlocks();
+    EHStack.popCatch();
+    return;
+  }
+
+  // The fall-through block.
+  llvm::BasicBlock *ContBB = createBasicBlock("__try.cont");
+
+  // We just emitted the body of the __try; jump to the continue block.
+  if (HaveInsertPoint())
+    Builder.CreateBr(ContBB);
+
+  // Check if our filter function returned true.
+  emitCatchDispatchBlock(*this, CatchScope);
+
+  // Grab the block before we pop the handler.
+  llvm::BasicBlock *ExceptBB = CatchScope.getHandler(0).Block;
+  EHStack.popCatch();
+
+  EmitBlockAfterUses(ExceptBB);
+
+  // Emit the __except body.
+  EmitStmt(Except->getBlock());
+
+  if (HaveInsertPoint())
+    Builder.CreateBr(ContBB);
+
+  EmitBlock(ContBB);
 }
 
 void CodeGenFunction::EmitSEHLeaveStmt(const SEHLeaveStmt &S) {
-  CGM.ErrorUnsupported(&S, "SEH __leave");
+  // If this code is reachable then emit a stop point (if generating
+  // debug info). We have to do this ourselves because we are on the
+  // "simple" statement path.
+  if (HaveInsertPoint())
+    EmitStopPoint(&S);
+
+  assert(!SEHTryEpilogueStack.empty() &&
+         "sema should have rejected this __leave");
+  EmitBranchThroughCleanup(*SEHTryEpilogueStack.back());
 }
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 1a3a61a..78e80a1 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -21,8 +21,8 @@
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/DeclObjC.h"
 #include "clang/AST/Attr.h"
+#include "clang/AST/DeclObjC.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
@@ -807,6 +807,7 @@
 /// length type, this is not possible.
 ///
 LValue CodeGenFunction::EmitLValue(const Expr *E) {
+  ApplyDebugLocation DL(*this, E);
   switch (E->getStmtClass()) {
   default: return EmitUnsupportedLValue(E, "l-value expression");
 
@@ -819,10 +820,14 @@
     return EmitObjCIsaExpr(cast<ObjCIsaExpr>(E));
   case Expr::BinaryOperatorClass:
     return EmitBinaryOperatorLValue(cast<BinaryOperator>(E));
-  case Expr::CompoundAssignOperatorClass:
-    if (!E->getType()->isAnyComplexType())
+  case Expr::CompoundAssignOperatorClass: {
+    QualType Ty = E->getType();
+    if (const AtomicType *AT = Ty->getAs<AtomicType>())
+      Ty = AT->getValueType();
+    if (!Ty->isAnyComplexType())
       return EmitCompoundAssignmentLValue(cast<CompoundAssignOperator>(E));
     return EmitComplexCompoundAssignmentLValue(cast<CompoundAssignOperator>(E));
+  }
   case Expr::CallExprClass:
   case Expr::CXXMemberCallExprClass:
   case Expr::CXXOperatorCallExprClass:
@@ -1135,7 +1140,7 @@
   }
 
   // Atomic operations have to be done on integral types.
-  if (Ty->isAtomicType()) {
+  if (Ty->isAtomicType() || typeIsSuitableForInlineAtomic(Ty, Volatile)) {
     LValue lvalue = LValue::MakeAddr(Addr, Ty,
                                      CharUnits::fromQuantity(Alignment),
                                      getContext(), TBAAInfo);
@@ -1254,7 +1259,8 @@
 
   Value = EmitToMemory(Value, Ty);
 
-  if (Ty->isAtomicType()) {
+  if (Ty->isAtomicType() ||
+      (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) {
     EmitAtomicStore(RValue::get(Value),
                     LValue::MakeAddr(Addr, Ty,
                                      CharUnits::fromQuantity(Alignment),
@@ -1415,8 +1421,8 @@
 RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) {
   assert((LV.getType()->isIntegerType() || LV.getType()->isPointerType()) &&
          "Bad type for register variable");
-  llvm::MDNode *RegName = dyn_cast<llvm::MDNode>(LV.getGlobalReg());
-  assert(RegName && "Register LValue is not metadata");
+  llvm::MDNode *RegName = cast<llvm::MDNode>(
+      cast<llvm::MetadataAsValue>(LV.getGlobalReg())->getMetadata());
 
   // We accept integer and pointer types only
   llvm::Type *OrigTy = CGM.getTypes().ConvertType(LV.getType());
@@ -1426,7 +1432,8 @@
   llvm::Type *Types[] = { Ty };
 
   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
-  llvm::Value *Call = Builder.CreateCall(F, RegName);
+  llvm::Value *Call = Builder.CreateCall(
+      F, llvm::MetadataAsValue::get(Ty->getContext(), RegName));
   if (OrigTy->isPointerTy())
     Call = Builder.CreateIntToPtr(Call, OrigTy);
   return RValue::get(Call);
@@ -1676,7 +1683,8 @@
 void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) {
   assert((Dst.getType()->isIntegerType() || Dst.getType()->isPointerType()) &&
          "Bad type for register variable");
-  llvm::MDNode *RegName = dyn_cast<llvm::MDNode>(Dst.getGlobalReg());
+  llvm::MDNode *RegName = cast<llvm::MDNode>(
+      cast<llvm::MetadataAsValue>(Dst.getGlobalReg())->getMetadata());
   assert(RegName && "Register LValue is not metadata");
 
   // We accept integer and pointer types only
@@ -1690,7 +1698,8 @@
   llvm::Value *Value = Src.getScalarVal();
   if (OrigTy->isPointerTy())
     Value = Builder.CreatePtrToInt(Value, Ty);
-  Builder.CreateCall2(F, RegName, Value);
+  Builder.CreateCall2(F, llvm::MetadataAsValue::get(Ty->getContext(), RegName),
+                      Value);
 }
 
 // setObjCGCLValueClass - sets class of the lvalue for the purpose of
@@ -1804,7 +1813,7 @@
 static LValue EmitThreadPrivateVarDeclLValue(
     CodeGenFunction &CGF, const VarDecl *VD, QualType T, llvm::Value *V,
     llvm::Type *RealVarTy, CharUnits Alignment, SourceLocation Loc) {
-  V = CGF.CGM.getOpenMPRuntime().getOMPAddrOfThreadPrivate(CGF, VD, V, Loc);
+  V = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, V, Loc);
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
   return CGF.MakeAddrLValue(V, T, Alignment);
 }
@@ -1885,10 +1894,12 @@
   if (M->getNumOperands() == 0) {
     llvm::MDString *Str = llvm::MDString::get(CGM.getLLVMContext(),
                                               Asm->getLabel());
-    llvm::Value *Ops[] = { Str };
+    llvm::Metadata *Ops[] = {Str};
     M->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops));
   }
-  return LValue::MakeGlobalReg(M->getOperand(0), VD->getType(), Alignment);
+  return LValue::MakeGlobalReg(
+      llvm::MetadataAsValue::get(CGM.getLLVMContext(), M->getOperand(0)),
+      VD->getType(), Alignment);
 }
 
 LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
@@ -1914,6 +1925,22 @@
       // FIXME: Eventually we will want to emit vector element references.
       return MakeAddrLValue(Val, T, Alignment);
     }
+
+    // Check for captured variables.
+    if (E->refersToEnclosingVariableOrCapture()) {
+      if (auto *FD = LambdaCaptureFields.lookup(VD))
+        return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
+      else if (CapturedStmtInfo) {
+        if (auto *V = LocalDeclMap.lookup(VD))
+          return MakeAddrLValue(V, T, Alignment);
+        else
+          return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
+                                         CapturedStmtInfo->getContextValue());
+      }
+      assert(isa<BlockDecl>(CurCodeDecl));
+      return MakeAddrLValue(GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()),
+                            T, Alignment);
+    }
   }
 
   // FIXME: We should be able to assert this for FunctionDecls as well!
@@ -1947,21 +1974,6 @@
           *this, VD, T, V, getTypes().ConvertTypeForMem(VD->getType()),
           Alignment, E->getExprLoc());
 
-    // Use special handling for lambdas.
-    if (!V) {
-      if (FieldDecl *FD = LambdaCaptureFields.lookup(VD)) {
-        return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
-      } else if (CapturedStmtInfo) {
-        if (const FieldDecl *FD = CapturedStmtInfo->lookup(VD))
-          return EmitCapturedFieldLValue(*this, FD,
-                                         CapturedStmtInfo->getContextValue());
-      }
-
-      assert(isa<BlockDecl>(CurCodeDecl) && E->refersToEnclosingLocal());
-      return MakeAddrLValue(GetAddrOfBlockDecl(VD, isBlockVariable),
-                            T, Alignment);
-    }
-
     assert(V && "DeclRefExpr not entered in LocalDeclMap?");
 
     if (isBlockVariable)
@@ -2201,9 +2213,10 @@
 namespace {
 /// \brief Specify under what conditions this check can be recovered
 enum class CheckRecoverableKind {
-  /// Always terminate program execution if this check fails
+  /// Always terminate program execution if this check fails.
   Unrecoverable,
-  /// Check supports recovering, allows user to specify which
+  /// Check supports recovering, runtime has both fatal (noreturn) and
+  /// non-fatal handlers for this check.
   Recoverable,
   /// Runtime conditionally aborts, always need to support recovery.
   AlwaysRecoverable
@@ -2222,42 +2235,95 @@
   }
 }
 
+static void emitCheckHandlerCall(CodeGenFunction &CGF,
+                                 llvm::FunctionType *FnType,
+                                 ArrayRef<llvm::Value *> FnArgs,
+                                 StringRef CheckName,
+                                 CheckRecoverableKind RecoverKind, bool IsFatal,
+                                 llvm::BasicBlock *ContBB) {
+  assert(IsFatal || RecoverKind != CheckRecoverableKind::Unrecoverable);
+  bool NeedsAbortSuffix =
+      IsFatal && RecoverKind != CheckRecoverableKind::Unrecoverable;
+  std::string FnName = ("__ubsan_handle_" + CheckName +
+                        (NeedsAbortSuffix ? "_abort" : "")).str();
+  bool MayReturn =
+      !IsFatal || RecoverKind == CheckRecoverableKind::AlwaysRecoverable;
+
+  llvm::AttrBuilder B;
+  if (!MayReturn) {
+    B.addAttribute(llvm::Attribute::NoReturn)
+        .addAttribute(llvm::Attribute::NoUnwind);
+  }
+  B.addAttribute(llvm::Attribute::UWTable);
+
+  llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(
+      FnType, FnName,
+      llvm::AttributeSet::get(CGF.getLLVMContext(),
+                              llvm::AttributeSet::FunctionIndex, B));
+  llvm::CallInst *HandlerCall = CGF.EmitNounwindRuntimeCall(Fn, FnArgs);
+  if (!MayReturn) {
+    HandlerCall->setDoesNotReturn();
+    CGF.Builder.CreateUnreachable();
+  } else {
+    CGF.Builder.CreateBr(ContBB);
+  }
+}
+
 void CodeGenFunction::EmitCheck(
     ArrayRef<std::pair<llvm::Value *, SanitizerKind>> Checked,
     StringRef CheckName, ArrayRef<llvm::Constant *> StaticArgs,
     ArrayRef<llvm::Value *> DynamicArgs) {
   assert(IsSanitizerScope);
   assert(Checked.size() > 0);
-  llvm::Value *Cond = Checked[0].first;
+
+  llvm::Value *FatalCond = nullptr;
+  llvm::Value *RecoverableCond = nullptr;
+  for (int i = 0, n = Checked.size(); i < n; ++i) {
+    llvm::Value *Check = Checked[i].first;
+    llvm::Value *&Cond =
+        CGM.getCodeGenOpts().SanitizeRecover.has(Checked[i].second)
+            ? RecoverableCond
+            : FatalCond;
+    Cond = Cond ? Builder.CreateAnd(Cond, Check) : Check;
+  }
+
+  llvm::Value *JointCond;
+  if (FatalCond && RecoverableCond)
+    JointCond = Builder.CreateAnd(FatalCond, RecoverableCond);
+  else
+    JointCond = FatalCond ? FatalCond : RecoverableCond;
+  assert(JointCond);
+
   CheckRecoverableKind RecoverKind = getRecoverableKind(Checked[0].second);
   assert(SanOpts.has(Checked[0].second));
+#ifndef NDEBUG
   for (int i = 1, n = Checked.size(); i < n; ++i) {
-    Cond = Builder.CreateAnd(Cond, Checked[i].first);
     assert(RecoverKind == getRecoverableKind(Checked[i].second) &&
            "All recoverable kinds in a single check must be same!");
     assert(SanOpts.has(Checked[i].second));
   }
+#endif
 
   if (CGM.getCodeGenOpts().SanitizeUndefinedTrapOnError) {
-    assert (RecoverKind != CheckRecoverableKind::AlwaysRecoverable &&
-            "Runtime call required for AlwaysRecoverable kind!");
-    return EmitTrapCheck(Cond);
+    assert(RecoverKind != CheckRecoverableKind::AlwaysRecoverable &&
+           "Runtime call required for AlwaysRecoverable kind!");
+    // Assume that -fsanitize-undefined-trap-on-error overrides
+    // -fsanitize-recover= options, as we can only print meaningful error
+    // message and recover if we have a runtime support.
+    return EmitTrapCheck(JointCond);
   }
 
   llvm::BasicBlock *Cont = createBasicBlock("cont");
-
-  llvm::BasicBlock *Handler = createBasicBlock("handler." + CheckName);
-
-  llvm::Instruction *Branch = Builder.CreateCondBr(Cond, Cont, Handler);
-
+  llvm::BasicBlock *Handlers = createBasicBlock("handler." + CheckName);
+  llvm::Instruction *Branch = Builder.CreateCondBr(JointCond, Cont, Handlers);
   // Give hint that we very much don't expect to execute the handler
   // Value chosen to match UR_NONTAKEN_WEIGHT, see BranchProbabilityInfo.cpp
   llvm::MDBuilder MDHelper(getLLVMContext());
   llvm::MDNode *Node = MDHelper.createBranchWeights((1U << 20) - 1, 1);
   Branch->setMetadata(llvm::LLVMContext::MD_prof, Node);
+  EmitBlock(Handlers);
 
-  EmitBlock(Handler);
-
+  // Emit handler arguments and create handler function type.
   llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
   auto *InfoPtr =
       new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
@@ -2280,34 +2346,27 @@
     ArgTypes.push_back(IntPtrTy);
   }
 
-  bool Recover = RecoverKind == CheckRecoverableKind::AlwaysRecoverable ||
-                 (RecoverKind == CheckRecoverableKind::Recoverable &&
-                  CGM.getCodeGenOpts().SanitizeRecover);
-
   llvm::FunctionType *FnType =
     llvm::FunctionType::get(CGM.VoidTy, ArgTypes, false);
-  llvm::AttrBuilder B;
-  if (!Recover) {
-    B.addAttribute(llvm::Attribute::NoReturn)
-     .addAttribute(llvm::Attribute::NoUnwind);
-  }
-  B.addAttribute(llvm::Attribute::UWTable);
 
-  // Checks that have two variants use a suffix to differentiate them
-  bool NeedsAbortSuffix = RecoverKind != CheckRecoverableKind::Unrecoverable &&
-                          !CGM.getCodeGenOpts().SanitizeRecover;
-  std::string FunctionName = ("__ubsan_handle_" + CheckName +
-                              (NeedsAbortSuffix? "_abort" : "")).str();
-  llvm::Value *Fn = CGM.CreateRuntimeFunction(
-      FnType, FunctionName,
-      llvm::AttributeSet::get(getLLVMContext(),
-                              llvm::AttributeSet::FunctionIndex, B));
-  llvm::CallInst *HandlerCall = EmitNounwindRuntimeCall(Fn, Args);
-  if (Recover) {
-    Builder.CreateBr(Cont);
+  if (!FatalCond || !RecoverableCond) {
+    // Simple case: we need to generate a single handler call, either
+    // fatal, or non-fatal.
+    emitCheckHandlerCall(*this, FnType, Args, CheckName, RecoverKind,
+                         (FatalCond != nullptr), Cont);
   } else {
-    HandlerCall->setDoesNotReturn();
-    Builder.CreateUnreachable();
+    // Emit two handler calls: first one for set of unrecoverable checks,
+    // another one for recoverable.
+    llvm::BasicBlock *NonFatalHandlerBB =
+        createBasicBlock("non_fatal." + CheckName);
+    llvm::BasicBlock *FatalHandlerBB = createBasicBlock("fatal." + CheckName);
+    Builder.CreateCondBr(FatalCond, NonFatalHandlerBB, FatalHandlerBB);
+    EmitBlock(FatalHandlerBB);
+    emitCheckHandlerCall(*this, FnType, Args, CheckName, RecoverKind, true,
+                         NonFatalHandlerBB);
+    EmitBlock(NonFatalHandlerBB);
+    emitCheckHandlerCall(*this, FnType, Args, CheckName, RecoverKind, false,
+                         Cont);
   }
 
   EmitBlock(Cont);
@@ -3006,19 +3065,6 @@
 
 RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
                                      ReturnValueSlot ReturnValue) {
-  if (CGDebugInfo *DI = getDebugInfo()) {
-    SourceLocation Loc = E->getLocStart();
-    // Force column info to be generated so we can differentiate
-    // multiple call sites on the same line in the debug info.
-    // FIXME: This is insufficient. Two calls coming from the same macro
-    // expansion will still get the same line/column and break debug info. It's
-    // possible that LLVM can be fixed to not rely on this uniqueness, at which
-    // point this workaround can be removed.
-    const FunctionDecl* Callee = E->getDirectCallee();
-    bool ForceColumnInfo = Callee && Callee->isInlineSpecified();
-    DI->EmitLocation(Builder, Loc, ForceColumnInfo);
-  }
-
   // Builtins never have block type.
   if (E->getCallee()->getType()->isBlockPointerType())
     return EmitBlockCallExpr(E, ReturnValue);
@@ -3032,7 +3078,7 @@
   const Decl *TargetDecl = E->getCalleeDecl();
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
     if (unsigned builtinID = FD->getBuiltinID())
-      return EmitBuiltinExpr(FD, builtinID, E);
+      return EmitBuiltinExpr(FD, builtinID, E, ReturnValue);
   }
 
   if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
@@ -3152,7 +3198,7 @@
   if (!RV.isScalar())
     return MakeAddrLValue(RV.getAggregateAddr(), E->getType());
 
-  assert(E->getCallReturnType()->isReferenceType() &&
+  assert(E->getCallReturnType(getContext())->isReferenceType() &&
          "Can't have a scalar return unless the return type is a "
          "reference type!");
 
@@ -3267,7 +3313,7 @@
 
 RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
                                  const CallExpr *E, ReturnValueSlot ReturnValue,
-                                 const Decl *TargetDecl) {
+                                 const Decl *TargetDecl, llvm::Value *Chain) {
   // Get the actual function type. The callee type will always be a pointer to
   // function type or a block pointer type.
   assert(CalleeType->isFunctionPointerType() &&
@@ -3278,16 +3324,6 @@
   const auto *FnType =
       cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
 
-  // Force column info to differentiate multiple inlined call sites on
-  // the same line, analoguous to EmitCallExpr.
-  // FIXME: This is insufficient. Two calls coming from the same macro expansion
-  // will still get the same line/column and break debug info. It's possible
-  // that LLVM can be fixed to not rely on this uniqueness, at which point this
-  // workaround can be removed.
-  bool ForceColumnInfo = false;
-  if (const FunctionDecl* FD = dyn_cast_or_null<const FunctionDecl>(TargetDecl))
-    ForceColumnInfo = FD->isInlineSpecified();
-
   if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function) &&
       (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
     if (llvm::Constant *PrefixSig =
@@ -3332,12 +3368,14 @@
   }
 
   CallArgList Args;
+  if (Chain)
+    Args.add(RValue::get(Builder.CreateBitCast(Chain, CGM.VoidPtrTy)),
+             CGM.getContext().VoidPtrTy);
   EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), E->arg_begin(),
-               E->arg_end(), E->getDirectCallee(), /*ParamsToSkip*/ 0,
-               ForceColumnInfo);
+               E->arg_end(), E->getDirectCallee(), /*ParamsToSkip*/ 0);
 
-  const CGFunctionInfo &FnInfo =
-    CGM.getTypes().arrangeFreeFunctionCall(Args, FnType);
+  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
+      Args, FnType, /*isChainCall=*/Chain);
 
   // C99 6.5.2.2p6:
   //   If the expression that denotes the called function has a type
@@ -3356,7 +3394,10 @@
   // through an unprototyped function type works like a *non-variadic*
   // call.  The way we make this work is to cast to the exact type
   // of the promoted arguments.
-  if (isa<FunctionNoProtoType>(FnType)) {
+  //
+  // Chain calls use this same code path to add the invisible chain parameter
+  // to the function type.
+  if (isa<FunctionNoProtoType>(FnType) || Chain) {
     llvm::Type *CalleeTy = getTypes().GetFunctionType(FnInfo);
     CalleeTy = CalleeTy->getPointerTo();
     Callee = Builder.CreateBitCast(Callee, CalleeTy, "callee.knr.cast");
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 4cf94c0..5b0d9f0 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -98,6 +98,11 @@
   //                            Visitor Methods
   //===--------------------------------------------------------------------===//
 
+  void Visit(Expr *E) {
+    ApplyDebugLocation DL(CGF, E);
+    StmtVisitor<AggExprEmitter>::Visit(E);
+  }
+
   void VisitStmt(Stmt *S) {
     CGF.ErrorUnsupported(S, "aggregate expression");
   }
@@ -207,7 +212,7 @@
   LValue LV = CGF.EmitLValue(E);
 
   // If the type of the l-value is atomic, then do an atomic load.
-  if (LV.getType()->isAtomicType()) {
+  if (LV.getType()->isAtomicType() || CGF.LValueIsSuitableForInlineAtomic(LV)) {
     CGF.EmitAtomicLoad(LV, E->getExprLoc(), Dest);
     return;
   }
@@ -736,7 +741,7 @@
 }
 
 void AggExprEmitter::VisitCallExpr(const CallExpr *E) {
-  if (E->getCallReturnType()->isReferenceType()) {
+  if (E->getCallReturnType(CGF.getContext())->isReferenceType()) {
     EmitAggLoadOfLValue(E);
     return;
   }
@@ -860,7 +865,8 @@
     LValue LHS = CGF.EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
 
     // That copy is an atomic copy if the LHS is atomic.
-    if (LHS.getType()->isAtomicType()) {
+    if (LHS.getType()->isAtomicType() ||
+        CGF.LValueIsSuitableForInlineAtomic(LHS)) {
       CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
       return;
     }
@@ -877,7 +883,8 @@
 
   // If we have an atomic type, evaluate into the destination and then
   // do an atomic copy.
-  if (LHS.getType()->isAtomicType()) {
+  if (LHS.getType()->isAtomicType() ||
+      CGF.LValueIsSuitableForInlineAtomic(LHS)) {
     EnsureDest(E->getRHS()->getType());
     Visit(E->getRHS());
     CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false);
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 39c77eb..425a968 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -120,9 +120,23 @@
                     ReturnValue);
   }
 
-  // Compute the object pointer.
+  bool HasQualifier = ME->hasQualifier();
+  NestedNameSpecifier *Qualifier = HasQualifier ? ME->getQualifier() : nullptr;
+  bool IsArrow = ME->isArrow();
   const Expr *Base = ME->getBase();
-  bool CanUseVirtualCall = MD->isVirtual() && !ME->hasQualifier();
+
+  return EmitCXXMemberOrOperatorMemberCallExpr(
+      CE, MD, ReturnValue, HasQualifier, Qualifier, IsArrow, Base);
+}
+
+RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
+    const CallExpr *CE, const CXXMethodDecl *MD, ReturnValueSlot ReturnValue,
+    bool HasQualifier, NestedNameSpecifier *Qualifier, bool IsArrow,
+    const Expr *Base) {
+  assert(isa<CXXMemberCallExpr>(CE) || isa<CXXOperatorCallExpr>(CE));
+
+  // Compute the object pointer.
+  bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier;
 
   const CXXMethodDecl *DevirtualizedMethod = nullptr;
   if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) {
@@ -153,7 +167,7 @@
   }
 
   llvm::Value *This;
-  if (ME->isArrow())
+  if (IsArrow)
     This = EmitScalarExpr(Base);
   else
     This = EmitLValue(Base).getAddress();
@@ -165,32 +179,38 @@
         cast<CXXConstructorDecl>(MD)->isDefaultConstructor())
       return RValue::get(nullptr);
 
-    if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) {
-      // We don't like to generate the trivial copy/move assignment operator
-      // when it isn't necessary; just produce the proper effect here.
-      llvm::Value *RHS = EmitLValue(*CE->arg_begin()).getAddress();
-      EmitAggregateAssign(This, RHS, CE->getType());
-      return RValue::get(This);
-    }
+    if (!MD->getParent()->mayInsertExtraPadding()) {
+      if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) {
+        // We don't like to generate the trivial copy/move assignment operator
+        // when it isn't necessary; just produce the proper effect here.
+        // Special case: skip first argument of CXXOperatorCall (it is "this").
+        unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(CE) ? 1 : 0;
+        llvm::Value *RHS =
+            EmitLValue(*(CE->arg_begin() + ArgsToSkip)).getAddress();
+        EmitAggregateAssign(This, RHS, CE->getType());
+        return RValue::get(This);
+      }
 
-    if (isa<CXXConstructorDecl>(MD) &&
-        cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) {
-      // Trivial move and copy ctor are the same.
-      assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
-      llvm::Value *RHS = EmitLValue(*CE->arg_begin()).getAddress();
-      EmitAggregateCopy(This, RHS, CE->arg_begin()->getType());
-      return RValue::get(This);
+      if (isa<CXXConstructorDecl>(MD) &&
+          cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) {
+        // Trivial move and copy ctor are the same.
+        assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
+        llvm::Value *RHS = EmitLValue(*CE->arg_begin()).getAddress();
+        EmitAggregateCopy(This, RHS, CE->arg_begin()->getType());
+        return RValue::get(This);
+      }
+      llvm_unreachable("unknown trivial member function");
     }
-    llvm_unreachable("unknown trivial member function");
   }
 
   // Compute the function type we're calling.
-  const CXXMethodDecl *CalleeDecl = DevirtualizedMethod ? DevirtualizedMethod : MD;
+  const CXXMethodDecl *CalleeDecl =
+      DevirtualizedMethod ? DevirtualizedMethod : MD;
   const CGFunctionInfo *FInfo = nullptr;
-  if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl))
+  if (const auto *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl))
     FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
         Dtor, StructorType::Complete);
-  else if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(CalleeDecl))
+  else if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(CalleeDecl))
     FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(
         Ctor, StructorType::Complete);
   else
@@ -212,13 +232,11 @@
            "Destructor shouldn't have explicit parameters");
     assert(ReturnValue.isNull() && "Destructor shouldn't have return value");
     if (UseVirtualCall) {
-      CGM.getCXXABI().EmitVirtualDestructorCall(*this, Dtor, Dtor_Complete,
-                                                This, CE);
+      CGM.getCXXABI().EmitVirtualDestructorCall(
+          *this, Dtor, Dtor_Complete, This, cast<CXXMemberCallExpr>(CE));
     } else {
-      if (getLangOpts().AppleKext &&
-          MD->isVirtual() &&
-          ME->hasQualifier())
-        Callee = BuildAppleKextVirtualCall(MD, ME->getQualifier(), Ty);
+      if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
+        Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty);
       else if (!DevirtualizedMethod)
         Callee =
             CGM.getAddrOfCXXStructor(Dtor, StructorType::Complete, FInfo, Ty);
@@ -238,10 +256,8 @@
   } else if (UseVirtualCall) {
     Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty);
   } else {
-    if (getLangOpts().AppleKext &&
-        MD->isVirtual() &&
-        ME->hasQualifier())
-      Callee = BuildAppleKextVirtualCall(MD, ME->getQualifier(), Ty);
+    if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
+      Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty);
     else if (!DevirtualizedMethod)
       Callee = CGM.GetAddrOfFunction(MD, Ty);
     else {
@@ -314,20 +330,9 @@
                                                ReturnValueSlot ReturnValue) {
   assert(MD->isInstance() &&
          "Trying to emit a member call expr on a static method!");
-  LValue LV = EmitLValue(E->getArg(0));
-  llvm::Value *This = LV.getAddress();
-
-  if ((MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) &&
-      MD->isTrivial() && !MD->getParent()->mayInsertExtraPadding()) {
-    llvm::Value *Src = EmitLValue(E->getArg(1)).getAddress();
-    QualType Ty = E->getType();
-    EmitAggregateAssign(This, Src, Ty);
-    return RValue::get(This);
-  }
-
-  llvm::Value *Callee = EmitCXXOperatorMemberCallee(E, MD, This);
-  return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This,
-                                     /*ImplicitParam=*/nullptr, QualType(), E);
+  return EmitCXXMemberOrOperatorMemberCallExpr(
+      E, MD, ReturnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr,
+      /*IsArrow=*/false, E->getArg(0));
 }
 
 RValue CodeGenFunction::EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
@@ -752,9 +757,8 @@
   CharUnits Alignment = CGF.getContext().getTypeAlignInChars(AllocType);
   switch (CGF.getEvaluationKind(AllocType)) {
   case TEK_Scalar:
-    CGF.EmitScalarInit(Init, nullptr, CGF.MakeAddrLValue(NewPtr, AllocType,
-                                                         Alignment),
-                       false);
+    CGF.EmitScalarInit(Init, nullptr,
+                       CGF.MakeAddrLValue(NewPtr, AllocType, Alignment), false);
     return;
   case TEK_Complex:
     CGF.EmitComplexExprIntoLValue(Init, CGF.MakeAddrLValue(NewPtr, AllocType,
@@ -1012,6 +1016,7 @@
                                llvm::Value *NewPtr,
                                llvm::Value *NumElements,
                                llvm::Value *AllocSizeWithoutCookie) {
+  ApplyDebugLocation DL(CGF, E);
   if (E->isArray())
     CGF.EmitNewArrayInitializer(E, ElementType, NewPtr, NumElements,
                                 AllocSizeWithoutCookie);
@@ -1028,9 +1033,9 @@
   llvm::Instruction *CallOrInvoke;
   llvm::Value *CalleeAddr = CGF.CGM.GetAddrOfFunction(Callee);
   RValue RV =
-      CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(Args, CalleeType),
-                   CalleeAddr, ReturnValueSlot(), Args,
-                   Callee, &CallOrInvoke);
+      CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(
+                       Args, CalleeType, /*chainCall=*/false),
+                   CalleeAddr, ReturnValueSlot(), Args, Callee, &CallOrInvoke);
 
   /// C++1y [expr.new]p10:
   ///   [In a new-expression,] an implementation is allowed to omit a call
@@ -1274,10 +1279,9 @@
 
   // Emit a null check on the allocation result if the allocation
   // function is allowed to return null (because it has a non-throwing
-  // exception spec; for this part, we inline
-  // CXXNewExpr::shouldNullCheckAllocation()) and we have an
+  // exception spec or is the reserved placement new) and we have an
   // interesting initializer.
-  bool nullCheck = allocatorType->isNothrow(getContext()) &&
+  bool nullCheck = E->shouldNullCheckAllocation(getContext()) &&
     (!allocType.isPODType(getContext()) || E->hasInitializer());
 
   llvm::BasicBlock *nullCheckBB = nullptr;
@@ -1418,6 +1422,71 @@
                                         OperatorDelete, ElementType);
 }
 
+static void EmitDelete(CodeGenFunction &CGF,
+                              const CXXDeleteExpr *DE,
+                              llvm::Value *Ptr,
+                              QualType ElementType);
+
+static void EmitSizedDelete(CodeGenFunction &CGF,
+                            const CXXDeleteExpr *DE,
+                            llvm::Value *Ptr,
+                            QualType ElementType,
+                            FunctionDecl* UnsizedDealloc) {
+
+  if (CGF.getLangOpts().DefineSizedDeallocation) {
+    // The delete operator in use is fixed. So simply emit the delete expr.
+    EmitDelete(CGF, DE, Ptr, ElementType);
+    return;
+  }
+
+  assert(UnsizedDealloc && "We must be emiting a 'sized' delete expr");
+
+  // Branch off over the value of operator delete:
+  // Use the sized form if available, and default on the unsized form otherwise.
+  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("if.then");
+  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("if.end");
+  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("if.else");
+
+  // Emit the condition.
+  const FunctionDecl *OpDelFD = DE->getOperatorDelete();
+  llvm::Value *OpDelAddr = CGF.CGM.GetAddrOfFunction(OpDelFD);
+  //llvm::Function *OpDel = dyn_cast<llvm::Function>(OpDelAddr);
+  llvm::Value *SDE = CGF.Builder.CreateIsNotNull(OpDelAddr, "sized.del.exists");
+  CGF.Builder.CreateCondBr(SDE, ThenBlock, ElseBlock);
+
+  // Emit the 'then' code.
+  CGF.EmitBlock(ThenBlock);
+  EmitDelete(CGF, DE, Ptr, ElementType);
+  CGF.EmitBranch(ContBlock);
+
+  // Compute the 'unsized' delete expr.
+  CXXDeleteExpr * E = const_cast<CXXDeleteExpr*>(DE);
+  CXXDeleteExpr *UnsizedDE =
+  new (CGF.getContext()) CXXDeleteExpr(CGF.getContext().VoidTy,
+                                       E->isGlobalDelete(),
+                                       E->isArrayForm(),
+                                       E->isArrayFormAsWritten(),
+                                       E->doesUsualArrayDeleteWantSize(),
+                                       UnsizedDealloc,
+                                       E->getArgument(),
+                                       E->getLocStart());
+  // Emit the 'else' code.
+  {
+    // There is no need to emit line number for an unconditional branch.
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
+    CGF.EmitBlock(ElseBlock);
+  }
+  EmitDelete(CGF, UnsizedDE, Ptr, ElementType);
+  {
+    // There is no need to emit line number for an unconditional branch.
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
+    CGF.EmitBranch(ContBlock);
+  }
+
+  // Emit the continuation block for code after the if.
+  CGF.EmitBlock(ContBlock, true);
+}
+
 /// Emit the code for deleting a single object.
 static void EmitObjectDelete(CodeGenFunction &CGF,
                              const CXXDeleteExpr *DE,
@@ -1577,6 +1646,17 @@
   CGF.PopCleanupBlock();
 }
 
+static void EmitDelete(CodeGenFunction &CGF,
+                       const CXXDeleteExpr *DE,
+                       llvm::Value *Ptr,
+                       QualType ElementType) {
+  if (DE->isArrayForm()) {
+    EmitArrayDelete(CGF, DE, Ptr, ElementType);
+  } else {
+    EmitObjectDelete(CGF, DE, Ptr, ElementType);
+  }
+}
+
 void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
   const Expr *Arg = E->getArgument();
   llvm::Value *Ptr = EmitScalarExpr(Arg);
@@ -1616,11 +1696,12 @@
   assert(ConvertTypeForMem(DeleteTy) ==
          cast<llvm::PointerType>(Ptr->getType())->getElementType());
 
-  if (E->isArrayForm()) {
-    EmitArrayDelete(*this, E, Ptr, DeleteTy);
-  } else {
-    EmitObjectDelete(*this, E, Ptr, DeleteTy);
-  }
+  const FunctionDecl *Dealloc = E->getOperatorDelete();
+  if (FunctionDecl* UnsizedDealloc =
+      Dealloc->getCorrespondingUnsizedGlobalDeallocationFunction())
+    EmitSizedDelete(*this, E, Ptr, DeleteTy, UnsizedDealloc);
+  else
+    EmitDelete(*this, E, Ptr, DeleteTy);
 
   EmitBlock(DeleteEnd);
 }
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 4700c7e..b2228f0 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -95,6 +95,7 @@
   //===--------------------------------------------------------------------===//
 
   ComplexPairTy Visit(Expr *E) {
+    ApplyDebugLocation DL(CGF, E);
     return StmtVisitor<ComplexExprEmitter, ComplexPairTy>::Visit(E);
   }
 
@@ -333,10 +334,10 @@
 
 /// EmitStoreOfComplex - Store the specified real/imag parts into the
 /// specified value pointer.
-void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val,
-                                            LValue lvalue,
+void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, LValue lvalue,
                                             bool isInit) {
-  if (lvalue.getType()->isAtomicType())
+  if (lvalue.getType()->isAtomicType() ||
+      (!isInit && CGF.LValueIsSuitableForInlineAtomic(lvalue)))
     return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit);
 
   llvm::Value *Ptr = lvalue.getAddress();
@@ -376,7 +377,7 @@
 
 
 ComplexPairTy ComplexExprEmitter::VisitCallExpr(const CallExpr *E) {
-  if (E->getCallReturnType()->isReferenceType())
+  if (E->getCallReturnType(CGF.getContext())->isReferenceType())
     return EmitLoadOfLValue(E);
 
   return CGF.EmitCallExpr(E).getComplexVal();
@@ -582,13 +583,22 @@
            Op.Ty->castAs<ComplexType>()->getElementType());
 
   // We *must* use the full CG function call building logic here because the
-  // complex type has special ABI handling.
-  const CGFunctionInfo &FuncInfo = CGF.CGM.getTypes().arrangeFreeFunctionCall(
-      Op.Ty, Args, FunctionType::ExtInfo(), RequiredArgs::All);
+  // complex type has special ABI handling. We also should not forget about
+  // special calling convention which may be used for compiler builtins.
+  const CGFunctionInfo &FuncInfo =
+    CGF.CGM.getTypes().arrangeFreeFunctionCall(
+      Op.Ty, Args, FunctionType::ExtInfo(/* No CC here - will be added later */),
+      RequiredArgs::All);
   llvm::FunctionType *FTy = CGF.CGM.getTypes().GetFunctionType(FuncInfo);
-  llvm::Constant *Func = CGF.CGM.CreateRuntimeFunction(FTy, LibCallName);
+  llvm::Constant *Func = CGF.CGM.CreateBuiltinFunction(FTy, LibCallName);
+  llvm::Instruction *Call;
 
-  return CGF.EmitCall(FuncInfo, Func, ReturnValueSlot(), Args).getComplexVal();
+  RValue Res = CGF.EmitCall(FuncInfo, Func, ReturnValueSlot(), Args,
+                            nullptr, &Call);
+  cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getBuiltinCC());
+  cast<llvm::CallInst>(Call)->setDoesNotThrow();
+
+  return Res.getComplexVal();
 }
 
 /// \brief Lookup the libcall name for a given floating point type complex
@@ -810,6 +820,8 @@
   TestAndClearIgnoreReal();
   TestAndClearIgnoreImag();
   QualType LHSTy = E->getLHS()->getType();
+  if (const AtomicType *AT = LHSTy->getAs<AtomicType>())
+    LHSTy = AT->getValueType();
 
   BinOpInfo OpInfo;
 
@@ -1025,7 +1037,7 @@
          "Invalid complex expression to emit");
 
   return ComplexExprEmitter(*this, IgnoreReal, IgnoreImag)
-    .Visit(const_cast<Expr*>(E));
+      .Visit(const_cast<Expr *>(E));
 }
 
 void CodeGenFunction::EmitComplexExprIntoLValue(const Expr *E, LValue dest,
@@ -1077,8 +1089,8 @@
 }
 
 LValue CodeGenFunction::
-EmitScalarCompooundAssignWithComplex(const CompoundAssignOperator *E,
-                                     llvm::Value *&Result) {
+EmitScalarCompoundAssignWithComplex(const CompoundAssignOperator *E,
+                                    llvm::Value *&Result) {
   CompoundFunc Op = getComplexOp(E->getOpcode());
   RValue Val;
   LValue Ret = ComplexExprEmitter(*this).EmitCompoundAssignLValue(E, Op, Val);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 088a5d0..54f7eee 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -750,6 +750,20 @@
     // initialise any elements that have not been initialised explicitly
     unsigned NumInitableElts = std::min(NumInitElements, NumElements);
 
+    // Initialize remaining array elements.
+    // FIXME: This doesn't handle member pointers correctly!
+    llvm::Constant *fillC;
+    if (Expr *filler = ILE->getArrayFiller())
+      fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF);
+    else
+      fillC = llvm::Constant::getNullValue(ElemTy);
+    if (!fillC)
+      return nullptr;
+
+    // Try to use a ConstantAggregateZero if we can.
+    if (fillC->isNullValue() && !NumInitableElts)
+      return llvm::ConstantAggregateZero::get(AType);
+
     // Copy initializer elements.
     std::vector<llvm::Constant*> Elts;
     Elts.reserve(NumInitableElts + NumElements);
@@ -764,15 +778,6 @@
       Elts.push_back(C);
     }
 
-    // Initialize remaining array elements.
-    // FIXME: This doesn't handle member pointers correctly!
-    llvm::Constant *fillC;
-    if (Expr *filler = ILE->getArrayFiller())
-      fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF);
-    else
-      fillC = llvm::Constant::getNullValue(ElemTy);
-    if (!fillC)
-      return nullptr;
     RewriteType |= (fillC->getType() != ElemTy);
     Elts.resize(NumElements, fillC);
 
@@ -1143,7 +1148,7 @@
     // FIXME: the target may want to specify that this is packed.
     llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(),
                                                   Complex[1]->getType(),
-                                                  NULL);
+                                                  nullptr);
     return llvm::ConstantStruct::get(STy, Complex);
   }
   case APValue::Float: {
@@ -1166,7 +1171,7 @@
     // FIXME: the target may want to specify that this is packed.
     llvm::StructType *STy = llvm::StructType::get(Complex[0]->getType(),
                                                   Complex[1]->getType(),
-                                                  NULL);
+                                                  nullptr);
     return llvm::ConstantStruct::get(STy, Complex);
   }
   case APValue::Vector: {
@@ -1207,9 +1212,6 @@
     unsigned NumElements = Value.getArraySize();
     unsigned NumInitElts = Value.getArrayInitializedElts();
 
-    std::vector<llvm::Constant*> Elts;
-    Elts.reserve(NumElements);
-
     // Emit array filler, if there is one.
     llvm::Constant *Filler = nullptr;
     if (Value.hasArrayFiller())
@@ -1217,7 +1219,18 @@
                                           CAT->getElementType(), CGF);
 
     // Emit initializer elements.
-    llvm::Type *CommonElementType = nullptr;
+    llvm::Type *CommonElementType =
+        getTypes().ConvertType(CAT->getElementType());
+
+    // Try to use a ConstantAggregateZero if we can.
+    if (Filler && Filler->isNullValue() && !NumInitElts) {
+      llvm::ArrayType *AType =
+          llvm::ArrayType::get(CommonElementType, NumElements);
+      return llvm::ConstantAggregateZero::get(AType);
+    }
+
+    std::vector<llvm::Constant*> Elts;
+    Elts.reserve(NumElements);
     for (unsigned I = 0; I < NumElements; ++I) {
       llvm::Constant *C = Filler;
       if (I < NumInitElts)
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 19d453d..dc12dd8 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -196,6 +196,7 @@
   //===--------------------------------------------------------------------===//
 
   Value *Visit(Expr *E) {
+    ApplyDebugLocation DL(CGF, E);
     return StmtVisitor<ScalarExprEmitter, Value*>::Visit(E);
   }
 
@@ -319,7 +320,7 @@
   Value *VisitCastExpr(CastExpr *E);
 
   Value *VisitCallExpr(const CallExpr *E) {
-    if (E->getCallReturnType()->isReferenceType())
+    if (E->getCallReturnType(CGF.getContext())->isReferenceType())
       return EmitLoadOfLValue(E);
 
     Value *V = CGF.EmitCallExpr(E).getScalarVal();
@@ -1351,8 +1352,8 @@
     llvm::Type *DstTy = ConvertType(DestTy);
     if (SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() &&
         SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) {
-      llvm::Type *MidTy = CGF.CGM.getDataLayout().getIntPtrType(SrcTy);
-      return Builder.CreateIntToPtr(Builder.CreatePtrToInt(Src, MidTy), DstTy);
+      llvm_unreachable("wrong cast for pointers in different address spaces"
+                       "(must be an address space cast)!");
     }
     return Builder.CreateBitCast(Src, DstTy);
   }
@@ -1411,8 +1412,11 @@
     // anything here.
     if (!E->getType()->isVariableArrayType()) {
       assert(isa<llvm::PointerType>(V->getType()) && "Expected pointer");
-      assert(isa<llvm::ArrayType>(cast<llvm::PointerType>(V->getType())
-                                 ->getElementType()) &&
+      V = CGF.Builder.CreatePointerCast(
+          V, ConvertType(E->getType())->getPointerTo(
+            V->getType()->getPointerAddressSpace()));
+
+      assert(isa<llvm::ArrayType>(V->getType()->getPointerElementType()) &&
              "Expected pointer to array");
       V = Builder.CreateStructGEP(V, 0, "arraydecay");
     }
@@ -1789,11 +1793,11 @@
   if (atomicPHI) {
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
-    llvm::Value *pair = Builder.CreateAtomicCmpXchg(
-        LV.getAddress(), atomicPHI, CGF.EmitToMemory(value, type),
-        llvm::SequentiallyConsistent, llvm::SequentiallyConsistent);
-    llvm::Value *old = Builder.CreateExtractValue(pair, 0);
-    llvm::Value *success = Builder.CreateExtractValue(pair, 1);
+    auto Pair = CGF.EmitAtomicCompareExchange(
+        LV, RValue::get(atomicPHI), RValue::get(CGF.EmitToMemory(value, type)),
+        E->getExprLoc());
+    llvm::Value *old = Pair.first.getScalarVal();
+    llvm::Value *success = Pair.second.getScalarVal();
     atomicPHI->addIncoming(old, opBB);
     Builder.CreateCondBr(success, contBB, opBB);
     Builder.SetInsertPoint(contBB);
@@ -2052,7 +2056,7 @@
   BinOpInfo OpInfo;
 
   if (E->getComputationResultType()->isAnyComplexType())
-    return CGF.EmitScalarCompooundAssignWithComplex(E, Result);
+    return CGF.EmitScalarCompoundAssignWithComplex(E, Result);
 
   // Emit the RHS first.  __block variables need to have the rhs evaluated
   // first, plus this should improve codegen a little.
@@ -2133,11 +2137,11 @@
   if (atomicPHI) {
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
-    llvm::Value *pair = Builder.CreateAtomicCmpXchg(
-        LHSLV.getAddress(), atomicPHI, CGF.EmitToMemory(Result, LHSTy),
-        llvm::SequentiallyConsistent, llvm::SequentiallyConsistent);
-    llvm::Value *old = Builder.CreateExtractValue(pair, 0);
-    llvm::Value *success = Builder.CreateExtractValue(pair, 1);
+    auto Pair = CGF.EmitAtomicCompareExchange(
+        LHSLV, RValue::get(atomicPHI),
+        RValue::get(CGF.EmitToMemory(Result, LHSTy)), E->getExprLoc());
+    llvm::Value *old = Pair.first.getScalarVal();
+    llvm::Value *success = Pair.second.getScalarVal();
     atomicPHI->addIncoming(old, opBB);
     Builder.CreateCondBr(success, contBB, opBB);
     Builder.SetInsertPoint(contBB);
@@ -3039,7 +3043,7 @@
   // Emit an unconditional branch from this block to ContBlock.
   {
     // There is no need to emit line number for unconditional branch.
-    SuppressDebugLocation S(Builder);
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
     CGF.EmitBlock(ContBlock);
   }
   // Insert an entry into the phi node for the edge with the value of RHSCond.
@@ -3312,8 +3316,12 @@
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
 
   // If EmitVAArg promoted the type, we must truncate it.
-  if (ArgTy != Val->getType())
-    Val = Builder.CreateTrunc(Val, ArgTy);
+  if (ArgTy != Val->getType()) {
+    if (ArgTy->isPointerTy() && !Val->getType()->isPointerTy())
+      Val = Builder.CreateIntToPtr(Val, ArgTy);
+    else
+      Val = Builder.CreateTrunc(Val, ArgTy);
+  }
 
   return Val;
 }
@@ -3385,13 +3393,8 @@
   assert(E && hasScalarEvaluationKind(E->getType()) &&
          "Invalid scalar expression to emit");
 
-  if (isa<CXXDefaultArgExpr>(E))
-    disableDebugInfo();
-  Value *V = ScalarExprEmitter(*this, IgnoreResultAssign)
-    .Visit(const_cast<Expr*>(E));
-  if (isa<CXXDefaultArgExpr>(E))
-    enableDebugInfo();
-  return V;
+  return ScalarExprEmitter(*this, IgnoreResultAssign)
+      .Visit(const_cast<Expr *>(E));
 }
 
 /// EmitScalarConversion - Emit a conversion from the specified type to the
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index a273f1d..011ae7e 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -24,42 +24,40 @@
       Attrs.VectorizerEnable == LoopAttributes::VecUnspecified)
     return nullptr;
 
-  SmallVector<Value *, 4> Args;
+  SmallVector<Metadata *, 4> Args;
   // Reserve operand 0 for loop id self reference.
-  MDNode *TempNode = MDNode::getTemporary(Ctx, None);
-  Args.push_back(TempNode);
+  auto TempNode = MDNode::getTemporary(Ctx, None);
+  Args.push_back(TempNode.get());
 
   // Setting vectorizer.width
   if (Attrs.VectorizerWidth > 0) {
-    Value *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.width"),
-                      ConstantInt::get(Type::getInt32Ty(Ctx),
-                                       Attrs.VectorizerWidth) };
+    Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"),
+                        ConstantAsMetadata::get(ConstantInt::get(
+                            Type::getInt32Ty(Ctx), Attrs.VectorizerWidth))};
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
   // Setting vectorizer.unroll
   if (Attrs.VectorizerUnroll > 0) {
-    Value *Vals[] = { MDString::get(Ctx, "llvm.loop.interleave.count"),
-                      ConstantInt::get(Type::getInt32Ty(Ctx),
-                                       Attrs.VectorizerUnroll) };
+    Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.interleave.count"),
+                        ConstantAsMetadata::get(ConstantInt::get(
+                            Type::getInt32Ty(Ctx), Attrs.VectorizerUnroll))};
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
   // Setting vectorizer.enable
   if (Attrs.VectorizerEnable != LoopAttributes::VecUnspecified) {
-    Value *Vals[] = { MDString::get(Ctx, "llvm.loop.vectorize.enable"),
-                      ConstantInt::get(Type::getInt1Ty(Ctx),
-                                       (Attrs.VectorizerEnable ==
-                                        LoopAttributes::VecEnable)) };
+    Metadata *Vals[] = {
+        MDString::get(Ctx, "llvm.loop.vectorize.enable"),
+        ConstantAsMetadata::get(ConstantInt::get(
+            Type::getInt1Ty(Ctx),
+            (Attrs.VectorizerEnable == LoopAttributes::VecEnable)))};
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
-  MDNode *LoopID = MDNode::get(Ctx, Args);
-  assert(LoopID->use_empty() && "LoopID should not be used");
-
   // Set the first operand to itself.
+  MDNode *LoopID = MDNode::get(Ctx, Args);
   LoopID->replaceOperandWith(0, LoopID);
-  MDNode::deleteTemporary(TempNode);
   return LoopID;
 }
 
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index b169399..aee1621 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -78,8 +78,8 @@
 /// This stack can be used to prepare attributes which are applied when a loop
 /// is emitted.
 class LoopInfoStack {
-  LoopInfoStack(const LoopInfoStack &) LLVM_DELETED_FUNCTION;
-  void operator=(const LoopInfoStack &) LLVM_DELETED_FUNCTION;
+  LoopInfoStack(const LoopInfoStack &) = delete;
+  void operator=(const LoopInfoStack &) = delete;
 
 public:
   LoopInfoStack() {}
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index ca67c4b..19f5ca2 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -60,7 +60,6 @@
 llvm::Value *
 CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   // Generate the correct selector for this literal's concrete type.
-  const Expr *SubExpr = E->getSubExpr();
   // Get the method.
   const ObjCMethodDecl *BoxingMethod = E->getBoxingMethod();
   assert(BoxingMethod && "BoxingMethod is null");
@@ -73,12 +72,9 @@
   CGObjCRuntime &Runtime = CGM.getObjCRuntime();
   const ObjCInterfaceDecl *ClassDecl = BoxingMethod->getClassInterface();
   llvm::Value *Receiver = Runtime.GetClass(*this, ClassDecl);
-  
-  const ParmVarDecl *argDecl = *BoxingMethod->param_begin();
-  QualType ArgQT = argDecl->getType().getUnqualifiedType();
-  RValue RV = EmitAnyExpr(SubExpr);
+
   CallArgList Args;
-  Args.add(RV, ArgQT);
+  EmitCallArgs(Args, BoxingMethod, E->arg_begin(), E->arg_end());
 
   RValue result = Runtime.GenerateMessageSend(
       *this, ReturnValueSlot(), BoxingMethod->getReturnType(), Sel, Receiver,
@@ -461,8 +457,8 @@
 /// the LLVM function and sets the other context used by
 /// CodeGenFunction.
 void CodeGenFunction::StartObjCMethod(const ObjCMethodDecl *OMD,
-                                      const ObjCContainerDecl *CD,
-                                      SourceLocation StartLoc) {
+                                      const ObjCContainerDecl *CD) {
+  SourceLocation StartLoc = OMD->getLocStart();
   FunctionArgList args;
   // Check if we should generate debug info for this method.
   if (OMD->hasAttr<NoDebugAttr>())
@@ -476,10 +472,10 @@
   args.push_back(OMD->getSelfDecl());
   args.push_back(OMD->getCmdDecl());
 
-  for (const auto *PI : OMD->params())
-    args.push_back(PI);
+  args.append(OMD->param_begin(), OMD->param_end());
 
   CurGD = OMD;
+  CurEHLocation = OMD->getLocEnd();
 
   StartFunction(OMD, OMD->getReturnType(), Fn, FI, args,
                 OMD->getLocation(), StartLoc);
@@ -501,15 +497,13 @@
 /// Generate an Objective-C method.  An Objective-C method is a C function with
 /// its pointer, name, and types registered in the class struture.
 void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
-  StartObjCMethod(OMD, OMD->getClassInterface(), OMD->getLocStart());
+  StartObjCMethod(OMD, OMD->getClassInterface());
   PGO.assignRegionCounters(OMD, CurFn);
   assert(isa<CompoundStmt>(OMD->getBody()));
   RegionCounter Cnt = getPGORegionCounter(OMD->getBody());
   Cnt.beginRegion(Builder);
   EmitCompoundStmtWithoutScope(*cast<CompoundStmt>(OMD->getBody()));
   FinishFunction(OMD->getBodyRBrace());
-  PGO.emitInstrumentationData();
-  PGO.destroyRegionCounters();
 }
 
 /// emitStructGetterCall - Call the runtime function to load a property
@@ -749,7 +743,7 @@
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   ObjCMethodDecl *OMD = PD->getGetterMethodDecl();
   assert(OMD && "Invalid call to generate getter (empty method)");
-  StartObjCMethod(OMD, IMP->getClassInterface(), OMD->getLocStart());
+  StartObjCMethod(OMD, IMP->getClassInterface());
 
   generateObjCGetterBody(IMP, PID, OMD, AtomicHelperFn);
 
@@ -1278,7 +1272,7 @@
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   ObjCMethodDecl *OMD = PD->getSetterMethodDecl();
   assert(OMD && "Invalid call to generate setter (empty method)");
-  StartObjCMethod(OMD, IMP->getClassInterface(), OMD->getLocStart());
+  StartObjCMethod(OMD, IMP->getClassInterface());
 
   generateObjCSetterBody(IMP, PID, AtomicHelperFn);
 
@@ -1356,7 +1350,7 @@
                                                  ObjCMethodDecl *MD,
                                                  bool ctor) {
   MD->createImplicitParams(CGM.getContext(), IMP->getClassInterface());
-  StartObjCMethod(MD, IMP->getClassInterface(), MD->getLocStart());
+  StartObjCMethod(MD, IMP->getClassInterface());
 
   // Emit .cxx_construct.
   if (ctor) {
@@ -1940,9 +1934,8 @@
       = cast<llvm::CallInst>(result->stripPointerCasts());
     assert(call->getCalledValue() == CGM.getARCEntrypoints().objc_retainBlock);
 
-    SmallVector<llvm::Value*,1> args;
     call->setMetadata("clang.arc.copy_on_escape",
-                      llvm::MDNode::get(Builder.getContext(), args));
+                      llvm::MDNode::get(Builder.getContext(), None));
   }
 
   return result;
@@ -1984,8 +1977,8 @@
                             "clang.arc.retainAutoreleasedReturnValueMarker");
       assert(metadata->getNumOperands() <= 1);
       if (metadata->getNumOperands() == 0) {
-        llvm::Value *string = llvm::MDString::get(getLLVMContext(), assembly);
-        metadata->addOperand(llvm::MDNode::get(getLLVMContext(), string));
+        metadata->addOperand(llvm::MDNode::get(
+            getLLVMContext(), llvm::MDString::get(getLLVMContext(), assembly)));
       }
     }
   }
@@ -2018,9 +2011,8 @@
   llvm::CallInst *call = EmitNounwindRuntimeCall(fn, value);
 
   if (precise == ARCImpreciseLifetime) {
-    SmallVector<llvm::Value*,1> args;
     call->setMetadata("clang.imprecise_release",
-                      llvm::MDNode::get(Builder.getContext(), args));
+                      llvm::MDNode::get(Builder.getContext(), None));
   }
 }
 
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 8d95369..da95260 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -1296,11 +1296,11 @@
   llvm::Value *imp = LookupIMPSuper(CGF, ObjCSuper, cmd, MSI);
   imp = EnforceType(Builder, imp, MSI.MessengerType);
 
-  llvm::Value *impMD[] = {
+  llvm::Metadata *impMD[] = {
       llvm::MDString::get(VMContext, Sel.getAsString()),
       llvm::MDString::get(VMContext, Class->getSuperClass()->getNameAsString()),
-      llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsClassMessage)
-   };
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          llvm::Type::getInt1Ty(VMContext), IsClassMessage))};
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
   llvm::Instruction *call;
@@ -1371,12 +1371,11 @@
   cmd = EnforceType(Builder, cmd, SelectorTy);
   Receiver = EnforceType(Builder, Receiver, IdTy);
 
-  llvm::Value *impMD[] = {
-        llvm::MDString::get(VMContext, Sel.getAsString()),
-        llvm::MDString::get(VMContext, Class ? Class->getNameAsString() :""),
-        llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext),
-                               Class!=nullptr)
-   };
+  llvm::Metadata *impMD[] = {
+      llvm::MDString::get(VMContext, Sel.getAsString()),
+      llvm::MDString::get(VMContext, Class ? Class->getNameAsString() : ""),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          llvm::Type::getInt1Ty(VMContext), Class != nullptr))};
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
   CallArgList ActualArgs;
@@ -2367,7 +2366,7 @@
   std::vector<llvm::Constant*> Elements;
   llvm::Constant *Statics = NULLPtr;
   // Generate statics list:
-  if (ConstantStrings.size()) {
+  if (!ConstantStrings.empty()) {
     llvm::ArrayType *StaticsArrayTy = llvm::ArrayType::get(PtrToInt8Ty,
         ConstantStrings.size() + 1);
     ConstantStrings.push_back(NULLPtr);
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index c05c226..b9fdf73 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -106,7 +106,7 @@
     llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
     llvm::Type *longDoubleType = llvm::Type::getX86_FP80Ty(VMContext);
     llvm::Type *resultType = 
-      llvm::StructType::get(longDoubleType, longDoubleType, NULL);
+      llvm::StructType::get(longDoubleType, longDoubleType, nullptr);
 
     return CGM.CreateRuntimeFunction(llvm::FunctionType::get(resultType,
                                                              params, true),
@@ -244,9 +244,9 @@
     Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(IdType, false, Params,
-                                                          FunctionType::ExtInfo(),
-                                                          RequiredArgs::All));
+        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
+            IdType, false, false, Params, FunctionType::ExtInfo(),
+            RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_getProperty");
   }
 
@@ -264,10 +264,9 @@
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
-                                                          Params,
-                                                          FunctionType::ExtInfo(),
-                                                          RequiredArgs::All));
+        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
+            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
+            RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_setProperty");
   }
 
@@ -291,10 +290,9 @@
     Params.push_back(IdType);
     Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
     llvm::FunctionType *FTy =
-    Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
-                                                        Params,
-                                                        FunctionType::ExtInfo(),
-                                                        RequiredArgs::All));
+        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
+            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
+            RequiredArgs::All));
     const char *name;
     if (atomic && copy)
       name = "objc_setProperty_atomic_copy";
@@ -319,10 +317,9 @@
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
-                                                          Params,
-                                                          FunctionType::ExtInfo(),
-                                                          RequiredArgs::All));
+        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
+            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
+            RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct");
   }
   
@@ -339,7 +336,7 @@
     Params.push_back(Ctx.VoidPtrTy);
     Params.push_back(Ctx.VoidPtrTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
+      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false, false,
                                                           Params,
                                                           FunctionType::ExtInfo(),
                                                           RequiredArgs::All));
@@ -353,10 +350,9 @@
     SmallVector<CanQualType,1> Params;
     Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType()));
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false,
-                                                          Params,
-                                                          FunctionType::ExtInfo(),
-                                                      RequiredArgs::All));
+        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
+            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
+            RequiredArgs::All));
     return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation");
   }
 
@@ -3814,15 +3810,16 @@
   // Enter a try block:
   //  - Call objc_exception_try_enter to push ExceptionData on top of
   //    the EH stack.
-  CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryEnterFn(), ExceptionData);
+  CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryEnterFn(),
+                              ExceptionData);
 
   //  - Call setjmp on the exception data buffer.
   llvm::Constant *Zero = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), 0);
   llvm::Value *GEPIndexes[] = { Zero, Zero, Zero };
   llvm::Value *SetJmpBuffer =
     CGF.Builder.CreateGEP(ExceptionData, GEPIndexes, "setjmp_buffer");
-  llvm::CallInst *SetJmpResult =
-    CGF.EmitNounwindRuntimeCall(ObjCTypes.getSetJmpFn(), SetJmpBuffer, "setjmp_result");
+  llvm::CallInst *SetJmpResult = CGF.EmitNounwindRuntimeCall(
+      ObjCTypes.getSetJmpFn(), SetJmpBuffer, "setjmp_result");
   SetJmpResult->setCanReturnTwice();
 
   // If setjmp returned 0, enter the protected block; otherwise,
@@ -4278,11 +4275,10 @@
                         eImageInfo_GCOnly);
 
       // Require that GC be specified and set to eImageInfo_GarbageCollected.
-      llvm::Value *Ops[2] = {
-        llvm::MDString::get(VMContext, "Objective-C Garbage Collection"),
-        llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
-                               eImageInfo_GarbageCollected)
-      };
+      llvm::Metadata *Ops[2] = {
+          llvm::MDString::get(VMContext, "Objective-C Garbage Collection"),
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+              llvm::Type::getInt32Ty(VMContext), eImageInfo_GarbageCollected))};
       Mod.addModuleFlag(llvm::Module::Require, "Objective-C GC Only",
                         llvm::MDNode::get(VMContext, Ops));
     }
@@ -5033,7 +5029,7 @@
   //   char *attributes;
   // }
   PropertyTy = llvm::StructType::create("struct._prop_t",
-                                        Int8PtrTy, Int8PtrTy, NULL);
+                                        Int8PtrTy, Int8PtrTy, nullptr);
 
   // struct _prop_list_t {
   //   uint32_t entsize;      // sizeof(struct _prop_t)
@@ -5042,7 +5038,7 @@
   // }
   PropertyListTy =
     llvm::StructType::create("struct._prop_list_t", IntTy, IntTy,
-                             llvm::ArrayType::get(PropertyTy, 0), NULL);
+                             llvm::ArrayType::get(PropertyTy, 0), nullptr);
   // struct _prop_list_t *
   PropertyListPtrTy = llvm::PointerType::getUnqual(PropertyListTy);
 
@@ -5053,7 +5049,7 @@
   // }
   MethodTy = llvm::StructType::create("struct._objc_method",
                                       SelectorPtrTy, Int8PtrTy, Int8PtrTy,
-                                      NULL);
+                                      nullptr);
 
   // struct _objc_cache *
   CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache");
@@ -5069,16 +5065,15 @@
   // }
   MethodDescriptionTy =
     llvm::StructType::create("struct._objc_method_description",
-                             SelectorPtrTy, Int8PtrTy, NULL);
+                             SelectorPtrTy, Int8PtrTy, nullptr);
 
   // struct _objc_method_description_list {
   //   int count;
   //   struct _objc_method_description[1];
   // }
-  MethodDescriptionListTy =
-    llvm::StructType::create("struct._objc_method_description_list",
-                             IntTy,
-                             llvm::ArrayType::get(MethodDescriptionTy, 0),NULL);
+  MethodDescriptionListTy = llvm::StructType::create(
+      "struct._objc_method_description_list", IntTy,
+      llvm::ArrayType::get(MethodDescriptionTy, 0), nullptr);
 
   // struct _objc_method_description_list *
   MethodDescriptionListPtrTy =
@@ -5097,7 +5092,7 @@
     llvm::StructType::create("struct._objc_protocol_extension",
                              IntTy, MethodDescriptionListPtrTy,
                              MethodDescriptionListPtrTy, PropertyListPtrTy,
-                             Int8PtrPtrTy, NULL);
+                             Int8PtrPtrTy, nullptr);
 
   // struct _objc_protocol_extension *
   ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy);
@@ -5112,7 +5107,7 @@
   ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy),
                           LongTy,
                           llvm::ArrayType::get(ProtocolTy, 0),
-                          NULL);
+                          nullptr);
 
   // struct _objc_protocol {
   //   struct _objc_protocol_extension *isa;
@@ -5125,7 +5120,7 @@
                       llvm::PointerType::getUnqual(ProtocolListTy),
                       MethodDescriptionListPtrTy,
                       MethodDescriptionListPtrTy,
-                      NULL);
+                      nullptr);
 
   // struct _objc_protocol_list *
   ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy);
@@ -5140,7 +5135,7 @@
   //   int  ivar_offset;
   // }
   IvarTy = llvm::StructType::create("struct._objc_ivar",
-                                    Int8PtrTy, Int8PtrTy, IntTy, NULL);
+                                    Int8PtrTy, Int8PtrTy, IntTy, nullptr);
 
   // struct _objc_ivar_list *
   IvarListTy =
@@ -5155,7 +5150,7 @@
   // struct _objc_class_extension *
   ClassExtensionTy =
     llvm::StructType::create("struct._objc_class_extension",
-                             IntTy, Int8PtrTy, PropertyListPtrTy, NULL);
+                             IntTy, Int8PtrTy, PropertyListPtrTy, nullptr);
   ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy);
 
   ClassTy = llvm::StructType::create(VMContext, "struct._objc_class");
@@ -5186,7 +5181,7 @@
                    ProtocolListPtrTy,
                    Int8PtrTy,
                    ClassExtensionPtrTy,
-                   NULL);
+                   nullptr);
 
   ClassPtrTy = llvm::PointerType::getUnqual(ClassTy);
 
@@ -5202,7 +5197,7 @@
     llvm::StructType::create("struct._objc_category",
                              Int8PtrTy, Int8PtrTy, MethodListPtrTy,
                              MethodListPtrTy, ProtocolListPtrTy,
-                             IntTy, PropertyListPtrTy, NULL);
+                             IntTy, PropertyListPtrTy, nullptr);
 
   // Global metadata structures
 
@@ -5216,7 +5211,7 @@
   SymtabTy =
     llvm::StructType::create("struct._objc_symtab",
                              LongTy, SelectorPtrTy, ShortTy, ShortTy,
-                             llvm::ArrayType::get(Int8PtrTy, 0), NULL);
+                             llvm::ArrayType::get(Int8PtrTy, 0), nullptr);
   SymtabPtrTy = llvm::PointerType::getUnqual(SymtabTy);
 
   // struct _objc_module {
@@ -5227,7 +5222,7 @@
   //  }
   ModuleTy =
     llvm::StructType::create("struct._objc_module",
-                             LongTy, LongTy, Int8PtrTy, SymtabPtrTy, NULL);
+                             LongTy, LongTy, Int8PtrTy, SymtabPtrTy, nullptr);
 
 
   // FIXME: This is the size of the setjmp buffer and should be target
@@ -5240,7 +5235,7 @@
   ExceptionDataTy =
     llvm::StructType::create("struct._objc_exception_data",
                              llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize),
-                             StackPtrTy, NULL);
+                             StackPtrTy, nullptr);
 
 }
 
@@ -5253,7 +5248,7 @@
   // }
   MethodListnfABITy =
     llvm::StructType::create("struct.__method_list_t", IntTy, IntTy,
-                             llvm::ArrayType::get(MethodTy, 0), NULL);
+                             llvm::ArrayType::get(MethodTy, 0), nullptr);
   // struct method_list_t *
   MethodListnfABIPtrTy = llvm::PointerType::getUnqual(MethodListnfABITy);
 
@@ -5281,7 +5276,7 @@
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy,
-                             NULL);
+                             nullptr);
 
   // struct _protocol_t*
   ProtocolnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolnfABITy);
@@ -5292,7 +5287,7 @@
   // }
   ProtocolListnfABITy->setBody(LongTy,
                                llvm::ArrayType::get(ProtocolnfABIPtrTy, 0),
-                               NULL);
+                               nullptr);
 
   // struct _objc_protocol_list*
   ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy);
@@ -5306,7 +5301,7 @@
   // }
   IvarnfABITy = llvm::StructType::create(
       "struct._ivar_t", llvm::PointerType::getUnqual(IvarOffsetVarTy),
-      Int8PtrTy, Int8PtrTy, IntTy, IntTy, NULL);
+      Int8PtrTy, Int8PtrTy, IntTy, IntTy, nullptr);
 
   // struct _ivar_list_t {
   //   uint32 entsize;  // sizeof(struct _ivar_t)
@@ -5315,7 +5310,7 @@
   // }
   IvarListnfABITy =
     llvm::StructType::create("struct._ivar_list_t", IntTy, IntTy,
-                             llvm::ArrayType::get(IvarnfABITy, 0), NULL);
+                             llvm::ArrayType::get(IvarnfABITy, 0), nullptr);
 
   IvarListnfABIPtrTy = llvm::PointerType::getUnqual(IvarListnfABITy);
 
@@ -5339,7 +5334,8 @@
                                             Int8PtrTy, MethodListnfABIPtrTy,
                                             ProtocolListnfABIPtrTy,
                                             IvarListnfABIPtrTy,
-                                            Int8PtrTy, PropertyListPtrTy, NULL);
+                                            Int8PtrTy, PropertyListPtrTy,
+                                            nullptr);
 
   // ImpnfABITy - LLVM for id (*)(id, SEL, ...)
   llvm::Type *params[] = { ObjectPtrTy, SelectorPtrTy };
@@ -5360,7 +5356,7 @@
                         CachePtrTy,
                         llvm::PointerType::getUnqual(ImpnfABITy),
                         llvm::PointerType::getUnqual(ClassRonfABITy),
-                        NULL);
+                        nullptr);
 
   // LLVM for struct _class_t *
   ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy);
@@ -5379,7 +5375,7 @@
                                              MethodListnfABIPtrTy,
                                              ProtocolListnfABIPtrTy,
                                              PropertyListPtrTy,
-                                             NULL);
+                                             nullptr);
 
   // New types for nonfragile abi messaging.
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
@@ -5418,7 +5414,7 @@
   // };
   SuperMessageRefTy =
     llvm::StructType::create("struct._super_message_ref_t",
-                             ImpnfABITy, SelectorPtrTy, NULL);
+                             ImpnfABITy, SelectorPtrTy, nullptr);
 
   // SuperMessageRefPtrTy - LLVM for struct _super_message_ref_t*
   SuperMessageRefPtrTy = llvm::PointerType::getUnqual(SuperMessageRefTy);
@@ -5432,7 +5428,7 @@
   EHTypeTy =
     llvm::StructType::create("struct._objc_typeinfo",
                              llvm::PointerType::getUnqual(Int8PtrTy),
-                             Int8PtrTy, ClassnfABIPtrTy, NULL);
+                             Int8PtrTy, ClassnfABIPtrTy, nullptr);
   EHTypePtrTy = llvm::PointerType::getUnqual(EHTypeTy);
 }
 
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index ecc844f..51865a6 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -13,8 +13,8 @@
 
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/StmtOpenMP.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -27,30 +27,46 @@
 using namespace CodeGen;
 
 namespace {
-/// \brief API for captured statement code generation in OpenMP constructs.
+/// \brief Base class for handling code generation inside OpenMP regions.
 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
 public:
-  CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
-                     const VarDecl *ThreadIDVar)
-      : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
-        Directive(D) {
-    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
-  }
+  CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS)
+      : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {}
 
-  /// \brief Gets a variable or parameter for storing global thread id
+  CGOpenMPRegionInfo(const OMPExecutableDirective &D)
+      : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {}
+
+  /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
-  const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
+  virtual const VarDecl *getThreadIDVariable() const = 0;
 
-  /// \brief Gets an LValue for the current ThreadID variable.
+  /// \brief Get an LValue for the current ThreadID variable.
   LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
 
+    /// \brief Emit the captured statement body.
+  virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
+
   static bool classof(const CGCapturedStmtInfo *Info) {
     return Info->getKind() == CR_OpenMP;
   }
+protected:
+  /// \brief OpenMP executable directive associated with the region.
+  const OMPExecutableDirective &Directive;
+};
 
-  /// \brief Emit the captured statement body.
-  void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
-
+/// \brief API for captured statement code generation in OpenMP constructs.
+class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
+public:
+  CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D,
+                             const CapturedStmt &CS, const VarDecl *ThreadIDVar)
+      : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) {
+    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
+  }
+  /// \brief Get a variable or parameter for storing global thread id
+  /// inside OpenMP construct.
+  virtual const VarDecl *getThreadIDVariable() const override {
+    return ThreadIDVar;
+  }
   /// \brief Get the name of the capture helper.
   StringRef getHelperName() const override { return ".omp_outlined."; }
 
@@ -58,29 +74,69 @@
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
   const VarDecl *ThreadIDVar;
-  /// \brief OpenMP executable directive associated with the region.
-  const OMPExecutableDirective &Directive;
+};
+
+/// \brief API for inlined captured statement code generation in OpenMP
+/// constructs.
+class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
+public:
+  CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D,
+                            CodeGenFunction::CGCapturedStmtInfo *OldCSI)
+      : CGOpenMPRegionInfo(D), OldCSI(OldCSI),
+        OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
+  // \brief Retrieve the value of the context parameter.
+  virtual llvm::Value *getContextValue() const override {
+    if (OuterRegionInfo)
+      return OuterRegionInfo->getContextValue();
+    llvm_unreachable("No context value for inlined OpenMP region");
+  }
+  /// \brief Lookup the captured field decl for a variable.
+  virtual const FieldDecl *lookup(const VarDecl *VD) const override {
+    if (OuterRegionInfo)
+      return OuterRegionInfo->lookup(VD);
+    llvm_unreachable("Trying to reference VarDecl that is neither local nor "
+                     "captured in outer OpenMP region");
+  }
+  virtual FieldDecl *getThisFieldDecl() const override {
+    if (OuterRegionInfo)
+      return OuterRegionInfo->getThisFieldDecl();
+    return nullptr;
+  }
+  /// \brief Get a variable or parameter for storing global thread id
+  /// inside OpenMP construct.
+  virtual const VarDecl *getThreadIDVariable() const override {
+    if (OuterRegionInfo)
+      return OuterRegionInfo->getThreadIDVariable();
+    return nullptr;
+  }
+  /// \brief Get the name of the capture helper.
+  virtual StringRef getHelperName() const override {
+    llvm_unreachable("No helper name for inlined OpenMP construct");
+  }
+
+  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
+
+private:
+  /// \brief CodeGen info about outer OpenMP region.
+  CodeGenFunction::CGCapturedStmtInfo *OldCSI;
+  CGOpenMPRegionInfo *OuterRegionInfo;
 };
 } // namespace
 
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
   return CGF.MakeNaturalAlignAddrLValue(
-      CGF.GetAddrOfLocalVar(ThreadIDVar),
-      CGF.getContext().getPointerType(ThreadIDVar->getType()));
+      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+      CGF.getContext().getPointerType(getThreadIDVariable()->getType()));
 }
 
-void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
+void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
   CGF.EmitOMPPrivateClause(Directive, PrivateScope);
   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
-  if (PrivateScope.Privatize()) {
+  if (PrivateScope.Privatize())
     // Emit implicit barrier to synchronize threads and avoid data races.
-    auto Flags = static_cast<CGOpenMPRuntime::OpenMPLocationFlags>(
-        CGOpenMPRuntime::OMP_IDENT_KMPC |
-        CGOpenMPRuntime::OMP_IDENT_BARRIER_IMPL);
-    CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(),
-                                                  Flags);
-  }
+    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(),
+                                               /*IsExplicit=*/false);
   CGCapturedStmtInfo::EmitBody(CGF, S);
 }
 
@@ -98,17 +154,17 @@
 }
 
 llvm::Value *
-CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
-                                            const VarDecl *ThreadIDVar) {
+CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
+                                      const VarDecl *ThreadIDVar) {
   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
+  CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
   CGF.CapturedStmtInfo = &CGInfo;
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
 
 llvm::Value *
-CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
+CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
   if (!Entry) {
     if (!DefaultOpenMPPSource) {
@@ -138,20 +194,23 @@
   return Entry;
 }
 
-llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
-    CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) {
+llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
+                                                 SourceLocation Loc,
+                                                 OpenMPLocationFlags Flags) {
   // If no debug info is generated - return global default location.
   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
       Loc.isInvalid())
-    return GetOrCreateDefaultOpenMPLocation(Flags);
+    return getOrCreateDefaultLocation(Flags);
 
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
   llvm::Value *LocValue = nullptr;
-  OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
-  if (I != OpenMPLocThreadIDMap.end()) {
+  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
+  if (I != OpenMPLocThreadIDMap.end())
     LocValue = I->second.DebugLoc;
-  } else {
+  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
+  // GetOpenMPThreadID was called before this routine.
+  if (LocValue == nullptr) {
     // Generate "ident_t .kmpc_loc.addr;"
     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
@@ -161,13 +220,13 @@
 
     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
-    CGF.Builder.CreateMemCpy(LocValue, GetOrCreateDefaultOpenMPLocation(Flags),
+    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
                              llvm::ConstantExpr::getSizeOf(IdentTy),
                              CGM.PointerAlignInBytes);
   }
 
   // char **psource = &.kmpc_loc_<flags>.addr.psource;
-  llvm::Value *PSource =
+  auto *PSource =
       CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
 
   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
@@ -191,51 +250,53 @@
   return LocValue;
 }
 
-llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF,
-                                                SourceLocation Loc) {
+llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
+                                          SourceLocation Loc) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
   llvm::Value *ThreadID = nullptr;
   // Check whether we've already cached a load of the thread id in this
   // function.
-  OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
+  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
   if (I != OpenMPLocThreadIDMap.end()) {
     ThreadID = I->second.ThreadID;
     if (ThreadID != nullptr)
       return ThreadID;
   }
   if (auto OMPRegionInfo =
-                 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    // Check if this an outlined function with thread id passed as argument.
-    auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
-    auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
-    auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
-    LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
-                                          ThreadIDVar->getType());
-    ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
-    // If value loaded in entry block, cache it and use it everywhere in
-    // function.
-    if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
-      auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-      Elem.second.ThreadID = ThreadID;
+          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+    if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) {
+      // Check if this an outlined function with thread id passed as argument.
+      auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+      auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
+      LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
+                                            ThreadIDVar->getType());
+      ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+      // If value loaded in entry block, cache it and use it everywhere in
+      // function.
+      if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+        auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+        Elem.second.ThreadID = ThreadID;
+      }
+      return ThreadID;
     }
-  } else {
-    // This is not an outlined function region - need to call __kmpc_int32
-    // kmpc_global_thread_num(ident_t *loc).
-    // Generate thread id value and cache this value for use across the
-    // function.
-    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
-    llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
-    ThreadID = CGF.EmitRuntimeCall(
-        CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
-    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-    Elem.second.ThreadID = ThreadID;
   }
+
+  // This is not an outlined function region - need to call __kmpc_int32
+  // kmpc_global_thread_num(ident_t *loc).
+  // Generate thread id value and cache this value for use across the
+  // function.
+  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+  ThreadID =
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+                          emitUpdateLocation(CGF, Loc));
+  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+  Elem.second.ThreadID = ThreadID;
   return ThreadID;
 }
 
-void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
+void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
     OpenMPLocThreadIDMap.erase(CGF.CurFn);
@@ -250,7 +311,7 @@
 }
 
 llvm::Constant *
-CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
+CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
   llvm::Constant *RTLFn = nullptr;
   switch (Function) {
   case OMPRTL__kmpc_fork_call: {
@@ -327,12 +388,102 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
     break;
   }
-  case OMPRTL__kmpc_barrier: {
-    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+  case OMPRTL__kmpc_cancel_barrier: {
+    // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
+    break;
+  }
+  // Build __kmpc_for_static_init*(
+  //               ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+  //               kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+  //               kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+  //               kmp_int[32|64] incr, kmp_int[32|64] chunk);
+  case OMPRTL__kmpc_for_static_init_4: {
+    auto ITy = CGM.Int32Ty;
+    auto PtrTy = llvm::PointerType::getUnqual(ITy);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(),                     // loc
+        CGM.Int32Ty,                               // tid
+        CGM.Int32Ty,                               // schedtype
+        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+        PtrTy,                                     // p_lower
+        PtrTy,                                     // p_upper
+        PtrTy,                                     // p_stride
+        ITy,                                       // incr
+        ITy                                        // chunk
+    };
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
+    break;
+  }
+  case OMPRTL__kmpc_for_static_init_4u: {
+    auto ITy = CGM.Int32Ty;
+    auto PtrTy = llvm::PointerType::getUnqual(ITy);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(),                     // loc
+        CGM.Int32Ty,                               // tid
+        CGM.Int32Ty,                               // schedtype
+        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+        PtrTy,                                     // p_lower
+        PtrTy,                                     // p_upper
+        PtrTy,                                     // p_stride
+        ITy,                                       // incr
+        ITy                                        // chunk
+    };
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
+    break;
+  }
+  case OMPRTL__kmpc_for_static_init_8: {
+    auto ITy = CGM.Int64Ty;
+    auto PtrTy = llvm::PointerType::getUnqual(ITy);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(),                     // loc
+        CGM.Int32Ty,                               // tid
+        CGM.Int32Ty,                               // schedtype
+        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+        PtrTy,                                     // p_lower
+        PtrTy,                                     // p_upper
+        PtrTy,                                     // p_stride
+        ITy,                                       // incr
+        ITy                                        // chunk
+    };
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
+    break;
+  }
+  case OMPRTL__kmpc_for_static_init_8u: {
+    auto ITy = CGM.Int64Ty;
+    auto PtrTy = llvm::PointerType::getUnqual(ITy);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(),                     // loc
+        CGM.Int32Ty,                               // tid
+        CGM.Int32Ty,                               // schedtype
+        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+        PtrTy,                                     // p_lower
+        PtrTy,                                     // p_upper
+        PtrTy,                                     // p_stride
+        ITy,                                       // incr
+        ITy                                        // chunk
+    };
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
+    break;
+  }
+  case OMPRTL__kmpc_for_static_fini: {
+    // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
     llvm::FunctionType *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
     break;
   }
   case OMPRTL__kmpc_push_num_threads: {
@@ -364,13 +515,54 @@
     break;
   }
   case OMPRTL__kmpc_flush: {
-    // Build void __kmpc_flush(ident_t *loc, ...);
+    // Build void __kmpc_flush(ident_t *loc);
     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
     llvm::FunctionType *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
     break;
   }
+  case OMPRTL__kmpc_master: {
+    // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
+    break;
+  }
+  case OMPRTL__kmpc_end_master: {
+    // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
+    break;
+  }
+  case OMPRTL__kmpc_omp_taskyield: {
+    // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
+    // int end_part);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
+    break;
+  }
+  case OMPRTL__kmpc_single: {
+    // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
+    break;
+  }
+  case OMPRTL__kmpc_end_single: {
+    // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -378,43 +570,41 @@
 llvm::Constant *
 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
   // Lookup the entry, lazily creating it if necessary.
-  return GetOrCreateInternalVariable(CGM.Int8PtrPtrTy,
+  return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
                                      Twine(CGM.getMangledName(VD)) + ".cache.");
 }
 
-llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF,
-                                                        const VarDecl *VD,
-                                                        llvm::Value *VDAddr,
-                                                        SourceLocation Loc) {
+llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                                     const VarDecl *VD,
+                                                     llvm::Value *VDAddr,
+                                                     SourceLocation Loc) {
   auto VarTy = VDAddr->getType()->getPointerElementType();
-  llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
-                         GetOpenMPThreadID(CGF, Loc),
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
                          getOrCreateThreadPrivateCache(VD)};
   return CGF.EmitRuntimeCall(
-      CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
+      createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
 }
 
-void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit(
+void CGOpenMPRuntime::emitThreadPrivateVarInit(
     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
   // library.
-  auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc);
-  CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num),
+  auto OMPLoc = emitUpdateLocation(CGF, Loc);
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
                       OMPLoc);
   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
   // to register constructor/destructor for variable.
   llvm::Value *Args[] = {OMPLoc,
                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
                          Ctor, CopyCtor, Dtor};
-  CGF.EmitRuntimeCall(CreateRuntimeFunction(
-                          CGOpenMPRuntime::OMPRTL__kmpc_threadprivate_register),
-                      Args);
+  CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
 }
 
-llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition(
+llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
     bool PerformInit, CodeGenFunction *CGF) {
   VD = VD->getDefinition(CGM.getContext());
@@ -517,44 +707,41 @@
       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
                             Loc);
-      EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
+      emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
       InitCGF.FinishFunction();
       return InitFunction;
     }
-    EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
+    emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
   }
   return nullptr;
 }
 
-void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF,
-                                          SourceLocation Loc,
-                                          llvm::Value *OutlinedFn,
-                                          llvm::Value *CapturedStruct) {
+void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                       llvm::Value *OutlinedFn,
+                                       llvm::Value *CapturedStruct) {
   // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
   llvm::Value *Args[] = {
-      EmitOpenMPUpdateLocation(CGF, Loc),
+      emitUpdateLocation(CGF, Loc),
       CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
       // (there is only one additional argument - 'context')
       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
       CGF.EmitCastToVoidPtr(CapturedStruct)};
-  auto RTLFn = CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_fork_call);
+  auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
   CGF.EmitRuntimeCall(RTLFn, Args);
 }
 
-void CGOpenMPRuntime::EmitOMPSerialCall(CodeGenFunction &CGF,
-                                        SourceLocation Loc,
-                                        llvm::Value *OutlinedFn,
-                                        llvm::Value *CapturedStruct) {
-  auto ThreadID = GetOpenMPThreadID(CGF, Loc);
+void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                     llvm::Value *OutlinedFn,
+                                     llvm::Value *CapturedStruct) {
+  auto ThreadID = getThreadID(CGF, Loc);
   // Build calls:
   // __kmpc_serialized_parallel(&Loc, GTid);
-  llvm::Value *SerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID};
-  auto RTLFn =
-      CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_serialized_parallel);
-  CGF.EmitRuntimeCall(RTLFn, SerArgs);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
+                      Args);
 
   // OutlinedFn(&GTid, &zero, CapturedStruct);
-  auto ThreadIDAddr = EmitThreadIDAddress(CGF, Loc);
+  auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
   auto Int32Ty =
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
   auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
@@ -563,10 +750,9 @@
   CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
 
   // __kmpc_end_serialized_parallel(&Loc, GTid);
-  llvm::Value *EndSerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID};
-  RTLFn = CreateRuntimeFunction(
-      CGOpenMPRuntime::OMPRTL__kmpc_end_serialized_parallel);
-  CGF.EmitRuntimeCall(RTLFn, EndSerArgs);
+  llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
+  CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
 }
 
 // If we're inside an (outlined) parallel region, use the region info's
@@ -575,13 +761,15 @@
 // regular serial code region, get thread ID by calling kmp_int32
 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
 // return the address of that temp.
-llvm::Value *CGOpenMPRuntime::EmitThreadIDAddress(CodeGenFunction &CGF,
+llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
                                                   SourceLocation Loc) {
   if (auto OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
-    return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
-                                SourceLocation()).getScalarVal();
-  auto ThreadID = GetOpenMPThreadID(CGF, Loc);
+    if (OMPRegionInfo->getThreadIDVariable())
+      return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
+                                  Loc).getScalarVal();
+
+  auto ThreadID = getThreadID(CGF, Loc);
   auto Int32Ty =
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
@@ -592,7 +780,7 @@
 }
 
 llvm::Constant *
-CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty,
+CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
                                              const llvm::Twine &Name) {
   SmallString<256> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
@@ -611,62 +799,260 @@
              Elem.first());
 }
 
-llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) {
+llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
   llvm::Twine Name(".gomp_critical_user_", CriticalName);
-  return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
+  return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
 }
 
-void CGOpenMPRuntime::EmitOMPCriticalRegionStart(CodeGenFunction &CGF,
-                                                 llvm::Value *RegionLock,
-                                                 SourceLocation Loc) {
-  // Prepare other arguments and build a call to __kmpc_critical
-  llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
-                         GetOpenMPThreadID(CGF, Loc), RegionLock};
-  auto RTLFn = CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_critical);
-  CGF.EmitRuntimeCall(RTLFn, Args);
+void CGOpenMPRuntime::emitCriticalRegion(
+    CodeGenFunction &CGF, StringRef CriticalName,
+    const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
+  auto RegionLock = getCriticalRegionLock(CriticalName);
+  // __kmpc_critical(ident_t *, gtid, Lock);
+  // CriticalOpGen();
+  // __kmpc_end_critical(ident_t *, gtid, Lock);
+  // Prepare arguments and build a call to __kmpc_critical
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+                         RegionLock};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
+  CriticalOpGen();
+  // Build a call to __kmpc_end_critical
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
 }
 
-void CGOpenMPRuntime::EmitOMPCriticalRegionEnd(CodeGenFunction &CGF,
-                                               llvm::Value *RegionLock,
-                                               SourceLocation Loc) {
-  // Prepare other arguments and build a call to __kmpc_end_critical
-  llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
-                         GetOpenMPThreadID(CGF, Loc), RegionLock};
-  auto RTLFn =
-      CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_end_critical);
-  CGF.EmitRuntimeCall(RTLFn, Args);
+static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
+                       const std::function<void()> &BodyOpGen) {
+  llvm::Value *CallBool = CGF.EmitScalarConversion(
+      IfCond,
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
+      CGF.getContext().BoolTy);
+
+  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
+  auto *ContBlock = CGF.createBasicBlock("omp_if.end");
+  // Generate the branch (If-stmt)
+  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
+  CGF.EmitBlock(ThenBlock);
+  BodyOpGen();
+  // Emit the rest of bblocks/branches
+  CGF.EmitBranch(ContBlock);
+  CGF.EmitBlock(ContBlock, true);
 }
 
-void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF,
-                                         SourceLocation Loc,
-                                         OpenMPLocationFlags Flags) {
-  // Build call __kmpc_barrier(loc, thread_id)
-  llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, Flags),
-                         GetOpenMPThreadID(CGF, Loc)};
-  auto RTLFn = CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_barrier);
-  CGF.EmitRuntimeCall(RTLFn, Args);
+void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
+                                       const std::function<void()> &MasterOpGen,
+                                       SourceLocation Loc) {
+  // if(__kmpc_master(ident_t *, gtid)) {
+  //   MasterOpGen();
+  //   __kmpc_end_master(ident_t *, gtid);
+  // }
+  // Prepare arguments and build a call to __kmpc_master
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+  auto *IsMaster =
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
+  emitIfStmt(CGF, IsMaster, [&]() -> void {
+    MasterOpGen();
+    // Build a call to __kmpc_end_master.
+    // OpenMP [1.2.2 OpenMP Language Terminology]
+    // For C/C++, an executable statement, possibly compound, with a single
+    // entry at the top and a single exit at the bottom, or an OpenMP construct.
+    // * Access to the structured block must not be the result of a branch.
+    // * The point of exit cannot be a branch out of the structured block.
+    // * The point of entry must not be a call to setjmp().
+    // * longjmp() and throw() must not violate the entry/exit criteria.
+    // * An expression statement, iteration statement, selection statement, or
+    // try block is considered to be a structured block if the corresponding
+    // compound statement obtained by enclosing it in { and } would be a
+    // structured block.
+    // It is analyzed in Sema, so we can just call __kmpc_end_master() on
+    // fallthrough rather than pushing a normal cleanup for it.
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args);
+  });
 }
 
-void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
-                                              llvm::Value *NumThreads,
-                                              SourceLocation Loc) {
+void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
+                                        SourceLocation Loc) {
+  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
+  llvm::Value *Args[] = {
+      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+      llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
+}
+
+void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
+                                       const std::function<void()> &SingleOpGen,
+                                       SourceLocation Loc) {
+  // if(__kmpc_single(ident_t *, gtid)) {
+  //   SingleOpGen();
+  //   __kmpc_end_single(ident_t *, gtid);
+  // }
+  // Prepare arguments and build a call to __kmpc_single
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+  auto *IsSingle =
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
+  emitIfStmt(CGF, IsSingle, [&]() -> void {
+    SingleOpGen();
+    // Build a call to __kmpc_end_single.
+    // OpenMP [1.2.2 OpenMP Language Terminology]
+    // For C/C++, an executable statement, possibly compound, with a single
+    // entry at the top and a single exit at the bottom, or an OpenMP construct.
+    // * Access to the structured block must not be the result of a branch.
+    // * The point of exit cannot be a branch out of the structured block.
+    // * The point of entry must not be a call to setjmp().
+    // * longjmp() and throw() must not violate the entry/exit criteria.
+    // * An expression statement, iteration statement, selection statement, or
+    // try block is considered to be a structured block if the corresponding
+    // compound statement obtained by enclosing it in { and } would be a
+    // structured block.
+    // It is analyzed in Sema, so we can just call __kmpc_end_single() on
+    // fallthrough rather than pushing a normal cleanup for it.
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
+  });
+}
+
+void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                      bool IsExplicit) {
+  // Build call __kmpc_cancel_barrier(loc, thread_id);
+  auto Flags = static_cast<OpenMPLocationFlags>(
+      OMP_IDENT_KMPC |
+      (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
+  // Build call __kmpc_cancel_barrier(loc, thread_id);
+  // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
+  // one provides the same functionality and adds initial support for
+  // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
+  // is provided default by the runtime library so it safe to make such
+  // replacement.
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
+                         getThreadID(CGF, Loc)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
+}
+
+/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
+/// the enum sched_type in kmp.h).
+enum OpenMPSchedType {
+  /// \brief Lower bound for default (unordered) versions.
+  OMP_sch_lower = 32,
+  OMP_sch_static_chunked = 33,
+  OMP_sch_static = 34,
+  OMP_sch_dynamic_chunked = 35,
+  OMP_sch_guided_chunked = 36,
+  OMP_sch_runtime = 37,
+  OMP_sch_auto = 38,
+  /// \brief Lower bound for 'ordered' versions.
+  OMP_ord_lower = 64,
+  /// \brief Lower bound for 'nomerge' versions.
+  OMP_nm_lower = 160,
+};
+
+/// \brief Map the OpenMP loop schedule to the runtime enumeration.
+static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
+                                          bool Chunked) {
+  switch (ScheduleKind) {
+  case OMPC_SCHEDULE_static:
+    return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
+  case OMPC_SCHEDULE_dynamic:
+    return OMP_sch_dynamic_chunked;
+  case OMPC_SCHEDULE_guided:
+    return OMP_sch_guided_chunked;
+  case OMPC_SCHEDULE_auto:
+    return OMP_sch_auto;
+  case OMPC_SCHEDULE_runtime:
+    return OMP_sch_runtime;
+  case OMPC_SCHEDULE_unknown:
+    assert(!Chunked && "chunk was specified but schedule kind not known");
+    return OMP_sch_static;
+  }
+  llvm_unreachable("Unexpected runtime schedule");
+}
+
+bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
+                                         bool Chunked) const {
+  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  return Schedule == OMP_sch_static;
+}
+
+bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
+  auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
+  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
+  return Schedule != OMP_sch_static;
+}
+
+void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                  OpenMPScheduleClauseKind ScheduleKind,
+                                  unsigned IVSize, bool IVSigned,
+                                  llvm::Value *IL, llvm::Value *LB,
+                                  llvm::Value *UB, llvm::Value *ST,
+                                  llvm::Value *Chunk) {
+  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
+  // Call __kmpc_for_static_init(
+  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
+  // TODO: Implement dynamic schedule.
+
+  // If the Chunk was not specified in the clause - use default value 1.
+  if (Chunk == nullptr)
+    Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
+
+  llvm::Value *Args[] = {
+      emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
+      CGF.Builder.getInt32(Schedule), // Schedule type
+      IL,                             // &isLastIter
+      LB,                             // &LB
+      UB,                             // &UB
+      ST,                             // &Stride
+      CGF.Builder.getIntN(IVSize, 1), // Incr
+      Chunk                           // Chunk
+  };
+  assert((IVSize == 32 || IVSize == 64) &&
+         "Index size is not compatible with the omp runtime");
+  auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
+                                    : OMPRTL__kmpc_for_static_init_4u)
+                        : (IVSigned ? OMPRTL__kmpc_for_static_init_8
+                                    : OMPRTL__kmpc_for_static_init_8u);
+  CGF.EmitRuntimeCall(createRuntimeFunction(F), Args);
+}
+
+void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
+                                    OpenMPScheduleClauseKind ScheduleKind) {
+  assert((ScheduleKind == OMPC_SCHEDULE_static ||
+          ScheduleKind == OMPC_SCHEDULE_unknown) &&
+         "Non-static schedule kinds are not yet implemented");
+  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+                         getThreadID(CGF, Loc)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
+                      Args);
+}
+
+void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
+                                           llvm::Value *NumThreads,
+                                           SourceLocation Loc) {
   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
   llvm::Value *Args[] = {
-      EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc),
+      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
-  llvm::Constant *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction(
-      CGOpenMPRuntime::OMPRTL__kmpc_push_num_threads);
-  CGF.EmitRuntimeCall(RTLFn, Args);
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
+                      Args);
 }
 
-void CGOpenMPRuntime::EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
-                                   SourceLocation Loc) {
-  // Build call void __kmpc_flush(ident_t *loc, ...)
-  // FIXME: List of variables is ignored by libiomp5 runtime, no need to
-  // generate it, just request full memory fence.
-  llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc),
-                         llvm::ConstantInt::get(CGM.Int32Ty, 0)};
-  auto *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction(
-      CGOpenMPRuntime::OMPRTL__kmpc_flush);
-  CGF.EmitRuntimeCall(RTLFn, Args);
+void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
+                                SourceLocation Loc) {
+  // Build call void __kmpc_flush(ident_t *loc)
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
+                      emitUpdateLocation(CGF, Loc));
 }
+
+InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D)
+    : CGF(CGF) {
+  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo);
+}
+
+InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() {
+  auto *OldCSI =
+      cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
+  delete CGF.CapturedStmtInfo;
+  CGF.CapturedStmtInfo = OldCSI;
+}
+
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index b9c1a35..97aa5b8 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
+#include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
@@ -42,7 +43,57 @@
 class CodeGenModule;
 
 class CGOpenMPRuntime {
-public:
+  enum OpenMPRTLFunction {
+    /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+    /// kmpc_micro microtask, ...);
+    OMPRTL__kmpc_fork_call,
+    /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
+    /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
+    OMPRTL__kmpc_threadprivate_cached,
+    /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
+    /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
+    OMPRTL__kmpc_threadprivate_register,
+    // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
+    OMPRTL__kmpc_global_thread_num,
+    // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *crit);
+    OMPRTL__kmpc_critical,
+    // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *crit);
+    OMPRTL__kmpc_end_critical,
+    // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
+    // global_tid);
+    OMPRTL__kmpc_cancel_barrier,
+    // Calls for static scheduling 'omp for' loops.
+    OMPRTL__kmpc_for_static_init_4,
+    OMPRTL__kmpc_for_static_init_4u,
+    OMPRTL__kmpc_for_static_init_8,
+    OMPRTL__kmpc_for_static_init_8u,
+    OMPRTL__kmpc_for_static_fini,
+    // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+    // global_tid);
+    OMPRTL__kmpc_serialized_parallel,
+    // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+    // global_tid);
+    OMPRTL__kmpc_end_serialized_parallel,
+    // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_threads);
+    OMPRTL__kmpc_push_num_threads,
+    // Call to void __kmpc_flush(ident_t *loc);
+    OMPRTL__kmpc_flush,
+    // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
+    OMPRTL__kmpc_master,
+    // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
+    OMPRTL__kmpc_end_master,
+    // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
+    // int end_part);
+    OMPRTL__kmpc_omp_taskyield,
+    // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
+    OMPRTL__kmpc_single,
+    // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
+    OMPRTL__kmpc_end_single,
+  };
+
   /// \brief Values for bit flags used in the ident_t to describe the fields.
   /// All enumeric elements are named and described in accordance with the code
   /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
@@ -64,40 +115,6 @@
     /// \brief Implicit barrier in 'single' directive.
     OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
   };
-  enum OpenMPRTLFunction {
-    /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
-    /// kmpc_micro microtask, ...);
-    OMPRTL__kmpc_fork_call,
-    /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
-    /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
-    OMPRTL__kmpc_threadprivate_cached,
-    /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
-    /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
-    OMPRTL__kmpc_threadprivate_register,
-    // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
-    OMPRTL__kmpc_global_thread_num,
-    // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *crit);
-    OMPRTL__kmpc_critical,
-    // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *crit);
-    OMPRTL__kmpc_end_critical,
-    // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_barrier,
-    // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_serialized_parallel,
-    // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_end_serialized_parallel,
-    // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 num_threads);
-    OMPRTL__kmpc_push_num_threads,
-    // Call to void __kmpc_flush(ident_t *loc, ...);
-    OMPRTL__kmpc_flush
-  };
-
-private:
   CodeGenModule &CGM;
   /// \brief Default const ident_t object used for initialization of all other
   /// ident_t objects.
@@ -105,7 +122,7 @@
   /// \brief Map of flags and corresponding default locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
   OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
-  llvm::Value *GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags);
+  llvm::Value *getOrCreateDefaultLocation(OpenMPLocationFlags Flags);
   /// \brief Describes ident structure that describes a source location.
   /// All descriptions are taken from
   /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
@@ -177,9 +194,8 @@
   /// \brief Emits object of ident_t type with info for source location.
   /// \param Flags Flags for OpenMP location.
   ///
-  llvm::Value *
-  EmitOpenMPUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
-                           OpenMPLocationFlags Flags = OMP_IDENT_KMPC);
+  llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
+                                  OpenMPLocationFlags Flags = OMP_IDENT_KMPC);
 
   /// \brief Returns pointer to ident_t type.
   llvm::Type *getIdentTyPointerTy();
@@ -190,7 +206,7 @@
   /// \brief Returns specified OpenMP runtime function.
   /// \param Function OpenMP runtime function.
   /// \return Specified function.
-  llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
+  llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function);
 
   /// \brief If the specified mangled name is not in the module, create and
   /// return threadprivate cache object. This object is a pointer's worth of
@@ -201,12 +217,12 @@
 
   /// \brief Emits address of the word in a memory where current thread id is
   /// stored.
-  virtual llvm::Value *EmitThreadIDAddress(CodeGenFunction &CGF,
+  virtual llvm::Value *emitThreadIDAddress(CodeGenFunction &CGF,
                                            SourceLocation Loc);
 
   /// \brief Gets thread id value for the current thread.
   ///
-  llvm::Value *GetOpenMPThreadID(CodeGenFunction &CGF, SourceLocation Loc);
+  llvm::Value *getThreadID(CodeGenFunction &CGF, SourceLocation Loc);
 
   /// \brief Gets (if variable with the given name already exist) or creates
   /// internal global variable with the specified Name. The created variable has
@@ -214,7 +230,7 @@
   /// \param Ty Type of the global variable. If it is exist already the type
   /// must be the same.
   /// \param Name Name of the variable.
-  llvm::Constant *GetOrCreateInternalVariable(llvm::Type *Ty,
+  llvm::Constant *getOrCreateInternalVariable(llvm::Type *Ty,
                                               const llvm::Twine &Name);
 
   /// \brief Set of threadprivate variables with the generated initializer.
@@ -226,9 +242,16 @@
   /// \param CopyCtor Pointer to a global copy function for \a VD.
   /// \param Dtor Pointer to a global destructor function for \a VD.
   /// \param Loc Location of threadprivate declaration.
-  void EmitOMPThreadPrivateVarInit(CodeGenFunction &CGF, llvm::Value *VDAddr,
-                                   llvm::Value *Ctor, llvm::Value *CopyCtor,
-                                   llvm::Value *Dtor, SourceLocation Loc);
+  void emitThreadPrivateVarInit(CodeGenFunction &CGF, llvm::Value *VDAddr,
+                                llvm::Value *Ctor, llvm::Value *CopyCtor,
+                                llvm::Value *Dtor, SourceLocation Loc);
+
+  /// \brief Returns corresponding lock object for the specified critical region
+  /// name. If the lock object does not exist it is created, otherwise the
+  /// reference to the existing copy is returned.
+  /// \param CriticalName Name of the critical region.
+  ///
+  llvm::Value *getCriticalRegionLock(StringRef CriticalName);
 
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
@@ -241,13 +264,12 @@
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
   ///
-  virtual llvm::Value *
-  EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
-                             const VarDecl *ThreadIDVar);
+  virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
+                                            const VarDecl *ThreadIDVar);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
-  void FunctionFinished(CodeGenFunction &CGF);
+  void functionFinished(CodeGenFunction &CGF);
 
   /// \brief Emits code for parallel call of the \a OutlinedFn with variables
   /// captured in a record which address is stored in \a CapturedStruct.
@@ -256,9 +278,9 @@
   /// \param CapturedStruct A pointer to the record with the references to
   /// variables used in \a OutlinedFn function.
   ///
-  virtual void EmitOMPParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                   llvm::Value *OutlinedFn,
-                                   llvm::Value *CapturedStruct);
+  virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                llvm::Value *OutlinedFn,
+                                llvm::Value *CapturedStruct);
 
   /// \brief Emits code for serial call of the \a OutlinedFn with variables
   /// captured in a record which address is stored in \a CapturedStruct.
@@ -266,46 +288,101 @@
   /// \param CapturedStruct A pointer to the record with the references to
   /// variables used in \a OutlinedFn function.
   ///
-  virtual void EmitOMPSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                 llvm::Value *OutlinedFn,
-                                 llvm::Value *CapturedStruct);
+  virtual void emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
+                              llvm::Value *OutlinedFn,
+                              llvm::Value *CapturedStruct);
 
-  /// \brief Returns corresponding lock object for the specified critical region
-  /// name. If the lock object does not exist it is created, otherwise the
-  /// reference to the existing copy is returned.
+  /// \brief Emits a critical region.
   /// \param CriticalName Name of the critical region.
+  /// \param CriticalOpGen Generator for the statement associated with the given
+  /// critical region.
+  virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+                                  const std::function<void()> &CriticalOpGen,
+                                  SourceLocation Loc);
+
+  /// \brief Emits a master region.
+  /// \param MasterOpGen Generator for the statement associated with the given
+  /// master region.
+  virtual void emitMasterRegion(CodeGenFunction &CGF,
+                                const std::function<void()> &MasterOpGen,
+                                SourceLocation Loc);
+
+  /// \brief Emits code for a taskyield directive.
+  virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc);
+
+  /// \brief Emits a single region.
+  /// \param SingleOpGen Generator for the statement associated with the given
+  /// single region.
+  virtual void emitSingleRegion(CodeGenFunction &CGF,
+                                const std::function<void()> &SingleOpGen,
+                                SourceLocation Loc);
+
+  /// \brief Emits explicit barrier for OpenMP threads.
+  /// \param IsExplicit true, if it is explicitly specified barrier.
   ///
-  llvm::Value *GetCriticalRegionLock(StringRef CriticalName);
+  virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+                               bool IsExplicit = true);
 
-  /// \brief Emits start of the critical region by calling void
-  /// __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
-  /// * \a RegionLock)
-  /// \param RegionLock The lock object for critical region.
-  virtual void EmitOMPCriticalRegionStart(CodeGenFunction &CGF,
-                                          llvm::Value *RegionLock,
-                                          SourceLocation Loc);
-
-  /// \brief Emits end of the critical region by calling void
-  /// __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
-  /// * \a RegionLock)
-  /// \param RegionLock The lock object for critical region.
-  virtual void EmitOMPCriticalRegionEnd(CodeGenFunction &CGF,
-                                        llvm::Value *RegionLock,
-                                        SourceLocation Loc);
-
-  /// \brief Emits a barrier for OpenMP threads.
-  /// \param Flags Flags for the barrier.
+  /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+  /// This kind of worksharing directive is emitted without outer loop.
+  /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
   ///
-  virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                  OpenMPLocationFlags Flags);
+  virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
+                                  bool Chunked) const;
+
+  /// \brief Check if the specified \a ScheduleKind is dynamic.
+  /// This kind of worksharing directive is emitted without outer loop.
+  /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
+  ///
+  virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
+
+  /// \brief Call the appropriate runtime routine to initialize it before start
+  /// of loop.
+  ///
+  /// Depending on the loop schedule, it is nesessary to call some runtime
+  /// routine before start of the OpenMP loop to get the loop upper / lower
+  /// bounds \a LB and \a UB and stride \a ST.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param SchedKind Schedule kind, specified by the 'schedule' clause.
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the interation variable.
+  /// \param IL Address of the output variable in which the flag of the
+  /// last iteration is returned.
+  /// \param LB Address of the output variable in which the lower iteration
+  /// number is returned.
+  /// \param UB Address of the output variable in which the upper iteration
+  /// number is returned.
+  /// \param ST Address of the output variable in which the stride value is
+  /// returned nesessary to generated the static_chunked scheduled loop.
+  /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
+  /// For the default (nullptr) value, the chunk 1 will be used.
+  ///
+  virtual void emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
+                           OpenMPScheduleClauseKind SchedKind, unsigned IVSize,
+                           bool IVSigned, llvm::Value *IL, llvm::Value *LB,
+                           llvm::Value *UB, llvm::Value *ST,
+                           llvm::Value *Chunk = nullptr);
+
+  /// \brief Call the appropriate runtime routine to notify that we finished
+  /// all the work with current loop.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+  ///
+  virtual void emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
+                             OpenMPScheduleClauseKind ScheduleKind);
 
   /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
   /// clause.
   /// \param NumThreads An integer value of threads.
-  virtual void EmitOMPNumThreadsClause(CodeGenFunction &CGF,
-                                       llvm::Value *NumThreads,
-                                       SourceLocation Loc);
+  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+                                    llvm::Value *NumThreads,
+                                    SourceLocation Loc);
 
   /// \brief Returns address of the threadprivate variable for the current
   /// thread.
@@ -313,10 +390,10 @@
   /// \param VDAddr Address of the global variable \a VD.
   /// \param Loc Location of the reference to threadprivate var.
   /// \return Address of the threadprivate variable for the current thread.
-  virtual llvm::Value *getOMPAddrOfThreadPrivate(CodeGenFunction &CGF,
-                                                 const VarDecl *VD,
-                                                 llvm::Value *VDAddr,
-                                                 SourceLocation Loc);
+  virtual llvm::Value *getAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                              const VarDecl *VD,
+                                              llvm::Value *VDAddr,
+                                              SourceLocation Loc);
 
   /// \brief Emit a code for initialization of threadprivate variable. It emits
   /// a call to runtime library which adds initial value to the newly created
@@ -327,14 +404,24 @@
   /// \param Loc Location of threadprivate declaration.
   /// \param PerformInit true if initialization expression is not constant.
   virtual llvm::Function *
-  EmitOMPThreadPrivateVarDefinition(const VarDecl *VD, llvm::Value *VDAddr,
-                                    SourceLocation Loc, bool PerformInit,
-                                    CodeGenFunction *CGF = nullptr);
+  emitThreadPrivateVarDefinition(const VarDecl *VD, llvm::Value *VDAddr,
+                                 SourceLocation Loc, bool PerformInit,
+                                 CodeGenFunction *CGF = nullptr);
 
   /// \brief Emit flush of the variables specified in 'omp flush' directive.
   /// \param Vars List of variables to flush.
-  virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
-                            SourceLocation Loc);
+  virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
+                         SourceLocation Loc);
+};
+
+/// \brief RAII for emitting code of CapturedStmt without function outlining.
+class InlinedOpenMPRegionRAII {
+  CodeGenFunction &CGF;
+
+public:
+  InlinedOpenMPRegionRAII(CodeGenFunction &CGF,
+                          const OMPExecutableDirective &D);
+  ~InlinedOpenMPRegionRAII();
 };
 } // namespace CodeGen
 } // namespace clang
diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h
index 2de0b2f..c15f9fd 100644
--- a/lib/CodeGen/CGRecordLayout.h
+++ b/lib/CodeGen/CGRecordLayout.h
@@ -109,8 +109,8 @@
 class CGRecordLayout {
   friend class CodeGenTypes;
 
-  CGRecordLayout(const CGRecordLayout &) LLVM_DELETED_FUNCTION;
-  void operator=(const CGRecordLayout &) LLVM_DELETED_FUNCTION;
+  CGRecordLayout(const CGRecordLayout &) = delete;
+  void operator=(const CGRecordLayout &) = delete;
 
 private:
   /// The LLVM type corresponding to this record layout; used when
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 7ad394b..202ea97 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -198,8 +198,8 @@
   bool IsZeroInitializableAsBase : 1;
   bool Packed : 1;
 private:
-  CGRecordLowering(const CGRecordLowering &) LLVM_DELETED_FUNCTION;
-  void operator =(const CGRecordLowering &) LLVM_DELETED_FUNCTION;
+  CGRecordLowering(const CGRecordLowering &) = delete;
+  void operator =(const CGRecordLowering &) = delete;
 };
 } // namespace {
 
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index f207a4f..0d160d3 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -88,6 +88,7 @@
   case Stmt::ContinueStmtClass:
   case Stmt::DefaultStmtClass:
   case Stmt::CaseStmtClass:
+  case Stmt::SEHLeaveStmtClass:
     llvm_unreachable("should have emitted these statements as simple");
 
 #define STMT(Type, Base)
@@ -173,9 +174,6 @@
   case Stmt::SEHTryStmtClass:
     EmitSEHTryStmt(cast<SEHTryStmt>(*S));
     break;
-  case Stmt::SEHLeaveStmtClass:
-    EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S));
-    break;
   case Stmt::OMPParallelDirectiveClass:
     EmitOMPParallelDirective(cast<OMPParallelDirective>(*S));
     break;
@@ -256,6 +254,7 @@
   case Stmt::ContinueStmtClass: EmitContinueStmt(cast<ContinueStmt>(*S)); break;
   case Stmt::DefaultStmtClass:  EmitDefaultStmt(cast<DefaultStmt>(*S));   break;
   case Stmt::CaseStmtClass:     EmitCaseStmt(cast<CaseStmt>(*S));         break;
+  case Stmt::SEHLeaveStmtClass: EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S)); break;
   }
 
   return true;
@@ -564,8 +563,8 @@
   // Emit the 'else' code if present.
   if (const Stmt *Else = S.getElse()) {
     {
-      // There is no need to emit line number for unconditional branch.
-      SuppressDebugLocation S(Builder);
+      // There is no need to emit line number for an unconditional branch.
+      auto NL = ApplyDebugLocation::CreateEmpty(*this);
       EmitBlock(ElseBlock);
     }
     {
@@ -573,8 +572,8 @@
       EmitStmt(Else);
     }
     {
-      // There is no need to emit line number for unconditional branch.
-      SuppressDebugLocation S(Builder);
+      // There is no need to emit line number for an unconditional branch.
+      auto NL = ApplyDebugLocation::CreateEmpty(*this);
       EmitBranch(ContBlock);
     }
   }
@@ -591,7 +590,9 @@
     return;
 
   // Add vectorize and unroll hints to the metadata on the conditional branch.
-  SmallVector<llvm::Value *, 2> Metadata(1);
+  //
+  // FIXME: Should this really start with a size of 1?
+  SmallVector<llvm::Metadata *, 2> Metadata(1);
   for (const auto *Attr : Attrs) {
     const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
 
@@ -629,7 +630,7 @@
       ValueInt = static_cast<int>(ValueAPS.getSExtValue());
     }
 
-    llvm::Value *Value;
+    llvm::Constant *Value;
     llvm::MDString *Name;
     switch (Option) {
     case LoopHintAttr::Vectorize:
@@ -656,15 +657,16 @@
       break;
     }
 
-    SmallVector<llvm::Value *, 2> OpValues;
+    SmallVector<llvm::Metadata *, 2> OpValues;
     OpValues.push_back(Name);
     if (Value)
-      OpValues.push_back(Value);
+      OpValues.push_back(llvm::ConstantAsMetadata::get(Value));
 
     // Set or overwrite metadata indicated by Name.
     Metadata.push_back(llvm::MDNode::get(Context, OpValues));
   }
 
+  // FIXME: This condition is never false.  Should it be an assert?
   if (!Metadata.empty()) {
     // Add llvm.loop MDNode to CondBr.
     llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata);
@@ -1656,6 +1658,12 @@
       while (Constraint[1] && Constraint[1] != ',')
         Constraint++;
       break;
+    case '&':
+    case '%':
+      Result += *Constraint;
+      while (Constraint[1] && Constraint[1] == *Constraint)
+        Constraint++;
+      break;
     case ',':
       Result += "|";
       break;
@@ -1687,7 +1695,7 @@
 static std::string
 AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr,
                        const TargetInfo &Target, CodeGenModule &CGM,
-                       const AsmStmt &Stmt) {
+                       const AsmStmt &Stmt, const bool EarlyClobber) {
   const DeclRefExpr *AsmDeclRef = dyn_cast<DeclRefExpr>(&AsmExpr);
   if (!AsmDeclRef)
     return Constraint;
@@ -1712,7 +1720,7 @@
   }
   // Canonicalize the register here before returning it.
   Register = Target.getNormalizedGCCRegisterName(Register);
-  return "{" + Register.str() + "}";
+  return (EarlyClobber ? "&{" : "{") + Register.str() + "}";
 }
 
 llvm::Value*
@@ -1766,10 +1774,10 @@
 /// asm.
 static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
                                       CodeGenFunction &CGF) {
-  SmallVector<llvm::Value *, 8> Locs;
+  SmallVector<llvm::Metadata *, 8> Locs;
   // Add the location of the first line to the MDNode.
-  Locs.push_back(llvm::ConstantInt::get(CGF.Int32Ty,
-                                        Str->getLocStart().getRawEncoding()));
+  Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+      CGF.Int32Ty, Str->getLocStart().getRawEncoding())));
   StringRef StrVal = Str->getString();
   if (!StrVal.empty()) {
     const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
@@ -1781,8 +1789,8 @@
       if (StrVal[i] != '\n') continue;
       SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts,
                                                       CGF.getTarget());
-      Locs.push_back(llvm::ConstantInt::get(CGF.Int32Ty,
-                                            LineLoc.getRawEncoding()));
+      Locs.push_back(llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())));
     }
   }
 
@@ -1845,7 +1853,8 @@
     OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());
 
     OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr,
-                                              getTarget(), CGM, S);
+                                              getTarget(), CGM, S,
+                                              Info.earlyClobber());
 
     LValue Dest = EmitLValue(OutExpr);
     if (!Constraints.empty())
@@ -1947,10 +1956,9 @@
     InputConstraint = SimplifyConstraint(InputConstraint.c_str(), getTarget(),
                                          &OutputConstraintInfos);
 
-    InputConstraint =
-      AddVariableConstraints(InputConstraint,
-                            *InputExpr->IgnoreParenNoopCasts(getContext()),
-                            getTarget(), CGM, S);
+    InputConstraint = AddVariableConstraints(
+        InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()),
+        getTarget(), CGM, S, false /* No EarlyClobber */);
 
     llvm::Value *Arg = EmitAsmInput(Info, InputExpr, Constraints);
 
@@ -2052,7 +2060,9 @@
   } else {
     // At least put the line number on MS inline asm blobs.
     auto Loc = llvm::ConstantInt::get(Int32Ty, S.getAsmLoc().getRawEncoding());
-    Result->setMetadata("srcloc", llvm::MDNode::get(getLLVMContext(), Loc));
+    Result->setMetadata("srcloc",
+                        llvm::MDNode::get(getLLVMContext(),
+                                          llvm::ConstantAsMetadata::get(Loc)));
   }
 
   // Extract all of the register value results from the asm.
@@ -2208,8 +2218,6 @@
   PGO.assignRegionCounters(CD, F);
   CapturedStmtInfo->EmitBody(*this, CD->getBody());
   FinishFunction(CD->getBodyRBrace());
-  PGO.emitInstrumentationData();
-  PGO.destroyRegionCounters();
 
   return F;
 }
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index b160f17..daf5fcc 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -14,9 +14,9 @@
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtOpenMP.h"
-#include "TargetInfo.h"
 using namespace clang;
 using namespace CodeGen;
 
@@ -57,13 +57,13 @@
   // Emit the 'else' code if present.
   {
     // There is no need to emit line number for unconditional branch.
-    SuppressDebugLocation SDL(CGF.Builder);
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
     CGF.EmitBlock(ElseBlock);
   }
   CodeGen(/*ThenBlock*/ false);
   {
     // There is no need to emit line number for unconditional branch.
-    SuppressDebugLocation SDL(CGF.Builder);
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
     CGF.EmitBranch(ContBlock);
   }
   // Emit the continuation block for code after the if.
@@ -220,17 +220,17 @@
     auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
                                          /*IgnoreResultAssign*/ true);
-    CGF.CGM.getOpenMPRuntime().EmitOMPNumThreadsClause(
+    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
         CGF, NumThreads, NumThreadsClause->getLocStart());
   }
-  CGF.CGM.getOpenMPRuntime().EmitOMPParallelCall(CGF, S.getLocStart(),
-                                                 OutlinedFn, CapturedStruct);
+  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
+                                              CapturedStruct);
 }
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
-  auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
+  auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction(
       S, *CS->getCapturedDecl()->param_begin());
   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
     auto Cond = cast<OMPIfClause>(C)->getCondition();
@@ -238,8 +238,8 @@
       if (ThenBlock)
         EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
       else
-        CGM.getOpenMPRuntime().EmitOMPSerialCall(*this, S.getLocStart(),
-                                                 OutlinedFn, CapturedStruct);
+        CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(),
+                                              OutlinedFn, CapturedStruct);
     });
   } else
     EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
@@ -417,6 +417,7 @@
     }
   }
 
+  InlinedOpenMPRegionRAII Region(*this, S);
   RunCleanupsScope DirectiveScope(*this);
 
   CGDebugInfo *DI = getDebugInfo();
@@ -470,8 +471,199 @@
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 }
 
-void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) {
-  llvm_unreachable("CodeGen for 'omp for' is not supported yet.");
+void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
+                                          const OMPLoopDirective &S,
+                                          OMPPrivateScope &LoopScope,
+                                          llvm::Value *LB, llvm::Value *UB,
+                                          llvm::Value *ST, llvm::Value *IL,
+                                          llvm::Value *Chunk) {
+  auto &RT = CGM.getOpenMPRuntime();
+  assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+         "static non-chunked schedule does not need outer loop");
+  if (RT.isDynamic(ScheduleKind)) {
+    ErrorUnsupported(&S, "OpenMP loop with dynamic schedule");
+    return;
+  }
+
+  // Emit outer loop.
+  //
+  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+  // When schedule(static, chunk_size) is specified, iterations are divided into
+  // chunks of size chunk_size, and the chunks are assigned to the threads in
+  // the team in a round-robin fashion in the order of the thread number.
+  //
+  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
+  //   while (idx <= UB) { BODY; ++idx; } // inner loop
+  //   LB = LB + ST;
+  //   UB = UB + ST;
+  // }
+  //
+  const Expr *IVExpr = S.getIterationVariable();
+  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+  RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
+                 UB, ST, Chunk);
+  auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
+
+  // Start the loop with a block that tests the condition.
+  auto CondBlock = createBasicBlock("omp.dispatch.cond");
+  EmitBlock(CondBlock);
+  LoopStack.push(CondBlock);
+
+  llvm::Value *BoolCondVal = nullptr;
+  // UB = min(UB, GlobalUB)
+  EmitIgnoredExpr(S.getEnsureUpperBound());
+  // IV = LB
+  EmitIgnoredExpr(S.getInit());
+  // IV < UB
+  BoolCondVal = EvaluateExprAsBool(S.getCond(false));
+
+  // If there are any cleanups between here and the loop-exit scope,
+  // create a block to stage a loop exit along.
+  auto ExitBlock = LoopExit.getBlock();
+  if (LoopScope.requiresCleanups())
+    ExitBlock = createBasicBlock("omp.dispatch.cleanup");
+
+  auto LoopBody = createBasicBlock("omp.dispatch.body");
+  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
+  if (ExitBlock != LoopExit.getBlock()) {
+    EmitBlock(ExitBlock);
+    EmitBranchThroughCleanup(LoopExit);
+  }
+  EmitBlock(LoopBody);
+
+  // Create a block for the increment.
+  auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
+  BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
+
+  EmitOMPInnerLoop(S, LoopScope);
+
+  EmitBlock(Continue.getBlock());
+  BreakContinueStack.pop_back();
+  // Emit "LB = LB + Stride", "UB = UB + Stride".
+  EmitIgnoredExpr(S.getNextLowerBound());
+  EmitIgnoredExpr(S.getNextUpperBound());
+
+  EmitBranch(CondBlock);
+  LoopStack.pop();
+  // Emit the fall-through block.
+  EmitBlock(LoopExit.getBlock());
+
+  // Tell the runtime we are done.
+  RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
+}
+
+/// \brief Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+                               const DeclRefExpr *Helper) {
+  auto VDecl = cast<VarDecl>(Helper->getDecl());
+  CGF.EmitVarDecl(*VDecl);
+  return CGF.EmitLValue(Helper);
+}
+
+void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+  // Emit the loop iteration variable.
+  auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+  auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+  EmitVarDecl(*IVDecl);
+
+  // Emit the iterations count variable.
+  // If it is not a variable, Sema decided to calculate iterations count on each
+  // iteration (e.g., it is foldable into a constant).
+  if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+    // Emit calculation of the iterations count.
+    EmitIgnoredExpr(S.getCalcLastIteration());
+  }
+
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Check pre-condition.
+  {
+    // Skip the entire loop if we don't meet the precondition.
+    RegionCounter Cnt = getPGORegionCounter(&S);
+    auto ThenBlock = createBasicBlock("omp.precond.then");
+    auto ContBlock = createBasicBlock("omp.precond.end");
+    EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
+    EmitBlock(ThenBlock);
+    Cnt.beginRegion(Builder);
+    // Emit 'then' code.
+    {
+      // Emit helper vars inits.
+      LValue LB =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+      LValue UB =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+      LValue ST =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+      LValue IL =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
+      OMPPrivateScope LoopScope(*this);
+      EmitPrivateLoopCounters(*this, LoopScope, S.counters());
+
+      // Detect the loop schedule kind and chunk.
+      auto ScheduleKind = OMPC_SCHEDULE_unknown;
+      llvm::Value *Chunk = nullptr;
+      if (auto C = cast_or_null<OMPScheduleClause>(
+              S.getSingleClause(OMPC_schedule))) {
+        ScheduleKind = C->getScheduleKind();
+        if (auto Ch = C->getChunkSize()) {
+          Chunk = EmitScalarExpr(Ch);
+          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+                                       S.getIterationVariable()->getType());
+        }
+      }
+      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+      if (RT.isStaticNonchunked(ScheduleKind,
+                                /* Chunked */ Chunk != nullptr)) {
+        // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+        // When no chunk_size is specified, the iteration space is divided into
+        // chunks that are approximately equal in size, and at most one chunk is
+        // distributed to each thread. Note that the size of the chunks is
+        // unspecified in this case.
+        RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
+                       IL.getAddress(), LB.getAddress(), UB.getAddress(),
+                       ST.getAddress());
+        // UB = min(UB, GlobalUB);
+        EmitIgnoredExpr(S.getEnsureUpperBound());
+        // IV = LB;
+        EmitIgnoredExpr(S.getInit());
+        // while (idx <= UB) { BODY; ++idx; }
+        EmitOMPInnerLoop(S, LoopScope);
+        // Tell the runtime we are done.
+        RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
+      } else {
+        // Emit the outer loop, which requests its work chunk [LB..UB] from
+        // runtime and runs the inner loop to process it.
+        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
+                            UB.getAddress(), ST.getAddress(), IL.getAddress(),
+                            Chunk);
+      }
+    }
+    // We're now done with the loop, so jump to the continuation block.
+    EmitBranch(ContBlock);
+    EmitBlock(ContBlock, true);
+  }
+}
+
+void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
+  InlinedOpenMPRegionRAII Region(*this, S);
+  RunCleanupsScope DirectiveScope(*this);
+
+  CGDebugInfo *DI = getDebugInfo();
+  if (DI)
+    DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+
+  EmitOMPWorksharingLoop(S);
+
+  // Emit an implicit barrier at the end.
+  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                         /*IsExplicit*/ false);
+  if (DI)
+    DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 }
 
 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
@@ -486,30 +678,32 @@
   llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &) {
-  llvm_unreachable("CodeGen for 'omp single' is not supported yet.");
-}
-
-void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &) {
-  llvm_unreachable("CodeGen for 'omp master' is not supported yet.");
-}
-
-void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
-  // __kmpc_critical();
-  // <captured_body>
-  // __kmpc_end_critical();
-  //
-
-  auto Lock = CGM.getOpenMPRuntime().GetCriticalRegionLock(
-      S.getDirectiveName().getAsString());
-  CGM.getOpenMPRuntime().EmitOMPCriticalRegionStart(*this, Lock,
-                                                    S.getLocStart());
-  {
+void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
+  CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
+    InlinedOpenMPRegionRAII Region(*this, S);
     RunCleanupsScope Scope(*this);
     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     EnsureInsertPoint();
-  }
-  CGM.getOpenMPRuntime().EmitOMPCriticalRegionEnd(*this, Lock, S.getLocEnd());
+  }, S.getLocStart());
+}
+
+void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
+  CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void {
+    InlinedOpenMPRegionRAII Region(*this, S);
+    RunCleanupsScope Scope(*this);
+    EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    EnsureInsertPoint();
+  }, S.getLocStart());
+}
+
+void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
+  CGM.getOpenMPRuntime().emitCriticalRegion(
+      *this, S.getDirectiveName().getAsString(), [&]() -> void {
+        InlinedOpenMPRegionRAII Region(*this, S);
+        RunCleanupsScope Scope(*this);
+        EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+        EnsureInsertPoint();
+      }, S.getLocStart());
 }
 
 void
@@ -531,12 +725,13 @@
   llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &) {
-  llvm_unreachable("CodeGen for 'omp taskyield' is not supported yet.");
+void CodeGenFunction::EmitOMPTaskyieldDirective(
+    const OMPTaskyieldDirective &S) {
+  CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
 }
 
-void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &) {
-  llvm_unreachable("CodeGen for 'omp barrier' is not supported yet.");
+void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
+  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
@@ -544,24 +739,142 @@
 }
 
 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
-  CGM.getOpenMPRuntime().EmitOMPFlush(
-      *this, [&]() -> ArrayRef<const Expr *> {
-               if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
-                 auto FlushClause = cast<OMPFlushClause>(C);
-                 return llvm::makeArrayRef(FlushClause->varlist_begin(),
-                                           FlushClause->varlist_end());
-               }
-               return llvm::None;
-             }(),
-      S.getLocStart());
+  CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
+    if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
+      auto FlushClause = cast<OMPFlushClause>(C);
+      return llvm::makeArrayRef(FlushClause->varlist_begin(),
+                                FlushClause->varlist_end());
+    }
+    return llvm::None;
+  }(), S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
-  llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
+static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
+                                         QualType SrcType, QualType DestType) {
+  assert(CGF.hasScalarEvaluationKind(DestType) &&
+         "DestType must have scalar evaluation kind.");
+  assert(!Val.isAggregate() && "Must be a scalar or complex.");
+  return Val.isScalar()
+             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
+             : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
+                                                 DestType);
+}
+
+static CodeGenFunction::ComplexPairTy
+convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
+                      QualType DestType) {
+  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
+         "DestType must have complex evaluation kind.");
+  CodeGenFunction::ComplexPairTy ComplexVal;
+  if (Val.isScalar()) {
+    // Convert the input element to the element type of the complex.
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    auto ScalarVal =
+        CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
+    ComplexVal = CodeGenFunction::ComplexPairTy(
+        ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
+  } else {
+    assert(Val.isComplex() && "Must be a scalar or complex.");
+    auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    ComplexVal.first = CGF.EmitScalarConversion(
+        Val.getComplexVal().first, SrcElementType, DestElementType);
+    ComplexVal.second = CGF.EmitScalarConversion(
+        Val.getComplexVal().second, SrcElementType, DestElementType);
+  }
+  return ComplexVal;
+}
+
+static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                  const Expr *X, const Expr *V,
+                                  SourceLocation Loc) {
+  // v = x;
+  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
+  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
+  LValue XLValue = CGF.EmitLValue(X);
+  LValue VLValue = CGF.EmitLValue(V);
+  RValue Res = XLValue.isGlobalReg()
+                   ? CGF.EmitLoadOfLValue(XLValue, Loc)
+                   : CGF.EmitAtomicLoad(XLValue, Loc,
+                                        IsSeqCst ? llvm::SequentiallyConsistent
+                                                 : llvm::Monotonic);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+  switch (CGF.getEvaluationKind(V->getType())) {
+  case TEK_Scalar:
+    CGF.EmitStoreOfScalar(
+        convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
+    break;
+  case TEK_Complex:
+    CGF.EmitStoreOfComplex(
+        convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
+        /*isInit=*/false);
+    break;
+  case TEK_Aggregate:
+    llvm_unreachable("Must be a scalar or complex.");
+  }
+}
+
+static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
+                              bool IsSeqCst, const Expr *X, const Expr *V,
+                              const Expr *, SourceLocation Loc) {
+  switch (Kind) {
+  case OMPC_read:
+    EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+    break;
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_private:
+  case OMPC_firstprivate:
+  case OMPC_lastprivate:
+  case OMPC_reduction:
+  case OMPC_safelen:
+  case OMPC_collapse:
+  case OMPC_default:
+  case OMPC_seq_cst:
+  case OMPC_shared:
+  case OMPC_linear:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_flush:
+  case OMPC_proc_bind:
+  case OMPC_schedule:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_threadprivate:
+  case OMPC_mergeable:
+  case OMPC_unknown:
+    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
+  }
+}
+
+void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
+  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
+  OpenMPClauseKind Kind = OMPC_unknown;
+  for (auto *C : S.clauses()) {
+    // Find first clause (skip seq_cst clause, if it is first).
+    if (C->getClauseKind() != OMPC_seq_cst) {
+      Kind = C->getClauseKind();
+      break;
+    }
+  }
+  EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
+                    S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index bd280ea..81bd651 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -94,6 +94,9 @@
   // Set the correct linkage.
   VTT->setLinkage(Linkage);
 
+  if (CGM.supportsCOMDAT() && VTT->isWeakForLinker())
+    VTT->setComdat(CGM.getModule().getOrInsertComdat(VTT->getName()));
+
   // Set the right visibility.
   CGM.setGlobalVisibility(VTT, RD);
 }
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 48a93ba..2e8471e 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -159,14 +159,10 @@
   // with "this".
   llvm::Value *ThisPtr = &*AI;
   llvm::BasicBlock *EntryBB = Fn->begin();
-  llvm::Instruction *ThisStore = nullptr;
-  for (llvm::BasicBlock::iterator I = EntryBB->begin(), E = EntryBB->end();
-       I != E; I++) {
-    if (isa<llvm::StoreInst>(I) && I->getOperand(0) == ThisPtr) {
-      ThisStore = cast<llvm::StoreInst>(I);
-      break;
-    }
-  }
+  llvm::Instruction *ThisStore =
+      std::find_if(EntryBB->begin(), EntryBB->end(), [&](llvm::Instruction &I) {
+    return isa<llvm::StoreInst>(I) && I.getOperand(0) == ThisPtr;
+  });
   assert(ThisStore && "Store of this should be in entry block?");
   // Adjust "this", if necessary.
   Builder.SetInsertPoint(ThisStore);
@@ -218,7 +214,7 @@
 
   // Start defining the function.
   StartFunction(GlobalDecl(), ResultType, Fn, FnInfo, FunctionArgs,
-                MD->getLocation(), SourceLocation());
+                MD->getLocation(), MD->getLocation());
 
   // Since we didn't pass a GlobalDecl to StartFunction, do this ourselves.
   CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
@@ -381,7 +377,10 @@
 
   // Set the right linkage.
   CGM.setFunctionLinkage(GD, Fn);
-  
+
+  if (CGM.supportsCOMDAT() && Fn->isWeakForLinker())
+    Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName()));
+
   // Set the right visibility.
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
   setThunkVisibility(CGM, MD, Thunk, Fn);
@@ -670,6 +669,8 @@
       VTLayout->getNumVTableThunks(), RTTI);
   VTable->setInitializer(Init);
   
+  CGM.EmitVTableBitSetEntries(VTable, *VTLayout.get());
+
   return VTable;
 }
 
@@ -751,19 +752,13 @@
   llvm_unreachable("Invalid TemplateSpecializationKind!");
 }
 
-/// This is a callback from Sema to tell us that it believes that a
-/// particular v-table is required to be emitted in this translation
-/// unit.
+/// This is a callback from Sema to tell us that that a particular v-table is
+/// required to be emitted in this translation unit.
 ///
-/// The reason we don't simply trust this callback is because Sema
-/// will happily report that something is used even when it's used
-/// only in code that we don't actually have to emit.
-///
-/// \param isRequired - if true, the v-table is mandatory, e.g.
-///   because the translation unit defines the key function
-void CodeGenModule::EmitVTable(CXXRecordDecl *theClass, bool isRequired) {
-  if (!isRequired) return;
-
+/// This is only called for vtables that _must_ be emitted (mainly due to key
+/// functions).  For weak vtables, CodeGen tracks when they are needed and
+/// emits them as-needed.
+void CodeGenModule::EmitVTable(CXXRecordDecl *theClass) {
   VTables.GenerateClassData(theClass);
 }
 
@@ -844,3 +839,65 @@
          "deferred extra v-tables during v-table emission?");
   DeferredVTables.clear();
 }
+
+void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
+                                            const VTableLayout &VTLayout) {
+  if (!LangOpts.Sanitize.has(SanitizerKind::CFIVptr))
+    return;
+
+  llvm::Metadata *VTableMD = llvm::ConstantAsMetadata::get(VTable);
+
+  std::vector<llvm::MDTuple *> BitsetEntries;
+  // Create a bit set entry for each address point.
+  for (auto &&AP : VTLayout.getAddressPoints()) {
+    // FIXME: Add blacklisting scheme.
+    if (AP.first.getBase()->isInStdNamespace())
+      continue;
+
+    std::string OutName;
+    llvm::raw_string_ostream Out(OutName);
+    getCXXABI().getMangleContext().mangleCXXVTableBitSet(AP.first.getBase(),
+                                                         Out);
+
+    CharUnits PointerWidth =
+        Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
+    uint64_t AddrPointOffset = AP.second * PointerWidth.getQuantity();
+
+    llvm::Metadata *BitsetOps[] = {
+        llvm::MDString::get(getLLVMContext(), Out.str()),
+        VTableMD,
+        llvm::ConstantAsMetadata::get(
+            llvm::ConstantInt::get(Int64Ty, AddrPointOffset))};
+    llvm::MDTuple *BitsetEntry =
+        llvm::MDTuple::get(getLLVMContext(), BitsetOps);
+    BitsetEntries.push_back(BitsetEntry);
+  }
+
+  // Sort the bit set entries for determinism.
+  std::sort(BitsetEntries.begin(), BitsetEntries.end(), [](llvm::MDTuple *T1,
+                                                           llvm::MDTuple *T2) {
+    if (T1 == T2)
+      return false;
+
+    StringRef S1 = cast<llvm::MDString>(T1->getOperand(0))->getString();
+    StringRef S2 = cast<llvm::MDString>(T2->getOperand(0))->getString();
+    if (S1 < S2)
+      return true;
+    if (S1 != S2)
+      return false;
+
+    uint64_t Offset1 = cast<llvm::ConstantInt>(
+                           cast<llvm::ConstantAsMetadata>(T1->getOperand(2))
+                               ->getValue())->getZExtValue();
+    uint64_t Offset2 = cast<llvm::ConstantInt>(
+                           cast<llvm::ConstantAsMetadata>(T2->getOperand(2))
+                               ->getValue())->getZExtValue();
+    assert(Offset1 != Offset2);
+    return Offset1 < Offset2;
+  });
+
+  llvm::NamedMDNode *BitsetsMD =
+      getModule().getOrInsertNamedMetadata("llvm.bitsets");
+  for (auto BitsetEntry : BitsetEntries)
+    BitsetsMD->addOperand(BitsetEntry);
+}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 10c2409..18f505d 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  Analysis
   BitReader
   BitWriter
   Core
@@ -12,7 +13,6 @@
   ProfileData
   ScalarOpts
   Support
-  Target
   TransformUtils
   )
 
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index f455e70..12189ae 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -67,5 +67,6 @@
                                          FunctionType::ExtInfo info,
                                          RequiredArgs args) {
   return CGM->getTypes().arrangeLLVMFunctionInfo(
-      returnType, /*IsInstanceMethod=*/false, argTypes, info, args);
+      returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
+      info, args);
 }
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 1eb59bd..b5ed12a 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -8,19 +8,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "CoverageMappingGen.h"
-#include "clang/CodeGen/CodeGenAction.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/DeclGroup.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclGroup.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/CodeGen/BackendUtil.h"
+#include "clang/CodeGen/CodeGenAction.h"
 #include "clang/CodeGen/ModuleBuilder.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/DebugInfo.h"
@@ -65,9 +65,8 @@
                     CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(_Diags), Action(action), CodeGenOpts(compopts),
           TargetOpts(targetopts), LangOpts(langopts), AsmOutStream(OS),
-          Context(), LLVMIRGeneration("LLVM IR Generation Time"),
-          Gen(CreateLLVMCodeGen(Diags, infile, compopts,
-                                targetopts, C, CoverageInfo)),
+          Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"),
+          Gen(CreateLLVMCodeGen(Diags, infile, compopts, C, CoverageInfo)),
           LinkModule(LinkModule) {
       llvm::TimePassesIsEnabled = TimePasses;
     }
@@ -196,8 +195,8 @@
       Gen->CompleteTentativeDefinition(D);
     }
 
-    void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) override {
-      Gen->HandleVTable(RD, DefinitionRequired);
+    void HandleVTable(CXXRecordDecl *RD) override {
+      Gen->HandleVTable(RD);
     }
 
     void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
@@ -668,6 +667,12 @@
   return std::move(Result);
 }
 
+static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM,
+                                         void *Context,
+                                         unsigned LocCookie) {
+  SM.print(nullptr, llvm::errs());
+}
+
 void CodeGenAction::ExecuteAction() {
   // If this is an IR file, we have to treat it specially.
   if (getCurrentFileKind() == IK_LLVM_IR) {
@@ -710,14 +715,14 @@
     }
     const TargetOptions &TargetOpts = CI.getTargetOpts();
     if (TheModule->getTargetTriple() != TargetOpts.Triple) {
-      unsigned DiagID = CI.getDiagnostics().getCustomDiagID(
-          DiagnosticsEngine::Warning,
-          "overriding the module target triple with %0");
-
-      CI.getDiagnostics().Report(SourceLocation(), DiagID) << TargetOpts.Triple;
+      CI.getDiagnostics().Report(SourceLocation(),
+                                 diag::warn_fe_override_module)
+          << TargetOpts.Triple;
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
+    LLVMContext &Ctx = TheModule->getContext();
+    Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler);
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
                       CI.getLangOpts(), CI.getTarget().getTargetDescription(),
                       TheModule.get(), BA, OS);
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 05c98fc..7d510d6 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -44,6 +44,7 @@
       LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr),
       NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr),
       ExceptionSlot(nullptr), EHSelectorSlot(nullptr),
+      AbnormalTerminationSlot(nullptr), SEHPointersDecl(nullptr),
       DebugInfo(CGM.getModuleDebugInfo()), DisableDebugInfo(false),
       DidCallStackSave(false), IndirectBranch(nullptr), PGO(cgm),
       SwitchInsn(nullptr), SwitchWeights(nullptr), CaseRangeBlock(nullptr),
@@ -63,6 +64,12 @@
     FMF.setNoNaNs();
     FMF.setNoInfs();
   }
+  if (CGM.getCodeGenOpts().NoNaNsFPMath) {
+    FMF.setNoNaNs();
+  }
+  if (CGM.getCodeGenOpts().NoSignedZeros) {
+    FMF.setNoSignedZeros();
+  }
   Builder.SetFastMathFlags(FMF);
 }
 
@@ -76,7 +83,7 @@
     destroyBlockInfos(FirstBlockInfo);
 
   if (getLangOpts().OpenMP) {
-    CGM.getOpenMPRuntime().FunctionFinished(*this);
+    CGM.getOpenMPRuntime().functionFinished(*this);
   }
 }
 
@@ -152,7 +159,7 @@
   }
 }
 
-void CodeGenFunction::EmitReturnBlock() {
+llvm::DebugLoc CodeGenFunction::EmitReturnBlock() {
   // For cleanliness, we try to avoid emitting the return block for
   // simple cases.
   llvm::BasicBlock *CurBB = Builder.GetInsertBlock();
@@ -167,7 +174,7 @@
       delete ReturnBlock.getBlock();
     } else
       EmitBlock(ReturnBlock.getBlock());
-    return;
+    return llvm::DebugLoc();
   }
 
   // Otherwise, if the return block is the target of a single direct
@@ -178,15 +185,13 @@
       dyn_cast<llvm::BranchInst>(*ReturnBlock.getBlock()->user_begin());
     if (BI && BI->isUnconditional() &&
         BI->getSuccessor(0) == ReturnBlock.getBlock()) {
-      // Reset insertion point, including debug location, and delete the
-      // branch.  This is really subtle and only works because the next change
-      // in location will hit the caching in CGDebugInfo::EmitLocation and not
-      // override this.
-      Builder.SetCurrentDebugLocation(BI->getDebugLoc());
+      // Record/return the DebugLoc of the simple 'return' expression to be used
+      // later by the actual 'ret' instruction.
+      llvm::DebugLoc Loc = BI->getDebugLoc();
       Builder.SetInsertPoint(BI->getParent());
       BI->eraseFromParent();
       delete ReturnBlock.getBlock();
-      return;
+      return Loc;
     }
   }
 
@@ -195,6 +200,7 @@
   // region.end for now.
 
   EmitBlock(ReturnBlock.getBlock());
+  return llvm::DebugLoc();
 }
 
 static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) {
@@ -236,8 +242,6 @@
   // edges will be *really* confused.
   bool EmitRetDbgLoc = true;
   if (EHStack.stable_begin() != PrologueCleanupDepth) {
-    PopCleanupBlocks(PrologueCleanupDepth);
-
     // Make sure the line table doesn't jump back into the body for
     // the ret after it's been at EndLoc.
     EmitRetDbgLoc = false;
@@ -245,19 +249,23 @@
     if (CGDebugInfo *DI = getDebugInfo())
       if (OnlySimpleReturnStmts)
         DI->EmitLocation(Builder, EndLoc);
+
+    PopCleanupBlocks(PrologueCleanupDepth);
   }
 
   // Emit function epilog (to return).
-  EmitReturnBlock();
+  llvm::DebugLoc Loc = EmitReturnBlock();
 
   if (ShouldInstrumentFunction())
     EmitFunctionInstrumentation("__cyg_profile_func_exit");
 
   // Emit debug descriptor for function end.
-  if (CGDebugInfo *DI = getDebugInfo()) {
+  if (CGDebugInfo *DI = getDebugInfo())
     DI->EmitFunctionEnd(Builder);
-  }
 
+  // Reset the debug location to that of the simple 'return' expression, if any
+  // rather than that of the end of the function's scope '}'.
+  ApplyDebugLocation AL(*this, Loc);
   EmitFunctionEpilog(*CurFnInfo, EmitRetDbgLoc, EndLoc);
   EmitEndEHSpec(CurCodeDecl);
 
@@ -349,9 +357,9 @@
 // information in the program executable. The argument information stored
 // includes the argument name, its type, the address and access qualifiers used.
 static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
-                                 CodeGenModule &CGM,llvm::LLVMContext &Context,
-                                 SmallVector <llvm::Value*, 5> &kernelMDArgs,
-                                 CGBuilderTy& Builder, ASTContext &ASTCtx) {
+                                 CodeGenModule &CGM, llvm::LLVMContext &Context,
+                                 SmallVector<llvm::Metadata *, 5> &kernelMDArgs,
+                                 CGBuilderTy &Builder, ASTContext &ASTCtx) {
   // Create MDNodes that represent the kernel arg metadata.
   // Each MDNode is a list in the form of "key", N number of values which is
   // the same number of values as their are kernel arguments.
@@ -359,28 +367,28 @@
   const PrintingPolicy &Policy = ASTCtx.getPrintingPolicy();
 
   // MDNode for the kernel argument address space qualifiers.
-  SmallVector<llvm::Value*, 8> addressQuals;
+  SmallVector<llvm::Metadata *, 8> addressQuals;
   addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space"));
 
   // MDNode for the kernel argument access qualifiers (images only).
-  SmallVector<llvm::Value*, 8> accessQuals;
+  SmallVector<llvm::Metadata *, 8> accessQuals;
   accessQuals.push_back(llvm::MDString::get(Context, "kernel_arg_access_qual"));
 
   // MDNode for the kernel argument type names.
-  SmallVector<llvm::Value*, 8> argTypeNames;
+  SmallVector<llvm::Metadata *, 8> argTypeNames;
   argTypeNames.push_back(llvm::MDString::get(Context, "kernel_arg_type"));
 
   // MDNode for the kernel argument base type names.
-  SmallVector<llvm::Value*, 8> argBaseTypeNames;
+  SmallVector<llvm::Metadata *, 8> argBaseTypeNames;
   argBaseTypeNames.push_back(
       llvm::MDString::get(Context, "kernel_arg_base_type"));
 
   // MDNode for the kernel argument type qualifiers.
-  SmallVector<llvm::Value*, 8> argTypeQuals;
+  SmallVector<llvm::Metadata *, 8> argTypeQuals;
   argTypeQuals.push_back(llvm::MDString::get(Context, "kernel_arg_type_qual"));
 
   // MDNode for the kernel argument names.
-  SmallVector<llvm::Value*, 8> argNames;
+  SmallVector<llvm::Metadata *, 8> argNames;
   argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name"));
 
   for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
@@ -392,8 +400,8 @@
       QualType pointeeTy = ty->getPointeeType();
 
       // Get address qualifier.
-      addressQuals.push_back(Builder.getInt32(ASTCtx.getTargetAddressSpace(
-        pointeeTy.getAddressSpace())));
+      addressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(
+          ASTCtx.getTargetAddressSpace(pointeeTy.getAddressSpace()))));
 
       // Get argument type name.
       std::string typeName =
@@ -432,7 +440,8 @@
         AddrSpc =
           CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
 
-      addressQuals.push_back(Builder.getInt32(AddrSpc));
+      addressQuals.push_back(
+          llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc)));
 
       // Get argument type name.
       std::string typeName = ty.getUnqualifiedType().getAsString(Policy);
@@ -483,7 +492,8 @@
   kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeNames));
   kernelMDArgs.push_back(llvm::MDNode::get(Context, argBaseTypeNames));
   kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeQuals));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames));
+  if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
+    kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames));
 }
 
 void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
@@ -494,12 +504,11 @@
 
   llvm::LLVMContext &Context = getLLVMContext();
 
-  SmallVector <llvm::Value*, 5> kernelMDArgs;
-  kernelMDArgs.push_back(Fn);
+  SmallVector<llvm::Metadata *, 5> kernelMDArgs;
+  kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(Fn));
 
-  if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
-    GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs,
-                         Builder, getContext());
+  GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs, Builder,
+                       getContext());
 
   if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
     QualType hintQTy = A->getTypeHint();
@@ -507,33 +516,31 @@
     bool isSignedInteger =
         hintQTy->isSignedIntegerType() ||
         (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
-    llvm::Value *attrMDArgs[] = {
-      llvm::MDString::get(Context, "vec_type_hint"),
-      llvm::UndefValue::get(CGM.getTypes().ConvertType(A->getTypeHint())),
-      llvm::ConstantInt::get(
-          llvm::IntegerType::get(Context, 32),
-          llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0)))
-    };
+    llvm::Metadata *attrMDArgs[] = {
+        llvm::MDString::get(Context, "vec_type_hint"),
+        llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
+            CGM.getTypes().ConvertType(A->getTypeHint()))),
+        llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+            llvm::IntegerType::get(Context, 32),
+            llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
-    llvm::Value *attrMDArgs[] = {
-      llvm::MDString::get(Context, "work_group_size_hint"),
-      Builder.getInt32(A->getXDim()),
-      Builder.getInt32(A->getYDim()),
-      Builder.getInt32(A->getZDim())
-    };
+    llvm::Metadata *attrMDArgs[] = {
+        llvm::MDString::get(Context, "work_group_size_hint"),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
-    llvm::Value *attrMDArgs[] = {
-      llvm::MDString::get(Context, "reqd_work_group_size"),
-      Builder.getInt32(A->getXDim()),
-      Builder.getInt32(A->getYDim()),
-      Builder.getInt32(A->getZDim())
-    };
+    llvm::Metadata *attrMDArgs[] = {
+        llvm::MDString::get(Context, "reqd_work_group_size"),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
+        llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
     kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
   }
 
@@ -603,17 +610,17 @@
   }
 
   // If we are checking function types, emit a function type signature as
-  // prefix data.
+  // prologue data.
   if (getLangOpts().CPlusPlus && SanOpts.has(SanitizerKind::Function)) {
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
-      if (llvm::Constant *PrefixSig =
+      if (llvm::Constant *PrologueSig =
               CGM.getTargetCodeGenInfo().getUBSanFunctionSignature(CGM)) {
         llvm::Constant *FTRTTIConst =
             CGM.GetAddrOfRTTIDescriptor(FD->getType(), /*ForEH=*/true);
-        llvm::Constant *PrefixStructElems[] = { PrefixSig, FTRTTIConst };
-        llvm::Constant *PrefixStructConst =
-            llvm::ConstantStruct::getAnon(PrefixStructElems, /*Packed=*/true);
-        Fn->setPrefixData(PrefixStructConst);
+        llvm::Constant *PrologueStructElems[] = { PrologueSig, FTRTTIConst };
+        llvm::Constant *PrologueStructConst =
+            llvm::ConstantStruct::getAnon(PrologueStructElems, /*Packed=*/true);
+        Fn->setPrologueData(PrologueStructConst);
       }
     }
   }
@@ -799,6 +806,9 @@
                                           const FunctionDecl *UnsizedDealloc) {
   // This is a weak discardable definition of the sized deallocation function.
   CGF.CurFn->setLinkage(llvm::Function::LinkOnceAnyLinkage);
+  if (CGF.CGM.supportsCOMDAT())
+    CGF.CurFn->setComdat(
+        CGF.CGM.getModule().getOrInsertComdat(CGF.CurFn->getName()));
 
   // Call the unsized deallocation function and forward the first argument
   // unchanged.
@@ -884,8 +894,11 @@
   } else if (FunctionDecl *UnsizedDealloc =
                  FD->getCorrespondingUnsizedGlobalDeallocationFunction()) {
     // Global sized deallocation functions get an implicit weak definition if
-    // they don't have an explicit definition.
+    // they don't have an explicit definition, if allowed.
+    assert(getLangOpts().DefineSizedDeallocation &&
+           "Can't emit unallowed definition.");
     EmitSizedDeallocationFunction(*this, UnsizedDealloc);
+
   } else
     llvm_unreachable("no definition for emitted function");
 
@@ -916,9 +929,6 @@
   // a quick pass now to see if we can.
   if (!CurFn->doesNotThrow())
     TryMarkNoThrow(CurFn);
-
-  PGO.emitInstrumentationData();
-  PGO.destroyRegionCounters();
 }
 
 /// ContainsLabel - Return true if the statement contains a label in it.  If
@@ -1055,8 +1065,11 @@
       uint64_t RHSCount = Cnt.getCount();
 
       ConditionalEvaluation eval(*this);
-      EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
-      EmitBlock(LHSTrue);
+      {
+        ApplyDebugLocation DL(*this, Cond);
+        EmitBranchOnBoolExpr(CondBOp->getLHS(), LHSTrue, FalseBlock, RHSCount);
+        EmitBlock(LHSTrue);
+      }
 
       // Any temporaries created here are conditional.
       Cnt.beginRegion(Builder);
@@ -1100,8 +1113,11 @@
       uint64_t RHSCount = TrueCount - LHSCount;
 
       ConditionalEvaluation eval(*this);
-      EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
-      EmitBlock(LHSFalse);
+      {
+        ApplyDebugLocation DL(*this, Cond);
+        EmitBranchOnBoolExpr(CondBOp->getLHS(), TrueBlock, LHSFalse, LHSCount);
+        EmitBlock(LHSFalse);
+      }
 
       // Any temporaries created here are conditional.
       Cnt.beginRegion(Builder);
@@ -1148,8 +1164,11 @@
     cond.begin(*this);
     EmitBlock(LHSBlock);
     Cnt.beginRegion(Builder);
-    EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
-                         LHSScaledTrueCount);
+    {
+      ApplyDebugLocation DL(*this, Cond);
+      EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
+                           LHSScaledTrueCount);
+    }
     cond.end(*this);
 
     cond.begin(*this);
@@ -1178,7 +1197,11 @@
                                                   CurrentCount - TrueCount);
 
   // Emit the code with the fully general case.
-  llvm::Value *CondV = EvaluateExprAsBool(Cond);
+  llvm::Value *CondV;
+  {
+    ApplyDebugLocation DL(*this, Cond);
+    CondV = EvaluateExprAsBool(Cond);
+  }
   Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
 }
 
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 4575e31..c0368aa 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -27,6 +27,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/ABI.h"
 #include "clang/Basic/CapturedStmt.h"
+#include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -93,24 +94,11 @@
   TEK_Aggregate
 };
 
-class SuppressDebugLocation {
-  llvm::DebugLoc CurLoc;
-  llvm::IRBuilderBase &Builder;
-public:
-  SuppressDebugLocation(llvm::IRBuilderBase &Builder)
-      : CurLoc(Builder.getCurrentDebugLocation()), Builder(Builder) {
-    Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-  }
-  ~SuppressDebugLocation() {
-    Builder.SetCurrentDebugLocation(CurLoc);
-  }
-};
-
 /// CodeGenFunction - This class organizes the per-function state that is used
 /// while generating LLVM code.
 class CodeGenFunction : public CodeGenTypeCache {
-  CodeGenFunction(const CodeGenFunction &) LLVM_DELETED_FUNCTION;
-  void operator=(const CodeGenFunction &) LLVM_DELETED_FUNCTION;
+  CodeGenFunction(const CodeGenFunction &) = delete;
+  void operator=(const CodeGenFunction &) = delete;
 
   friend class CGCXXABI;
 public:
@@ -182,6 +170,8 @@
   /// \brief API for captured statement code generation.
   class CGCapturedStmtInfo {
   public:
+    explicit CGCapturedStmtInfo(CapturedRegionKind K = CR_Default)
+        : Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {}
     explicit CGCapturedStmtInfo(const CapturedStmt &S,
                                 CapturedRegionKind K = CR_Default)
       : Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {
@@ -204,22 +194,22 @@
 
     void setContextValue(llvm::Value *V) { ThisValue = V; }
     // \brief Retrieve the value of the context parameter.
-    llvm::Value *getContextValue() const { return ThisValue; }
+    virtual llvm::Value *getContextValue() const { return ThisValue; }
 
     /// \brief Lookup the captured field decl for a variable.
-    const FieldDecl *lookup(const VarDecl *VD) const {
+    virtual const FieldDecl *lookup(const VarDecl *VD) const {
       return CaptureFields.lookup(VD);
     }
 
-    bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; }
-    FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
+    bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; }
+    virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
 
     static bool classof(const CGCapturedStmtInfo *) {
       return true;
     }
 
     /// \brief Emit the captured statement body.
-    virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) {
+    virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) {
       RegionCounter Cnt = CGF.getPGORegionCounter(S);
       Cnt.beginRegion(CGF.Builder);
       CGF.EmitStmt(S);
@@ -285,6 +275,7 @@
 
   EHScopeStack EHStack;
   llvm::SmallVector<char, 256> LifetimeExtendedCleanupStack;
+  llvm::SmallVector<const JumpDest *, 2> SEHTryEpilogueStack;
 
   /// Header for data within LifetimeExtendedCleanupStack.
   struct LifetimeExtendedCleanupHeader {
@@ -316,6 +307,12 @@
   /// write the current selector value into this alloca.
   llvm::AllocaInst *EHSelectorSlot;
 
+  llvm::AllocaInst *AbnormalTerminationSlot;
+
+  /// The implicit parameter to SEH filter functions of type
+  /// 'EXCEPTION_POINTERS*'.
+  ImplicitParamDecl *SEHPointersDecl;
+
   /// Emits a landing pad for the current EH stack.
   llvm::BasicBlock *EmitLandingPad();
 
@@ -354,6 +351,20 @@
     void exit(CodeGenFunction &CGF);
   };
 
+  /// Cleanups can be emitted for two reasons: normal control leaving a region
+  /// exceptional control flow leaving a region.
+  struct SEHFinallyInfo {
+    SEHFinallyInfo()
+        : FinallyBB(nullptr), ContBB(nullptr), ResumeBB(nullptr) {}
+
+    llvm::BasicBlock *FinallyBB;
+    llvm::BasicBlock *ContBB;
+    llvm::BasicBlock *ResumeBB;
+  };
+
+  /// Returns true inside SEH __try blocks.
+  bool isSEHTryScope() const { return !SEHTryEpilogueStack.empty(); }
+
   /// pushFullExprCleanup - Push a cleanup to be run at the end of the
   /// current full-expression.  Safe against the possibility that
   /// we're currently inside a conditionally-evaluated expression.
@@ -499,8 +510,8 @@
     bool PerformCleanup;
   private:
 
-    RunCleanupsScope(const RunCleanupsScope &) LLVM_DELETED_FUNCTION;
-    void operator=(const RunCleanupsScope &) LLVM_DELETED_FUNCTION;
+    RunCleanupsScope(const RunCleanupsScope &) = delete;
+    void operator=(const RunCleanupsScope &) = delete;
 
   protected:
     CodeGenFunction& CGF;
@@ -548,8 +559,8 @@
     SmallVector<const LabelDecl*, 4> Labels;
     LexicalScope *ParentScope;
 
-    LexicalScope(const LexicalScope &) LLVM_DELETED_FUNCTION;
-    void operator=(const LexicalScope &) LLVM_DELETED_FUNCTION;
+    LexicalScope(const LexicalScope &) = delete;
+    void operator=(const LexicalScope &) = delete;
 
   public:
     /// \brief Enter a new cleanup scope.
@@ -573,7 +584,10 @@
 
       // If we should perform a cleanup, force them now.  Note that
       // this ends the cleanup scope before rescoping any labels.
-      if (PerformCleanup) ForceCleanup();
+      if (PerformCleanup) {
+        ApplyDebugLocation DL(CGF, Range.getEnd());
+        ForceCleanup();
+      }
     }
 
     /// \brief Force the emission of cleanups now, instead of waiting
@@ -598,8 +612,8 @@
     VarDeclMapTy SavedPrivates;
 
   private:
-    OMPPrivateScope(const OMPPrivateScope &) LLVM_DELETED_FUNCTION;
-    void operator=(const OMPPrivateScope &) LLVM_DELETED_FUNCTION;
+    OMPPrivateScope(const OMPPrivateScope &) = delete;
+    void operator=(const OMPPrivateScope &) = delete;
 
   public:
     /// \brief Enter a new OpenMP private scope.
@@ -614,7 +628,6 @@
     addPrivate(const VarDecl *LocalVD,
                const std::function<llvm::Value *()> &PrivateGen) {
       assert(PerformCleanup && "adding private to dead scope");
-      assert(LocalVD->isLocalVarDecl() && "privatizing non-local variable");
       if (SavedLocals.count(LocalVD) > 0) return false;
       SavedLocals[LocalVD] = CGF.LocalDeclMap.lookup(LocalVD);
       CGF.LocalDeclMap.erase(LocalVD);
@@ -1089,6 +1102,10 @@
   llvm::Value *getExceptionSlot();
   llvm::Value *getEHSelectorSlot();
 
+  /// Stack slot that contains whether a __finally block is being executed as an
+  /// EH cleanup or as a normal cleanup.
+  llvm::Value *getAbnormalTerminationSlot();
+
   /// Returns the contents of the function's exception object and selector
   /// slots.
   llvm::Value *getExceptionFromSlot();
@@ -1109,6 +1126,11 @@
     return getInvokeDestImpl();
   }
 
+  bool currentFunctionUsesSEHTry() const {
+    const auto *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
+    return FD && FD->usesSEHTry();
+  }
+
   const TargetInfo &getTarget() const { return Target; }
   llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
 
@@ -1178,9 +1200,7 @@
 
   void GenerateObjCMethod(const ObjCMethodDecl *OMD);
 
-  void StartObjCMethod(const ObjCMethodDecl *MD,
-                       const ObjCContainerDecl *CD,
-                       SourceLocation StartLoc);
+  void StartObjCMethod(const ObjCMethodDecl *MD, const ObjCContainerDecl *CD);
 
   /// GenerateObjCGetter - Synthesize an Objective-C property getter function.
   void GenerateObjCGetter(ObjCImplementationDecl *IMP,
@@ -1272,15 +1292,18 @@
   void EmitLambdaStaticInvokeFunction(const CXXMethodDecl *MD);
   void EmitAsanPrologueOrEpilogue(bool Prologue);
 
-  /// EmitReturnBlock - Emit the unified return block, trying to avoid its
-  /// emission when possible.
-  void EmitReturnBlock();
+  /// \brief Emit the unified return block, trying to avoid its emission when
+  /// possible.
+  /// \return The debug location of the user written return statement if the
+  /// return block is is avoided.
+  llvm::DebugLoc EmitReturnBlock();
 
   /// FinishFunction - Complete IR generation of the current function. It is
   /// legal to call this function even if there is no current insertion point.
   void FinishFunction(SourceLocation EndLoc=SourceLocation());
 
-  void StartThunk(llvm::Function *Fn, GlobalDecl GD, const CGFunctionInfo &FnInfo);
+  void StartThunk(llvm::Function *Fn, GlobalDecl GD,
+                  const CGFunctionInfo &FnInfo);
 
   void EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk);
 
@@ -1323,6 +1346,9 @@
   /// to by This.
   llvm::Value *GetVTablePtr(llvm::Value *This, llvm::Type *Ty);
 
+  /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable.
+  /// If vptr CFI is enabled, emit a check that VTable is valid.
+  void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable);
 
   /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
   /// expr can be devirtualized.
@@ -1543,8 +1569,8 @@
 
   /// EmitExprAsInit - Emits the code necessary to initialize a
   /// location in memory with the given initializer.
-  void EmitExprAsInit(const Expr *init, const ValueDecl *D,
-                      LValue lvalue, bool capturedByInit);
+  void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue,
+                      bool capturedByInit);
 
   /// hasVolatileMember - returns true if aggregate type has a volatile
   /// member.
@@ -1566,6 +1592,15 @@
                       true);
   }
 
+  void EmitAggregateCopyCtor(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+                           QualType DestTy, QualType SrcTy) {
+    CharUnits DestTypeAlign = getContext().getTypeAlignInChars(DestTy);
+    CharUnits SrcTypeAlign = getContext().getTypeAlignInChars(SrcTy);
+    EmitAggregateCopy(DestPtr, SrcPtr, SrcTy, /*IsVolatile=*/false,
+                      std::min(DestTypeAlign, SrcTypeAlign),
+                      /*IsAssignment=*/false);
+  }
+
   /// EmitAggregateCopy - Emit an aggregate copy.
   ///
   /// \param isVolatile - True iff either the source or the destination is
@@ -1830,8 +1865,8 @@
   /// This function can be called with a null (unreachable) insert point.
   void EmitVarDecl(const VarDecl &D);
 
-  void EmitScalarInit(const Expr *init, const ValueDecl *D,
-                      LValue lvalue, bool capturedByInit);
+  void EmitScalarInit(const Expr *init, const ValueDecl *D, LValue lvalue,
+                      bool capturedByInit);
   void EmitScalarInit(llvm::Value *init, LValue lvalue);
 
   typedef void SpecialInitFn(CodeGenFunction &Init, const VarDecl &D,
@@ -2002,6 +2037,17 @@
   void EmitCXXTryStmt(const CXXTryStmt &S);
   void EmitSEHTryStmt(const SEHTryStmt &S);
   void EmitSEHLeaveStmt(const SEHLeaveStmt &S);
+  void EnterSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI);
+  void ExitSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI);
+
+  llvm::Function *GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
+                                            const SEHExceptStmt &Except);
+
+  void EmitSEHExceptionCodeSave();
+  llvm::Value *EmitSEHExceptionCode();
+  llvm::Value *EmitSEHExceptionInfo();
+  llvm::Value *EmitSEHAbnormalTermination();
+
   void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                            ArrayRef<const Attr *> Attrs = None);
 
@@ -2041,12 +2087,22 @@
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
 
-  /// Helpers for 'omp simd' directive.
+private:
+
+  /// Helpers for the OpenMP loop directives.
   void EmitOMPLoopBody(const OMPLoopDirective &Directive,
                        bool SeparateIter = false);
   void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
                         bool SeparateIter = false);
   void EmitOMPSimdFinal(const OMPLoopDirective &S);
+  void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
+  void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
+                           const OMPLoopDirective &S,
+                           OMPPrivateScope &LoopScope, llvm::Value *LB,
+                           llvm::Value *UB, llvm::Value *ST, llvm::Value *IL,
+                           llvm::Value *Chunk);
+
+public:
 
   //===--------------------------------------------------------------------===//
   //                         LValue Expression Emission
@@ -2094,11 +2150,27 @@
 
   void EmitAtomicInit(Expr *E, LValue lvalue);
 
+  bool LValueIsSuitableForInlineAtomic(LValue Src);
+  bool typeIsSuitableForInlineAtomic(QualType Ty, bool IsVolatile) const;
+
+  RValue EmitAtomicLoad(LValue LV, SourceLocation SL,
+                        AggValueSlot Slot = AggValueSlot::ignored());
+
   RValue EmitAtomicLoad(LValue lvalue, SourceLocation loc,
+                        llvm::AtomicOrdering AO, bool IsVolatile = false,
                         AggValueSlot slot = AggValueSlot::ignored());
 
   void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit);
 
+  void EmitAtomicStore(RValue rvalue, LValue lvalue, llvm::AtomicOrdering AO,
+                       bool IsVolatile, bool isInit);
+
+  std::pair<RValue, RValue> EmitAtomicCompareExchange(
+      LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
+      llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+      llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+      bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
+
   /// EmitToMemory - Change a scalar value from its value
   /// representation to its in-memory representation.
   llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty);
@@ -2150,7 +2222,7 @@
   /// EmitStoreThroughLValue - Store the specified rvalue into the specified
   /// lvalue, where both are guaranteed to the have the same type, and that type
   /// is 'Ty'.
-  void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false);
+  void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit = false);
   void EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst);
   void EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst);
 
@@ -2166,8 +2238,8 @@
   /// Emit an l-value for an assignment (simple or compound) of complex type.
   LValue EmitComplexAssignmentLValue(const BinaryOperator *E);
   LValue EmitComplexCompoundAssignmentLValue(const CompoundAssignOperator *E);
-  LValue EmitScalarCompooundAssignWithComplex(const CompoundAssignOperator *E,
-                                              llvm::Value *&Result);
+  LValue EmitScalarCompoundAssignWithComplex(const CompoundAssignOperator *E,
+                                             llvm::Value *&Result);
 
   // Note: only available for agg return types
   LValue EmitBinaryOperatorLValue(const BinaryOperator *E);
@@ -2211,7 +2283,7 @@
       return ConstantEmission(C, false);
     }
 
-    LLVM_EXPLICIT operator bool() const {
+    explicit operator bool() const {
       return ValueAndIsReference.getOpaqueValue() != nullptr;
     }
 
@@ -2281,7 +2353,8 @@
 
   RValue EmitCall(QualType FnType, llvm::Value *Callee, const CallExpr *E,
                   ReturnValueSlot ReturnValue,
-                  const Decl *TargetDecl = nullptr);
+                  const Decl *TargetDecl = nullptr,
+                  llvm::Value *Chain = nullptr);
   RValue EmitCallExpr(const CallExpr *E,
                       ReturnValueSlot ReturnValue = ReturnValueSlot());
 
@@ -2329,12 +2402,16 @@
                              StructorType Type);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
                                ReturnValueSlot ReturnValue);
+  RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE,
+                                               const CXXMethodDecl *MD,
+                                               ReturnValueSlot ReturnValue,
+                                               bool HasQualifier,
+                                               NestedNameSpecifier *Qualifier,
+                                               bool IsArrow, const Expr *Base);
+  // Compute the object pointer.
   RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
                                       ReturnValueSlot ReturnValue);
 
-  llvm::Value *EmitCXXOperatorMemberCallee(const CXXOperatorCallExpr *E,
-                                           const CXXMethodDecl *MD,
-                                           llvm::Value *This);
   RValue EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E,
                                        const CXXMethodDecl *MD,
                                        ReturnValueSlot ReturnValue);
@@ -2344,7 +2421,8 @@
 
 
   RValue EmitBuiltinExpr(const FunctionDecl *FD,
-                         unsigned BuiltinID, const CallExpr *E);
+                         unsigned BuiltinID, const CallExpr *E,
+                         ReturnValueSlot ReturnValue);
 
   RValue EmitBlockCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue);
 
@@ -2711,7 +2789,7 @@
                     CallExpr::const_arg_iterator ArgBeg,
                     CallExpr::const_arg_iterator ArgEnd,
                     const FunctionDecl *CalleeDecl = nullptr,
-                    unsigned ParamsToSkip = 0, bool ForceColumnInfo = false) {
+                    unsigned ParamsToSkip = 0) {
     SmallVector<QualType, 16> ArgTypes;
     CallExpr::const_arg_iterator Arg = ArgBeg;
 
@@ -2723,28 +2801,13 @@
                 E = CallArgTypeInfo->param_type_end();
            I != E; ++I, ++Arg) {
         assert(Arg != ArgEnd && "Running over edge of argument list!");
-#ifndef NDEBUG
-        QualType ArgType = *I;
-        QualType ActualArgType = Arg->getType();
-        if (ArgType->isPointerType() && ActualArgType->isPointerType()) {
-          QualType ActualBaseType =
-              ActualArgType->getAs<PointerType>()->getPointeeType();
-          QualType ArgBaseType =
-              ArgType->getAs<PointerType>()->getPointeeType();
-          if (ArgBaseType->isVariableArrayType()) {
-            if (const VariableArrayType *VAT =
-                    getContext().getAsVariableArrayType(ActualBaseType)) {
-              if (!VAT->getSizeExpr())
-                ActualArgType = ArgType;
-            }
-          }
-        }
-        assert(getContext()
-                       .getCanonicalType(ArgType.getNonReferenceType())
-                       .getTypePtr() ==
-                   getContext().getCanonicalType(ActualArgType).getTypePtr() &&
-               "type mismatch in call argument!");
-#endif
+        assert(
+            ((*I)->isVariablyModifiedType() ||
+             getContext()
+                     .getCanonicalType((*I).getNonReferenceType())
+                     .getTypePtr() ==
+                 getContext().getCanonicalType(Arg->getType()).getTypePtr()) &&
+            "type mismatch in call argument!");
         ArgTypes.push_back(*I);
       }
     }
@@ -2759,15 +2822,14 @@
     for (; Arg != ArgEnd; ++Arg)
       ArgTypes.push_back(getVarArgType(*Arg));
 
-    EmitCallArgs(Args, ArgTypes, ArgBeg, ArgEnd, CalleeDecl, ParamsToSkip,
-                 ForceColumnInfo);
+    EmitCallArgs(Args, ArgTypes, ArgBeg, ArgEnd, CalleeDecl, ParamsToSkip);
   }
 
   void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes,
                     CallExpr::const_arg_iterator ArgBeg,
                     CallExpr::const_arg_iterator ArgEnd,
                     const FunctionDecl *CalleeDecl = nullptr,
-                    unsigned ParamsToSkip = 0, bool ForceColumnInfo = false);
+                    unsigned ParamsToSkip = 0);
 
 private:
   QualType getVarArgType(const Expr *Arg);
@@ -2787,6 +2849,8 @@
   /// GetPointeeAlignment - Given an expression with a pointer type, emit the
   /// value and compute our best estimate of the alignment of the pointee.
   std::pair<llvm::Value*, unsigned> EmitPointerWithAlignment(const Expr *Addr);
+
+  llvm::Value *GetValueForARMHint(unsigned BuiltinID);
 };
 
 /// Helper class with most of the code for saving a value for a
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index b35e81c..1b6439b 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -21,8 +21,8 @@
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
-#include "CoverageMappingGen.h"
 #include "CodeGenTBAA.h"
+#include "CoverageMappingGen.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
@@ -64,6 +64,7 @@
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
   case TargetCXXABI::iOS64:
+  case TargetCXXABI::GenericMIPS:
   case TargetCXXABI::GenericItanium:
     return CreateItaniumCXXABI(CGM);
   case TargetCXXABI::Microsoft:
@@ -109,6 +110,7 @@
   Int8PtrPtrTy = Int8PtrTy->getPointerTo(0);
 
   RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC();
+  BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC();
 
   if (LangOpts.ObjC1)
     createObjCRuntime();
@@ -139,12 +141,14 @@
   RRData = new RREntrypoints();
 
   if (!CodeGenOpts.InstrProfileInput.empty()) {
-    if (std::error_code EC = llvm::IndexedInstrProfReader::create(
-            CodeGenOpts.InstrProfileInput, PGOReader)) {
+    auto ReaderOrErr =
+        llvm::IndexedInstrProfReader::create(CodeGenOpts.InstrProfileInput);
+    if (std::error_code EC = ReaderOrErr.getError()) {
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
                                               "Could not read profile: %0");
       getDiags().Report(DiagID) << EC.message();
     }
+    PGOReader = std::move(ReaderOrErr.get());
   }
 
   // If coverage mapping generation is enabled, create the
@@ -344,9 +348,6 @@
   if (ObjCRuntime)
     if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction())
       AddGlobalCtor(ObjCInitFunction);
-  if (getCodeGenOpts().ProfileInstrGenerate)
-    if (llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this))
-      AddGlobalCtor(PGOInit, 0);
   if (PGOReader && PGOStats.hasDiagnostics())
     PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
   EmitCtorList(GlobalCtors, "llvm.global_ctors");
@@ -624,7 +625,7 @@
 
   // Get the type of a ctor entry, { i32, void ()*, i8* }.
   llvm::StructType *CtorStructTy = llvm::StructType::get(
-      Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, NULL);
+      Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr);
 
   // Construct the constructor and destructor arrays.
   SmallVector<llvm::Constant*, 8> Ctors;
@@ -715,10 +716,6 @@
     // Naked implies noinline: we should not be inlining such functions.
     B.addAttribute(llvm::Attribute::Naked);
     B.addAttribute(llvm::Attribute::NoInline);
-  } else if (D->hasAttr<OptimizeNoneAttr>()) {
-    // OptimizeNone implies noinline; we should not be inlining such functions.
-    B.addAttribute(llvm::Attribute::OptimizeNone);
-    B.addAttribute(llvm::Attribute::NoInline);
   } else if (D->hasAttr<NoDuplicateAttr>()) {
     B.addAttribute(llvm::Attribute::NoDuplicate);
   } else if (D->hasAttr<NoInlineAttr>()) {
@@ -731,19 +728,14 @@
   }
 
   if (D->hasAttr<ColdAttr>()) {
-    B.addAttribute(llvm::Attribute::OptimizeForSize);
+    if (!D->hasAttr<OptimizeNoneAttr>())
+      B.addAttribute(llvm::Attribute::OptimizeForSize);
     B.addAttribute(llvm::Attribute::Cold);
   }
 
   if (D->hasAttr<MinSizeAttr>())
     B.addAttribute(llvm::Attribute::MinSize);
 
-  if (D->hasAttr<OptimizeNoneAttr>()) {
-    // OptimizeNone wins over OptimizeForSize and MinSize.
-    B.removeAttribute(llvm::Attribute::OptimizeForSize);
-    B.removeAttribute(llvm::Attribute::MinSize);
-  }
-
   if (LangOpts.getStackProtector() == LangOptions::SSPOn)
     B.addAttribute(llvm::Attribute::StackProtect);
   else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
@@ -772,6 +764,24 @@
                    llvm::AttributeSet::get(
                        F->getContext(), llvm::AttributeSet::FunctionIndex, B));
 
+  if (D->hasAttr<OptimizeNoneAttr>()) {
+    // OptimizeNone implies noinline; we should not be inlining such functions.
+    F->addFnAttr(llvm::Attribute::OptimizeNone);
+    F->addFnAttr(llvm::Attribute::NoInline);
+
+    // OptimizeNone wins over OptimizeForSize, MinSize, AlwaysInline.
+    assert(!F->hasFnAttribute(llvm::Attribute::OptimizeForSize) &&
+           "OptimizeNone and OptimizeForSize on same function!");
+    assert(!F->hasFnAttribute(llvm::Attribute::MinSize) &&
+           "OptimizeNone and MinSize on same function!");
+    assert(!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
+           "OptimizeNone and AlwaysInline on same function!");
+
+    // Attribute 'inlinehint' has no effect on 'optnone' functions.
+    // Explicitly remove it from the set of function attributes.
+    F->removeFnAttr(llvm::Attribute::InlineHint);
+  }
+
   if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
     F->setUnnamedAddr(true);
   else if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
@@ -928,8 +938,8 @@
   UsedArray.resize(List.size());
   for (unsigned i = 0, e = List.size(); i != e; ++i) {
     UsedArray[i] =
-     llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*List[i]),
-                                    CGM.Int8PtrTy);
+        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+            cast<llvm::Constant>(&*List[i]), CGM.Int8PtrTy);
   }
 
   if (UsedArray.empty())
@@ -949,29 +959,28 @@
 }
 
 void CodeGenModule::AppendLinkerOptions(StringRef Opts) {
-  llvm::Value *MDOpts = llvm::MDString::get(getLLVMContext(), Opts);
+  auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opts);
   LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
 }
 
 void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) {
   llvm::SmallString<32> Opt;
   getTargetCodeGenInfo().getDetectMismatchOption(Name, Value, Opt);
-  llvm::Value *MDOpts = llvm::MDString::get(getLLVMContext(), Opt);
+  auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt);
   LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
 }
 
 void CodeGenModule::AddDependentLib(StringRef Lib) {
   llvm::SmallString<24> Opt;
   getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt);
-  llvm::Value *MDOpts = llvm::MDString::get(getLLVMContext(), Opt);
+  auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt);
   LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts));
 }
 
 /// \brief Add link options implied by the given module, including modules
 /// it depends on, using a postorder walk.
-static void addLinkOptionsPostorder(CodeGenModule &CGM,
-                                    Module *Mod,
-                                    SmallVectorImpl<llvm::Value *> &Metadata,
+static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod,
+                                    SmallVectorImpl<llvm::Metadata *> &Metadata,
                                     llvm::SmallPtrSet<Module *, 16> &Visited) {
   // Import this module's parent.
   if (Mod->Parent && Visited.insert(Mod->Parent).second) {
@@ -991,10 +1000,9 @@
     // Link against a framework.  Frameworks are currently Darwin only, so we
     // don't to ask TargetCodeGenInfo for the spelling of the linker option.
     if (Mod->LinkLibraries[I-1].IsFramework) {
-      llvm::Value *Args[2] = {
-        llvm::MDString::get(Context, "-framework"),
-        llvm::MDString::get(Context, Mod->LinkLibraries[I-1].Library)
-      };
+      llvm::Metadata *Args[2] = {
+          llvm::MDString::get(Context, "-framework"),
+          llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library)};
 
       Metadata.push_back(llvm::MDNode::get(Context, Args));
       continue;
@@ -1004,7 +1012,7 @@
     llvm::SmallString<24> Opt;
     CGM.getTargetCodeGenInfo().getDependentLibraryOption(
       Mod->LinkLibraries[I-1].Library, Opt);
-    llvm::Value *OptString = llvm::MDString::get(Context, Opt);
+    auto *OptString = llvm::MDString::get(Context, Opt);
     Metadata.push_back(llvm::MDNode::get(Context, OptString));
   }
 }
@@ -1057,7 +1065,7 @@
   // Add link options for all of the imported modules in reverse topological
   // order.  We don't do anything to try to order import link flags with respect
   // to linker options inserted by things like #pragma comment().
-  SmallVector<llvm::Value *, 16> MetadataArgs;
+  SmallVector<llvm::Metadata *, 16> MetadataArgs;
   Visited.clear();
   for (llvm::SetVector<clang::Module *>::iterator M = LinkModules.begin(),
                                                MEnd = LinkModules.end();
@@ -1079,36 +1087,52 @@
   // previously unused static decl may become used during the generation of code
   // for a static function, iterate until no changes are made.
 
-  while (true) {
-    if (!DeferredVTables.empty()) {
-      EmitDeferredVTables();
+  if (!DeferredVTables.empty()) {
+    EmitDeferredVTables();
 
-      // Emitting a v-table doesn't directly cause more v-tables to
-      // become deferred, although it can cause functions to be
-      // emitted that then need those v-tables.
-      assert(DeferredVTables.empty());
-    }
+    // Emitting a v-table doesn't directly cause more v-tables to
+    // become deferred, although it can cause functions to be
+    // emitted that then need those v-tables.
+    assert(DeferredVTables.empty());
+  }
 
-    // Stop if we're out of both deferred v-tables and deferred declarations.
-    if (DeferredDeclsToEmit.empty()) break;
+  // Stop if we're out of both deferred v-tables and deferred declarations.
+  if (DeferredDeclsToEmit.empty())
+    return;
 
-    DeferredGlobal &G = DeferredDeclsToEmit.back();
+  // Grab the list of decls to emit. If EmitGlobalDefinition schedules more
+  // work, it will not interfere with this.
+  std::vector<DeferredGlobal> CurDeclsToEmit;
+  CurDeclsToEmit.swap(DeferredDeclsToEmit);
+
+  for (DeferredGlobal &G : CurDeclsToEmit) {
     GlobalDecl D = G.GD;
     llvm::GlobalValue *GV = G.GV;
-    DeferredDeclsToEmit.pop_back();
+    G.GV = nullptr;
 
-    assert(GV == GetGlobalValue(getMangledName(D)));
+    assert(!GV || GV == GetGlobalValue(getMangledName(D)));
+    if (!GV)
+      GV = GetGlobalValue(getMangledName(D));
+
     // Check to see if we've already emitted this.  This is necessary
     // for a couple of reasons: first, decls can end up in the
     // deferred-decls queue multiple times, and second, decls can end
     // up with definitions in unusual ways (e.g. by an extern inline
     // function acquiring a strong function redefinition).  Just
     // ignore these cases.
-    if(!GV->isDeclaration())
+    if (GV && !GV->isDeclaration())
       continue;
 
     // Otherwise, emit the definition and move on to the next one.
     EmitGlobalDefinition(D, GV);
+
+    // If we found out that we need to emit more decls, do that recursively.
+    // This has the advantage that the decls are emitted in a DFS and related
+    // ones are close together, which is convenient for testing.
+    if (!DeferredVTables.empty() || !DeferredDeclsToEmit.empty()) {
+      EmitDeferred();
+      assert(DeferredVTables.empty() && DeferredDeclsToEmit.empty());
+    }
   }
 }
 
@@ -1229,12 +1253,22 @@
   return false;
 }
 
-bool CodeGenModule::MayDeferGeneration(const ValueDecl *Global) {
+bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) {
   // Never defer when EmitAllDecls is specified.
   if (LangOpts.EmitAllDecls)
-    return false;
+    return true;
 
-  return !getContext().DeclMustBeEmitted(Global);
+  return getContext().DeclMustBeEmitted(Global);
+}
+
+bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(Global))
+    if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation)
+      // Implicit template instantiations may change linkage if they are later
+      // explicitly instantiated, so they should not be emitted eagerly.
+      return false;
+
+  return true;
 }
 
 llvm::Constant *CodeGenModule::GetAddrOfUuidDescriptor(
@@ -1255,6 +1289,8 @@
   auto *GV = new llvm::GlobalVariable(
       getModule(), Init->getType(),
       /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name);
+  if (supportsCOMDAT())
+    GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
   return GV;
 }
 
@@ -1343,9 +1379,10 @@
       return;
   }
 
-  // Defer code generation when possible if this is a static definition, inline
-  // function etc.  These we only want to emit if they are used.
-  if (!MayDeferGeneration(Global)) {
+  // Defer code generation to first use when possible, e.g. if this is an inline
+  // function. If the global must always be emitted, do it eagerly if possible
+  // to benefit from cache locality.
+  if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) {
     // Emit the definition if it can't be deferred.
     EmitGlobalDefinition(GD);
     return;
@@ -1358,13 +1395,16 @@
     DelayedCXXInitPosition[Global] = CXXGlobalInits.size();
     CXXGlobalInits.push_back(nullptr);
   }
-  
-  // If the value has already been used, add it directly to the
-  // DeferredDeclsToEmit list.
+
   StringRef MangledName = getMangledName(GD);
-  if (llvm::GlobalValue *GV = GetGlobalValue(MangledName))
+  if (llvm::GlobalValue *GV = GetGlobalValue(MangledName)) {
+    // The value has already been used and should therefore be emitted.
     addDeferredDeclToEmit(GV, GD);
-  else {
+  } else if (MustBeEmitted(Global)) {
+    // The value must be emitted, but cannot be emitted eagerly.
+    assert(!MayBeEmittedEagerly(Global));
+    addDeferredDeclToEmit(/*GV=*/nullptr, GD);
+  } else {
     // Otherwise, remember that we saw a deferred decl with this name.  The
     // first use of the mangled name will cause it to move into
     // DeferredDeclsToEmit.
@@ -1578,12 +1618,15 @@
       // don't need it anymore).
       addDeferredDeclToEmit(F, DDI->second);
       DeferredDecls.erase(DDI);
-
+      
       // Otherwise, if this is a sized deallocation function, emit a weak
-      // definition
-      // for it at the end of the translation unit.
-    } else if (D && cast<FunctionDecl>(D)
-                        ->getCorrespondingUnsizedGlobalDeallocationFunction()) {
+      // definition for it at the end of the translation unit (if allowed),
+      // unless the sized deallocation function is aliased.
+    } else if (D &&
+               cast<FunctionDecl>(D)
+                  ->getCorrespondingUnsizedGlobalDeallocationFunction() &&
+               getLangOpts().DefineSizedDeallocation &&
+               !D->hasAttr<AliasAttr>()) {
       addDeferredDeclToEmit(F, GD);
 
       // Otherwise, there are cases we have to worry about where we're
@@ -1651,6 +1694,21 @@
   return C;
 }
 
+/// CreateBuiltinFunction - Create a new builtin function with the specified
+/// type and name.
+llvm::Constant *
+CodeGenModule::CreateBuiltinFunction(llvm::FunctionType *FTy,
+                                     StringRef Name,
+                                     llvm::AttributeSet ExtraAttrs) {
+  llvm::Constant *C =
+      GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
+                              /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs);
+  if (auto *F = dyn_cast<llvm::Function>(C))
+    if (F->empty())
+      F->setCallingConv(getBuiltinCC());
+  return C;
+}
+
 /// isTypeConstant - Determine whether an object of this type can be emitted
 /// as a constant.
 ///
@@ -1790,7 +1848,10 @@
     
     OldGV->eraseFromParent();
   }
-  
+
+  if (supportsCOMDAT() && GV->isWeakForLinker())
+    GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
+
   return GV;
 }
 
@@ -1823,7 +1884,7 @@
 void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
   assert(!D->getInit() && "Cannot emit definite definitions here!");
 
-  if (MayDeferGeneration(D)) {
+  if (!MustBeEmitted(D)) {
     // If we have not seen a reference to this variable yet, place it
     // into the deferred declarations table to be emitted if needed
     // later.
@@ -1890,6 +1951,38 @@
     R.first->second = nullptr;
 }
 
+static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) {
+  if (!CGM.supportsCOMDAT())
+    return false;
+
+  if (D.hasAttr<SelectAnyAttr>())
+    return true;
+
+  GVALinkage Linkage;
+  if (auto *VD = dyn_cast<VarDecl>(&D))
+    Linkage = CGM.getContext().GetGVALinkageForVariable(VD);
+  else
+    Linkage = CGM.getContext().GetGVALinkageForFunction(cast<FunctionDecl>(&D));
+
+  switch (Linkage) {
+  case GVA_Internal:
+  case GVA_AvailableExternally:
+  case GVA_StrongExternal:
+    return false;
+  case GVA_DiscardableODR:
+  case GVA_StrongODR:
+    return true;
+  }
+  llvm_unreachable("No such linkage");
+}
+
+void CodeGenModule::maybeSetTrivialComdat(const Decl &D,
+                                          llvm::GlobalObject &GO) {
+  if (!shouldBeInCOMDAT(*this, D))
+    return;
+  GO.setComdat(TheModule.getOrInsertComdat(GO.getName()));
+}
+
 void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   llvm::Constant *Init = nullptr;
   QualType ASTTy = D->getType();
@@ -2033,6 +2126,8 @@
     setTLSMode(GV, *D);
   }
 
+  maybeSetTrivialComdat(*D, *GV);
+
   // Emit the initializer function if necessary.
   if (NeedsGlobalCtor || NeedsGlobalDtor)
     EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor);
@@ -2073,9 +2168,25 @@
 
   // Declarations with a required alignment do not have common linakge in MSVC
   // mode.
-  if (Context.getLangOpts().MSVCCompat &&
-      (Context.isAlignmentRequired(D->getType()) || D->hasAttr<AlignedAttr>()))
-    return true;
+  if (Context.getLangOpts().MSVCCompat) {
+    if (D->hasAttr<AlignedAttr>())
+      return true;
+    QualType VarType = D->getType();
+    if (Context.isAlignmentRequired(VarType))
+      return true;
+
+    if (const auto *RT = VarType->getAs<RecordType>()) {
+      const RecordDecl *RD = RT->getDecl();
+      for (const FieldDecl *FD : RD->fields()) {
+        if (FD->isBitField())
+          continue;
+        if (FD->hasAttr<AlignedAttr>())
+          return true;
+        if (Context.isAlignmentRequired(FD->getType()))
+          return true;
+      }
+    }
+  }
 
   return false;
 }
@@ -2367,6 +2478,8 @@
 
   MaybeHandleStaticInExternC(D, Fn);
 
+  maybeSetTrivialComdat(*D, *Fn);
+
   CodeGenFunction(*this).GenerateCode(D, Fn, FI);
 
   setFunctionDefinitionAttributes(D, Fn);
@@ -2799,12 +2912,18 @@
   if (CGM.getLangOpts().OpenCL)
     AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
 
+  llvm::Module &M = CGM.getModule();
   // Create a global variable for this string
   auto *GV = new llvm::GlobalVariable(
-      CGM.getModule(), C->getType(), !CGM.getLangOpts().WritableStrings, LT, C,
-      GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+      M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName,
+      nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
   GV->setAlignment(Alignment);
   GV->setUnnamedAddr(true);
+  if (GV->isWeakForLinker()) {
+    assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
+    GV->setComdat(M.getOrInsertComdat(GV->getName()));
+  }
+
   return GV;
 }
 
@@ -2963,10 +3082,19 @@
   // Create a global variable for this lifetime-extended temporary.
   llvm::GlobalValue::LinkageTypes Linkage =
       getLLVMLinkageVarDefinition(VD, Constant);
-  // There is no need for this temporary to have global linkage if the global
-  // variable has external linkage.
-  if (Linkage == llvm::GlobalVariable::ExternalLinkage)
-    Linkage = llvm::GlobalVariable::PrivateLinkage;
+  if (Linkage == llvm::GlobalVariable::ExternalLinkage) {
+    const VarDecl *InitVD;
+    if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) &&
+        isa<CXXRecordDecl>(InitVD->getLexicalDeclContext())) {
+      // Temporaries defined inside a class get linkonce_odr linkage because the
+      // class can be defined in multipe translation units.
+      Linkage = llvm::GlobalVariable::LinkOnceODRLinkage;
+    } else {
+      // There is no need for this temporary to have external linkage if the
+      // VarDecl has external linkage.
+      Linkage = llvm::GlobalVariable::InternalLinkage;
+    }
+  }
   unsigned AddrSpace = GetGlobalVarAddressSpace(
       VD, getContext().getTargetAddressSpace(MaterializedType));
   auto *GV = new llvm::GlobalVariable(
@@ -2976,6 +3104,8 @@
   setGlobalVisibility(GV, VD);
   GV->setAlignment(
       getContext().getTypeAlignInChars(MaterializedType).getQuantity());
+  if (supportsCOMDAT() && GV->isWeakForLinker())
+    GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
   if (VD->getTLSKind())
     setTLSMode(GV, *VD);
   Slot = GV;
@@ -3263,8 +3393,10 @@
   case Decl::ClassTemplateSpecialization: {
     const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
     if (DebugInfo &&
-        Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition)
+        Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition &&
+        Spec->hasDefinition())
       DebugInfo->completeTemplateDefinition(*Spec);
+    break;
   }
 
   default:
@@ -3272,6 +3404,7 @@
     // non-top-level decl.  FIXME: Would be nice to have an isTopLevelDeclKind
     // function. Need to recode Decl::Kind to do that easily.
     assert(isa<TypeDecl>(D) && "Unsupported decl kind");
+    break;
   }
 }
 
@@ -3376,10 +3509,9 @@
       CGM.getModule().getOrInsertNamedMetadata("clang.global.decl.ptrs");
 
   // TODO: should we report variant information for ctors/dtors?
-  llvm::Value *Ops[] = {
-    Addr,
-    GetPointerConstant(CGM.getLLVMContext(), D.getDecl())
-  };
+  llvm::Metadata *Ops[] = {llvm::ConstantAsMetadata::get(Addr),
+                           llvm::ConstantAsMetadata::get(GetPointerConstant(
+                               CGM.getLLVMContext(), D.getDecl()))};
   GlobalMetadata->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops));
 }
 
@@ -3442,7 +3574,9 @@
     llvm::Value *Addr = I.second;
     if (auto *Alloca = dyn_cast<llvm::AllocaInst>(Addr)) {
       llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D);
-      Alloca->setMetadata(DeclPtrKind, llvm::MDNode::get(Context, DAddr));
+      Alloca->setMetadata(
+          DeclPtrKind, llvm::MDNode::get(
+                           Context, llvm::ValueAsMetadata::getConstant(DAddr)));
     } else if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr)) {
       GlobalDecl GD = GlobalDecl(cast<VarDecl>(D));
       EmitGlobalDeclMetadata(CGM, GlobalMetadata, GD, GV);
@@ -3456,9 +3590,7 @@
   std::string Version = getClangFullVersion();
   llvm::LLVMContext &Ctx = TheModule.getContext();
 
-  llvm::Value *IdentNode[] = {
-    llvm::MDString::get(Ctx, Version)
-  };
+  llvm::Metadata *IdentNode[] = {llvm::MDString::get(Ctx, Version)};
   IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode));
 }
 
@@ -3486,9 +3618,8 @@
           llvm::MDString::get(Ctx, getCodeGenOpts().CoverageFile);
       for (int i = 0, e = CUNode->getNumOperands(); i != e; ++i) {
         llvm::MDNode *CU = CUNode->getOperand(i);
-        llvm::Value *node[] = { CoverageFile, CU };
-        llvm::MDNode *N = llvm::MDNode::get(Ctx, node);
-        GCov->addOperand(N);
+        llvm::Metadata *Elts[] = {CoverageFile, CU};
+        GCov->addOperand(llvm::MDNode::get(Ctx, Elts));
       }
     }
   }
@@ -3543,10 +3674,8 @@
         VD->getAnyInitializer() &&
         !VD->getAnyInitializer()->isConstantInitializer(getContext(),
                                                         /*ForRef=*/false);
-    if (auto InitFunction =
-            getOpenMPRuntime().EmitOMPThreadPrivateVarDefinition(
-                VD, GetAddrOfGlobalVar(VD), RefExpr->getLocStart(),
-                PerformInit))
+    if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition(
+            VD, GetAddrOfGlobalVar(VD), RefExpr->getLocStart(), PerformInit))
       CXXGlobalInits.push_back(InitFunction);
   }
 }
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index a20d66c..6902d19 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -150,6 +150,8 @@
 
   llvm::CallingConv::ID RuntimeCC;
   llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
+  llvm::CallingConv::ID BuiltinCC;
+  llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; }
 };
 
 struct RREntrypoints {
@@ -256,9 +258,10 @@
 /// This class organizes the cross-function state that is used while generating
 /// LLVM code.
 class CodeGenModule : public CodeGenTypeCache {
-  CodeGenModule(const CodeGenModule &) LLVM_DELETED_FUNCTION;
-  void operator=(const CodeGenModule &) LLVM_DELETED_FUNCTION;
+  CodeGenModule(const CodeGenModule &) = delete;
+  void operator=(const CodeGenModule &) = delete;
 
+public:
   struct Structor {
     Structor() : Priority(0), Initializer(nullptr), AssociatedData(nullptr) {}
     Structor(int Priority, llvm::Constant *Initializer,
@@ -272,6 +275,7 @@
 
   typedef std::vector<Structor> CtorList;
 
+private:
   ASTContext &Context;
   const LangOptions &LangOpts;
   const CodeGenOptions &CodeGenOpts;
@@ -320,7 +324,7 @@
   /// referenced. These get code generated when the module is done.
   struct DeferredGlobal {
     DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {}
-    llvm::AssertingVH<llvm::GlobalValue> GV;
+    llvm::TrackingVH<llvm::GlobalValue> GV;
     GlobalDecl GD;
   };
   std::vector<DeferredGlobal> DeferredDeclsToEmit;
@@ -419,7 +423,7 @@
   llvm::SetVector<clang::Module *> ImportedModules;
 
   /// \brief A vector of metadata strings.
-  SmallVector<llvm::Value *, 16> LinkerOptionsMetadata;
+  SmallVector<llvm::Metadata *, 16> LinkerOptionsMetadata;
 
   /// @name Cache for Objective-C runtime types
   /// @{
@@ -589,9 +593,7 @@
 
   llvm::MDNode *getNoObjCARCExceptionsMetadata() {
     if (!NoObjCARCExceptionsMetadata)
-      NoObjCARCExceptionsMetadata =
-        llvm::MDNode::get(getLLVMContext(),
-                          SmallVector<llvm::Value*,1>());
+      NoObjCARCExceptionsMetadata = llvm::MDNode::get(getLLVMContext(), None);
     return NoObjCARCExceptionsMetadata;
   }
 
@@ -604,6 +606,7 @@
   const TargetInfo &getTarget() const { return Target; }
   const llvm::Triple &getTriple() const;
   bool supportsCOMDAT() const;
+  void maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO);
 
   CGCXXABI &getCXXABI() const { return *ABI; }
   llvm::LLVMContext &getLLVMContext() { return VMContext; }
@@ -624,6 +627,9 @@
     return VTables.getMicrosoftVTableContext();
   }
 
+  CtorList &getGlobalCtors() { return GlobalCtors; }
+  CtorList &getGlobalDtors() { return GlobalDtors; }
+
   llvm::MDNode *getTBAAInfo(QualType QTy);
   llvm::MDNode *getTBAAInfoForVTablePtr();
   llvm::MDNode *getTBAAStructInfo(QualType QTy);
@@ -874,6 +880,11 @@
                                         StringRef Name,
                                         llvm::AttributeSet ExtraAttrs =
                                           llvm::AttributeSet());
+  /// Create a new compiler builtin function with the specified type and name.
+  llvm::Constant *CreateBuiltinFunction(llvm::FunctionType *Ty,
+                                        StringRef Name,
+                                        llvm::AttributeSet ExtraAttrs =
+                                          llvm::AttributeSet());
   /// Create a new runtime global variable with the specified type and name.
   llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
                                         StringRef Name);
@@ -983,7 +994,7 @@
 
   void EmitTentativeDefinition(const VarDecl *D);
 
-  void EmitVTable(CXXRecordDecl *Class, bool DefinitionRequired);
+  void EmitVTable(CXXRecordDecl *Class);
 
   /// Emit the RTTI descriptors for the builtin types.
   void EmitFundamentalRTTIDescriptors();
@@ -1092,6 +1103,11 @@
   /// \param D Threadprivate declaration.
   void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
 
+  /// Emit bit set entries for the given vtable using the given layout if
+  /// vptr CFI is enabled.
+  void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
+                               const VTableLayout &VTLayout);
+
 private:
   llvm::Constant *
   GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
@@ -1190,9 +1206,15 @@
   /// Emits the initializer for a uuidof string.
   llvm::Constant *EmitUuidofInitializer(StringRef uuidstr);
 
-  /// Determine if the given decl can be emitted lazily; this is only relevant
-  /// for definitions. The given decl must be either a function or var decl.
-  bool MayDeferGeneration(const ValueDecl *D);
+  /// Determine whether the definition must be emitted; if this returns \c
+  /// false, the definition can be emitted lazily if it's used.
+  bool MustBeEmitted(const ValueDecl *D);
+
+  /// Determine whether the definition can be emitted eagerly, or should be
+  /// delayed until the end of the translation unit. This is relevant for
+  /// definitions whose linkage can change, e.g. implicit function instantions
+  /// which may later be explicitly instantiated.
+  bool MayBeEmittedEagerly(const ValueDecl *D);
 
   /// Check whether we can use a "simpler", more core exceptions personality
   /// function.
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index 2f1f211..a88335d 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -16,6 +16,7 @@
 #include "CoverageMappingGen.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/StmtVisitor.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Endian.h"
@@ -27,7 +28,7 @@
 
 void CodeGenPGO::setFuncName(StringRef Name,
                              llvm::GlobalValue::LinkageTypes Linkage) {
-  RawFuncName = Name;
+  StringRef RawFuncName = Name;
 
   // Function names may be prefixed with a binary '1' to indicate
   // that the backend should not modify the symbols due to any platform
@@ -35,204 +36,44 @@
   if (RawFuncName[0] == '\1')
     RawFuncName = RawFuncName.substr(1);
 
-  if (!llvm::GlobalValue::isLocalLinkage(Linkage)) {
-    PrefixedFuncName.reset(new std::string(RawFuncName));
-    return;
+  FuncName = RawFuncName;
+  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+    // For local symbols, prepend the main file name to distinguish them.
+    // Do not include the full path in the file name since there's no guarantee
+    // that it will stay the same, e.g., if the files are checked out from
+    // version control in different locations.
+    if (CGM.getCodeGenOpts().MainFileName.empty())
+      FuncName = FuncName.insert(0, "<unknown>:");
+    else
+      FuncName = FuncName.insert(0, CGM.getCodeGenOpts().MainFileName + ":");
   }
 
-  // For local symbols, prepend the main file name to distinguish them.
-  // Do not include the full path in the file name since there's no guarantee
-  // that it will stay the same, e.g., if the files are checked out from
-  // version control in different locations.
-  PrefixedFuncName.reset(new std::string(CGM.getCodeGenOpts().MainFileName));
-  if (PrefixedFuncName->empty())
-    PrefixedFuncName->assign("<unknown>");
-  PrefixedFuncName->append(":");
-  PrefixedFuncName->append(RawFuncName);
+  // If we're generating a profile, create a variable for the name.
+  if (CGM.getCodeGenOpts().ProfileInstrGenerate)
+    createFuncNameVar(Linkage);
 }
 
 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
   setFuncName(Fn->getName(), Fn->getLinkage());
 }
 
-void CodeGenPGO::setVarLinkage(llvm::GlobalValue::LinkageTypes Linkage) {
-  // Set the linkage for variables based on the function linkage.  Usually, we
-  // want to match it, but available_externally and extern_weak both have the
-  // wrong semantics.
-  VarLinkage = Linkage;
-  switch (VarLinkage) {
-  case llvm::GlobalValue::ExternalWeakLinkage:
-    VarLinkage = llvm::GlobalValue::LinkOnceAnyLinkage;
-    break;
-  case llvm::GlobalValue::AvailableExternallyLinkage:
-    VarLinkage = llvm::GlobalValue::LinkOnceODRLinkage;
-    break;
-  default:
-    break;
-  }
-}
+void CodeGenPGO::createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage) {
+  // Usually, we want to match the function's linkage, but
+  // available_externally and extern_weak both have the wrong semantics.
+  if (Linkage == llvm::GlobalValue::ExternalWeakLinkage)
+    Linkage = llvm::GlobalValue::LinkOnceAnyLinkage;
+  else if (Linkage == llvm::GlobalValue::AvailableExternallyLinkage)
+    Linkage = llvm::GlobalValue::LinkOnceODRLinkage;
 
-static llvm::Function *getRegisterFunc(CodeGenModule &CGM) {
-  return CGM.getModule().getFunction("__llvm_profile_register_functions");
-}
+  auto *Value =
+      llvm::ConstantDataArray::getString(CGM.getLLVMContext(), FuncName, false);
+  FuncNameVar =
+      new llvm::GlobalVariable(CGM.getModule(), Value->getType(), true, Linkage,
+                               Value, "__llvm_profile_name_" + FuncName);
 
-static llvm::BasicBlock *getOrInsertRegisterBB(CodeGenModule &CGM) {
-  // Don't do this for Darwin.  compiler-rt uses linker magic.
-  if (CGM.getTarget().getTriple().isOSDarwin())
-    return nullptr;
-
-  // Only need to insert this once per module.
-  if (llvm::Function *RegisterF = getRegisterFunc(CGM))
-    return &RegisterF->getEntryBlock();
-
-  // Construct the function.
-  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
-  auto *RegisterFTy = llvm::FunctionType::get(VoidTy, false);
-  auto *RegisterF = llvm::Function::Create(RegisterFTy,
-                                           llvm::GlobalValue::InternalLinkage,
-                                           "__llvm_profile_register_functions",
-                                           &CGM.getModule());
-  RegisterF->setUnnamedAddr(true);
-  if (CGM.getCodeGenOpts().DisableRedZone)
-    RegisterF->addFnAttr(llvm::Attribute::NoRedZone);
-
-  // Construct and return the entry block.
-  auto *BB = llvm::BasicBlock::Create(CGM.getLLVMContext(), "", RegisterF);
-  CGBuilderTy Builder(BB);
-  Builder.CreateRetVoid();
-  return BB;
-}
-
-static llvm::Constant *getOrInsertRuntimeRegister(CodeGenModule &CGM) {
-  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
-  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
-  auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
-  return CGM.getModule().getOrInsertFunction("__llvm_profile_register_function",
-                                             RuntimeRegisterTy);
-}
-
-static bool isMachO(const CodeGenModule &CGM) {
-  return CGM.getTarget().getTriple().isOSBinFormatMachO();
-}
-
-static StringRef getCountersSection(const CodeGenModule &CGM) {
-  return isMachO(CGM) ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
-}
-
-static StringRef getNameSection(const CodeGenModule &CGM) {
-  return isMachO(CGM) ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
-}
-
-static StringRef getDataSection(const CodeGenModule &CGM) {
-  return isMachO(CGM) ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
-}
-
-llvm::GlobalVariable *CodeGenPGO::buildDataVar() {
-  // Create name variable.
-  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-  auto *VarName = llvm::ConstantDataArray::getString(Ctx, getFuncName(),
-                                                     false);
-  auto *Name = new llvm::GlobalVariable(CGM.getModule(), VarName->getType(),
-                                        true, VarLinkage, VarName,
-                                        getFuncVarName("name"));
-  Name->setSection(getNameSection(CGM));
-  Name->setAlignment(1);
-
-  // Create data variable.
-  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
-  auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
-  auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
-  auto *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx);
-  llvm::GlobalVariable *Data = nullptr;
-  if (RegionCounters) {
-    llvm::Type *DataTypes[] = {
-      Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy
-    };
-    auto *DataTy = llvm::StructType::get(Ctx, makeArrayRef(DataTypes));
-    llvm::Constant *DataVals[] = {
-      llvm::ConstantInt::get(Int32Ty, getFuncName().size()),
-      llvm::ConstantInt::get(Int32Ty, NumRegionCounters),
-      llvm::ConstantInt::get(Int64Ty, FunctionHash),
-      llvm::ConstantExpr::getBitCast(Name, Int8PtrTy),
-      llvm::ConstantExpr::getBitCast(RegionCounters, Int64PtrTy)
-    };
-    Data =
-      new llvm::GlobalVariable(CGM.getModule(), DataTy, true, VarLinkage,
-                               llvm::ConstantStruct::get(DataTy, DataVals),
-                               getFuncVarName("data"));
-
-    // All the data should be packed into an array in its own section.
-    Data->setSection(getDataSection(CGM));
-    Data->setAlignment(8);
-  }
-
-  // Create coverage mapping data variable.
-  if (!CoverageMapping.empty())
-    CGM.getCoverageMapping()->addFunctionMappingRecord(Name, getFuncName(),
-                                                       FunctionHash,
-                                                       CoverageMapping);
-
-  // Hide all these symbols so that we correctly get a copy for each
-  // executable.  The profile format expects names and counters to be
-  // contiguous, so references into shared objects would be invalid.
-  if (!llvm::GlobalValue::isLocalLinkage(VarLinkage)) {
-    Name->setVisibility(llvm::GlobalValue::HiddenVisibility);
-    if (Data) {
-      Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
-      RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
-    }
-  }
-
-  // Make sure the data doesn't get deleted.
-  if (Data) CGM.addUsedGlobal(Data);
-  return Data;
-}
-
-void CodeGenPGO::emitInstrumentationData() {
-  if (!RegionCounters)
-    return;
-
-  // Build the data.
-  auto *Data = buildDataVar();
-
-  // Register the data.
-  auto *RegisterBB = getOrInsertRegisterBB(CGM);
-  if (!RegisterBB)
-    return;
-  CGBuilderTy Builder(RegisterBB->getTerminator());
-  auto *VoidPtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
-  Builder.CreateCall(getOrInsertRuntimeRegister(CGM),
-                     Builder.CreateBitCast(Data, VoidPtrTy));
-}
-
-llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) {
-  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
-    return nullptr;
-
-  assert(CGM.getModule().getFunction("__llvm_profile_init") == nullptr &&
-         "profile initialization already emitted");
-
-  // Get the function to call at initialization.
-  llvm::Constant *RegisterF = getRegisterFunc(CGM);
-  if (!RegisterF)
-    return nullptr;
-
-  // Create the initialization function.
-  auto *VoidTy = llvm::Type::getVoidTy(CGM.getLLVMContext());
-  auto *F = llvm::Function::Create(llvm::FunctionType::get(VoidTy, false),
-                                   llvm::GlobalValue::InternalLinkage,
-                                   "__llvm_profile_init", &CGM.getModule());
-  F->setUnnamedAddr(true);
-  F->addFnAttr(llvm::Attribute::NoInline);
-  if (CGM.getCodeGenOpts().DisableRedZone)
-    F->addFnAttr(llvm::Attribute::NoRedZone);
-
-  // Add the basic block and the necessary calls.
-  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", F));
-  Builder.CreateCall(RegisterF);
-  Builder.CreateRetVoid();
-
-  return F;
+  // Hide the symbol so that we correctly get a copy for each executable.
+  if (!llvm::GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+    FuncNameVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
 }
 
 namespace {
@@ -297,482 +138,479 @@
 const unsigned PGOHash::NumTypesPerWord;
 const unsigned PGOHash::TooBig;
 
-  /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
-  struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
-    /// The next counter value to assign.
-    unsigned NextCounter;
-    /// The function hash.
-    PGOHash Hash;
-    /// The map of statements to counters.
-    llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
+/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
+struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
+  /// The next counter value to assign.
+  unsigned NextCounter;
+  /// The function hash.
+  PGOHash Hash;
+  /// The map of statements to counters.
+  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
 
-    MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
-        : NextCounter(0), CounterMap(CounterMap) {}
+  MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
+      : NextCounter(0), CounterMap(CounterMap) {}
 
-    // Blocks and lambdas are handled as separate functions, so we need not
-    // traverse them in the parent context.
-    bool TraverseBlockExpr(BlockExpr *BE) { return true; }
-    bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
-    bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
+  // Blocks and lambdas are handled as separate functions, so we need not
+  // traverse them in the parent context.
+  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
+  bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
+  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
 
-    bool VisitDecl(const Decl *D) {
-      switch (D->getKind()) {
-      default:
-        break;
-      case Decl::Function:
-      case Decl::CXXMethod:
-      case Decl::CXXConstructor:
-      case Decl::CXXDestructor:
-      case Decl::CXXConversion:
-      case Decl::ObjCMethod:
-      case Decl::Block:
-      case Decl::Captured:
-        CounterMap[D->getBody()] = NextCounter++;
-        break;
-      }
+  bool VisitDecl(const Decl *D) {
+    switch (D->getKind()) {
+    default:
+      break;
+    case Decl::Function:
+    case Decl::CXXMethod:
+    case Decl::CXXConstructor:
+    case Decl::CXXDestructor:
+    case Decl::CXXConversion:
+    case Decl::ObjCMethod:
+    case Decl::Block:
+    case Decl::Captured:
+      CounterMap[D->getBody()] = NextCounter++;
+      break;
+    }
+    return true;
+  }
+
+  bool VisitStmt(const Stmt *S) {
+    auto Type = getHashType(S);
+    if (Type == PGOHash::None)
       return true;
-    }
 
-    bool VisitStmt(const Stmt *S) {
-      auto Type = getHashType(S);
-      if (Type == PGOHash::None)
-        return true;
+    CounterMap[S] = NextCounter++;
+    Hash.combine(Type);
+    return true;
+  }
+  PGOHash::HashType getHashType(const Stmt *S) {
+    switch (S->getStmtClass()) {
+    default:
+      break;
+    case Stmt::LabelStmtClass:
+      return PGOHash::LabelStmt;
+    case Stmt::WhileStmtClass:
+      return PGOHash::WhileStmt;
+    case Stmt::DoStmtClass:
+      return PGOHash::DoStmt;
+    case Stmt::ForStmtClass:
+      return PGOHash::ForStmt;
+    case Stmt::CXXForRangeStmtClass:
+      return PGOHash::CXXForRangeStmt;
+    case Stmt::ObjCForCollectionStmtClass:
+      return PGOHash::ObjCForCollectionStmt;
+    case Stmt::SwitchStmtClass:
+      return PGOHash::SwitchStmt;
+    case Stmt::CaseStmtClass:
+      return PGOHash::CaseStmt;
+    case Stmt::DefaultStmtClass:
+      return PGOHash::DefaultStmt;
+    case Stmt::IfStmtClass:
+      return PGOHash::IfStmt;
+    case Stmt::CXXTryStmtClass:
+      return PGOHash::CXXTryStmt;
+    case Stmt::CXXCatchStmtClass:
+      return PGOHash::CXXCatchStmt;
+    case Stmt::ConditionalOperatorClass:
+      return PGOHash::ConditionalOperator;
+    case Stmt::BinaryConditionalOperatorClass:
+      return PGOHash::BinaryConditionalOperator;
+    case Stmt::BinaryOperatorClass: {
+      const BinaryOperator *BO = cast<BinaryOperator>(S);
+      if (BO->getOpcode() == BO_LAnd)
+        return PGOHash::BinaryOperatorLAnd;
+      if (BO->getOpcode() == BO_LOr)
+        return PGOHash::BinaryOperatorLOr;
+      break;
+    }
+    }
+    return PGOHash::None;
+  }
+};
 
-      CounterMap[S] = NextCounter++;
-      Hash.combine(Type);
-      return true;
-    }
-    PGOHash::HashType getHashType(const Stmt *S) {
-      switch (S->getStmtClass()) {
-      default:
-        break;
-      case Stmt::LabelStmtClass:
-        return PGOHash::LabelStmt;
-      case Stmt::WhileStmtClass:
-        return PGOHash::WhileStmt;
-      case Stmt::DoStmtClass:
-        return PGOHash::DoStmt;
-      case Stmt::ForStmtClass:
-        return PGOHash::ForStmt;
-      case Stmt::CXXForRangeStmtClass:
-        return PGOHash::CXXForRangeStmt;
-      case Stmt::ObjCForCollectionStmtClass:
-        return PGOHash::ObjCForCollectionStmt;
-      case Stmt::SwitchStmtClass:
-        return PGOHash::SwitchStmt;
-      case Stmt::CaseStmtClass:
-        return PGOHash::CaseStmt;
-      case Stmt::DefaultStmtClass:
-        return PGOHash::DefaultStmt;
-      case Stmt::IfStmtClass:
-        return PGOHash::IfStmt;
-      case Stmt::CXXTryStmtClass:
-        return PGOHash::CXXTryStmt;
-      case Stmt::CXXCatchStmtClass:
-        return PGOHash::CXXCatchStmt;
-      case Stmt::ConditionalOperatorClass:
-        return PGOHash::ConditionalOperator;
-      case Stmt::BinaryConditionalOperatorClass:
-        return PGOHash::BinaryConditionalOperator;
-      case Stmt::BinaryOperatorClass: {
-        const BinaryOperator *BO = cast<BinaryOperator>(S);
-        if (BO->getOpcode() == BO_LAnd)
-          return PGOHash::BinaryOperatorLAnd;
-        if (BO->getOpcode() == BO_LOr)
-          return PGOHash::BinaryOperatorLOr;
-        break;
-      }
-      }
-      return PGOHash::None;
-    }
+/// A StmtVisitor that propagates the raw counts through the AST and
+/// records the count at statements where the value may change.
+struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
+  /// PGO state.
+  CodeGenPGO &PGO;
+
+  /// A flag that is set when the current count should be recorded on the
+  /// next statement, such as at the exit of a loop.
+  bool RecordNextStmtCount;
+
+  /// The map of statements to count values.
+  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
+
+  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
+  struct BreakContinue {
+    uint64_t BreakCount;
+    uint64_t ContinueCount;
+    BreakContinue() : BreakCount(0), ContinueCount(0) {}
   };
+  SmallVector<BreakContinue, 8> BreakContinueStack;
 
-  /// A StmtVisitor that propagates the raw counts through the AST and
-  /// records the count at statements where the value may change.
-  struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
-    /// PGO state.
-    CodeGenPGO &PGO;
+  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
+                      CodeGenPGO &PGO)
+      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
 
-    /// A flag that is set when the current count should be recorded on the
-    /// next statement, such as at the exit of a loop.
-    bool RecordNextStmtCount;
-
-    /// The map of statements to count values.
-    llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
-
-    /// BreakContinueStack - Keep counts of breaks and continues inside loops.
-    struct BreakContinue {
-      uint64_t BreakCount;
-      uint64_t ContinueCount;
-      BreakContinue() : BreakCount(0), ContinueCount(0) {}
-    };
-    SmallVector<BreakContinue, 8> BreakContinueStack;
-
-    ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
-                        CodeGenPGO &PGO)
-        : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
-
-    void RecordStmtCount(const Stmt *S) {
-      if (RecordNextStmtCount) {
-        CountMap[S] = PGO.getCurrentRegionCount();
-        RecordNextStmtCount = false;
-      }
-    }
-
-    void VisitStmt(const Stmt *S) {
-      RecordStmtCount(S);
-      for (Stmt::const_child_range I = S->children(); I; ++I) {
-        if (*I)
-         this->Visit(*I);
-      }
-    }
-
-    void VisitFunctionDecl(const FunctionDecl *D) {
-      // Counter tracks entry to the function body.
-      RegionCounter Cnt(PGO, D->getBody());
-      Cnt.beginRegion();
-      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
-      Visit(D->getBody());
-    }
-
-    // Skip lambda expressions. We visit these as FunctionDecls when we're
-    // generating them and aren't interested in the body when generating a
-    // parent context.
-    void VisitLambdaExpr(const LambdaExpr *LE) {}
-
-    void VisitCapturedDecl(const CapturedDecl *D) {
-      // Counter tracks entry to the capture body.
-      RegionCounter Cnt(PGO, D->getBody());
-      Cnt.beginRegion();
-      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
-      Visit(D->getBody());
-    }
-
-    void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
-      // Counter tracks entry to the method body.
-      RegionCounter Cnt(PGO, D->getBody());
-      Cnt.beginRegion();
-      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
-      Visit(D->getBody());
-    }
-
-    void VisitBlockDecl(const BlockDecl *D) {
-      // Counter tracks entry to the block body.
-      RegionCounter Cnt(PGO, D->getBody());
-      Cnt.beginRegion();
-      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
-      Visit(D->getBody());
-    }
-
-    void VisitReturnStmt(const ReturnStmt *S) {
-      RecordStmtCount(S);
-      if (S->getRetValue())
-        Visit(S->getRetValue());
-      PGO.setCurrentRegionUnreachable();
-      RecordNextStmtCount = true;
-    }
-
-    void VisitGotoStmt(const GotoStmt *S) {
-      RecordStmtCount(S);
-      PGO.setCurrentRegionUnreachable();
-      RecordNextStmtCount = true;
-    }
-
-    void VisitLabelStmt(const LabelStmt *S) {
-      RecordNextStmtCount = false;
-      // Counter tracks the block following the label.
-      RegionCounter Cnt(PGO, S);
-      Cnt.beginRegion();
+  void RecordStmtCount(const Stmt *S) {
+    if (RecordNextStmtCount) {
       CountMap[S] = PGO.getCurrentRegionCount();
-      Visit(S->getSubStmt());
+      RecordNextStmtCount = false;
     }
+  }
 
-    void VisitBreakStmt(const BreakStmt *S) {
-      RecordStmtCount(S);
-      assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
-      BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
-      PGO.setCurrentRegionUnreachable();
-      RecordNextStmtCount = true;
+  void VisitStmt(const Stmt *S) {
+    RecordStmtCount(S);
+    for (Stmt::const_child_range I = S->children(); I; ++I) {
+      if (*I)
+        this->Visit(*I);
     }
+  }
 
-    void VisitContinueStmt(const ContinueStmt *S) {
-      RecordStmtCount(S);
-      assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
-      BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
-      PGO.setCurrentRegionUnreachable();
-      RecordNextStmtCount = true;
-    }
+  void VisitFunctionDecl(const FunctionDecl *D) {
+    // Counter tracks entry to the function body.
+    RegionCounter Cnt(PGO, D->getBody());
+    Cnt.beginRegion();
+    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    Visit(D->getBody());
+  }
 
-    void VisitWhileStmt(const WhileStmt *S) {
-      RecordStmtCount(S);
-      // Counter tracks the body of the loop.
-      RegionCounter Cnt(PGO, S);
-      BreakContinueStack.push_back(BreakContinue());
-      // Visit the body region first so the break/continue adjustments can be
-      // included when visiting the condition.
-      Cnt.beginRegion();
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
-      Cnt.adjustForControlFlow();
+  // Skip lambda expressions. We visit these as FunctionDecls when we're
+  // generating them and aren't interested in the body when generating a
+  // parent context.
+  void VisitLambdaExpr(const LambdaExpr *LE) {}
 
-      // ...then go back and propagate counts through the condition. The count
-      // at the start of the condition is the sum of the incoming edges,
-      // the backedge from the end of the loop body, and the edges from
-      // continue statements.
-      BreakContinue BC = BreakContinueStack.pop_back_val();
-      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
-                                Cnt.getAdjustedCount() + BC.ContinueCount);
-      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
-      Visit(S->getCond());
-      Cnt.adjustForControlFlow();
-      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
-      RecordNextStmtCount = true;
-    }
+  void VisitCapturedDecl(const CapturedDecl *D) {
+    // Counter tracks entry to the capture body.
+    RegionCounter Cnt(PGO, D->getBody());
+    Cnt.beginRegion();
+    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    Visit(D->getBody());
+  }
 
-    void VisitDoStmt(const DoStmt *S) {
-      RecordStmtCount(S);
-      // Counter tracks the body of the loop.
-      RegionCounter Cnt(PGO, S);
-      BreakContinueStack.push_back(BreakContinue());
-      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
-      Cnt.adjustForControlFlow();
+  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
+    // Counter tracks entry to the method body.
+    RegionCounter Cnt(PGO, D->getBody());
+    Cnt.beginRegion();
+    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    Visit(D->getBody());
+  }
 
-      BreakContinue BC = BreakContinueStack.pop_back_val();
-      // The count at the start of the condition is equal to the count at the
-      // end of the body. The adjusted count does not include either the
-      // fall-through count coming into the loop or the continue count, so add
-      // both of those separately. This is coincidentally the same equation as
-      // with while loops but for different reasons.
-      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
-                                Cnt.getAdjustedCount() + BC.ContinueCount);
-      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
-      Visit(S->getCond());
-      Cnt.adjustForControlFlow();
-      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
-      RecordNextStmtCount = true;
-    }
+  void VisitBlockDecl(const BlockDecl *D) {
+    // Counter tracks entry to the block body.
+    RegionCounter Cnt(PGO, D->getBody());
+    Cnt.beginRegion();
+    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    Visit(D->getBody());
+  }
 
-    void VisitForStmt(const ForStmt *S) {
-      RecordStmtCount(S);
-      if (S->getInit())
-        Visit(S->getInit());
-      // Counter tracks the body of the loop.
-      RegionCounter Cnt(PGO, S);
-      BreakContinueStack.push_back(BreakContinue());
-      // Visit the body region first. (This is basically the same as a while
-      // loop; see further comments in VisitWhileStmt.)
-      Cnt.beginRegion();
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
-      Cnt.adjustForControlFlow();
+  void VisitReturnStmt(const ReturnStmt *S) {
+    RecordStmtCount(S);
+    if (S->getRetValue())
+      Visit(S->getRetValue());
+    PGO.setCurrentRegionUnreachable();
+    RecordNextStmtCount = true;
+  }
 
-      // The increment is essentially part of the body but it needs to include
-      // the count for all the continue statements.
-      if (S->getInc()) {
-        Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
-                                  BreakContinueStack.back().ContinueCount);
-        CountMap[S->getInc()] = PGO.getCurrentRegionCount();
-        Visit(S->getInc());
-        Cnt.adjustForControlFlow();
-      }
+  void VisitGotoStmt(const GotoStmt *S) {
+    RecordStmtCount(S);
+    PGO.setCurrentRegionUnreachable();
+    RecordNextStmtCount = true;
+  }
 
-      BreakContinue BC = BreakContinueStack.pop_back_val();
+  void VisitLabelStmt(const LabelStmt *S) {
+    RecordNextStmtCount = false;
+    // Counter tracks the block following the label.
+    RegionCounter Cnt(PGO, S);
+    Cnt.beginRegion();
+    CountMap[S] = PGO.getCurrentRegionCount();
+    Visit(S->getSubStmt());
+  }
 
-      // ...then go back and propagate counts through the condition.
-      if (S->getCond()) {
-        Cnt.setCurrentRegionCount(Cnt.getParentCount() +
-                                  Cnt.getAdjustedCount() +
-                                  BC.ContinueCount);
-        CountMap[S->getCond()] = PGO.getCurrentRegionCount();
-        Visit(S->getCond());
-        Cnt.adjustForControlFlow();
-      }
-      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
-      RecordNextStmtCount = true;
-    }
+  void VisitBreakStmt(const BreakStmt *S) {
+    RecordStmtCount(S);
+    assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
+    BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
+    PGO.setCurrentRegionUnreachable();
+    RecordNextStmtCount = true;
+  }
 
-    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
-      RecordStmtCount(S);
-      Visit(S->getRangeStmt());
-      Visit(S->getBeginEndStmt());
-      // Counter tracks the body of the loop.
-      RegionCounter Cnt(PGO, S);
-      BreakContinueStack.push_back(BreakContinue());
-      // Visit the body region first. (This is basically the same as a while
-      // loop; see further comments in VisitWhileStmt.)
-      Cnt.beginRegion();
-      CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
-      Visit(S->getLoopVarStmt());
-      Visit(S->getBody());
-      Cnt.adjustForControlFlow();
+  void VisitContinueStmt(const ContinueStmt *S) {
+    RecordStmtCount(S);
+    assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
+    BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
+    PGO.setCurrentRegionUnreachable();
+    RecordNextStmtCount = true;
+  }
 
-      // The increment is essentially part of the body but it needs to include
-      // the count for all the continue statements.
+  void VisitWhileStmt(const WhileStmt *S) {
+    RecordStmtCount(S);
+    // Counter tracks the body of the loop.
+    RegionCounter Cnt(PGO, S);
+    BreakContinueStack.push_back(BreakContinue());
+    // Visit the body region first so the break/continue adjustments can be
+    // included when visiting the condition.
+    Cnt.beginRegion();
+    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    Visit(S->getBody());
+    Cnt.adjustForControlFlow();
+
+    // ...then go back and propagate counts through the condition. The count
+    // at the start of the condition is the sum of the incoming edges,
+    // the backedge from the end of the loop body, and the edges from
+    // continue statements.
+    BreakContinue BC = BreakContinueStack.pop_back_val();
+    Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
+                              BC.ContinueCount);
+    CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+    Visit(S->getCond());
+    Cnt.adjustForControlFlow();
+    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    RecordNextStmtCount = true;
+  }
+
+  void VisitDoStmt(const DoStmt *S) {
+    RecordStmtCount(S);
+    // Counter tracks the body of the loop.
+    RegionCounter Cnt(PGO, S);
+    BreakContinueStack.push_back(BreakContinue());
+    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    Visit(S->getBody());
+    Cnt.adjustForControlFlow();
+
+    BreakContinue BC = BreakContinueStack.pop_back_val();
+    // The count at the start of the condition is equal to the count at the
+    // end of the body. The adjusted count does not include either the
+    // fall-through count coming into the loop or the continue count, so add
+    // both of those separately. This is coincidentally the same equation as
+    // with while loops but for different reasons.
+    Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
+                              BC.ContinueCount);
+    CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+    Visit(S->getCond());
+    Cnt.adjustForControlFlow();
+    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    RecordNextStmtCount = true;
+  }
+
+  void VisitForStmt(const ForStmt *S) {
+    RecordStmtCount(S);
+    if (S->getInit())
+      Visit(S->getInit());
+    // Counter tracks the body of the loop.
+    RegionCounter Cnt(PGO, S);
+    BreakContinueStack.push_back(BreakContinue());
+    // Visit the body region first. (This is basically the same as a while
+    // loop; see further comments in VisitWhileStmt.)
+    Cnt.beginRegion();
+    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    Visit(S->getBody());
+    Cnt.adjustForControlFlow();
+
+    // The increment is essentially part of the body but it needs to include
+    // the count for all the continue statements.
+    if (S->getInc()) {
       Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
                                 BreakContinueStack.back().ContinueCount);
       CountMap[S->getInc()] = PGO.getCurrentRegionCount();
       Visit(S->getInc());
       Cnt.adjustForControlFlow();
+    }
 
-      BreakContinue BC = BreakContinueStack.pop_back_val();
+    BreakContinue BC = BreakContinueStack.pop_back_val();
 
-      // ...then go back and propagate counts through the condition.
-      Cnt.setCurrentRegionCount(Cnt.getParentCount() +
-                                Cnt.getAdjustedCount() +
+    // ...then go back and propagate counts through the condition.
+    if (S->getCond()) {
+      Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
                                 BC.ContinueCount);
       CountMap[S->getCond()] = PGO.getCurrentRegionCount();
       Visit(S->getCond());
       Cnt.adjustForControlFlow();
-      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
-      RecordNextStmtCount = true;
     }
+    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    RecordNextStmtCount = true;
+  }
 
-    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
-      RecordStmtCount(S);
-      Visit(S->getElement());
-      // Counter tracks the body of the loop.
-      RegionCounter Cnt(PGO, S);
-      BreakContinueStack.push_back(BreakContinue());
-      Cnt.beginRegion();
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
-      BreakContinue BC = BreakContinueStack.pop_back_val();
-      Cnt.adjustForControlFlow();
-      Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
-      RecordNextStmtCount = true;
-    }
+  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
+    RecordStmtCount(S);
+    Visit(S->getRangeStmt());
+    Visit(S->getBeginEndStmt());
+    // Counter tracks the body of the loop.
+    RegionCounter Cnt(PGO, S);
+    BreakContinueStack.push_back(BreakContinue());
+    // Visit the body region first. (This is basically the same as a while
+    // loop; see further comments in VisitWhileStmt.)
+    Cnt.beginRegion();
+    CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
+    Visit(S->getLoopVarStmt());
+    Visit(S->getBody());
+    Cnt.adjustForControlFlow();
 
-    void VisitSwitchStmt(const SwitchStmt *S) {
-      RecordStmtCount(S);
-      Visit(S->getCond());
-      PGO.setCurrentRegionUnreachable();
-      BreakContinueStack.push_back(BreakContinue());
-      Visit(S->getBody());
-      // If the switch is inside a loop, add the continue counts.
-      BreakContinue BC = BreakContinueStack.pop_back_val();
-      if (!BreakContinueStack.empty())
-        BreakContinueStack.back().ContinueCount += BC.ContinueCount;
-      // Counter tracks the exit block of the switch.
-      RegionCounter ExitCnt(PGO, S);
-      ExitCnt.beginRegion();
-      RecordNextStmtCount = true;
-    }
+    // The increment is essentially part of the body but it needs to include
+    // the count for all the continue statements.
+    Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
+                              BreakContinueStack.back().ContinueCount);
+    CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+    Visit(S->getInc());
+    Cnt.adjustForControlFlow();
 
-    void VisitCaseStmt(const CaseStmt *S) {
-      RecordNextStmtCount = false;
-      // Counter for this particular case. This counts only jumps from the
-      // switch header and does not include fallthrough from the case before
-      // this one.
-      RegionCounter Cnt(PGO, S);
-      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-      CountMap[S] = Cnt.getCount();
-      RecordNextStmtCount = true;
-      Visit(S->getSubStmt());
-    }
+    BreakContinue BC = BreakContinueStack.pop_back_val();
 
-    void VisitDefaultStmt(const DefaultStmt *S) {
-      RecordNextStmtCount = false;
-      // Counter for this default case. This does not include fallthrough from
-      // the previous case.
-      RegionCounter Cnt(PGO, S);
-      Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-      CountMap[S] = Cnt.getCount();
-      RecordNextStmtCount = true;
-      Visit(S->getSubStmt());
-    }
+    // ...then go back and propagate counts through the condition.
+    Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
+                              BC.ContinueCount);
+    CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+    Visit(S->getCond());
+    Cnt.adjustForControlFlow();
+    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    RecordNextStmtCount = true;
+  }
 
-    void VisitIfStmt(const IfStmt *S) {
-      RecordStmtCount(S);
-      // Counter tracks the "then" part of an if statement. The count for
-      // the "else" part, if it exists, will be calculated from this counter.
-      RegionCounter Cnt(PGO, S);
-      Visit(S->getCond());
+  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
+    RecordStmtCount(S);
+    Visit(S->getElement());
+    // Counter tracks the body of the loop.
+    RegionCounter Cnt(PGO, S);
+    BreakContinueStack.push_back(BreakContinue());
+    Cnt.beginRegion();
+    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    Visit(S->getBody());
+    BreakContinue BC = BreakContinueStack.pop_back_val();
+    Cnt.adjustForControlFlow();
+    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    RecordNextStmtCount = true;
+  }
 
-      Cnt.beginRegion();
-      CountMap[S->getThen()] = PGO.getCurrentRegionCount();
-      Visit(S->getThen());
-      Cnt.adjustForControlFlow();
+  void VisitSwitchStmt(const SwitchStmt *S) {
+    RecordStmtCount(S);
+    Visit(S->getCond());
+    PGO.setCurrentRegionUnreachable();
+    BreakContinueStack.push_back(BreakContinue());
+    Visit(S->getBody());
+    // If the switch is inside a loop, add the continue counts.
+    BreakContinue BC = BreakContinueStack.pop_back_val();
+    if (!BreakContinueStack.empty())
+      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
+    // Counter tracks the exit block of the switch.
+    RegionCounter ExitCnt(PGO, S);
+    ExitCnt.beginRegion();
+    RecordNextStmtCount = true;
+  }
 
-      if (S->getElse()) {
-        Cnt.beginElseRegion();
-        CountMap[S->getElse()] = PGO.getCurrentRegionCount();
-        Visit(S->getElse());
-        Cnt.adjustForControlFlow();
-      }
-      Cnt.applyAdjustmentsToRegion(0);
-      RecordNextStmtCount = true;
-    }
+  void VisitCaseStmt(const CaseStmt *S) {
+    RecordNextStmtCount = false;
+    // Counter for this particular case. This counts only jumps from the
+    // switch header and does not include fallthrough from the case before
+    // this one.
+    RegionCounter Cnt(PGO, S);
+    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+    CountMap[S] = Cnt.getCount();
+    RecordNextStmtCount = true;
+    Visit(S->getSubStmt());
+  }
 
-    void VisitCXXTryStmt(const CXXTryStmt *S) {
-      RecordStmtCount(S);
-      Visit(S->getTryBlock());
-      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
-        Visit(S->getHandler(I));
-      // Counter tracks the continuation block of the try statement.
-      RegionCounter Cnt(PGO, S);
-      Cnt.beginRegion();
-      RecordNextStmtCount = true;
-    }
+  void VisitDefaultStmt(const DefaultStmt *S) {
+    RecordNextStmtCount = false;
+    // Counter for this default case. This does not include fallthrough from
+    // the previous case.
+    RegionCounter Cnt(PGO, S);
+    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
+    CountMap[S] = Cnt.getCount();
+    RecordNextStmtCount = true;
+    Visit(S->getSubStmt());
+  }
 
-    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
-      RecordNextStmtCount = false;
-      // Counter tracks the catch statement's handler block.
-      RegionCounter Cnt(PGO, S);
-      Cnt.beginRegion();
-      CountMap[S] = PGO.getCurrentRegionCount();
-      Visit(S->getHandlerBlock());
-    }
+  void VisitIfStmt(const IfStmt *S) {
+    RecordStmtCount(S);
+    // Counter tracks the "then" part of an if statement. The count for
+    // the "else" part, if it exists, will be calculated from this counter.
+    RegionCounter Cnt(PGO, S);
+    Visit(S->getCond());
 
-    void VisitAbstractConditionalOperator(
-        const AbstractConditionalOperator *E) {
-      RecordStmtCount(E);
-      // Counter tracks the "true" part of a conditional operator. The
-      // count in the "false" part will be calculated from this counter.
-      RegionCounter Cnt(PGO, E);
-      Visit(E->getCond());
+    Cnt.beginRegion();
+    CountMap[S->getThen()] = PGO.getCurrentRegionCount();
+    Visit(S->getThen());
+    Cnt.adjustForControlFlow();
 
-      Cnt.beginRegion();
-      CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
-      Visit(E->getTrueExpr());
-      Cnt.adjustForControlFlow();
-
+    if (S->getElse()) {
       Cnt.beginElseRegion();
-      CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
-      Visit(E->getFalseExpr());
+      CountMap[S->getElse()] = PGO.getCurrentRegionCount();
+      Visit(S->getElse());
       Cnt.adjustForControlFlow();
-
-      Cnt.applyAdjustmentsToRegion(0);
-      RecordNextStmtCount = true;
     }
+    Cnt.applyAdjustmentsToRegion(0);
+    RecordNextStmtCount = true;
+  }
 
-    void VisitBinLAnd(const BinaryOperator *E) {
-      RecordStmtCount(E);
-      // Counter tracks the right hand side of a logical and operator.
-      RegionCounter Cnt(PGO, E);
-      Visit(E->getLHS());
-      Cnt.beginRegion();
-      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
-      Visit(E->getRHS());
-      Cnt.adjustForControlFlow();
-      Cnt.applyAdjustmentsToRegion(0);
-      RecordNextStmtCount = true;
-    }
+  void VisitCXXTryStmt(const CXXTryStmt *S) {
+    RecordStmtCount(S);
+    Visit(S->getTryBlock());
+    for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
+      Visit(S->getHandler(I));
+    // Counter tracks the continuation block of the try statement.
+    RegionCounter Cnt(PGO, S);
+    Cnt.beginRegion();
+    RecordNextStmtCount = true;
+  }
 
-    void VisitBinLOr(const BinaryOperator *E) {
-      RecordStmtCount(E);
-      // Counter tracks the right hand side of a logical or operator.
-      RegionCounter Cnt(PGO, E);
-      Visit(E->getLHS());
-      Cnt.beginRegion();
-      CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
-      Visit(E->getRHS());
-      Cnt.adjustForControlFlow();
-      Cnt.applyAdjustmentsToRegion(0);
-      RecordNextStmtCount = true;
-    }
-  };
+  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
+    RecordNextStmtCount = false;
+    // Counter tracks the catch statement's handler block.
+    RegionCounter Cnt(PGO, S);
+    Cnt.beginRegion();
+    CountMap[S] = PGO.getCurrentRegionCount();
+    Visit(S->getHandlerBlock());
+  }
+
+  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
+    RecordStmtCount(E);
+    // Counter tracks the "true" part of a conditional operator. The
+    // count in the "false" part will be calculated from this counter.
+    RegionCounter Cnt(PGO, E);
+    Visit(E->getCond());
+
+    Cnt.beginRegion();
+    CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
+    Visit(E->getTrueExpr());
+    Cnt.adjustForControlFlow();
+
+    Cnt.beginElseRegion();
+    CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
+    Visit(E->getFalseExpr());
+    Cnt.adjustForControlFlow();
+
+    Cnt.applyAdjustmentsToRegion(0);
+    RecordNextStmtCount = true;
+  }
+
+  void VisitBinLAnd(const BinaryOperator *E) {
+    RecordStmtCount(E);
+    // Counter tracks the right hand side of a logical and operator.
+    RegionCounter Cnt(PGO, E);
+    Visit(E->getLHS());
+    Cnt.beginRegion();
+    CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+    Visit(E->getRHS());
+    Cnt.adjustForControlFlow();
+    Cnt.applyAdjustmentsToRegion(0);
+    RecordNextStmtCount = true;
+  }
+
+  void VisitBinLOr(const BinaryOperator *E) {
+    RecordStmtCount(E);
+    // Counter tracks the right hand side of a logical or operator.
+    RegionCounter Cnt(PGO, E);
+    Visit(E->getLHS());
+    Cnt.beginRegion();
+    CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+    Visit(E->getRHS());
+    Cnt.adjustForControlFlow();
+    Cnt.applyAdjustmentsToRegion(0);
+    RecordNextStmtCount = true;
+  }
+};
 }
 
 void PGOHash::combine(HashType Type) {
@@ -812,35 +650,6 @@
   return endian::read<uint64_t, little, unaligned>(Result);
 }
 
-static void emitRuntimeHook(CodeGenModule &CGM) {
-  const char *const RuntimeVarName = "__llvm_profile_runtime";
-  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
-  if (CGM.getModule().getGlobalVariable(RuntimeVarName))
-    return;
-
-  // Declare the runtime hook.
-  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
-  auto *Var = new llvm::GlobalVariable(CGM.getModule(), Int32Ty, false,
-                                       llvm::GlobalValue::ExternalLinkage,
-                                       nullptr, RuntimeVarName);
-
-  // Make a function that uses it.
-  auto *User = llvm::Function::Create(llvm::FunctionType::get(Int32Ty, false),
-                                      llvm::GlobalValue::LinkOnceODRLinkage,
-                                      RuntimeUserName, &CGM.getModule());
-  User->addFnAttr(llvm::Attribute::NoInline);
-  if (CGM.getCodeGenOpts().DisableRedZone)
-    User->addFnAttr(llvm::Attribute::NoRedZone);
-  CGBuilderTy Builder(llvm::BasicBlock::Create(CGM.getLLVMContext(), "", User));
-  auto *Load = Builder.CreateLoad(Var);
-  Builder.CreateRet(Load);
-
-  // Create a use of the function.  Now the definition of the runtime variable
-  // should get pulled in, along with any static initializears.
-  CGM.addUsedGlobal(User);
-}
-
 void CodeGenPGO::checkGlobalDecl(GlobalDecl GD) {
   // Make sure we only emit coverage mapping for one constructor/destructor.
   // Clang emits several functions for the constructor and the destructor of
@@ -864,15 +673,10 @@
     return;
   CGM.ClearUnusedCoverageMapping(D);
   setFuncName(Fn);
-  setVarLinkage(Fn->getLinkage());
 
   mapRegionCounters(D);
-  if (InstrumentRegions) {
-    emitRuntimeHook(CGM);
-    emitCounterVariables();
-    if (CGM.getCodeGenOpts().CoverageMapping)
-      emitCounterRegionMapping(D);
-  }
+  if (CGM.getCodeGenOpts().CoverageMapping)
+    emitCounterRegionMapping(D);
   if (PGOReader) {
     SourceManager &SM = CGM.getContext().getSourceManager();
     loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
@@ -905,12 +709,19 @@
   if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
     return;
 
+  std::string CoverageMapping;
   llvm::raw_string_ostream OS(CoverageMapping);
   CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
                                 CGM.getContext().getSourceManager(),
                                 CGM.getLangOpts(), RegionCounterMap.get());
   MappingGen.emitCounterMapping(D, OS);
   OS.flush();
+
+  if (CoverageMapping.empty())
+    return;
+
+  CGM.getCoverageMapping()->addFunctionMappingRecord(
+      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
 }
 
 void
@@ -918,21 +729,25 @@
                                     llvm::GlobalValue::LinkageTypes Linkage) {
   if (SkipCoverageMapping)
     return;
-  setFuncName(FuncName, Linkage);
-  setVarLinkage(Linkage);
-
   // Don't map the functions inside the system headers
   auto Loc = D->getBody()->getLocStart();
   if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
     return;
 
+  std::string CoverageMapping;
   llvm::raw_string_ostream OS(CoverageMapping);
   CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
                                 CGM.getContext().getSourceManager(),
                                 CGM.getLangOpts());
   MappingGen.emitEmptyMapping(D, OS);
   OS.flush();
-  buildDataVar();
+
+  if (CoverageMapping.empty())
+    return;
+
+  setFuncName(FuncName, Linkage);
+  CGM.getCoverageMapping()->addFunctionMappingRecord(
+      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
 }
 
 void CodeGenPGO::computeRegionCounts(const Decl *D) {
@@ -966,34 +781,25 @@
     Fn->addFnAttr(llvm::Attribute::Cold);
 }
 
-void CodeGenPGO::emitCounterVariables() {
-  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-  llvm::ArrayType *CounterTy = llvm::ArrayType::get(llvm::Type::getInt64Ty(Ctx),
-                                                    NumRegionCounters);
-  RegionCounters =
-    new llvm::GlobalVariable(CGM.getModule(), CounterTy, false, VarLinkage,
-                             llvm::Constant::getNullValue(CounterTy),
-                             getFuncVarName("counters"));
-  RegionCounters->setAlignment(8);
-  RegionCounters->setSection(getCountersSection(CGM));
-}
-
 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
-  if (!RegionCounters)
+  if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
     return;
-  llvm::Value *Addr =
-    Builder.CreateConstInBoundsGEP2_64(RegionCounters, 0, Counter);
-  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
-  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
-  Builder.CreateStore(Count, Addr);
+  if (!Builder.GetInsertPoint())
+    return;
+  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
+  Builder.CreateCall4(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
+                      llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+                      Builder.getInt64(FunctionHash),
+                      Builder.getInt32(NumRegionCounters),
+                      Builder.getInt32(Counter));
 }
 
 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                                   bool IsInMainFile) {
   CGM.getPGOStats().addVisited(IsInMainFile);
-  RegionCounts.reset(new std::vector<uint64_t>);
-  if (std::error_code EC = PGOReader->getFunctionCounts(
-          getFuncName(), FunctionHash, *RegionCounts)) {
+  RegionCounts.clear();
+  if (std::error_code EC =
+          PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
     if (EC == llvm::instrprof_error::unknown_function)
       CGM.getPGOStats().addMissing(IsInMainFile);
     else if (EC == llvm::instrprof_error::hash_mismatch)
@@ -1001,17 +807,10 @@
     else if (EC == llvm::instrprof_error::malformed)
       // TODO: Consider a more specific warning for this case.
       CGM.getPGOStats().addMismatched(IsInMainFile);
-    RegionCounts.reset();
+    RegionCounts.clear();
   }
 }
 
-void CodeGenPGO::destroyRegionCounters() {
-  RegionCounterMap.reset();
-  StmtCountMap.reset();
-  RegionCounts.reset();
-  RegionCounters = nullptr;
-}
-
 /// \brief Calculate what to divide by to scale weights.
 ///
 /// Given the maximum weight, calculate a divisor that will scale all the
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index fd1418f..431c850 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -31,39 +31,28 @@
 class CodeGenPGO {
 private:
   CodeGenModule &CGM;
-  std::unique_ptr<std::string> PrefixedFuncName;
-  StringRef RawFuncName;
-  llvm::GlobalValue::LinkageTypes VarLinkage;
+  std::string FuncName;
+  llvm::GlobalVariable *FuncNameVar;
 
   unsigned NumRegionCounters;
   uint64_t FunctionHash;
-  llvm::GlobalVariable *RegionCounters;
   std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
   std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
-  std::unique_ptr<std::vector<uint64_t>> RegionCounts;
+  std::vector<uint64_t> RegionCounts;
   uint64_t CurrentRegionCount;
-  std::string CoverageMapping;
   /// \brief A flag that is set to true when this function doesn't need
   /// to have coverage mapping data.
   bool SkipCoverageMapping;
 
 public:
   CodeGenPGO(CodeGenModule &CGM)
-      : CGM(CGM), NumRegionCounters(0), FunctionHash(0),
-        RegionCounters(nullptr), CurrentRegionCount(0),
+      : CGM(CGM), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0),
         SkipCoverageMapping(false) {}
 
   /// Whether or not we have PGO region data for the current function. This is
   /// false both when we have no data at all and when our data has been
   /// discarded.
-  bool haveRegionCounts() const { return RegionCounts != nullptr; }
-
-  /// Get the string used to identify this function in the profile data.
-  /// For functions with local linkage, this includes the main file name.
-  StringRef getFuncName() const { return StringRef(*PrefixedFuncName); }
-  std::string getFuncVarName(StringRef VarName) const {
-    return ("__llvm_profile_" + VarName + "_" + RawFuncName).str();
-  }
+  bool haveRegionCounts() const { return !RegionCounts.empty(); }
 
   /// Return the counter value of the current region.
   uint64_t getCurrentRegionCount() const { return CurrentRegionCount; }
@@ -111,13 +100,6 @@
   /// generates global variables or associates PGO data with each of the
   /// counters depending on whether we are generating or using instrumentation.
   void assignRegionCounters(const Decl *D, llvm::Function *Fn);
-  /// Emit static data structures for instrumentation data.
-  void emitInstrumentationData();
-  /// Clean up region counter state. Must be called if assignRegionCounters is
-  /// used.
-  void destroyRegionCounters();
-  /// Emit static initialization code, if any.
-  static llvm::Function *emitInitialization(CodeGenModule &CGM);
   /// Emit a coverage mapping range with a counter zero
   /// for an unused declaration.
   void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
@@ -125,7 +107,7 @@
 private:
   void setFuncName(llvm::Function *Fn);
   void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
-  void setVarLinkage(llvm::GlobalValue::LinkageTypes Linkage);
+  void createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage);
   void mapRegionCounters(const Decl *D);
   void computeRegionCounts(const Decl *D);
   void applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
@@ -133,7 +115,6 @@
   void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                         bool IsInMainFile);
   void emitCounterVariables();
-  llvm::GlobalVariable *buildDataVar();
   void emitCounterRegionMapping(const Decl *D);
 
   /// Emit code to increment the counter at the given index
@@ -151,7 +132,7 @@
   uint64_t getRegionCount(unsigned Counter) {
     if (!haveRegionCounts())
       return 0;
-    return (*RegionCounts)[Counter];
+    return RegionCounts[Counter];
   }
 
   friend class RegionCounter;
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 44494ae..67a9fbe 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -406,7 +406,7 @@
     llvm_unreachable("Unexpected undeduced auto type!");
   case Type::Complex: {
     llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType());
-    ResultType = llvm::StructType::get(EltTy, EltTy, NULL);
+    ResultType = llvm::StructType::get(EltTy, EltTy, nullptr);
     break;
   }
   case Type::LValueReference:
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 51e0309..64c5799 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -248,7 +248,8 @@
                                                   CXXCtorType CtorKind,
                                                   unsigned ExtraArgs);
   const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
-                                                const FunctionType *Ty);
+                                                const FunctionType *Ty,
+                                                bool ChainCall);
   const CGFunctionInfo &arrangeFreeFunctionCall(QualType ResTy,
                                                 const CallArgList &args,
                                                 FunctionType::ExtInfo info,
@@ -273,7 +274,8 @@
   ///
   /// \param argTypes - must all actually be canonical as params
   const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType,
-                                                bool IsInstanceMethod,
+                                                bool instanceMethod,
+                                                bool chainCall,
                                                 ArrayRef<CanQualType> argTypes,
                                                 FunctionType::ExtInfo info,
                                                 RequiredArgs args);
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index ac0c22c..55e7334 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -15,10 +15,11 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Lex/Lexer.h"
-#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ProfileData/CoverageMapping.h"
-#include "llvm/ProfileData/CoverageMappingWriter.h"
 #include "llvm/ProfileData/CoverageMappingReader.h"
+#include "llvm/ProfileData/CoverageMappingWriter.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/FileSystem.h"
 
 using namespace clang;
@@ -33,101 +34,51 @@
 
 /// \brief A region of source code that can be mapped to a counter.
 class SourceMappingRegion {
-public:
-  enum RegionFlags {
-    /// \brief This region won't be emitted if it wasn't extended.
-    /// This is useful so that we won't emit source ranges for single tokens
-    /// that we don't really care that much about, like:
-    ///   the '(' token in #define MACRO (
-    IgnoreIfNotExtended = 0x0001,
-  };
-
-private:
-  FileID File, MacroArgumentFile;
-
   Counter Count;
 
-  /// \brief A statement that initiated the count of Zero.
-  ///
-  /// This initiator statement is useful to prevent merging of unreachable
-  /// regions with different statements that caused the counter to become
-  /// unreachable.
-  const Stmt *UnreachableInitiator;
-
-  /// \brief A statement that separates certain mapping regions into groups.
-  ///
-  /// The group statement is sometimes useful when we are emitting the source
-  /// regions not in their correct lexical order, e.g. the regions for the
-  /// incrementation expression in the 'for' construct. By marking the regions
-  /// in the incrementation expression with the group statement, we avoid the
-  /// merging of the regions from the incrementation expression and the loop's
-  /// body.
-  const Stmt *Group;
-
   /// \brief The region's starting location.
-  SourceLocation LocStart;
+  Optional<SourceLocation> LocStart;
 
   /// \brief The region's ending location.
-  SourceLocation LocEnd, AlternativeLocEnd;
-  unsigned Flags;
+  Optional<SourceLocation> LocEnd;
 
 public:
-  SourceMappingRegion(FileID File, FileID MacroArgumentFile, Counter Count,
-                      const Stmt *UnreachableInitiator, const Stmt *Group,
-                      SourceLocation LocStart, SourceLocation LocEnd,
-                      unsigned Flags = 0)
-      : File(File), MacroArgumentFile(MacroArgumentFile), Count(Count),
-        UnreachableInitiator(UnreachableInitiator), Group(Group),
-        LocStart(LocStart), LocEnd(LocEnd), AlternativeLocEnd(LocStart),
-        Flags(Flags) {}
+  SourceMappingRegion(Counter Count, Optional<SourceLocation> LocStart,
+                      Optional<SourceLocation> LocEnd)
+      : Count(Count), LocStart(LocStart), LocEnd(LocEnd) {}
 
-  const FileID &getFile() const { return File; }
+  SourceMappingRegion(SourceMappingRegion &&Region)
+      : Count(std::move(Region.Count)), LocStart(std::move(Region.LocStart)),
+        LocEnd(std::move(Region.LocEnd)) {}
+
+  SourceMappingRegion &operator=(SourceMappingRegion &&RHS) {
+    Count = std::move(RHS.Count);
+    LocStart = std::move(RHS.LocStart);
+    LocEnd = std::move(RHS.LocEnd);
+    return *this;
+  }
 
   const Counter &getCounter() const { return Count; }
 
-  const SourceLocation &getStartLoc() const { return LocStart; }
+  void setCounter(Counter C) { Count = C; }
 
-  const SourceLocation &getEndLoc(const SourceManager &SM) const {
-    if (SM.getFileID(LocEnd) != File)
-      return AlternativeLocEnd;
-    return LocEnd;
+  bool hasStartLoc() const { return LocStart.hasValue(); }
+
+  void setStartLoc(SourceLocation Loc) { LocStart = Loc; }
+
+  const SourceLocation &getStartLoc() const {
+    assert(LocStart && "Region has no start location");
+    return *LocStart;
   }
 
-  bool hasFlag(RegionFlags Flag) const { return (Flags & Flag) != 0; }
+  bool hasEndLoc() const { return LocEnd.hasValue(); }
 
-  void setFlag(RegionFlags Flag) { Flags |= Flag; }
+  void setEndLoc(SourceLocation Loc) { LocEnd = Loc; }
 
-  void clearFlag(RegionFlags Flag) { Flags &= ~Flag; }
-
-  /// \brief Return true if two regions can be merged together.
-  bool isMergeable(SourceMappingRegion &R) {
-    // FIXME: We allow merging regions with a gap in between them. Should we?
-    return File == R.File && MacroArgumentFile == R.MacroArgumentFile &&
-           Count == R.Count && UnreachableInitiator == R.UnreachableInitiator &&
-           Group == R.Group;
+  const SourceLocation &getEndLoc() const {
+    assert(LocEnd && "Region has no end location");
+    return *LocEnd;
   }
-
-  /// \brief A comparison that sorts such that mergeable regions are adjacent.
-  friend bool operator<(const SourceMappingRegion &LHS,
-                        const SourceMappingRegion &RHS) {
-    return std::tie(LHS.File, LHS.MacroArgumentFile, LHS.Count,
-                    LHS.UnreachableInitiator, LHS.Group) <
-           std::tie(RHS.File, RHS.MacroArgumentFile, RHS.Count,
-                    RHS.UnreachableInitiator, RHS.Group);
-  }
-};
-
-/// \brief The state of the coverage mapping builder.
-struct SourceMappingState {
-  Counter CurrentRegionCount;
-  const Stmt *CurrentSourceGroup;
-  const Stmt *CurrentUnreachableRegionInitiator;
-
-  SourceMappingState(Counter CurrentRegionCount, const Stmt *CurrentSourceGroup,
-                     const Stmt *CurrentUnreachableRegionInitiator)
-      : CurrentRegionCount(CurrentRegionCount),
-        CurrentSourceGroup(CurrentSourceGroup),
-        CurrentUnreachableRegionInitiator(CurrentUnreachableRegionInitiator) {}
 };
 
 /// \brief Provides the common functionality for the different
@@ -139,26 +90,11 @@
   const LangOptions &LangOpts;
 
 private:
-  struct FileInfo {
-    /// \brief The file id that will be used by the coverage mapping system.
-    unsigned CovMappingFileID;
-    const FileEntry *Entry;
-
-    FileInfo(unsigned CovMappingFileID, const FileEntry *Entry)
-        : CovMappingFileID(CovMappingFileID), Entry(Entry) {}
-  };
-
-  /// \brief This mapping maps clang's FileIDs to file ids used
-  /// by the coverage mapping system and clang's file entries.
-  llvm::SmallDenseMap<FileID, FileInfo, 8> FileIDMapping;
+  /// \brief Map of clang's FileIDs to IDs used for coverage mapping.
+  llvm::SmallDenseMap<FileID, std::pair<unsigned, SourceLocation>, 8>
+      FileIDMapping;
 
 public:
-  /// \brief The statement that corresponds to the current source group.
-  const Stmt *CurrentSourceGroup;
-
-  /// \brief The statement the initiated the current unreachable region.
-  const Stmt *CurrentUnreachableRegionInitiator;
-
   /// \brief The coverage mapping regions for this function
   llvm::SmallVector<CounterMappingRegion, 32> MappingRegions;
   /// \brief The source mapping regions for this function.
@@ -166,60 +102,99 @@
 
   CoverageMappingBuilder(CoverageMappingModuleGen &CVM, SourceManager &SM,
                          const LangOptions &LangOpts)
-      : CVM(CVM), SM(SM), LangOpts(LangOpts),
-        CurrentSourceGroup(nullptr),
-        CurrentUnreachableRegionInitiator(nullptr) {}
+      : CVM(CVM), SM(SM), LangOpts(LangOpts) {}
 
   /// \brief Return the precise end location for the given token.
   SourceLocation getPreciseTokenLocEnd(SourceLocation Loc) {
-    return Lexer::getLocForEndOfToken(SM.getSpellingLoc(Loc), 0, SM, LangOpts);
+    // We avoid getLocForEndOfToken here, because it doesn't do what we want for
+    // macro locations, which we just treat as expanded files.
+    unsigned TokLen =
+        Lexer::MeasureTokenLength(SM.getSpellingLoc(Loc), SM, LangOpts);
+    return Loc.getLocWithOffset(TokLen);
   }
 
-  /// \brief Create the mapping that maps from the function's file ids to
-  /// the indices for the translation unit's filenames.
-  void createFileIDMapping(SmallVectorImpl<unsigned> &Mapping) {
-    Mapping.resize(FileIDMapping.size(), 0);
-    for (const auto &I : FileIDMapping)
-      Mapping[I.second.CovMappingFileID] = CVM.getFileID(I.second.Entry);
+  /// \brief Return the start location of an included file or expanded macro.
+  SourceLocation getStartOfFileOrMacro(SourceLocation Loc) {
+    if (Loc.isMacroID())
+      return Loc.getLocWithOffset(-SM.getFileOffset(Loc));
+    return SM.getLocForStartOfFile(SM.getFileID(Loc));
   }
 
-  /// \brief Get the coverage mapping file id that corresponds to the given
-  /// clang file id. If such file id doesn't exist, it gets added to the
-  /// mapping that maps from clang's file ids to coverage mapping file ids.
-  /// Return true if there was an error getting the coverage mapping file id.
-  /// An example of an when this function fails is when the region tries
-  /// to get a coverage file id for a location in a built-in macro.
-  bool getCoverageFileID(SourceLocation LocStart, FileID File,
-                         FileID SpellingFile, unsigned &Result) {
-    auto Mapping = FileIDMapping.find(File);
-    if (Mapping != FileIDMapping.end()) {
-      Result = Mapping->second.CovMappingFileID;
-      return false;
+  /// \brief Return the end location of an included file or expanded macro.
+  SourceLocation getEndOfFileOrMacro(SourceLocation Loc) {
+    if (Loc.isMacroID())
+      return Loc.getLocWithOffset(SM.getFileIDSize(SM.getFileID(Loc)) -
+                                  SM.getFileOffset(Loc) - 1);
+    return SM.getLocForEndOfFile(SM.getFileID(Loc));
+  }
+
+  /// \brief Find out where the current file is included or macro is expanded.
+  SourceLocation getIncludeOrExpansionLoc(SourceLocation Loc) {
+    return Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).first
+                           : SM.getIncludeLoc(SM.getFileID(Loc));
+  }
+
+  /// \brief Get the start of \c S ignoring macro argument locations.
+  SourceLocation getStart(const Stmt *S) {
+    SourceLocation Loc = S->getLocStart();
+    while (SM.isMacroArgExpansion(Loc))
+      Loc = SM.getImmediateExpansionRange(Loc).first;
+    return Loc;
+  }
+
+  /// \brief Get the end of \c S ignoring macro argument locations.
+  SourceLocation getEnd(const Stmt *S) {
+    SourceLocation Loc = S->getLocEnd();
+    while (SM.isMacroArgExpansion(Loc))
+      Loc = SM.getImmediateExpansionRange(Loc).first;
+    return Loc;
+  }
+
+  /// \brief Find the set of files we have regions for and assign IDs
+  ///
+  /// Fills \c Mapping with the virtual file mapping needed to write out
+  /// coverage and collects the necessary file information to emit source and
+  /// expansion regions.
+  void gatherFileIDs(SmallVectorImpl<unsigned> &Mapping) {
+    FileIDMapping.clear();
+
+    SmallVector<FileID, 8> Visited;
+    SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs;
+    for (const auto &Region : SourceRegions) {
+      SourceLocation Loc = Region.getStartLoc();
+      FileID File = SM.getFileID(Loc);
+      if (std::find(Visited.begin(), Visited.end(), File) != Visited.end())
+        continue;
+      Visited.push_back(File);
+
+      unsigned Depth = 0;
+      for (SourceLocation Parent = getIncludeOrExpansionLoc(Loc);
+           !Parent.isInvalid(); Parent = getIncludeOrExpansionLoc(Parent))
+        ++Depth;
+      FileLocs.push_back(std::make_pair(Loc, Depth));
     }
+    std::stable_sort(FileLocs.begin(), FileLocs.end(), llvm::less_second());
 
-    auto Entry = SM.getFileEntryForID(SpellingFile);
-    if (!Entry)
-      return true;
+    for (const auto &FL : FileLocs) {
+      SourceLocation Loc = FL.first;
+      FileID SpellingFile = SM.getDecomposedSpellingLoc(Loc).first;
+      auto Entry = SM.getFileEntryForID(SpellingFile);
+      if (!Entry)
+        continue;
 
-    Result = FileIDMapping.size();
-    FileIDMapping.insert(std::make_pair(File, FileInfo(Result, Entry)));
-    createFileExpansionRegion(LocStart, File);
-    return false;
+      FileIDMapping[SM.getFileID(Loc)] = std::make_pair(Mapping.size(), Loc);
+      Mapping.push_back(CVM.getFileID(Entry));
+    }
   }
 
-  /// \brief Get the coverage mapping file id that corresponds to the given
-  /// clang file id.
-  /// Return true if there was an error getting the coverage mapping file id.
-  bool getExistingCoverageFileID(FileID File, unsigned &Result) {
-    // Make sure that the file is valid.
-    if (File.isInvalid())
-      return true;
-    auto Mapping = FileIDMapping.find(File);
-    if (Mapping != FileIDMapping.end()) {
-      Result = Mapping->second.CovMappingFileID;
-      return false;
-    }
-    return true;
+  /// \brief Get the coverage mapping file ID for \c Loc.
+  ///
+  /// If such file id doesn't exist, return None.
+  Optional<unsigned> getCoverageFileID(SourceLocation Loc) {
+    auto Mapping = FileIDMapping.find(SM.getFileID(Loc));
+    if (Mapping != FileIDMapping.end())
+      return Mapping->second.first;
+    return None;
   }
 
   /// \brief Return true if the given clang's file id has a corresponding
@@ -248,162 +223,83 @@
     for (const auto &I : SkippedRanges) {
       auto LocStart = I.getBegin();
       auto LocEnd = I.getEnd();
-      auto FileStart = SM.getFileID(LocStart);
-      if (!hasExistingCoverageFileID(FileStart))
-        continue;
-      auto ActualFileStart = SM.getDecomposedSpellingLoc(LocStart).first;
-      if (ActualFileStart != SM.getDecomposedSpellingLoc(LocEnd).first)
-        // Ignore regions that span across multiple files.
-        continue;
+      assert(SM.isWrittenInSameFile(LocStart, LocEnd) &&
+             "region spans multiple files");
 
-      unsigned CovFileID;
-      if (getCoverageFileID(LocStart, FileStart, ActualFileStart, CovFileID))
+      auto CovFileID = getCoverageFileID(LocStart);
+      if (!CovFileID)
         continue;
       unsigned LineStart = SM.getSpellingLineNumber(LocStart);
       unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart);
       unsigned LineEnd = SM.getSpellingLineNumber(LocEnd);
       unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd);
-      CounterMappingRegion Region(Counter(), CovFileID, LineStart, ColumnStart,
-                                  LineEnd, ColumnEnd, false,
-                                  CounterMappingRegion::SkippedRegion);
+      auto Region = CounterMappingRegion::makeSkipped(
+          *CovFileID, LineStart, ColumnStart, LineEnd, ColumnEnd);
       // Make sure that we only collect the regions that are inside
       // the souce code of this function.
-      if (Region.LineStart >= FileLineRanges[CovFileID].first &&
-          Region.LineEnd <= FileLineRanges[CovFileID].second)
+      if (Region.LineStart >= FileLineRanges[*CovFileID].first &&
+          Region.LineEnd <= FileLineRanges[*CovFileID].second)
         MappingRegions.push_back(Region);
     }
   }
 
-  /// \brief Create a mapping region that correponds to an expansion of
-  /// a macro or an embedded include.
-  void createFileExpansionRegion(SourceLocation Loc, FileID ExpandedFile) {
-    SourceLocation LocStart;
-    if (Loc.isMacroID())
-      LocStart = SM.getImmediateExpansionRange(Loc).first;
-    else {
-      LocStart = SM.getIncludeLoc(ExpandedFile);
-      if (LocStart.isInvalid())
-        return; // This file has no expansion region.
-    }
-
-    auto File = SM.getFileID(LocStart);
-    auto SpellingFile = SM.getDecomposedSpellingLoc(LocStart).first;
-    unsigned CovFileID, ExpandedFileID;
-    if (getExistingCoverageFileID(ExpandedFile, ExpandedFileID))
-      return;
-    if (getCoverageFileID(LocStart, File, SpellingFile, CovFileID))
-      return;
-    unsigned LineStart = SM.getSpellingLineNumber(LocStart);
-    unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart);
-    unsigned LineEnd = LineStart;
-    // Compute the end column manually as Lexer::getLocForEndOfToken doesn't
-    // give the correct result in all cases.
-    unsigned ColumnEnd =
-        ColumnStart +
-        Lexer::MeasureTokenLength(SM.getSpellingLoc(LocStart), SM, LangOpts);
-
-    MappingRegions.push_back(CounterMappingRegion(
-        Counter(), CovFileID, LineStart, ColumnStart, LineEnd, ColumnEnd,
-        false, CounterMappingRegion::ExpansionRegion));
-    MappingRegions.back().ExpandedFileID = ExpandedFileID;
-  }
-
-  /// \brief Enter a source region group that is identified by the given
-  /// statement.
-  /// It's not possible to enter a group when there is already
-  /// another group present.
-  void beginSourceRegionGroup(const Stmt *Group) {
-    assert(!CurrentSourceGroup);
-    CurrentSourceGroup = Group;
-  }
-
-  /// \brief Exit the current source region group.
-  void endSourceRegionGroup() { CurrentSourceGroup = nullptr; }
-
-  /// \brief Associate a counter with a given source code range.
-  void mapSourceCodeRange(SourceLocation LocStart, SourceLocation LocEnd,
-                          Counter Count, const Stmt *UnreachableInitiator,
-                          const Stmt *SourceGroup, unsigned Flags = 0,
-                          FileID MacroArgumentFile = FileID()) {
-    if (SM.isMacroArgExpansion(LocStart)) {
-      // Map the code range with the macro argument's value.
-      mapSourceCodeRange(SM.getImmediateSpellingLoc(LocStart),
-                         SM.getImmediateSpellingLoc(LocEnd), Count,
-                         UnreachableInitiator, SourceGroup, Flags,
-                         SM.getFileID(LocStart));
-      // Map the code range where the macro argument is referenced.
-      SourceLocation RefLocStart(SM.getImmediateExpansionRange(LocStart).first);
-      SourceLocation RefLocEnd(RefLocStart);
-      if (SM.isMacroArgExpansion(RefLocStart))
-        mapSourceCodeRange(RefLocStart, RefLocEnd, Count, UnreachableInitiator,
-                           SourceGroup, 0, SM.getFileID(RefLocStart));
-      else
-        mapSourceCodeRange(RefLocStart, RefLocEnd, Count, UnreachableInitiator,
-                           SourceGroup);
-      return;
-    }
-    auto File = SM.getFileID(LocStart);
-    // Make sure that the file id is valid.
-    if (File.isInvalid())
-      return;
-    SourceRegions.emplace_back(File, MacroArgumentFile, Count,
-                               UnreachableInitiator, SourceGroup, LocStart,
-                               LocEnd, Flags);
-  }
-
-  void mapSourceCodeRange(SourceLocation LocStart, SourceLocation LocEnd,
-                          Counter Count, unsigned Flags = 0) {
-    mapSourceCodeRange(LocStart, LocEnd, Count,
-                       CurrentUnreachableRegionInitiator, CurrentSourceGroup,
-                       Flags);
-  }
-
-  void mapSourceCodeRange(const SourceMappingState &State,
-                          SourceLocation LocStart, SourceLocation LocEnd,
-                          unsigned Flags = 0) {
-    mapSourceCodeRange(LocStart, LocEnd, State.CurrentRegionCount,
-                       State.CurrentUnreachableRegionInitiator,
-                       State.CurrentSourceGroup, Flags);
-  }
-
   /// \brief Generate the coverage counter mapping regions from collected
   /// source regions.
   void emitSourceRegions() {
-    std::sort(SourceRegions.begin(), SourceRegions.end());
+    for (const auto &Region : SourceRegions) {
+      assert(Region.hasEndLoc() && "incomplete region");
 
-    for (auto I = SourceRegions.begin(), E = SourceRegions.end(); I != E; ++I) {
-      // Keep the original start location of this region.
-      SourceLocation LocStart = I->getStartLoc();
-      SourceLocation LocEnd = I->getEndLoc(SM);
+      SourceLocation LocStart = Region.getStartLoc();
+      assert(!SM.getFileID(LocStart).isInvalid() && "region in invalid file");
 
-      bool Ignore = I->hasFlag(SourceMappingRegion::IgnoreIfNotExtended);
-      // We need to handle mergeable regions together.
-      for (auto Next = I + 1; Next != E && Next->isMergeable(*I); ++Next) {
-        ++I;
-        LocStart = std::min(LocStart, I->getStartLoc());
-        LocEnd = std::max(LocEnd, I->getEndLoc(SM));
-        // FIXME: Should we && together the Ignore flag of multiple regions?
-        Ignore = false;
-      }
-      if (Ignore)
+      auto CovFileID = getCoverageFileID(LocStart);
+      // Ignore regions that don't have a file, such as builtin macros.
+      if (!CovFileID)
         continue;
 
+      SourceLocation LocEnd = getPreciseTokenLocEnd(Region.getEndLoc());
+      assert(SM.isWrittenInSameFile(LocStart, LocEnd) &&
+             "region spans multiple files");
+
       // Find the spilling locations for the mapping region.
-      LocEnd = getPreciseTokenLocEnd(LocEnd);
       unsigned LineStart = SM.getSpellingLineNumber(LocStart);
       unsigned ColumnStart = SM.getSpellingColumnNumber(LocStart);
       unsigned LineEnd = SM.getSpellingLineNumber(LocEnd);
       unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd);
 
-      auto SpellingFile = SM.getDecomposedSpellingLoc(LocStart).first;
-      unsigned CovFileID;
-      if (getCoverageFileID(LocStart, I->getFile(), SpellingFile, CovFileID))
+      assert(LineStart <= LineEnd && "region start and end out of order");
+      MappingRegions.push_back(CounterMappingRegion::makeRegion(
+          Region.getCounter(), *CovFileID, LineStart, ColumnStart, LineEnd,
+          ColumnEnd));
+    }
+  }
+
+  /// \brief Generate expansion regions for each virtual file we've seen.
+  void emitExpansionRegions() {
+    for (const auto &FM : FileIDMapping) {
+      SourceLocation ExpandedLoc = FM.second.second;
+      SourceLocation ParentLoc = getIncludeOrExpansionLoc(ExpandedLoc);
+      if (ParentLoc.isInvalid())
         continue;
 
-      assert(LineStart <= LineEnd);
-      MappingRegions.push_back(CounterMappingRegion(
-          I->getCounter(), CovFileID, LineStart, ColumnStart, LineEnd,
-          ColumnEnd, false, CounterMappingRegion::CodeRegion));
+      auto ParentFileID = getCoverageFileID(ParentLoc);
+      if (!ParentFileID)
+        continue;
+      auto ExpandedFileID = getCoverageFileID(ExpandedLoc);
+      assert(ExpandedFileID && "expansion in uncovered file");
+
+      SourceLocation LocEnd = getPreciseTokenLocEnd(ParentLoc);
+      assert(SM.isWrittenInSameFile(ParentLoc, LocEnd) &&
+             "region spans multiple files");
+
+      unsigned LineStart = SM.getSpellingLineNumber(ParentLoc);
+      unsigned ColumnStart = SM.getSpellingColumnNumber(ParentLoc);
+      unsigned LineEnd = SM.getSpellingLineNumber(LocEnd);
+      unsigned ColumnEnd = SM.getSpellingColumnNumber(LocEnd);
+
+      MappingRegions.push_back(CounterMappingRegion::makeExpansion(
+          *ParentFileID, *ExpandedFileID, LineStart, ColumnStart, LineEnd,
+          ColumnEnd));
     }
   }
 };
@@ -419,14 +315,14 @@
     if (!D->hasBody())
       return;
     auto Body = D->getBody();
-    mapSourceCodeRange(Body->getLocStart(), Body->getLocEnd(), Counter());
+    SourceRegions.emplace_back(Counter(), getStart(Body), getEnd(Body));
   }
 
   /// \brief Write the mapping data to the output stream
   void write(llvm::raw_ostream &OS) {
-    emitSourceRegions();
     SmallVector<unsigned, 16> FileIDMapping;
-    createFileIDMapping(FileIDMapping);
+    gatherFileIDs(FileIDMapping);
+    emitSourceRegions();
 
     CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions);
     Writer.write(OS);
@@ -441,136 +337,212 @@
   /// \brief The map of statements to count values.
   llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
 
-  Counter CurrentRegionCount;
+  /// \brief A stack of currently live regions.
+  std::vector<SourceMappingRegion> RegionStack;
 
   CounterExpressionBuilder Builder;
 
-  /// \brief Return a counter that represents the
-  /// expression that subracts rhs from lhs.
+  /// \brief A location in the most recently visited file or macro.
+  ///
+  /// This is used to adjust the active source regions appropriately when
+  /// expressions cross file or macro boundaries.
+  SourceLocation MostRecentLocation;
+
+  /// \brief Return a counter for the subtraction of \c RHS from \c LHS
   Counter subtractCounters(Counter LHS, Counter RHS) {
     return Builder.subtract(LHS, RHS);
   }
 
-  /// \brief Return a counter that represents the
-  /// the exression that adds lhs and rhs.
+  /// \brief Return a counter for the sum of \c LHS and \c RHS.
   Counter addCounters(Counter LHS, Counter RHS) {
     return Builder.add(LHS, RHS);
   }
 
+  Counter addCounters(Counter C1, Counter C2, Counter C3) {
+    return addCounters(addCounters(C1, C2), C3);
+  }
+
+  Counter addCounters(Counter C1, Counter C2, Counter C3, Counter C4) {
+    return addCounters(addCounters(C1, C2, C3), C4);
+  }
+
   /// \brief Return the region counter for the given statement.
+  ///
   /// This should only be called on statements that have a dedicated counter.
-  unsigned getRegionCounter(const Stmt *S) { return CounterMap[S]; }
-
-  /// \brief Return the region count for the counter at the given index.
-  Counter getRegionCount(unsigned CounterId) {
-    return Counter::getCounter(CounterId);
+  Counter getRegionCounter(const Stmt *S) {
+    return Counter::getCounter(CounterMap[S]);
   }
 
-  /// \brief Return the counter value of the current region.
-  Counter getCurrentRegionCount() { return CurrentRegionCount; }
+  /// \brief Push a region onto the stack.
+  ///
+  /// Returns the index on the stack where the region was pushed. This can be
+  /// used with popRegions to exit a "scope", ending the region that was pushed.
+  size_t pushRegion(Counter Count, Optional<SourceLocation> StartLoc = None,
+                    Optional<SourceLocation> EndLoc = None) {
+    if (StartLoc)
+      MostRecentLocation = *StartLoc;
+    RegionStack.emplace_back(Count, StartLoc, EndLoc);
 
-  /// \brief Set the counter value for the current region.
-  /// This is used to keep track of changes to the most recent counter
-  /// from control flow and non-local exits.
-  void setCurrentRegionCount(Counter Count) {
-    CurrentRegionCount = Count;
-    CurrentUnreachableRegionInitiator = nullptr;
+    return RegionStack.size() - 1;
   }
 
-  /// \brief Indicate that the current region is never reached,
-  /// and thus should have a counter value of zero.
-  /// This is important so that subsequent regions can correctly track
-  /// their parent counts.
-  void setCurrentRegionUnreachable(const Stmt *Initiator) {
-    CurrentRegionCount = Counter::getZero();
-    CurrentUnreachableRegionInitiator = Initiator;
+  /// \brief Pop regions from the stack into the function's list of regions.
+  ///
+  /// Adds all regions from \c ParentIndex to the top of the stack to the
+  /// function's \c SourceRegions.
+  void popRegions(size_t ParentIndex) {
+    assert(RegionStack.size() >= ParentIndex && "parent not in stack");
+    while (RegionStack.size() > ParentIndex) {
+      SourceMappingRegion &Region = RegionStack.back();
+      if (Region.hasStartLoc()) {
+        SourceLocation StartLoc = Region.getStartLoc();
+        SourceLocation EndLoc = Region.hasEndLoc()
+                                    ? Region.getEndLoc()
+                                    : RegionStack[ParentIndex].getEndLoc();
+        while (!SM.isWrittenInSameFile(StartLoc, EndLoc)) {
+          // The region ends in a nested file or macro expansion. Create a
+          // separate region for each expansion.
+          SourceLocation NestedLoc = getStartOfFileOrMacro(EndLoc);
+          assert(SM.isWrittenInSameFile(NestedLoc, EndLoc));
+
+          SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
+
+          EndLoc = getIncludeOrExpansionLoc(EndLoc);
+          assert(!EndLoc.isInvalid() &&
+                 "File exit was not handled before popRegions");
+        }
+        Region.setEndLoc(EndLoc);
+
+        MostRecentLocation = EndLoc;
+        // If this region happens to span an entire expansion, we need to make
+        // sure we don't overlap the parent region with it.
+        if (StartLoc == getStartOfFileOrMacro(StartLoc) &&
+            EndLoc == getEndOfFileOrMacro(EndLoc))
+          MostRecentLocation = getIncludeOrExpansionLoc(EndLoc);
+
+        assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc));
+        SourceRegions.push_back(std::move(Region));
+      }
+      RegionStack.pop_back();
+    }
   }
 
-  /// \brief A counter for a particular region.
-  /// This is the primary interface through
-  /// which the coverage mapping builder manages counters and their values.
-  class RegionMapper {
-    CounterCoverageMappingBuilder &Mapping;
-    Counter Count;
-    Counter ParentCount;
-    Counter RegionCount;
-    Counter Adjust;
+  /// \brief Return the currently active region.
+  SourceMappingRegion &getRegion() {
+    assert(!RegionStack.empty() && "statement has no region");
+    return RegionStack.back();
+  }
 
-  public:
-    RegionMapper(CounterCoverageMappingBuilder *Mapper, const Stmt *S)
-        : Mapping(*Mapper),
-          Count(Mapper->getRegionCount(Mapper->getRegionCounter(S))),
-          ParentCount(Mapper->getCurrentRegionCount()) {}
+  /// \brief Propagate counts through the children of \c S.
+  Counter propagateCounts(Counter TopCount, const Stmt *S) {
+    size_t Index = pushRegion(TopCount, getStart(S), getEnd(S));
+    Visit(S);
+    Counter ExitCount = getRegion().getCounter();
+    popRegions(Index);
+    return ExitCount;
+  }
 
-    /// Get the value of the counter. In most cases this is the number of times
-    /// the region of the counter was entered, but for switch labels it's the
-    /// number of direct jumps to that label.
-    Counter getCount() const { return Count; }
+  /// \brief Adjust the most recently visited location to \c EndLoc.
+  ///
+  /// This should be used after visiting any statements in non-source order.
+  void adjustForOutOfOrderTraversal(SourceLocation EndLoc) {
+    MostRecentLocation = EndLoc;
+    if (MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation))
+      MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation);
+  }
 
-    /// Get the value of the counter with adjustments applied. Adjustments occur
-    /// when control enters or leaves the region abnormally; i.e., if there is a
-    /// jump to a label within the region, or if the function can return from
-    /// within the region. The adjusted count, then, is the value of the counter
-    /// at the end of the region.
-    Counter getAdjustedCount() const {
-      return Mapping.addCounters(Count, Adjust);
+  /// \brief Check whether \c Loc is included or expanded from \c Parent.
+  bool isNestedIn(SourceLocation Loc, FileID Parent) {
+    do {
+      Loc = getIncludeOrExpansionLoc(Loc);
+      if (Loc.isInvalid())
+        return false;
+    } while (!SM.isInFileID(Loc, Parent));
+    return true;
+  }
+
+  /// \brief Adjust regions and state when \c NewLoc exits a file.
+  ///
+  /// If moving from our most recently tracked location to \c NewLoc exits any
+  /// files, this adjusts our current region stack and creates the file regions
+  /// for the exited file.
+  void handleFileExit(SourceLocation NewLoc) {
+    if (SM.isWrittenInSameFile(MostRecentLocation, NewLoc))
+      return;
+
+    // If NewLoc is not in a file that contains MostRecentLocation, walk up to
+    // find the common ancestor.
+    SourceLocation LCA = NewLoc;
+    FileID ParentFile = SM.getFileID(LCA);
+    while (!isNestedIn(MostRecentLocation, ParentFile)) {
+      LCA = getIncludeOrExpansionLoc(LCA);
+      if (LCA.isInvalid() || SM.isWrittenInSameFile(LCA, MostRecentLocation)) {
+        // Since there isn't a common ancestor, no file was exited. We just need
+        // to adjust our location to the new file.
+        MostRecentLocation = NewLoc;
+        return;
+      }
+      ParentFile = SM.getFileID(LCA);
     }
 
-    /// Get the value of the counter in this region's parent, i.e., the region
-    /// that was active when this region began. This is useful for deriving
-    /// counts in implicitly counted regions, like the false case of a condition
-    /// or the normal exits of a loop.
-    Counter getParentCount() const { return ParentCount; }
+    llvm::SmallSet<SourceLocation, 8> StartLocs;
+    Optional<Counter> ParentCounter;
+    for (auto I = RegionStack.rbegin(), E = RegionStack.rend(); I != E; ++I) {
+      if (!I->hasStartLoc())
+        continue;
+      SourceLocation Loc = I->getStartLoc();
+      if (!isNestedIn(Loc, ParentFile)) {
+        ParentCounter = I->getCounter();
+        break;
+      }
 
-    /// Activate the counter by emitting an increment and starting to track
-    /// adjustments. If AddIncomingFallThrough is true, the current region count
-    /// will be added to the counter for the purposes of tracking the region.
-    void beginRegion(bool AddIncomingFallThrough = false) {
-      RegionCount = Count;
-      if (AddIncomingFallThrough)
-        RegionCount =
-            Mapping.addCounters(RegionCount, Mapping.getCurrentRegionCount());
-      Mapping.setCurrentRegionCount(RegionCount);
+      while (!SM.isInFileID(Loc, ParentFile)) {
+        // The most nested region for each start location is the one with the
+        // correct count. We avoid creating redundant regions by stopping once
+        // we've seen this region.
+        if (StartLocs.insert(Loc).second)
+          SourceRegions.emplace_back(I->getCounter(), Loc,
+                                     getEndOfFileOrMacro(Loc));
+        Loc = getIncludeOrExpansionLoc(Loc);
+      }
+      I->setStartLoc(getPreciseTokenLocEnd(Loc));
     }
 
-    /// For counters on boolean branches, begins tracking adjustments for the
-    /// uncounted path.
-    void beginElseRegion() {
-      RegionCount = Mapping.subtractCounters(ParentCount, Count);
-      Mapping.setCurrentRegionCount(RegionCount);
+    if (ParentCounter) {
+      // If the file is contained completely by another region and doesn't
+      // immediately start its own region, the whole file gets a region
+      // corresponding to the parent.
+      SourceLocation Loc = MostRecentLocation;
+      while (isNestedIn(Loc, ParentFile)) {
+        SourceLocation FileStart = getStartOfFileOrMacro(Loc);
+        if (StartLocs.insert(FileStart).second)
+          SourceRegions.emplace_back(*ParentCounter, FileStart,
+                                     getEndOfFileOrMacro(Loc));
+        Loc = getIncludeOrExpansionLoc(Loc);
+      }
     }
 
-    /// Reset the current region count.
-    void setCurrentRegionCount(Counter CurrentCount) {
-      RegionCount = CurrentCount;
-      Mapping.setCurrentRegionCount(RegionCount);
-    }
+    MostRecentLocation = NewLoc;
+  }
 
-    /// Adjust for non-local control flow after emitting a subexpression or
-    /// substatement. This must be called to account for constructs such as
-    /// gotos,
-    /// labels, and returns, so that we can ensure that our region's count is
-    /// correct in the code that follows.
-    void adjustForControlFlow() {
-      Adjust = Mapping.addCounters(
-          Adjust, Mapping.subtractCounters(Mapping.getCurrentRegionCount(),
-                                           RegionCount));
-      // Reset the region count in case this is called again later.
-      RegionCount = Mapping.getCurrentRegionCount();
-    }
+  /// \brief Ensure that \c S is included in the current region.
+  void extendRegion(const Stmt *S) {
+    SourceMappingRegion &Region = getRegion();
+    SourceLocation StartLoc = getStart(S);
 
-    /// Commit all adjustments to the current region. If the region is a loop,
-    /// the LoopAdjust value should be the count of all the breaks and continues
-    /// from the loop, to compensate for those counts being deducted from the
-    /// adjustments for the body of the loop.
-    void applyAdjustmentsToRegion() {
-      Mapping.setCurrentRegionCount(Mapping.addCounters(ParentCount, Adjust));
-    }
-    void applyAdjustmentsToRegion(Counter LoopAdjust) {
-      Mapping.setCurrentRegionCount(Mapping.addCounters(
-          Mapping.addCounters(ParentCount, Adjust), LoopAdjust));
-    }
-  };
+    handleFileExit(StartLoc);
+    if (!Region.hasStartLoc())
+      Region.setStartLoc(StartLoc);
+  }
+
+  /// \brief Mark \c S as a terminator, starting a zero region.
+  void terminateRegion(const Stmt *S) {
+    extendRegion(S);
+    SourceMappingRegion &Region = getRegion();
+    if (!Region.hasEndLoc())
+      Region.setEndLoc(getEnd(S));
+    pushRegion(Counter::getZero());
+  }
 
   /// \brief Keep counts of breaks and continues inside loops.
   struct BreakContinue {
@@ -587,452 +559,301 @@
 
   /// \brief Write the mapping data to the output stream
   void write(llvm::raw_ostream &OS) {
-    emitSourceRegions();
     llvm::SmallVector<unsigned, 8> VirtualFileMapping;
-    createFileIDMapping(VirtualFileMapping);
+    gatherFileIDs(VirtualFileMapping);
+    emitSourceRegions();
+    emitExpansionRegions();
     gatherSkippedRegions();
 
-    CoverageMappingWriter Writer(
-        VirtualFileMapping, Builder.getExpressions(), MappingRegions);
+    CoverageMappingWriter Writer(VirtualFileMapping, Builder.getExpressions(),
+                                 MappingRegions);
     Writer.write(OS);
   }
 
-  /// \brief Return the current source mapping state.
-  SourceMappingState getCurrentState() const {
-    return SourceMappingState(CurrentRegionCount, CurrentSourceGroup,
-                              CurrentUnreachableRegionInitiator);
-  }
-
-  /// \brief Associate the source code range with the current region count.
-  void mapSourceCodeRange(SourceLocation LocStart, SourceLocation LocEnd,
-                          unsigned Flags = 0) {
-    CoverageMappingBuilder::mapSourceCodeRange(LocStart, LocEnd,
-                                               CurrentRegionCount, Flags);
-  }
-
-  void mapSourceCodeRange(SourceLocation LocStart) {
-    CoverageMappingBuilder::mapSourceCodeRange(LocStart, LocStart,
-                                               CurrentRegionCount);
-  }
-
-  /// \brief Associate the source range of a token with the current region
-  /// count.
-  /// Ignore the source range for this token if it produces a distinct
-  /// mapping region with no other source ranges.
-  void mapToken(SourceLocation LocStart) {
-    CoverageMappingBuilder::mapSourceCodeRange(
-        LocStart, LocStart, CurrentRegionCount,
-        SourceMappingRegion::IgnoreIfNotExtended);
-  }
-
-  void mapToken(const SourceMappingState &State, SourceLocation LocStart) {
-    CoverageMappingBuilder::mapSourceCodeRange(
-        State, LocStart, LocStart, SourceMappingRegion::IgnoreIfNotExtended);
-  }
-
   void VisitStmt(const Stmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    if (!S->getLocStart().isInvalid())
+      extendRegion(S);
     for (Stmt::const_child_range I = S->children(); I; ++I) {
       if (*I)
         this->Visit(*I);
     }
+    handleFileExit(getEnd(S));
   }
 
   void VisitDecl(const Decl *D) {
-    if (!D->hasBody())
-      return;
-    // Counter tracks entry to the function body.
-    auto Body = D->getBody();
-    RegionMapper Cnt(this, Body);
-    Cnt.beginRegion();
-    Visit(Body);
-  }
-
-  void VisitDeclStmt(const DeclStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
-    for (Stmt::const_child_range I = static_cast<const Stmt *>(S)->children();
-         I; ++I) {
-      if (*I)
-        this->Visit(*I);
-    }
-  }
-
-  void VisitCompoundStmt(const CompoundStmt *S) {
-    SourceMappingState State = getCurrentState();
-    mapSourceCodeRange(S->getLBracLoc());
-    for (Stmt::const_child_range I = S->children(); I; ++I) {
-      if (*I)
-        this->Visit(*I);
-    }
-    CoverageMappingBuilder::mapSourceCodeRange(State, S->getRBracLoc(),
-                                               S->getRBracLoc());
+    Stmt *Body = D->getBody();
+    propagateCounts(getRegionCounter(Body), Body);
   }
 
   void VisitReturnStmt(const ReturnStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    extendRegion(S);
     if (S->getRetValue())
       Visit(S->getRetValue());
-    setCurrentRegionUnreachable(S);
+    terminateRegion(S);
   }
 
-  void VisitGotoStmt(const GotoStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
-    mapToken(S->getLabelLoc());
-    setCurrentRegionUnreachable(S);
-  }
+  void VisitGotoStmt(const GotoStmt *S) { terminateRegion(S); }
 
   void VisitLabelStmt(const LabelStmt *S) {
-    // Counter tracks the block following the label.
-    RegionMapper Cnt(this, S);
-    Cnt.beginRegion();
-    mapSourceCodeRange(S->getLocStart());
-    // Can't map the ':' token as its location isn't known.
+    SourceLocation Start = getStart(S);
+    // We can't extendRegion here or we risk overlapping with our new region.
+    handleFileExit(Start);
+    pushRegion(getRegionCounter(S), Start);
     Visit(S->getSubStmt());
   }
 
   void VisitBreakStmt(const BreakStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
     assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
     BreakContinueStack.back().BreakCount = addCounters(
-        BreakContinueStack.back().BreakCount, getCurrentRegionCount());
-    setCurrentRegionUnreachable(S);
+        BreakContinueStack.back().BreakCount, getRegion().getCounter());
+    terminateRegion(S);
   }
 
   void VisitContinueStmt(const ContinueStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
     assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
     BreakContinueStack.back().ContinueCount = addCounters(
-        BreakContinueStack.back().ContinueCount, getCurrentRegionCount());
-    setCurrentRegionUnreachable(S);
+        BreakContinueStack.back().ContinueCount, getRegion().getCounter());
+    terminateRegion(S);
   }
 
   void VisitWhileStmt(const WhileStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
-    // Counter tracks the body of the loop.
-    RegionMapper Cnt(this, S);
-    BreakContinueStack.push_back(BreakContinue());
-    // Visit the body region first so the break/continue adjustments can be
-    // included when visiting the condition.
-    Cnt.beginRegion();
-    Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    extendRegion(S);
 
-    // ...then go back and propagate counts through the condition. The count
-    // at the start of the condition is the sum of the incoming edges,
-    // the backedge from the end of the loop body, and the edges from
-    // continue statements.
+    Counter ParentCount = getRegion().getCounter();
+    Counter BodyCount = getRegionCounter(S);
+
+    // Handle the body first so that we can get the backedge count.
+    BreakContinueStack.push_back(BreakContinue());
+    extendRegion(S->getBody());
+    Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
-    Cnt.setCurrentRegionCount(
-        addCounters(Cnt.getParentCount(),
-                    addCounters(Cnt.getAdjustedCount(), BC.ContinueCount)));
-    beginSourceRegionGroup(S->getCond());
-    Visit(S->getCond());
-    endSourceRegionGroup();
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(addCounters(BC.BreakCount, BC.ContinueCount));
+
+    // Go back to handle the condition.
+    Counter CondCount =
+        addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
+    propagateCounts(CondCount, S->getCond());
+    adjustForOutOfOrderTraversal(getEnd(S));
+
+    Counter OutCount =
+        addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
+    if (OutCount != ParentCount)
+      pushRegion(OutCount);
   }
 
   void VisitDoStmt(const DoStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
-    // Counter tracks the body of the loop.
-    RegionMapper Cnt(this, S);
-    BreakContinueStack.push_back(BreakContinue());
-    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-    Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    extendRegion(S);
 
+    Counter ParentCount = getRegion().getCounter();
+    Counter BodyCount = getRegionCounter(S);
+
+    BreakContinueStack.push_back(BreakContinue());
+    extendRegion(S->getBody());
+    Counter BackedgeCount =
+        propagateCounts(addCounters(ParentCount, BodyCount), S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
-    // The count at the start of the condition is equal to the count at the
-    // end of the body. The adjusted count does not include either the
-    // fall-through count coming into the loop or the continue count, so add
-    // both of those separately. This is coincidentally the same equation as
-    // with while loops but for different reasons.
-    Cnt.setCurrentRegionCount(
-        addCounters(Cnt.getParentCount(),
-                    addCounters(Cnt.getAdjustedCount(), BC.ContinueCount)));
-    Visit(S->getCond());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(addCounters(BC.BreakCount, BC.ContinueCount));
+
+    Counter CondCount = addCounters(BackedgeCount, BC.ContinueCount);
+    propagateCounts(CondCount, S->getCond());
+
+    Counter OutCount =
+        addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
+    if (OutCount != ParentCount)
+      pushRegion(OutCount);
   }
 
   void VisitForStmt(const ForStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    extendRegion(S);
     if (S->getInit())
       Visit(S->getInit());
 
-    // Counter tracks the body of the loop.
-    RegionMapper Cnt(this, S);
+    Counter ParentCount = getRegion().getCounter();
+    Counter BodyCount = getRegionCounter(S);
+
+    // Handle the body first so that we can get the backedge count.
     BreakContinueStack.push_back(BreakContinue());
-    // Visit the body region first. (This is basically the same as a while
-    // loop; see further comments in VisitWhileStmt.)
-    Cnt.beginRegion();
-    Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    extendRegion(S->getBody());
+    Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
+    BreakContinue BC = BreakContinueStack.pop_back_val();
 
     // The increment is essentially part of the body but it needs to include
     // the count for all the continue statements.
-    if (S->getInc()) {
-      Cnt.setCurrentRegionCount(addCounters(
-          getCurrentRegionCount(), BreakContinueStack.back().ContinueCount));
-      beginSourceRegionGroup(S->getInc());
-      Visit(S->getInc());
-      endSourceRegionGroup();
-      Cnt.adjustForControlFlow();
+    if (const Stmt *Inc = S->getInc())
+      propagateCounts(addCounters(BackedgeCount, BC.ContinueCount), Inc);
+
+    // Go back to handle the condition.
+    Counter CondCount =
+        addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
+    if (const Expr *Cond = S->getCond()) {
+      propagateCounts(CondCount, Cond);
+      adjustForOutOfOrderTraversal(getEnd(S));
     }
 
-    BreakContinue BC = BreakContinueStack.pop_back_val();
-
-    // ...then go back and propagate counts through the condition.
-    if (S->getCond()) {
-      Cnt.setCurrentRegionCount(
-          addCounters(addCounters(Cnt.getParentCount(), Cnt.getAdjustedCount()),
-                      BC.ContinueCount));
-      beginSourceRegionGroup(S->getCond());
-      Visit(S->getCond());
-      endSourceRegionGroup();
-      Cnt.adjustForControlFlow();
-    }
-    Cnt.applyAdjustmentsToRegion(addCounters(BC.BreakCount, BC.ContinueCount));
+    Counter OutCount =
+        addCounters(BC.BreakCount, subtractCounters(CondCount, BodyCount));
+    if (OutCount != ParentCount)
+      pushRegion(OutCount);
   }
 
   void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    extendRegion(S);
+    Visit(S->getLoopVarStmt());
     Visit(S->getRangeStmt());
-    Visit(S->getBeginEndStmt());
-    // Counter tracks the body of the loop.
-    RegionMapper Cnt(this, S);
+
+    Counter ParentCount = getRegion().getCounter();
+    Counter BodyCount = getRegionCounter(S);
+
     BreakContinueStack.push_back(BreakContinue());
-    // Visit the body region first. (This is basically the same as a while
-    // loop; see further comments in VisitWhileStmt.)
-    Cnt.beginRegion();
-    Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    extendRegion(S->getBody());
+    Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
-    Cnt.applyAdjustmentsToRegion(addCounters(BC.BreakCount, BC.ContinueCount));
+
+    Counter OutCount = addCounters(ParentCount, BC.BreakCount, BC.ContinueCount,
+                                   subtractCounters(BodyCount, BackedgeCount));
+    if (OutCount != ParentCount)
+      pushRegion(OutCount);
   }
 
   void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    extendRegion(S);
     Visit(S->getElement());
-    // Counter tracks the body of the loop.
-    RegionMapper Cnt(this, S);
+
+    Counter ParentCount = getRegion().getCounter();
+    Counter BodyCount = getRegionCounter(S);
+
     BreakContinueStack.push_back(BreakContinue());
-    Cnt.beginRegion();
-    Visit(S->getBody());
+    extendRegion(S->getBody());
+    Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(addCounters(BC.BreakCount, BC.ContinueCount));
+
+    Counter OutCount = addCounters(ParentCount, BC.BreakCount, BC.ContinueCount,
+                                   subtractCounters(BodyCount, BackedgeCount));
+    if (OutCount != ParentCount)
+      pushRegion(OutCount);
   }
 
   void VisitSwitchStmt(const SwitchStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    extendRegion(S);
     Visit(S->getCond());
+
     BreakContinueStack.push_back(BreakContinue());
-    // Map the '}' for the body to have the same count as the regions after
-    // the switch.
-    SourceLocation RBracLoc;
-    if (const auto *CS = dyn_cast<CompoundStmt>(S->getBody())) {
-      mapSourceCodeRange(CS->getLBracLoc());
-      setCurrentRegionUnreachable(S);
-      for (Stmt::const_child_range I = CS->children(); I; ++I) {
-        if (*I)
-          this->Visit(*I);
+
+    const Stmt *Body = S->getBody();
+    extendRegion(Body);
+    if (const auto *CS = dyn_cast<CompoundStmt>(Body)) {
+      if (!CS->body_empty()) {
+        // The body of the switch needs a zero region so that fallthrough counts
+        // behave correctly, but it would be misleading to include the braces of
+        // the compound statement in the zeroed area, so we need to handle this
+        // specially.
+        size_t Index =
+            pushRegion(Counter::getZero(), getStart(CS->body_front()),
+                       getEnd(CS->body_back()));
+        for (const auto &Child : CS->children())
+          Visit(Child);
+        popRegions(Index);
       }
-      RBracLoc = CS->getRBracLoc();
-    } else {
-      setCurrentRegionUnreachable(S);
-      Visit(S->getBody());
-    }
-    // If the switch is inside a loop, add the continue counts.
+    } else
+      propagateCounts(Counter::getZero(), Body);
     BreakContinue BC = BreakContinueStack.pop_back_val();
+
     if (!BreakContinueStack.empty())
       BreakContinueStack.back().ContinueCount = addCounters(
           BreakContinueStack.back().ContinueCount, BC.ContinueCount);
-    // Counter tracks the exit block of the switch.
-    RegionMapper ExitCnt(this, S);
-    ExitCnt.beginRegion();
-    if (RBracLoc.isValid())
-      mapSourceCodeRange(RBracLoc);
+
+    Counter ExitCount = getRegionCounter(S);
+    pushRegion(ExitCount);
   }
 
-  void VisitCaseStmt(const CaseStmt *S) {
-    // Counter for this particular case. This counts only jumps from the
-    // switch header and does not include fallthrough from the case before
-    // this one.
-    RegionMapper Cnt(this, S);
-    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-    mapSourceCodeRange(S->getLocStart());
-    mapToken(S->getColonLoc());
-    Visit(S->getSubStmt());
-  }
+  void VisitSwitchCase(const SwitchCase *S) {
+    extendRegion(S);
 
-  void VisitDefaultStmt(const DefaultStmt *S) {
-    // Counter for this default case. This does not include fallthrough from
-    // the previous case.
-    RegionMapper Cnt(this, S);
-    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-    mapSourceCodeRange(S->getLocStart());
-    mapToken(S->getColonLoc());
+    SourceMappingRegion &Parent = getRegion();
+
+    Counter Count = addCounters(Parent.getCounter(), getRegionCounter(S));
+    // Reuse the existing region if it starts at our label. This is typical of
+    // the first case in a switch.
+    if (Parent.hasStartLoc() && Parent.getStartLoc() == getStart(S))
+      Parent.setCounter(Count);
+    else
+      pushRegion(Count, getStart(S));
+
+    if (const CaseStmt *CS = dyn_cast<CaseStmt>(S)) {
+      Visit(CS->getLHS());
+      if (const Expr *RHS = CS->getRHS())
+        Visit(RHS);
+    }
     Visit(S->getSubStmt());
   }
 
   void VisitIfStmt(const IfStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
-    Visit(S->getCond());
-    mapToken(S->getElseLoc());
+    extendRegion(S);
 
-    // Counter tracks the "then" part of an if statement. The count for
-    // the "else" part, if it exists, will be calculated from this counter.
-    RegionMapper Cnt(this, S);
-    Cnt.beginRegion();
-    Visit(S->getThen());
-    Cnt.adjustForControlFlow();
+    Counter ParentCount = getRegion().getCounter();
+    Counter ThenCount = getRegionCounter(S);
 
-    if (S->getElse()) {
-      Cnt.beginElseRegion();
-      Visit(S->getElse());
-      Cnt.adjustForControlFlow();
-    }
-    Cnt.applyAdjustmentsToRegion();
+    // Emitting a counter for the condition makes it easier to interpret the
+    // counter for the body when looking at the coverage.
+    propagateCounts(ParentCount, S->getCond());
+
+    extendRegion(S->getThen());
+    Counter OutCount = propagateCounts(ThenCount, S->getThen());
+
+    Counter ElseCount = subtractCounters(ParentCount, ThenCount);
+    if (const Stmt *Else = S->getElse()) {
+      extendRegion(S->getElse());
+      OutCount = addCounters(OutCount, propagateCounts(ElseCount, Else));
+    } else
+      OutCount = addCounters(OutCount, ElseCount);
+
+    if (OutCount != ParentCount)
+      pushRegion(OutCount);
   }
 
   void VisitCXXTryStmt(const CXXTryStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
+    extendRegion(S);
     Visit(S->getTryBlock());
     for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
       Visit(S->getHandler(I));
-    // Counter tracks the continuation block of the try statement.
-    RegionMapper Cnt(this, S);
-    Cnt.beginRegion();
+
+    Counter ExitCount = getRegionCounter(S);
+    pushRegion(ExitCount);
   }
 
   void VisitCXXCatchStmt(const CXXCatchStmt *S) {
-    mapSourceCodeRange(S->getLocStart());
-    // Counter tracks the catch statement's handler block.
-    RegionMapper Cnt(this, S);
-    Cnt.beginRegion();
-    Visit(S->getHandlerBlock());
+    extendRegion(S);
+    propagateCounts(getRegionCounter(S), S->getHandlerBlock());
   }
 
   void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
-    Visit(E->getCond());
-    mapToken(E->getQuestionLoc());
-    auto State = getCurrentState();
+    extendRegion(E);
 
-    // Counter tracks the "true" part of a conditional operator. The
-    // count in the "false" part will be calculated from this counter.
-    RegionMapper Cnt(this, E);
-    Cnt.beginRegion();
-    Visit(E->getTrueExpr());
-    Cnt.adjustForControlFlow();
+    Counter ParentCount = getRegion().getCounter();
+    Counter TrueCount = getRegionCounter(E);
 
-    mapToken(State, E->getColonLoc());
-
-    Cnt.beginElseRegion();
-    Visit(E->getFalseExpr());
-    Cnt.adjustForControlFlow();
-
-    Cnt.applyAdjustmentsToRegion();
+    propagateCounts(TrueCount, E->getTrueExpr());
+    propagateCounts(subtractCounters(ParentCount, TrueCount),
+                    E->getFalseExpr());
   }
 
   void VisitBinLAnd(const BinaryOperator *E) {
+    extendRegion(E);
     Visit(E->getLHS());
-    mapToken(E->getOperatorLoc());
-    // Counter tracks the right hand side of a logical and operator.
-    RegionMapper Cnt(this, E);
-    Cnt.beginRegion();
-    Visit(E->getRHS());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion();
+
+    extendRegion(E->getRHS());
+    propagateCounts(getRegionCounter(E), E->getRHS());
   }
 
   void VisitBinLOr(const BinaryOperator *E) {
+    extendRegion(E);
     Visit(E->getLHS());
-    mapToken(E->getOperatorLoc());
-    // Counter tracks the right hand side of a logical or operator.
-    RegionMapper Cnt(this, E);
-    Cnt.beginRegion();
-    Visit(E->getRHS());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion();
+
+    extendRegion(E->getRHS());
+    propagateCounts(getRegionCounter(E), E->getRHS());
   }
 
-  void VisitParenExpr(const ParenExpr *E) {
-    mapToken(E->getLParen());
-    Visit(E->getSubExpr());
-    mapToken(E->getRParen());
-  }
-
-  void VisitBinaryOperator(const BinaryOperator *E) {
-    Visit(E->getLHS());
-    mapToken(E->getOperatorLoc());
-    Visit(E->getRHS());
-  }
-
-  void VisitUnaryOperator(const UnaryOperator *E) {
-    bool Postfix = E->isPostfix();
-    if (!Postfix)
-      mapToken(E->getOperatorLoc());
-    Visit(E->getSubExpr());
-    if (Postfix)
-      mapToken(E->getOperatorLoc());
-  }
-
-  void VisitMemberExpr(const MemberExpr *E) {
-    Visit(E->getBase());
-    mapToken(E->getMemberLoc());
-  }
-
-  void VisitCallExpr(const CallExpr *E) {
-    Visit(E->getCallee());
-    for (const auto &Arg : E->arguments())
-      Visit(Arg);
-    mapToken(E->getRParenLoc());
-  }
-
-  void VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
-    Visit(E->getLHS());
-    Visit(E->getRHS());
-    mapToken(E->getRBracketLoc());
-  }
-
-  void VisitCStyleCastExpr(const CStyleCastExpr *E) {
-    mapToken(E->getLParenLoc());
-    mapToken(E->getRParenLoc());
-    Visit(E->getSubExpr());
-  }
-
-  // Map literals as tokens so that the macros like #define PI 3.14
-  // won't generate coverage mapping regions.
-
-  void VisitIntegerLiteral(const IntegerLiteral *E) {
-    mapToken(E->getLocStart());
-  }
-
-  void VisitFloatingLiteral(const FloatingLiteral *E) {
-    mapToken(E->getLocStart());
-  }
-
-  void VisitCharacterLiteral(const CharacterLiteral *E) {
-    mapToken(E->getLocStart());
-  }
-
-  void VisitStringLiteral(const StringLiteral *E) {
-    mapToken(E->getLocStart());
-  }
-
-  void VisitImaginaryLiteral(const ImaginaryLiteral *E) {
-    mapToken(E->getLocStart());
-  }
-
-  void VisitObjCMessageExpr(const ObjCMessageExpr *E) {
-    mapToken(E->getLeftLoc());
-    for (Stmt::const_child_range I = static_cast<const Stmt*>(E)->children(); I;
-         ++I) {
-      if (*I)
-        this->Visit(*I);
-    }
-    mapToken(E->getRightLoc());
+  void VisitLambdaExpr(const LambdaExpr *LE) {
+    // Lambdas are treated as their own functions for now, so we shouldn't
+    // propagate counts into them.
   }
 };
 }
@@ -1045,10 +866,12 @@
   return isMachO(CGM) ? "__DATA,__llvm_covmap" : "__llvm_covmap";
 }
 
-static void dump(llvm::raw_ostream &OS, const CoverageMappingRecord &Function) {
-  OS << Function.FunctionName << ":\n";
-  CounterMappingContext Ctx(Function.Expressions);
-  for (const auto &R : Function.MappingRegions) {
+static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
+                 ArrayRef<CounterExpression> Expressions,
+                 ArrayRef<CounterMappingRegion> Regions) {
+  OS << FunctionName << ":\n";
+  CounterMappingContext Ctx(Expressions);
+  for (const auto &R : Regions) {
     OS.indent(2);
     switch (R.Kind) {
     case CounterMappingRegion::CodeRegion:
@@ -1061,15 +884,12 @@
       break;
     }
 
-    OS << "File " << R.FileID << ", " << R.LineStart << ":"
-           << R.ColumnStart << " -> " << R.LineEnd << ":" << R.ColumnEnd
-           << " = ";
-    Ctx.dump(R.Count);
-    OS << " (HasCodeBefore = " << R.HasCodeBefore;
+    OS << "File " << R.FileID << ", " << R.LineStart << ":" << R.ColumnStart
+       << " -> " << R.LineEnd << ":" << R.ColumnEnd << " = ";
+    Ctx.dump(R.Count, OS);
     if (R.Kind == CounterMappingRegion::ExpansionRegion)
-      OS << ", Expanded file = " << R.ExpandedFileID;
-
-    OS << ")\n";
+      OS << " (Expanded file = " << R.ExpandedFileID << ")";
+    OS << "\n";
   }
 }
 
@@ -1108,13 +928,11 @@
     FilenameRefs.resize(FileEntries.size());
     for (const auto &Entry : FileEntries)
       FilenameRefs[Entry.second] = Entry.first->getName();
-    RawCoverageMappingReader Reader(FunctionNameValue, CoverageMapping,
-                                    FilenameRefs,
-                                    Filenames, Expressions, Regions);
-    CoverageMappingRecord FunctionRecord;
-    if (Reader.read(FunctionRecord))
+    RawCoverageMappingReader Reader(CoverageMapping, FilenameRefs, Filenames,
+                                    Expressions, Regions);
+    if (Reader.read())
       return;
-    dump(llvm::outs(), FunctionRecord);
+    dump(llvm::outs(), FunctionNameValue, Expressions, Regions);
   }
 }
 
diff --git a/lib/CodeGen/CoverageMappingGen.h b/lib/CodeGen/CoverageMappingGen.h
index e4c6412..0d1bf6d 100644
--- a/lib/CodeGen/CoverageMappingGen.h
+++ b/lib/CodeGen/CoverageMappingGen.h
@@ -16,10 +16,10 @@
 
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
-#include "clang/Lex/PPCallbacks.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/ADT/StringMap.h"
+#include "clang/Lex/PPCallbacks.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index e695848..6535b76 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -75,8 +75,14 @@
 template <class T> struct DominatingValue<T*> : DominatingPointer<T> {};
 
 enum CleanupKind : unsigned {
+  /// Denotes a cleanup that should run when a scope is exited using exceptional
+  /// control flow (a throw statement leading to stack unwinding, ).
   EHCleanup = 0x1,
+
+  /// Denotes a cleanup that should run when a scope is exited using normal
+  /// control flow (falling off the end of the scope, return, goto, ...).
   NormalCleanup = 0x2,
+
   NormalAndEHCleanup = EHCleanup | NormalCleanup,
 
   InactiveCleanup = 0x4,
@@ -306,53 +312,10 @@
                    InnermostEHScope(stable_end()) {}
   ~EHScopeStack() { delete[] StartOfBuffer; }
 
-  // Variadic templates would make this not terrible.
-
   /// Push a lazily-created cleanup on the stack.
-  template <class T>
-  void pushCleanup(CleanupKind Kind) {
+  template <class T, class... As> void pushCleanup(CleanupKind Kind, As... A) {
     void *Buffer = pushCleanup(Kind, sizeof(T));
-    Cleanup *Obj = new(Buffer) T();
-    (void) Obj;
-  }
-
-  /// Push a lazily-created cleanup on the stack.
-  template <class T, class A0>
-  void pushCleanup(CleanupKind Kind, A0 a0) {
-    void *Buffer = pushCleanup(Kind, sizeof(T));
-    Cleanup *Obj = new(Buffer) T(a0);
-    (void) Obj;
-  }
-
-  /// Push a lazily-created cleanup on the stack.
-  template <class T, class A0, class A1>
-  void pushCleanup(CleanupKind Kind, A0 a0, A1 a1) {
-    void *Buffer = pushCleanup(Kind, sizeof(T));
-    Cleanup *Obj = new(Buffer) T(a0, a1);
-    (void) Obj;
-  }
-
-  /// Push a lazily-created cleanup on the stack.
-  template <class T, class A0, class A1, class A2>
-  void pushCleanup(CleanupKind Kind, A0 a0, A1 a1, A2 a2) {
-    void *Buffer = pushCleanup(Kind, sizeof(T));
-    Cleanup *Obj = new(Buffer) T(a0, a1, a2);
-    (void) Obj;
-  }
-
-  /// Push a lazily-created cleanup on the stack.
-  template <class T, class A0, class A1, class A2, class A3>
-  void pushCleanup(CleanupKind Kind, A0 a0, A1 a1, A2 a2, A3 a3) {
-    void *Buffer = pushCleanup(Kind, sizeof(T));
-    Cleanup *Obj = new(Buffer) T(a0, a1, a2, a3);
-    (void) Obj;
-  }
-
-  /// Push a lazily-created cleanup on the stack.
-  template <class T, class A0, class A1, class A2, class A3, class A4>
-  void pushCleanup(CleanupKind Kind, A0 a0, A1 a1, A2 a2, A3 a3, A4 a4) {
-    void *Buffer = pushCleanup(Kind, sizeof(T));
-    Cleanup *Obj = new(Buffer) T(a0, a1, a2, a3, a4);
+    Cleanup *Obj = new (Buffer) T(A...);
     (void) Obj;
   }
 
@@ -369,10 +332,10 @@
   ///
   /// The pointer returned from this method is valid until the cleanup
   /// stack is modified.
-  template <class T, class A0, class A1, class A2>
-  T *pushCleanupWithExtra(CleanupKind Kind, size_t N, A0 a0, A1 a1, A2 a2) {
+  template <class T, class... As>
+  T *pushCleanupWithExtra(CleanupKind Kind, size_t N, As... A) {
     void *Buffer = pushCleanup(Kind, sizeof(T) + T::getExtraSize(N));
-    return new (Buffer) T(N, a0, a1, a2);
+    return new (Buffer) T(N, A...);
   }
 
   void pushCopyOfCleanup(CleanupKind Kind, const void *Cleanup, size_t Size) {
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index a65c5ef..e580969 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -110,6 +110,8 @@
                                llvm::Value *Ptr, QualType ElementType,
                                const CXXDestructorDecl *Dtor) override;
 
+  void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
+
   void EmitFundamentalRTTIDescriptor(QualType Type);
   void EmitFundamentalRTTIDescriptors();
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
@@ -337,6 +339,9 @@
     return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true,
                              /* UseARMGuardVarABI = */ true);
 
+  case TargetCXXABI::GenericMIPS:
+    return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true);
+
   case TargetCXXABI::GenericItanium:
     if (CGM.getContext().getTargetInfo().getTriple().getArch()
         == llvm::Triple::le32) {
@@ -358,7 +363,7 @@
 ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
   if (MPT->isMemberDataPointer())
     return CGM.PtrDiffTy;
-  return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, NULL);
+  return llvm::StructType::get(CGM.PtrDiffTy, CGM.PtrDiffTy, nullptr);
 }
 
 /// In the Itanium and ARM ABIs, method pointers have the form:
@@ -887,6 +892,20 @@
     CGF.PopCleanupBlock();
 }
 
+void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
+  // void __cxa_rethrow();
+
+  llvm::FunctionType *FTy =
+    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
+
+  llvm::Constant *Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow");
+
+  if (isNoReturn)
+    CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, None);
+  else
+    CGF.EmitRuntimeCallOrInvoke(Fn);
+}
+
 static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
   // void *__dynamic_cast(const void *sub,
   //                      const abi::__class_type_info *src,
@@ -1106,7 +1125,7 @@
   CGM.EmitGlobal(GlobalDecl(D, Ctor_Base));
 
   // The constructor used for constructing this as a complete class;
-  // constucts the virtual bases, then calls the base constructor.
+  // constructs the virtual bases, then calls the base constructor.
   if (!D->getParent()->isAbstract()) {
     // We don't need to emit the complete ctor if the class is abstract.
     CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
@@ -1240,6 +1259,9 @@
   // Set the correct linkage.
   VTable->setLinkage(Linkage);
 
+  if (CGM.supportsCOMDAT() && VTable->isWeakForLinker())
+    VTable->setComdat(CGM.getModule().getOrInsertComdat(VTable->getName()));
+
   // Set the right visibility.
   CGM.setGlobalVisibility(VTable, RD);
 
@@ -1259,6 +1281,8 @@
       cast<NamespaceDecl>(DC)->getIdentifier()->isStr("__cxxabiv1") &&
       DC->getParent()->isTranslationUnit())
     EmitFundamentalRTTIDescriptors();
+
+  CGM.EmitVTableBitSetEntries(VTable, VTLayout);
 }
 
 llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructor(
@@ -1350,6 +1374,8 @@
   Ty = Ty->getPointerTo()->getPointerTo();
   llvm::Value *VTable = CGF.GetVTablePtr(This, Ty);
 
+  CGF.EmitVTablePtrCheckForCall(cast<CXXMethodDecl>(GD.getDecl()), VTable);
+
   uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
   llvm::Value *VFuncPtr =
       CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
@@ -1695,11 +1721,12 @@
 
     // The ABI says: It is suggested that it be emitted in the same COMDAT group
     // as the associated data object
-    if (!D.isLocalVarDecl() && var->isWeakForLinker() && CGM.supportsCOMDAT()) {
-      llvm::Comdat *C = CGM.getModule().getOrInsertComdat(var->getName());
+    llvm::Comdat *C = var->getComdat();
+    if (!D.isLocalVarDecl() && C) {
       guard->setComdat(C);
-      var->setComdat(C);
       CGF.CurFn->setComdat(C);
+    } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
+      guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
     }
 
     CGM.setStaticLocalDeclGuardAddress(&D, guard);
@@ -2699,9 +2726,13 @@
 
   llvm::Constant *Init = llvm::ConstantStruct::getAnon(Fields);
 
+  llvm::Module &M = CGM.getModule();
   llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(CGM.getModule(), Init->getType(),
-                             /*Constant=*/true, Linkage, Init, Name);
+      new llvm::GlobalVariable(M, Init->getType(),
+                               /*Constant=*/true, Linkage, Init, Name);
+
+  if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
+    GV->setComdat(M.getOrInsertComdat(GV->getName()));
 
   // If there's already an old global variable, replace it with the new one.
   if (OldGV) {
@@ -3185,5 +3216,7 @@
       getMangleContext().mangleCXXCtorComdat(CD, Out);
     llvm::Comdat *C = CGM.getModule().getOrInsertComdat(Out.str());
     Fn->setComdat(C);
+  } else {
+    CGM.maybeSetTrivialComdat(*MD, *Fn);
   }
 }
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 659ed0a..48adf3a 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -70,6 +70,8 @@
                                llvm::Value *Ptr, QualType ElementType,
                                const CXXDestructorDecl *Dtor) override;
 
+  void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
+
   llvm::GlobalVariable *getMSCompleteObjectLocator(const CXXRecordDecl *RD,
                                                    const VPtrInfo *Info);
 
@@ -665,6 +667,30 @@
     CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
 }
 
+static llvm::Function *getRethrowFn(CodeGenModule &CGM) {
+  // _CxxThrowException takes two pointer width arguments: a value and a context
+  // object which points to a TypeInfo object.
+  llvm::Type *ArgTypes[] = {CGM.Int8PtrTy, CGM.Int8PtrTy};
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(CGM.VoidTy, ArgTypes, false);
+  auto *Fn = cast<llvm::Function>(
+      CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"));
+  // _CxxThrowException is stdcall on 32-bit x86 platforms.
+  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86)
+    Fn->setCallingConv(llvm::CallingConv::X86_StdCall);
+  return Fn;
+}
+
+void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
+  llvm::Value *Args[] = {llvm::ConstantPointerNull::get(CGM.Int8PtrTy),
+                         llvm::ConstantPointerNull::get(CGM.Int8PtrTy)};
+  auto *Fn = getRethrowFn(CGM);
+  if (isNoReturn)
+    CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args);
+  else
+    CGF.EmitRuntimeCallOrInvoke(Fn, Args);
+}
+
 /// \brief Gets the offset to the virtual base that contains the vfptr for
 /// MS-ABI polymorphic types.
 static llvm::Value *getPolymorphicOffset(CodeGenFunction &CGF,
@@ -1394,6 +1420,10 @@
         } else {
           llvm_unreachable("unexpected linkage for vftable!");
         }
+      } else {
+        if (llvm::GlobalValue::isWeakForLinker(VFTableLinkage))
+          VTable->setComdat(
+              CGM.getModule().getOrInsertComdat(VTable->getName()));
       }
       VFTable->setLinkage(VFTableLinkage);
       CGM.setGlobalVisibility(VFTable, RD);
@@ -1439,9 +1469,9 @@
   llvm::Value *Callee = getVirtualFunctionPointer(CGF, GD, This, Ty);
 
   ASTContext &Context = CGF.getContext();
-  llvm::Value *ImplicitParam =
-      llvm::ConstantInt::get(llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
-                             DtorType == Dtor_Deleting);
+  llvm::Value *ImplicitParam = llvm::ConstantInt::get(
+      llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
+      DtorType == Dtor_Deleting);
 
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
   RValue RV = CGF.EmitCXXStructorCall(Dtor, Callee, ReturnValueSlot(), This,
@@ -1504,10 +1534,18 @@
   ThunkFn->setLinkage(MD->isExternallyVisible()
                           ? llvm::GlobalValue::LinkOnceODRLinkage
                           : llvm::GlobalValue::InternalLinkage);
+  if (MD->isExternallyVisible())
+    ThunkFn->setComdat(CGM.getModule().getOrInsertComdat(ThunkFn->getName()));
 
   CGM.SetLLVMFunctionAttributes(MD, FnInfo, ThunkFn);
   CGM.SetLLVMFunctionAttributesForDefinition(MD, ThunkFn);
 
+  // Add the "thunk" attribute so that LLVM knows that the return type is
+  // meaningless. These thunks can be used to call functions with differing
+  // return types, and the caller is required to cast the prototype
+  // appropriately to extract the correct value.
+  ThunkFn->addFnAttr("thunk");
+
   // These thunks can be compared, so they are not unnamed.
   ThunkFn->setUnnamedAddr(false);
 
@@ -1544,7 +1582,8 @@
   for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
     const VPtrInfo *VBT = (*VBGlobals.VBTables)[I];
     llvm::GlobalVariable *GV = VBGlobals.Globals[I];
-    emitVBTableDefinition(*VBT, RD, GV);
+    if (GV->isDeclaration())
+      emitVBTableDefinition(*VBT, RD, GV);
   }
 }
 
@@ -1571,6 +1610,9 @@
   else if (RD->hasAttr<DLLExportAttr>())
     GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
 
+  if (!GV->hasExternalLinkage())
+    emitVBTableDefinition(VBT, RD, GV);
+
   return GV;
 }
 
@@ -1803,18 +1845,10 @@
     llvm::Function *F = CXXThreadLocalInits[I];
 
     // If the GV is already in a comdat group, then we have to join it.
-    llvm::Comdat *C = GV->getComdat();
-
-    // LinkOnce and Weak linkage are lowered down to a single-member comdat
-    // group.
-    // Make an explicit group so we can join it.
-    if (!C && (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())) {
-      C = CGM.getModule().getOrInsertComdat(GV->getName());
-      GV->setComdat(C);
+    if (llvm::Comdat *C = GV->getComdat())
       AddToXDU(F)->setComdat(C);
-    } else {
+    else
       NonComdatInits.push_back(F);
-    }
   }
 
   if (!NonComdatInits.empty()) {
@@ -1843,7 +1877,9 @@
   if (!D.isStaticLocal()) {
     assert(GV->hasWeakLinkage() || GV->hasLinkOnceLinkage());
     // GlobalOpt is allowed to discard the initializer, so use linkonce_odr.
-    CGF.CurFn->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
+    llvm::Function *F = CGF.CurFn;
+    F->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
+    F->setComdat(CGM.getModule().getOrInsertComdat(F->getName()));
     CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
     return;
   }
@@ -1899,6 +1935,9 @@
                                  GV->getLinkage(), Zero, GuardName.str());
     GI->Guard->setVisibility(GV->getVisibility());
     GI->Guard->setDLLStorageClass(GV->getDLLStorageClass());
+    if (GI->Guard->isWeakForLinker())
+      GI->Guard->setComdat(
+          CGM.getModule().getOrInsertComdat(GI->Guard->getName()));
   } else {
     assert(GI->Guard->getLinkage() == GV->getLinkage() &&
            "static local from the same function had different linkage");
@@ -2836,6 +2875,8 @@
   auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
                                       /*Initializer=*/nullptr,
                                       MangledName.c_str());
+  if (CHD->isWeakForLinker())
+    CHD->setComdat(CGM.getModule().getOrInsertComdat(CHD->getName()));
 
   // Initialize the base class ClassHierarchyDescriptor.
   llvm::Constant *Fields[] = {
@@ -2869,6 +2910,8 @@
   auto *BCA = new llvm::GlobalVariable(
       Module, ArrType,
       /*Constant=*/true, Linkage, /*Initializer=*/nullptr, MangledName.c_str());
+  if (BCA->isWeakForLinker())
+    BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName()));
 
   // Initialize the BaseClassArray.
   SmallVector<llvm::Constant *, 8> BaseClassArrayData;
@@ -2909,6 +2952,8 @@
   auto BCD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
                                       /*Initializer=*/nullptr,
                                       MangledName.c_str());
+  if (BCD->isWeakForLinker())
+    BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName()));
 
   // Initialize the BaseClassDescriptor.
   llvm::Constant *Fields[] = {
@@ -2968,6 +3013,8 @@
   if (!ABI.isImageRelative())
     FieldsRef = FieldsRef.drop_back();
   COL->setInitializer(llvm::ConstantStruct::get(Type, FieldsRef));
+  if (COL->isWeakForLinker())
+    COL->setComdat(CGM.getModule().getOrInsertComdat(COL->getName()));
   return COL;
 }
 
@@ -2999,13 +3046,14 @@
     llvm::ConstantDataArray::getString(CGM.getLLVMContext(), TypeInfoString)};
   llvm::StructType *TypeDescriptorType =
       getTypeDescriptorType(TypeInfoString);
-  return llvm::ConstantExpr::getBitCast(
-      new llvm::GlobalVariable(
-          CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
-          getLinkageForRTTI(Type),
-          llvm::ConstantStruct::get(TypeDescriptorType, Fields),
-          MangledName.c_str()),
-      CGM.Int8PtrTy);
+  auto *Var = new llvm::GlobalVariable(
+      CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
+      getLinkageForRTTI(Type),
+      llvm::ConstantStruct::get(TypeDescriptorType, Fields),
+      MangledName.c_str());
+  if (Var->isWeakForLinker())
+    Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName()));
+  return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
 }
 
 /// \brief Gets or a creates a Microsoft CompleteObjectLocator.
@@ -3019,7 +3067,8 @@
                                const CXXConstructorDecl *ctor,
                                StructorType ctorType) {
   // There are no constructor variants, always emit the complete destructor.
-  CGM.codegenCXXStructor(ctor, StructorType::Complete);
+  llvm::Function *Fn = CGM.codegenCXXStructor(ctor, StructorType::Complete);
+  CGM.maybeSetTrivialComdat(*ctor, *Fn);
 }
 
 static void emitCXXDestructor(CodeGenModule &CGM, const CXXDestructorDecl *dtor,
@@ -3045,7 +3094,9 @@
   if (dtorType == StructorType::Base && !CGM.TryEmitBaseDestructorAsAlias(dtor))
     return;
 
-  CGM.codegenCXXStructor(dtor, dtorType);
+  llvm::Function *Fn = CGM.codegenCXXStructor(dtor, dtorType);
+  if (Fn->isWeakForLinker())
+    Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName()));
 }
 
 void MicrosoftCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index ee6f6f9..03771e1 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -52,15 +52,22 @@
     std::unique_ptr<llvm::Module> M;
     std::unique_ptr<CodeGen::CodeGenModule> Builder;
 
+  private:
+    SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions;
+
   public:
     CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string& ModuleName,
                       const CodeGenOptions &CGO, llvm::LLVMContext& C,
                       CoverageSourceInfo *CoverageInfo = nullptr)
-      : Diags(diags), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
+      : Diags(diags), Ctx(nullptr), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
         CoverageInfo(CoverageInfo),
         M(new llvm::Module(ModuleName, C)) {}
 
-    virtual ~CodeGeneratorImpl() {}
+    virtual ~CodeGeneratorImpl() {
+      // There should normally not be any leftover inline method definitions.
+      assert(DeferredInlineMethodDefinitions.empty() ||
+             Diags.hasErrorOccurred());
+    }
 
     llvm::Module* GetModule() override {
       return M.get();
@@ -88,7 +95,8 @@
 
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
       M->setDataLayout(Ctx->getTargetInfo().getTargetDescription());
-      TD.reset(new llvm::DataLayout(Ctx->getTargetInfo().getTargetDescription()));
+      TD.reset(
+          new llvm::DataLayout(Ctx->getTargetInfo().getTargetDescription()));
       Builder.reset(new CodeGen::CodeGenModule(Context, CodeGenOpts, *M, *TD,
                                                Diags, CoverageInfo));
 
@@ -204,11 +212,11 @@
       Builder->EmitTentativeDefinition(D);
     }
 
-    void HandleVTable(CXXRecordDecl *RD, bool DefinitionRequired) override {
+    void HandleVTable(CXXRecordDecl *RD) override {
       if (Diags.hasErrorOccurred())
         return;
 
-      Builder->EmitVTable(RD, DefinitionRequired);
+      Builder->EmitVTable(RD);
     }
 
     void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
@@ -223,9 +231,6 @@
     void HandleDependentLibrary(llvm::StringRef Lib) override {
       Builder->AddDependentLib(Lib);
     }
-
-  private:
-    std::vector<CXXMethodDecl *> DeferredInlineMethodDefinitions;
   };
 }
 
@@ -234,7 +239,6 @@
 CodeGenerator *clang::CreateLLVMCodeGen(DiagnosticsEngine &Diags,
                                         const std::string& ModuleName,
                                         const CodeGenOptions &CGO,
-                                        const TargetOptions &/*TO*/,
                                         llvm::LLVMContext& C,
                                         CoverageSourceInfo *CoverageInfo) {
   return new CodeGeneratorImpl(Diags, ModuleName, CGO, C, CoverageInfo);
diff --git a/lib/CodeGen/SanitizerMetadata.cpp b/lib/CodeGen/SanitizerMetadata.cpp
index 70bc058..7c38b28 100644
--- a/lib/CodeGen/SanitizerMetadata.cpp
+++ b/lib/CodeGen/SanitizerMetadata.cpp
@@ -30,8 +30,8 @@
   IsDynInit &= !CGM.isInSanitizerBlacklist(GV, Loc, Ty, "init");
   IsBlacklisted |= CGM.isInSanitizerBlacklist(GV, Loc, Ty);
 
-  llvm::Value *LocDescr = nullptr;
-  llvm::Value *GlobalName = nullptr;
+  llvm::Metadata *LocDescr = nullptr;
+  llvm::Metadata *GlobalName = nullptr;
   llvm::LLVMContext &VMContext = CGM.getLLVMContext();
   if (!IsBlacklisted) {
     // Don't generate source location and global name if it is blacklisted -
@@ -41,10 +41,12 @@
       GlobalName = llvm::MDString::get(VMContext, Name);
   }
 
-  llvm::Value *GlobalMetadata[] = {
-      GV, LocDescr, GlobalName,
-      llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsDynInit),
-      llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsBlacklisted)};
+  llvm::Metadata *GlobalMetadata[] = {
+      llvm::ConstantAsMetadata::get(GV), LocDescr, GlobalName,
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), IsDynInit)),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          llvm::Type::getInt1Ty(VMContext), IsBlacklisted))};
 
   llvm::MDNode *ThisGlobal = llvm::MDNode::get(VMContext, GlobalMetadata);
   llvm::NamedMDNode *AsanGlobals =
@@ -70,9 +72,8 @@
 }
 
 void SanitizerMetadata::disableSanitizerForInstruction(llvm::Instruction *I) {
-  I->setMetadata(
-      CGM.getModule().getMDKindID("nosanitize"),
-      llvm::MDNode::get(CGM.getLLVMContext(), ArrayRef<llvm::Value *>()));
+  I->setMetadata(CGM.getModule().getMDKindID("nosanitize"),
+                 llvm::MDNode::get(CGM.getLLVMContext(), None));
 }
 
 llvm::MDNode *SanitizerMetadata::getLocationMetadata(SourceLocation Loc) {
@@ -80,11 +81,12 @@
   if (!PLoc.isValid())
     return nullptr;
   llvm::LLVMContext &VMContext = CGM.getLLVMContext();
-  llvm::Value *LocMetadata[] = {
+  llvm::Metadata *LocMetadata[] = {
       llvm::MDString::get(VMContext, PLoc.getFilename()),
-      llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext), PLoc.getLine()),
-      llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
-                             PLoc.getColumn()),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          llvm::Type::getInt32Ty(VMContext), PLoc.getLine())),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          llvm::Type::getInt32Ty(VMContext), PLoc.getColumn())),
   };
   return llvm::MDNode::get(VMContext, LocMetadata);
 }
diff --git a/lib/CodeGen/SanitizerMetadata.h b/lib/CodeGen/SanitizerMetadata.h
index f3c700a..166f0e6 100644
--- a/lib/CodeGen/SanitizerMetadata.h
+++ b/lib/CodeGen/SanitizerMetadata.h
@@ -13,9 +13,9 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_SANITIZERMETADATA_H
 #define LLVM_CLANG_LIB_CODEGEN_SANITIZERMETADATA_H
 
+#include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
-#include "clang/AST/Type.h"
 
 namespace llvm {
 class GlobalVariable;
@@ -31,8 +31,8 @@
 class CodeGenModule;
 
 class SanitizerMetadata {
-  SanitizerMetadata(const SanitizerMetadata &) LLVM_DELETED_FUNCTION;
-  void operator=(const SanitizerMetadata &) LLVM_DELETED_FUNCTION;
+  SanitizerMetadata(const SanitizerMetadata &) = delete;
+  void operator=(const SanitizerMetadata &) = delete;
 
   CodeGenModule &CGM;
 public:
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 2604720..36f9914 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -20,11 +20,11 @@
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <algorithm>    // std::sort
 
 using namespace clang;
@@ -665,6 +665,9 @@
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
 
+  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
+    return true;
+  }
 };
 
 }
@@ -1145,6 +1148,10 @@
     }
   }
 
+  // The chain argument effectively gives us another free register.
+  if (FI.isChainCall())
+    ++State.FreeRegs;
+
   bool UsedInAlloca = false;
   for (auto &I : FI.arguments()) {
     I.info = classifyArgumentType(I.type, State);
@@ -1301,17 +1308,8 @@
   case llvm::Triple::FreeBSD:
   case llvm::Triple::OpenBSD:
   case llvm::Triple::Bitrig:
-    return true;
   case llvm::Triple::Win32:
-    switch (Triple.getEnvironment()) {
-    case llvm::Triple::UnknownEnvironment:
-    case llvm::Triple::Cygnus:
-    case llvm::Triple::GNU:
-    case llvm::Triple::MSVC:
-      return true;
-    default:
-      return false;
-    }
+    return true;
   default:
     return false;
   }
@@ -1509,6 +1507,10 @@
 
   llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                          CodeGenFunction &CGF) const override;
+
+  bool has64BitPointers() const {
+    return Has64BitPointers;
+  }
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1594,24 +1596,51 @@
 
   llvm::Constant *
   getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
-    unsigned Sig = (0xeb << 0) |  // jmp rel8
-                   (0x0a << 8) |  //           .+0x0c
-                   ('F' << 16) |
-                   ('T' << 24);
+    unsigned Sig;
+    if (getABIInfo().has64BitPointers())
+      Sig = (0xeb << 0) |  // jmp rel8
+            (0x0a << 8) |  //           .+0x0c
+            ('F' << 16) |
+            ('T' << 24);
+    else
+      Sig = (0xeb << 0) |  // jmp rel8
+            (0x06 << 8) |  //           .+0x08
+            ('F' << 16) |
+            ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
 
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return HasAVX ? 32 : 16;
   }
+
+  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
+    return true;
+  }
+};
+
+class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
+public:
+  PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
+    : X86_64TargetCodeGenInfo(CGT, HasAVX) {}
+
+  void getDependentLibraryOption(llvm::StringRef Lib,
+                                 llvm::SmallString<24> &Opt) const {
+    Opt = "\01";
+    Opt += Lib;
+  }
 };
 
 static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
-  // If the argument does not end in .lib, automatically add the suffix. This
-  // matches the behavior of MSVC.
-  std::string ArgStr = Lib;
+  // If the argument does not end in .lib, automatically add the suffix.
+  // If the argument contains a space, enclose it in quotes.
+  // This matches the behavior of MSVC.
+  bool Quote = (Lib.find(" ") != StringRef::npos);
+  std::string ArgStr = Quote ? "\"" : "";
+  ArgStr += Lib;
   if (!Lib.endswith_lower(".lib"))
     ArgStr += ".lib";
+  ArgStr += Quote ? "\"" : "";
   return ArgStr;
 }
 
@@ -1621,6 +1650,9 @@
         bool d, bool p, bool w, unsigned RegParms)
     : X86_32TargetCodeGenInfo(CGT, d, p, w, RegParms) {}
 
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const override;
+
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
     Opt = "/DEFAULTLIB:";
@@ -1634,12 +1666,35 @@
   }
 };
 
+static void addStackProbeSizeTargetAttribute(const Decl *D,
+                                             llvm::GlobalValue *GV,
+                                             CodeGen::CodeGenModule &CGM) {
+  if (isa<FunctionDecl>(D)) {
+    if (CGM.getCodeGenOpts().StackProbeSize != 4096) {
+      llvm::Function *Fn = cast<llvm::Function>(GV);
+
+      Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
+    }
+  }
+}
+
+void WinX86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+                                                     llvm::GlobalValue *GV,
+                                            CodeGen::CodeGenModule &CGM) const {
+  X86_32TargetCodeGenInfo::SetTargetAttributes(D, GV, CGM);
+
+  addStackProbeSizeTargetAttribute(D, GV, CGM);
+}
+
 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
   bool HasAVX;
 public:
   WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
     : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)), HasAVX(HasAVX) {}
 
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const override;
+
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
     return 7;
   }
@@ -1671,6 +1726,13 @@
   }
 };
 
+void WinX86_64TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+                                                     llvm::GlobalValue *GV,
+                                            CodeGen::CodeGenModule &CGM) const {
+  TargetCodeGenInfo::SetTargetAttributes(D, GV, CGM);
+
+  addStackProbeSizeTargetAttribute(D, GV, CGM);
+}
 }
 
 void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
@@ -2139,19 +2201,15 @@
   return ABIArgInfo::getIndirect(Align);
 }
 
-/// GetByteVectorType - The ABI specifies that a value should be passed in an
-/// full vector XMM/YMM register.  Pick an LLVM IR type that will be passed as a
-/// vector register.
+/// The ABI specifies that a value should be passed in a full vector XMM/YMM
+/// register. Pick an LLVM IR type that will be passed as a vector register.
 llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
-  llvm::Type *IRType = CGT.ConvertType(Ty);
+  // Wrapper structs/arrays that only contain vectors are passed just like
+  // vectors; strip them off if present.
+  if (const Type *InnerTy = isSingleElementStruct(Ty, getContext()))
+    Ty = QualType(InnerTy, 0);
 
-  // Wrapper structs that just contain vectors are passed just like vectors,
-  // strip them off if present.
-  llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType);
-  while (STy && STy->getNumElements() == 1) {
-    IRType = STy->getElementType(0);
-    STy = dyn_cast<llvm::StructType>(IRType);
-  }
+  llvm::Type *IRType = CGT.ConvertType(Ty);
 
   // If the preferred type is a 16-byte vector, prefer to pass it.
   if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(IRType)){
@@ -2415,7 +2473,7 @@
     }
   }
 
-  llvm::StructType *Result = llvm::StructType::get(Lo, Hi, NULL);
+  llvm::StructType *Result = llvm::StructType::get(Lo, Hi, nullptr);
 
 
   // Verify that the second element is at an 8-byte offset.
@@ -2492,7 +2550,7 @@
     assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
     ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
                                     llvm::Type::getX86_FP80Ty(getVMContext()),
-                                    NULL);
+                                    nullptr);
     break;
   }
 
@@ -2695,6 +2753,10 @@
   if (FI.getReturnInfo().isIndirect())
     --freeIntRegs;
 
+  // The chain argument effectively gives us another free register.
+  if (FI.isChainCall())
+    ++freeIntRegs;
+
   unsigned NumRequiredArgs = FI.getNumRequiredArgs();
   // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
   // get assigned (in left-to-right order) for passing as follows...
@@ -2895,7 +2957,7 @@
     llvm::Type *DoubleTy = CGF.DoubleTy;
     llvm::Type *DblPtrTy =
       llvm::PointerType::getUnqual(DoubleTy);
-    llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, NULL);
+    llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr);
     llvm::Value *V, *Tmp = CGF.CreateMemTemp(Ty);
     Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
     V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrLo,
@@ -2990,7 +3052,7 @@
       return ABIArgInfo::getDirect();
   }
 
-  if (RT || Ty->isMemberPointerType()) {
+  if (RT || Ty->isAnyComplexType() || Ty->isMemberPointerType()) {
     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
     // not 1, 2, 4, or 8 bytes, must be passed by reference."
     if (Width > 64 || !llvm::isPowerOf2_64(Width))
@@ -3046,48 +3108,6 @@
   return AddrTyped;
 }
 
-namespace {
-
-class NaClX86_64ABIInfo : public ABIInfo {
- public:
-  NaClX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
-      : ABIInfo(CGT), PInfo(CGT), NInfo(CGT, HasAVX) {}
-  void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
- private:
-  PNaClABIInfo PInfo;  // Used for generating calls with pnaclcall callingconv.
-  X86_64ABIInfo NInfo; // Used for everything else.
-};
-
-class NaClX86_64TargetCodeGenInfo : public TargetCodeGenInfo  {
-  bool HasAVX;
- public:
-   NaClX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
-       : TargetCodeGenInfo(new NaClX86_64ABIInfo(CGT, HasAVX)), HasAVX(HasAVX) {
-   }
-   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-     return HasAVX ? 32 : 16;
-   }
-};
-
-}
-
-void NaClX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  if (FI.getASTCallingConvention() == CC_PnaclCall)
-    PInfo.computeInfo(FI);
-  else
-    NInfo.computeInfo(FI);
-}
-
-llvm::Value *NaClX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-  // Always use the native convention; calling pnacl-style varargs functions
-  // is unuspported.
-  return NInfo.EmitVAArg(VAListAddr, Ty, CGF);
-}
-
-
 // PowerPC-32
 namespace {
 /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
@@ -3114,6 +3134,10 @@
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return 16; // Natural alignment for Altivec vectors.
   }
+
+  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
+    return true;
+  }
 };
 
 }
@@ -3326,6 +3350,10 @@
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return 16; // Natural alignment for Altivec and VSX vectors.
   }
+
+  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
+    return true;
+  }
 };
 
 class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
@@ -3343,6 +3371,10 @@
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return 16; // Natural alignment for Altivec vectors.
   }
+
+  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
+    return true;
+  }
 };
 
 }
@@ -3645,7 +3677,7 @@
       llvm::Type *CoerceTy;
       if (Bits > GPRBits) {
         CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
-        CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, NULL);
+        CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
       } else
         CoerceTy = llvm::IntegerType::get(getVMContext(),
                                           llvm::RoundUpToAlignment(Bits, 8));
@@ -3822,9 +3854,7 @@
   bool isDarwinPCS() const { return Kind == DarwinPCS; }
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
-                                  bool &IsHA, unsigned &AllocatedGPR,
-                                  bool &IsSmallAggr, bool IsNamedArg) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
@@ -3832,68 +3862,11 @@
   bool isIllegalVectorType(QualType Ty) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
-    // To correctly handle Homogeneous Aggregate, we need to keep track of the
-    // number of SIMD and Floating-point registers allocated so far.
-    // If the argument is an HFA or an HVA and there are sufficient unallocated
-    // SIMD and Floating-point registers, then the argument is allocated to SIMD
-    // and Floating-point Registers (with one register per member of the HFA or
-    // HVA). Otherwise, the NSRN is set to 8.
-    unsigned AllocatedVFP = 0;
-
-    // To correctly handle small aggregates, we need to keep track of the number
-    // of GPRs allocated so far. If the small aggregate can't all fit into
-    // registers, it will be on stack. We don't allow the aggregate to be
-    // partially in registers.
-    unsigned AllocatedGPR = 0;
-
-    // Find the number of named arguments. Variadic arguments get special
-    // treatment with the Darwin ABI.
-    unsigned NumRequiredArgs = FI.getNumRequiredArgs();
-
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    unsigned ArgNo = 0;
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it, ++ArgNo) {
-      unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
-      bool IsHA = false, IsSmallAggr = false;
-      const unsigned NumVFPs = 8;
-      const unsigned NumGPRs = 8;
-      bool IsNamedArg = ArgNo < NumRequiredArgs;
-      it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
-                                      AllocatedGPR, IsSmallAggr, IsNamedArg);
 
-      // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
-      // as sequences of floats since they'll get "holes" inserted as
-      // padding by the back end.
-      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
-          getContext().getTypeAlign(it->type) < 64) {
-        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
-        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
-
-        llvm::Type *CoerceTy = llvm::ArrayType::get(
-            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
-        it->info = ABIArgInfo::getDirect(CoerceTy);
-      }
-
-      // If we do not have enough VFP registers for the HA, any VFP registers
-      // that are unallocated are marked as unavailable. To achieve this, we add
-      // padding of (NumVFPs - PreAllocation) floats.
-      if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
-        it->info.setPaddingType(PaddingTy);
-      }
-
-      // If we do not have enough GPRs for the small aggregate, any GPR regs
-      // that are unallocated are marked as unavailable.
-      if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR);
-        it->info =
-            ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy);
-      }
-    }
+    for (auto &it : FI.arguments())
+      it.info = classifyArgumentType(it.type);
   }
 
   llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -3924,12 +3897,7 @@
 };
 }
 
-ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
-                                                unsigned &AllocatedVFP,
-                                                bool &IsHA,
-                                                unsigned &AllocatedGPR,
-                                                bool &IsSmallAggr,
-                                                bool IsNamedArg) const {
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
@@ -3938,53 +3906,30 @@
     // Android promotes <2 x i8> to i16, not i32
     if (Size <= 16) {
       llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
-      AllocatedGPR++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 32) {
       llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
-      AllocatedGPR++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
-      AllocatedVFP++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
-      AllocatedVFP++;
       return ABIArgInfo::getDirect(ResType);
     }
-    AllocatedGPR++;
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
-  if (Ty->isVectorType())
-    // Size of a legal vector should be either 64 or 128.
-    AllocatedVFP++;
-  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->getKind() == BuiltinType::Half ||
-        BT->getKind() == BuiltinType::Float ||
-        BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
-      AllocatedVFP++;
-  }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
-    if (!Ty->isFloatingType() && !Ty->isVectorType()) {
-      unsigned Alignment = getContext().getTypeAlign(Ty);
-      if (!isDarwinPCS() && Alignment > 64)
-        AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
-      int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-      AllocatedGPR += RegsNeeded;
-    }
     return (Ty->isPromotableIntegerType() && isDarwinPCS()
                 ? ABIArgInfo::getExtend()
                 : ABIArgInfo::getDirect());
@@ -3993,9 +3938,8 @@
   // Structures with either a non-trivial destructor or a non-trivial
   // copy constructor are always indirect.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    AllocatedGPR++;
     return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
-                                          CGCXXABI::RAA_DirectInMemory);
+                                   CGCXXABI::RAA_DirectInMemory);
   }
 
   // Empty records are always ignored on Darwin, but actually passed in C++ mode
@@ -4004,7 +3948,6 @@
     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
       return ABIArgInfo::getIgnore();
 
-    ++AllocatedGPR;
     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
   }
 
@@ -4012,28 +3955,16 @@
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (isHomogeneousAggregate(Ty, Base, Members)) {
-    IsHA = true;
-    if (!IsNamedArg && isDarwinPCS()) {
-      // With the Darwin ABI, variadic arguments are always passed on the stack
-      // and should not be expanded. Treat variadic HFAs as arrays of doubles.
-      uint64_t Size = getContext().getTypeSize(Ty);
-      llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
-    }
-    AllocatedVFP += Members;
-    return ABIArgInfo::getExpand();
+    return ABIArgInfo::getDirect(
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size <= 128) {
     unsigned Alignment = getContext().getTypeAlign(Ty);
-    if (!isDarwinPCS() && Alignment > 64)
-      AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
-    AllocatedGPR += Size / 64;
-    IsSmallAggr = true;
+
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
     // For aggregates with 16-byte alignment, we use i128.
     if (Alignment < 128 && Size == 128) {
@@ -4043,7 +3974,6 @@
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
-  AllocatedGPR++;
   return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 }
 
@@ -4119,14 +4049,25 @@
   return Members <= 4;
 }
 
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
-  bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
-                                       IsSmallAggr, false /*IsNamedArg*/);
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
+                                            QualType Ty,
+                                            CodeGenFunction &CGF) const {
+  ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
 
+  llvm::Type *BaseTy = CGF.ConvertType(Ty);
+  if (IsIndirect)
+    BaseTy = llvm::PointerType::getUnqual(BaseTy);
+  else if (AI.getCoerceToType())
+    BaseTy = AI.getCoerceToType();
+
+  unsigned NumRegs = 1;
+  if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
+    BaseTy = ArrTy->getElementType();
+    NumRegs = ArrTy->getNumElements();
+  }
+  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
   //
@@ -4146,21 +4087,19 @@
 
   llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
   int reg_top_index;
-  int RegSize;
-  if (AllocatedGPR) {
-    assert(!AllocatedVFP && "Arguments never split between int & VFP regs");
+  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
+  if (!IsFPR) {
     // 3 is the field number of __gr_offs
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
-    RegSize = 8 * AllocatedGPR;
+    RegSize = llvm::RoundUpToAlignment(RegSize, 8);
   } else {
-    assert(!AllocatedGPR && "Argument must go in VFP or int regs");
     // 4 is the field number of __vr_offs.
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
     reg_top_index = 2; // field number for __vr_top
-    RegSize = 16 * AllocatedVFP;
+    RegSize = 16 * NumRegs;
   }
 
   //=======================================
@@ -4184,7 +4123,7 @@
   // Integer arguments may need to correct register alignment (for example a
   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
   // align __gr_offs to calculate the potential address.
-  if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
     int Align = Ctx.getTypeAlign(Ty) / 8;
 
     reg_offs = CGF.Builder.CreateAdd(
@@ -4428,17 +4367,10 @@
 
 private:
   ABIKind Kind;
-  mutable int VFPRegs[16];
-  const unsigned NumVFPs;
-  const unsigned NumGPRs;
-  mutable unsigned AllocatedGPRs;
-  mutable unsigned AllocatedVFPs;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind),
-    NumVFPs(16), NumGPRs(4) {
-    setRuntimeCC();
-    resetAllocatedRegs();
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+    setCCs();
   }
 
   bool isEABI() const {
@@ -4468,8 +4400,7 @@
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
-                                  bool &IsCPRC) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const;
   bool isIllegalVectorType(QualType Ty) const;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
@@ -4483,11 +4414,7 @@
 
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
-  void setRuntimeCC();
-
-  void markAllocatedGPRs(unsigned Alignment, unsigned NumRequired) const;
-  void markAllocatedVFPs(unsigned Alignment, unsigned NumRequired) const;
-  void resetAllocatedRegs(void) const;
+  void setCCs();
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4559,57 +4486,50 @@
                                               B));
   }
 
+  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
+    return false;
+    // FIXME: backend implementation too restricted, even on Darwin.
+    // return CGF.getTarget().getTriple().isOSDarwin();
+  }
 };
 
+class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo {
+  void addStackProbeSizeTargetAttribute(const Decl *D, llvm::GlobalValue *GV,
+                                        CodeGen::CodeGenModule &CGM) const;
+
+public:
+  WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
+      : ARMTargetCodeGenInfo(CGT, K) {}
+
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const override;
+};
+
+void WindowsARMTargetCodeGenInfo::addStackProbeSizeTargetAttribute(
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+  if (!isa<FunctionDecl>(D))
+    return;
+  if (CGM.getCodeGenOpts().StackProbeSize == 4096)
+    return;
+
+  llvm::Function *F = cast<llvm::Function>(GV);
+  F->addFnAttr("stack-probe-size",
+               llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
+}
+
+void WindowsARMTargetCodeGenInfo::SetTargetAttributes(
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
+  ARMTargetCodeGenInfo::SetTargetAttributes(D, GV, CGM);
+  addStackProbeSizeTargetAttribute(D, GV, CGM);
+}
 }
 
 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  // To correctly handle Homogeneous Aggregate, we need to keep track of the
-  // VFP registers allocated so far.
-  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
-  // VFP registers of the appropriate type unallocated then the argument is
-  // allocated to the lowest-numbered sequence of such registers.
-  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
-  // unallocated are marked as unavailable. 
-  resetAllocatedRegs();
-
-  if (getCXXABI().classifyReturnType(FI)) {
-    if (FI.getReturnInfo().isIndirect())
-      markAllocatedGPRs(1, 1);
-  } else {
+  if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
-  }
-  for (auto &I : FI.arguments()) {
-    unsigned PreAllocationVFPs = AllocatedVFPs;
-    unsigned PreAllocationGPRs = AllocatedGPRs;
-    bool IsCPRC = false;
-    // 6.1.2.3 There is one VFP co-processor register class using registers
-    // s0-s15 (d0-d7) for passing arguments.
-    I.info = classifyArgumentType(I.type, FI.isVariadic(), IsCPRC);
 
-    // If we have allocated some arguments onto the stack (due to running
-    // out of VFP registers), we cannot split an argument between GPRs and
-    // the stack. If this situation occurs, we add padding to prevent the
-    // GPRs from being used. In this situation, the current argument could
-    // only be allocated by rule C.8, so rule C.6 would mark these GPRs as
-    // unusable anyway.
-    // We do not have to do this if the argument is being passed ByVal, as the
-    // backend can handle that situation correctly.
-    const bool StackUsed = PreAllocationGPRs > NumGPRs || PreAllocationVFPs > NumVFPs;
-    const bool IsByVal = I.info.isIndirect() && I.info.getIndirectByVal();
-    if (!IsCPRC && PreAllocationGPRs < NumGPRs && AllocatedGPRs > NumGPRs &&
-        StackUsed && !IsByVal) {
-      llvm::Type *PaddingTy = llvm::ArrayType::get(
-          llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
-      if (I.info.canHaveCoerceToType()) {
-        I.info = ABIArgInfo::getDirect(I.info.getCoerceToType() /* type */,
-                                       0 /* offset */, PaddingTy, true);
-      } else {
-        I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */,
-                                       PaddingTy, true);
-      }
-    }
-  }
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type, FI.isVariadic());
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
@@ -4617,7 +4537,7 @@
 
   llvm::CallingConv::ID cc = getRuntimeCC();
   if (cc != llvm::CallingConv::C)
-    FI.setEffectiveCallingConvention(cc);    
+    FI.setEffectiveCallingConvention(cc);
 }
 
 /// Return the default calling convention that LLVM will use.
@@ -4642,7 +4562,7 @@
   llvm_unreachable("bad ABI kind");
 }
 
-void ARMABIInfo::setRuntimeCC() {
+void ARMABIInfo::setCCs() {
   assert(getRuntimeCC() == llvm::CallingConv::C);
 
   // Don't muddy up the IR with a ton of explicit annotations if
@@ -4650,66 +4570,13 @@
   llvm::CallingConv::ID abiCC = getABIDefaultCC();
   if (abiCC != getLLVMDefaultCC())
     RuntimeCC = abiCC;
+
+  BuiltinCC = (getABIKind() == APCS ?
+               llvm::CallingConv::ARM_APCS : llvm::CallingConv::ARM_AAPCS);
 }
 
-/// markAllocatedVFPs - update VFPRegs according to the alignment and
-/// number of VFP registers (unit is S register) requested.
-void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
-                                   unsigned NumRequired) const {
-  // Early Exit.
-  if (AllocatedVFPs >= 16) {
-    // We use AllocatedVFP > 16 to signal that some CPRCs were allocated on
-    // the stack.
-    AllocatedVFPs = 17;
-    return;
-  }
-  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
-  // VFP registers of the appropriate type unallocated then the argument is
-  // allocated to the lowest-numbered sequence of such registers.
-  for (unsigned I = 0; I < 16; I += Alignment) {
-    bool FoundSlot = true;
-    for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
-      if (J >= 16 || VFPRegs[J]) {
-         FoundSlot = false;
-         break;
-      }
-    if (FoundSlot) {
-      for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
-        VFPRegs[J] = 1;
-      AllocatedVFPs += NumRequired;
-      return;
-    }
-  }
-  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
-  // unallocated are marked as unavailable.
-  for (unsigned I = 0; I < 16; I++)
-    VFPRegs[I] = 1;
-  AllocatedVFPs = 17; // We do not have enough VFP registers.
-}
-
-/// Update AllocatedGPRs to record the number of general purpose registers
-/// which have been allocated. It is valid for AllocatedGPRs to go above 4,
-/// this represents arguments being stored on the stack.
-void ARMABIInfo::markAllocatedGPRs(unsigned Alignment,
-                                   unsigned NumRequired) const {
-  assert((Alignment == 1 || Alignment == 2) && "Alignment must be 4 or 8 bytes");
-
-  if (Alignment == 2 && AllocatedGPRs & 0x1)
-    AllocatedGPRs += 1;
-
-  AllocatedGPRs += NumRequired;
-}
-
-void ARMABIInfo::resetAllocatedRegs(void) const {
-  AllocatedGPRs = 0;
-  AllocatedVFPs = 0;
-  for (unsigned i = 0; i < NumVFPs; ++i)
-    VFPRegs[i] = 0;
-}
-
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
-                                            bool &IsCPRC) const {
-  // We update number of allocated VFPs according to
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
+                                            bool isVariadic) const {
   // 6.1.2.1 The following argument types are VFP CPRCs:
   //   A single-precision floating-point type (including promoted
   //   half-precision types); A double-precision floating-point type;
@@ -4727,58 +4594,20 @@
     if (Size <= 32) {
       llvm::Type *ResType =
           llvm::Type::getInt32Ty(getVMContext());
-      markAllocatedGPRs(1, 1);
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 2);
-      if (getABIKind() == ARMABIInfo::AAPCS || isVariadic){
-        markAllocatedGPRs(2, 2);
-      } else {
-        markAllocatedVFPs(2, 2);
-        IsCPRC = true;
-      }
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 4);
-      if (getABIKind() == ARMABIInfo::AAPCS || isVariadic) {
-        markAllocatedGPRs(2, 4);
-      } else {
-        markAllocatedVFPs(4, 4);
-        IsCPRC = true;
-      }
       return ABIArgInfo::getDirect(ResType);
     }
-    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
-  // Update VFPRegs for legal vector types.
-  if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
-    if (const VectorType *VT = Ty->getAs<VectorType>()) {
-      uint64_t Size = getContext().getTypeSize(VT);
-      // Size of a legal vector should be power of 2 and above 64.
-      markAllocatedVFPs(Size >= 128 ? 4 : 2, Size / 32);
-      IsCPRC = true;
-    }
-  }
-  // Update VFPRegs for floating point types.
-  if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
-    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (BT->getKind() == BuiltinType::Half ||
-          BT->getKind() == BuiltinType::Float) {
-        markAllocatedVFPs(1, 1);
-        IsCPRC = true;
-      }
-      if (BT->getKind() == BuiltinType::Double ||
-          BT->getKind() == BuiltinType::LongDouble) {
-        markAllocatedVFPs(2, 2);
-        IsCPRC = true;
-      }
-    }
-  }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
@@ -4786,15 +4615,11 @@
       Ty = EnumTy->getDecl()->getIntegerType();
     }
 
-    unsigned Size = getContext().getTypeSize(Ty);
-    if (!IsCPRC)
-      markAllocatedGPRs(Size > 32 ? 2 : 1, (Size + 31) / 32);
     return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend()
                                           : ABIArgInfo::getDirect());
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
   }
 
@@ -4810,19 +4635,6 @@
     if (isHomogeneousAggregate(Ty, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Base can be a floating-point or a vector.
-      if (Base->isVectorType()) {
-        // ElementSize is in number of floats.
-        unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
-        markAllocatedVFPs(ElementSize,
-                          Members * ElementSize);
-      } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
-        markAllocatedVFPs(1, Members);
-      else {
-        assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
-               Base->isSpecificBuiltinType(BuiltinType::LongDouble));
-        markAllocatedVFPs(2, Members * 2);
-      }
-      IsCPRC = true;
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
     }
   }
@@ -4841,7 +4653,6 @@
     // argument is greater than 64 bytes, this will always use up any available
     // registers (of which there are 4). We also don't care about getting the
     // alignment right, because general-purpose registers cannot be back-filled.
-    markAllocatedGPRs(1, 4);
     return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
            /*Realign=*/TyAlign > ABIAlign);
   }
@@ -4854,11 +4665,9 @@
   if (getContext().getTypeAlign(Ty) <= 32) {
     ElemTy = llvm::Type::getInt32Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
-    markAllocatedGPRs(1, SizeRegs);
   } else {
     ElemTy = llvm::Type::getInt64Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
-    markAllocatedGPRs(2, SizeRegs * 2);
   }
 
   return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
@@ -4958,7 +4767,6 @@
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
-    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0);
   }
 
@@ -4996,7 +4804,6 @@
     }
 
     // Otherwise return in memory.
-    markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0);
   }
 
@@ -5032,7 +4839,6 @@
     return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
   }
 
-  markAllocatedGPRs(1, 1);
   return ABIArgInfo::getIndirect(0);
 }
 
@@ -5145,42 +4951,6 @@
   return AddrTyped;
 }
 
-namespace {
-
-class NaClARMABIInfo : public ABIInfo {
- public:
-  NaClARMABIInfo(CodeGen::CodeGenTypes &CGT, ARMABIInfo::ABIKind Kind)
-      : ABIInfo(CGT), PInfo(CGT), NInfo(CGT, Kind) {}
-  void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
- private:
-  PNaClABIInfo PInfo; // Used for generating calls with pnaclcall callingconv.
-  ARMABIInfo NInfo; // Used for everything else.
-};
-
-class NaClARMTargetCodeGenInfo : public TargetCodeGenInfo  {
- public:
-  NaClARMTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, ARMABIInfo::ABIKind Kind)
-      : TargetCodeGenInfo(new NaClARMABIInfo(CGT, Kind)) {}
-};
-
-}
-
-void NaClARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  if (FI.getASTCallingConvention() == CC_PnaclCall)
-    PInfo.computeInfo(FI);
-  else
-    static_cast<const ABIInfo&>(NInfo).computeInfo(FI);
-}
-
-llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
-  // Always use the native convention; calling pnacl-style varargs functions
-  // is unsupported.
-  return static_cast<const ABIInfo&>(NInfo).EmitVAArg(VAListAddr, Ty, CGF);
-}
-
 //===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
@@ -5314,9 +5084,10 @@
   // Get "nvvm.annotations" metadata node
   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
 
-  llvm::Value *MDVals[] = {
-      F, llvm::MDString::get(Ctx, Name),
-      llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand)};
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
   // Append metadata to nvvm.annotations
   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
 }
@@ -5785,6 +5556,8 @@
 
 ABIArgInfo
 MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
   uint64_t OrigOffset = Offset;
   uint64_t TySize = getContext().getTypeSize(Ty);
   uint64_t Align = getContext().getTypeAlign(Ty) / 8;
@@ -5923,10 +5696,13 @@
   llvm::Type *BP = CGF.Int8PtrTy;
   llvm::Type *BPP = CGF.Int8PtrPtrTy;
 
-  // Integer arguments are promoted 32-bit on O32 and 64-bit on N32/N64.
+  // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
+  // Pointers are also promoted in the same way but this only matters for N32.
   unsigned SlotSizeInBits = IsO32 ? 32 : 64;
-  if (Ty->isIntegerType() &&
-      CGF.getContext().getIntWidth(Ty) < SlotSizeInBits) {
+  unsigned PtrWidth = getTarget().getPointerWidth(0);
+  if ((Ty->isIntegerType() &&
+          CGF.getContext().getIntWidth(Ty) < SlotSizeInBits) ||
+      (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) {
     Ty = CGF.getContext().getIntTypeForBitwidth(SlotSizeInBits,
                                                 Ty->isSignedIntegerType());
   }
@@ -5938,7 +5714,6 @@
       std::min(getContext().getTypeAlign(Ty) / 8, StackAlignInBytes);
   llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
   llvm::Value *AddrTyped;
-  unsigned PtrWidth = getTarget().getPointerWidth(0);
   llvm::IntegerType *IntTy = (PtrWidth == 32) ? CGF.Int32Ty : CGF.Int64Ty;
 
   if (TypeAlign > MinABIStackAlignInBytes) {
@@ -6028,20 +5803,24 @@
         llvm::NamedMDNode *OpenCLMetadata = 
             M.getModule().getOrInsertNamedMetadata("opencl.kernel_wg_size_info");
 
-        SmallVector<llvm::Value*, 5> Operands;
-        Operands.push_back(F);
+        SmallVector<llvm::Metadata *, 5> Operands;
+        Operands.push_back(llvm::ConstantAsMetadata::get(F));
 
-        Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty, 
-                             llvm::APInt(32, Attr->getXDim())));
-        Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty,
-                             llvm::APInt(32, Attr->getYDim())));
-        Operands.push_back(llvm::Constant::getIntegerValue(M.Int32Ty, 
-                             llvm::APInt(32, Attr->getZDim())));
+        Operands.push_back(
+            llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
+                M.Int32Ty, llvm::APInt(32, Attr->getXDim()))));
+        Operands.push_back(
+            llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
+                M.Int32Ty, llvm::APInt(32, Attr->getYDim()))));
+        Operands.push_back(
+            llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
+                M.Int32Ty, llvm::APInt(32, Attr->getZDim()))));
 
         // Add a boolean constant operand for "required" (true) or "hint" (false)
         // for implementing the work_group_size_hint attr later. Currently 
         // always true as the hint is not yet implemented.
-        Operands.push_back(llvm::ConstantInt::getTrue(Context));
+        Operands.push_back(
+            llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context)));
         OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands));
       }
     }
@@ -6183,6 +5962,45 @@
   return AddrTyped;
 }
 
+//===----------------------------------------------------------------------===//
+// AMDGPU ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
+};
+
+}
+
+void AMDGPUTargetCodeGenInfo::SetTargetAttributes(
+  const Decl *D,
+  llvm::GlobalValue *GV,
+  CodeGen::CodeGenModule &M) const {
+  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  if (!FD)
+    return;
+
+  if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    uint32_t NumVGPR = Attr->getNumVGPR();
+    if (NumVGPR != 0)
+      F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    unsigned NumSGPR = Attr->getNumSGPR();
+    if (NumSGPR != 0)
+      F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
+  }
+}
+
 
 //===----------------------------------------------------------------------===//
 // SPARC v9 ABI Implementation.
@@ -6766,8 +6584,8 @@
   SmallStringEnc Enc;
   if (getTypeString(Enc, D, CGM, TSC)) {
     llvm::LLVMContext &Ctx = CGM.getModule().getContext();
-    llvm::SmallVector<llvm::Value *, 2> MDVals;
-    MDVals.push_back(GV);
+    llvm::SmallVector<llvm::Metadata *, 2> MDVals;
+    MDVals.push_back(llvm::ConstantAsMetadata::get(GV));
     MDVals.push_back(llvm::MDString::get(Ctx, Enc.str()));
     llvm::NamedMDNode *MD =
       CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
@@ -7153,6 +6971,12 @@
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
     {
+      if (Triple.getOS() == llvm::Triple::Win32) {
+        TheTargetCodeGenInfo =
+            new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP);
+        return *TheTargetCodeGenInfo;
+      }
+
       ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
       if (getTarget().getABI() == "apcs-gnu")
         Kind = ARMABIInfo::APCS;
@@ -7161,14 +6985,7 @@
                 Triple.getEnvironment() == llvm::Triple::GNUEABIHF))
         Kind = ARMABIInfo::AAPCS_VFP;
 
-      switch (Triple.getOS()) {
-        case llvm::Triple::NaCl:
-          return *(TheTargetCodeGenInfo =
-                   new NaClARMTargetCodeGenInfo(Types, Kind));
-        default:
-          return *(TheTargetCodeGenInfo =
-                   new ARMTargetCodeGenInfo(Types, Kind));
-      }
+      return *(TheTargetCodeGenInfo = new ARMTargetCodeGenInfo(Types, Kind));
     }
 
   case llvm::Triple::ppc:
@@ -7210,7 +7027,7 @@
     bool IsDarwinVectorABI = Triple.isOSDarwin();
     bool IsSmallStructInRegABI =
         X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
-    bool IsWin32FloatStructABI = Triple.isWindowsMSVCEnvironment();
+    bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
       return *(TheTargetCodeGenInfo =
@@ -7234,9 +7051,8 @@
     case llvm::Triple::Win32:
       return *(TheTargetCodeGenInfo =
                    new WinX86_64TargetCodeGenInfo(Types, HasAVX));
-    case llvm::Triple::NaCl:
-      return *(TheTargetCodeGenInfo =
-                   new NaClX86_64TargetCodeGenInfo(Types, HasAVX));
+    case llvm::Triple::PS4:
+      return *(TheTargetCodeGenInfo = new PS4TargetCodeGenInfo(Types, HasAVX));
     default:
       return *(TheTargetCodeGenInfo =
                    new X86_64TargetCodeGenInfo(Types, HasAVX));
@@ -7244,6 +7060,10 @@
   }
   case llvm::Triple::hexagon:
     return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types));
+  case llvm::Triple::r600:
+    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
+  case llvm::Triple::amdgcn:
+    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::sparcv9:
     return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index cc469d6..87f1376 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -225,6 +225,13 @@
   virtual unsigned getOpenMPSimdDefaultAlignment(QualType Type) const {
     return 0;
   }
+
+  /// Control whether __builtin_longjmp / __builtin_setjmp are lowered to
+  /// llvm.eh.sjlj.longjmp / llvm.eh.sjlj.setjmp or the normal library
+  /// function.
+  virtual bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const {
+    return false;
+  }
 };
 }