Update aosp/master clang for rebase to r233350

Change-Id: I12d4823f10bc9e445b8b86e7721b71f98d1df442
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index da3e5ce..bdab637 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -71,7 +71,6 @@
   legacy::PassManager *getCodeGenPasses() const {
     if (!CodeGenPasses) {
       CodeGenPasses = new legacy::PassManager();
-      CodeGenPasses->add(new DataLayoutPass());
       CodeGenPasses->add(
           createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
     }
@@ -81,7 +80,6 @@
   legacy::PassManager *getPerModulePasses() const {
     if (!PerModulePasses) {
       PerModulePasses = new legacy::PassManager();
-      PerModulePasses->add(new DataLayoutPass());
       PerModulePasses->add(
           createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
     }
@@ -91,7 +89,6 @@
   legacy::FunctionPassManager *getPerFunctionPasses() const {
     if (!PerFunctionPasses) {
       PerFunctionPasses = new legacy::FunctionPassManager(TheModule);
-      PerFunctionPasses->add(new DataLayoutPass());
       PerFunctionPasses->add(
           createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
     }
@@ -240,6 +237,14 @@
   TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
   if (!CodeGenOpts.SimplifyLibCalls)
     TLII->disableAllFunctions();
+
+  switch (CodeGenOpts.getVecLib()) {
+  case CodeGenOptions::Accelerate:
+    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate);
+    break;
+  default:
+    break;
+  }
   return TLII;
 }
 
@@ -368,8 +373,6 @@
   legacy::PassManager *MPM = getPerModulePasses();
   if (!CodeGenOpts.RewriteMapFiles.empty())
     addSymbolRewriterPass(CodeGenOpts, MPM);
-  if (CodeGenOpts.VerifyModule)
-    MPM->add(createDebugInfoVerifierPass());
 
   if (!CodeGenOpts.DisableGCov &&
       (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
@@ -383,6 +386,7 @@
     Options.NoRedZone = CodeGenOpts.DisableRedZone;
     Options.FunctionNamesInData =
         !CodeGenOpts.CoverageNoFunctionNamesInData;
+    Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
     MPM->add(createGCOVProfilerPass(Options));
     if (CodeGenOpts.getDebugInfo() == CodeGenOptions::NoDebugInfo)
       MPM->add(createStripSymbolsPass(true));
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 2af2264..b7222fc 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -73,6 +73,8 @@
 
         LVal = lvalue;
       } else if (lvalue.isBitField()) {
+        ValueTy = lvalue.getType();
+        ValueSizeInBits = C.getTypeSize(ValueTy);
         auto &OrigBFI = lvalue.getBitFieldInfo();
         auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
         AtomicSizeInBits = C.toBits(
@@ -93,12 +95,34 @@
         BFI.StorageSize = AtomicSizeInBits;
         LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(),
                                     lvalue.getAlignment());
+        LVal.setTBAAInfo(lvalue.getTBAAInfo());
+        AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
+        if (AtomicTy.isNull()) {
+          llvm::APInt Size(
+              /*numBits=*/32,
+              C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity());
+          AtomicTy = C.getConstantArrayType(C.CharTy, Size, ArrayType::Normal,
+                                            /*IndexTypeQuals=*/0);
+        }
+        AtomicAlign = ValueAlign = lvalue.getAlignment();
       } else if (lvalue.isVectorElt()) {
-        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        ValueTy = lvalue.getType()->getAs<VectorType>()->getElementType();
+        ValueSizeInBits = C.getTypeSize(ValueTy);
+        AtomicTy = lvalue.getType();
+        AtomicSizeInBits = C.getTypeSize(AtomicTy);
+        AtomicAlign = ValueAlign = lvalue.getAlignment();
         LVal = lvalue;
       } else {
         assert(lvalue.isExtVectorElt());
-        AtomicSizeInBits = C.getTypeSize(lvalue.getType());
+        ValueTy = lvalue.getType();
+        ValueSizeInBits = C.getTypeSize(ValueTy);
+        AtomicTy = ValueTy = CGF.getContext().getExtVectorType(
+            lvalue.getType(), lvalue.getExtVectorAddr()
+                                  ->getType()
+                                  ->getPointerElementType()
+                                  ->getVectorNumElements());
+        AtomicSizeInBits = C.getTypeSize(AtomicTy);
+        AtomicAlign = ValueAlign = lvalue.getAlignment();
         LVal = lvalue;
       }
       UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
@@ -114,6 +138,16 @@
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
     const LValue &getAtomicLValue() const { return LVal; }
+    llvm::Value *getAtomicAddress() const {
+      if (LVal.isSimple())
+        return LVal.getAddress();
+      else if (LVal.isBitField())
+        return LVal.getBitFieldAddr();
+      else if (LVal.isVectorElt())
+        return LVal.getVectorAddr();
+      assert(LVal.isExtVectorElt());
+      return LVal.getExtVectorAddr();
+    }
 
     /// Is the atomic size larger than the underlying value type?
     ///
@@ -137,15 +171,15 @@
     llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
 
     /// Turn an atomic-layout object into an r-value.
-    RValue convertTempToRValue(llvm::Value *addr,
-                               AggValueSlot resultSlot,
-                               SourceLocation loc) const;
+    RValue convertTempToRValue(llvm::Value *addr, AggValueSlot resultSlot,
+                               SourceLocation loc, bool AsValue) const;
 
     /// \brief Converts a rvalue to integer value.
     llvm::Value *convertRValueToInt(RValue RVal) const;
 
-    RValue convertIntToValue(llvm::Value *IntVal, AggValueSlot ResultSlot,
-                             SourceLocation Loc) const;
+    RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
+                                     AggValueSlot ResultSlot,
+                                     SourceLocation Loc, bool AsValue) const;
 
     /// Copy an atomic r-value into atomic-layout memory.
     void emitCopyIntoMemory(RValue rvalue) const;
@@ -153,7 +187,7 @@
     /// Project an l-value down to the value field.
     LValue projectValue() const {
       assert(LVal.isSimple());
-      llvm::Value *addr = LVal.getAddress();
+      llvm::Value *addr = getAtomicAddress();
       if (hasPadding())
         addr = CGF.Builder.CreateStructGEP(addr, 0);
 
@@ -161,14 +195,91 @@
                               CGF.getContext(), LVal.getTBAAInfo());
     }
 
+    /// \brief Emits atomic load.
+    /// \returns Loaded value.
+    RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
+                          bool AsValue, llvm::AtomicOrdering AO,
+                          bool IsVolatile);
+
+    /// \brief Emits atomic compare-and-exchange sequence.
+    /// \param Expected Expected value.
+    /// \param Desired Desired value.
+    /// \param Success Atomic ordering for success operation.
+    /// \param Failure Atomic ordering for failed operation.
+    /// \param IsWeak true if atomic operation is weak, false otherwise.
+    /// \returns Pair of values: previous value from storage (value type) and
+    /// boolean flag (i1 type) with true if success and false otherwise.
+    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchange(
+        RValue Expected, RValue Desired,
+        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+        bool IsWeak = false);
+
     /// Materialize an atomic r-value in atomic-layout memory.
     llvm::Value *materializeRValue(RValue rvalue) const;
 
+    /// \brief Translates LLVM atomic ordering to GNU atomic ordering for
+    /// libcalls.
+    static AtomicExpr::AtomicOrderingKind
+    translateAtomicOrdering(const llvm::AtomicOrdering AO);
+
   private:
     bool requiresMemSetZero(llvm::Type *type) const;
+
+    /// \brief Creates temp alloca for intermediate operations on atomic value.
+    llvm::Value *CreateTempAlloca() const;
+
+    /// \brief Emits atomic load as a libcall.
+    void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
+                               llvm::AtomicOrdering AO, bool IsVolatile);
+    /// \brief Emits atomic load as LLVM instruction.
+    llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
+    /// \brief Emits atomic compare-and-exchange op as a libcall.
+    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall(
+        llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
+        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent);
+    /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
+    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
+        llvm::Value *Expected, llvm::Value *Desired,
+        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+        bool IsWeak = false);
   };
 }
 
+AtomicExpr::AtomicOrderingKind
+AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) {
+  switch (AO) {
+  case llvm::Unordered:
+  case llvm::NotAtomic:
+  case llvm::Monotonic:
+    return AtomicExpr::AO_ABI_memory_order_relaxed;
+  case llvm::Acquire:
+    return AtomicExpr::AO_ABI_memory_order_acquire;
+  case llvm::Release:
+    return AtomicExpr::AO_ABI_memory_order_release;
+  case llvm::AcquireRelease:
+    return AtomicExpr::AO_ABI_memory_order_acq_rel;
+  case llvm::SequentiallyConsistent:
+    return AtomicExpr::AO_ABI_memory_order_seq_cst;
+  }
+  llvm_unreachable("Unhandled AtomicOrdering");
+}
+
+llvm::Value *AtomicInfo::CreateTempAlloca() const {
+  auto *TempAlloca = CGF.CreateMemTemp(
+      (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
+                                                                : AtomicTy,
+      "atomic-temp");
+  TempAlloca->setAlignment(getAtomicAlignment().getQuantity());
+  // Cast to pointer to value type for bitfields.
+  if (LVal.isBitField())
+    return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        TempAlloca, getAtomicAddress()->getType());
+  return TempAlloca;
+}
+
 static RValue emitAtomicLibcall(CodeGenFunction &CGF,
                                 StringRef fnName,
                                 QualType resultType,
@@ -217,9 +328,10 @@
   if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
     return false;
 
-  CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
-                           AtomicSizeInBits / 8,
-                           LVal.getAlignment().getQuantity());
+  CGF.Builder.CreateMemSet(
+      addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+      CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
+      LVal.getAlignment().getQuantity());
   return true;
 }
 
@@ -941,7 +1053,7 @@
 
 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
                                        AggValueSlot resultSlot,
-                                       SourceLocation loc) const {
+                                       SourceLocation loc, bool AsValue) const {
   if (LVal.isSimple()) {
     if (EvaluationKind == TEK_Aggregate)
       return resultSlot.asRValue();
@@ -953,7 +1065,11 @@
     // Otherwise, just convert the temporary to an r-value using the
     // normal conversion routine.
     return CGF.convertTempToRValue(addr, getValueType(), loc);
-  } else if (LVal.isBitField())
+  } else if (!AsValue)
+    // Get RValue from temp memory as atomic for non-simple lvalues
+    return RValue::get(
+        CGF.Builder.CreateAlignedLoad(addr, AtomicAlign.getQuantity()));
+  else if (LVal.isBitField())
     return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
         addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
   else if (LVal.isVectorElt())
@@ -966,14 +1082,20 @@
       addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment()));
 }
 
-RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal,
-                                     AggValueSlot ResultSlot,
-                                     SourceLocation Loc) const {
-  assert(LVal.isSimple());
+RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
+                                             AggValueSlot ResultSlot,
+                                             SourceLocation Loc,
+                                             bool AsValue) const {
   // Try not to in some easy cases.
   assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
-  if (getEvaluationKind() == TEK_Scalar && !hasPadding()) {
-    auto *ValTy = CGF.ConvertTypeForMem(ValueTy);
+  if (getEvaluationKind() == TEK_Scalar &&
+      (((!LVal.isBitField() ||
+         LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
+        !hasPadding()) ||
+       !AsValue)) {
+    auto *ValTy = AsValue
+                      ? CGF.ConvertTypeForMem(ValueTy)
+                      : getAtomicAddress()->getType()->getPointerElementType();
     if (ValTy->isIntegerTy()) {
       assert(IntVal->getType() == ValTy && "Different integer types.");
       return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
@@ -988,13 +1110,13 @@
   llvm::Value *Temp;
   bool TempIsVolatile = false;
   CharUnits TempAlignment;
-  if (getEvaluationKind() == TEK_Aggregate) {
+  if (AsValue && getEvaluationKind() == TEK_Aggregate) {
     assert(!ResultSlot.isIgnored());
     Temp = ResultSlot.getAddr();
     TempAlignment = getValueAlignment();
     TempIsVolatile = ResultSlot.isVolatile();
   } else {
-    Temp = CGF.CreateMemTemp(getAtomicType(), "atomic-temp");
+    Temp = CreateTempAlloca();
     TempAlignment = getAtomicAlignment();
   }
 
@@ -1003,7 +1125,38 @@
   CGF.Builder.CreateAlignedStore(IntVal, CastTemp, TempAlignment.getQuantity())
       ->setVolatile(TempIsVolatile);
 
-  return convertTempToRValue(Temp, ResultSlot, Loc);
+  return convertTempToRValue(Temp, ResultSlot, Loc, AsValue);
+}
+
+void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
+                                       llvm::AtomicOrdering AO, bool) {
+  // void __atomic_load(size_t size, void *mem, void *return, int order);
+  CallArgList Args;
+  Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
+           CGF.getContext().VoidPtrTy);
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)),
+           CGF.getContext().VoidPtrTy);
+  Args.add(RValue::get(
+               llvm::ConstantInt::get(CGF.IntTy, translateAtomicOrdering(AO))),
+           CGF.getContext().IntTy);
+  emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args);
+}
+
+llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
+                                          bool IsVolatile) {
+  // Okay, we're doing this natively.
+  llvm::Value *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
+  llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
+  Load->setAtomic(AO);
+
+  // Other decoration.
+  Load->setAlignment(getAtomicAlignment().getQuantity());
+  if (IsVolatile)
+    Load->setVolatile(true);
+  if (LVal.getTBAAInfo())
+    CGF.CGM.DecorateInstruction(Load, LVal.getTBAAInfo());
+  return Load;
 }
 
 /// An LValue is a candidate for having its loads and stores be made atomic if
@@ -1041,85 +1194,47 @@
   return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
 }
 
+RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
+                                  bool AsValue, llvm::AtomicOrdering AO,
+                                  bool IsVolatile) {
+  // Check whether we should use a library call.
+  if (shouldUseLibcall()) {
+    llvm::Value *TempAddr;
+    if (LVal.isSimple() && !ResultSlot.isIgnored()) {
+      assert(getEvaluationKind() == TEK_Aggregate);
+      TempAddr = ResultSlot.getAddr();
+    } else
+      TempAddr = CreateTempAlloca();
+
+    EmitAtomicLoadLibcall(TempAddr, AO, IsVolatile);
+
+    // Okay, turn that back into the original value or whole atomic (for
+    // non-simple lvalues) type.
+    return convertTempToRValue(TempAddr, ResultSlot, Loc, AsValue);
+  }
+
+  // Okay, we're doing this natively.
+  auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
+
+  // If we're ignoring an aggregate return, don't do anything.
+  if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
+    return RValue::getAggregate(nullptr, false);
+
+  // Okay, turn that back into the original value or atomic (for non-simple
+  // lvalues) type.
+  return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
+}
+
 /// Emit a load from an l-value of atomic type.  Note that the r-value
 /// we produce is an r-value of the atomic *value* type.
 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
                                        llvm::AtomicOrdering AO, bool IsVolatile,
                                        AggValueSlot resultSlot) {
-  AtomicInfo atomics(*this, src);
-  LValue LVal = atomics.getAtomicLValue();
-  llvm::Value *SrcAddr = nullptr;
-  llvm::AllocaInst *NonSimpleTempAlloca = nullptr;
-  if (LVal.isSimple())
-    SrcAddr = LVal.getAddress();
-  else {
-    if (LVal.isBitField())
-      SrcAddr = LVal.getBitFieldAddr();
-    else if (LVal.isVectorElt())
-      SrcAddr = LVal.getVectorAddr();
-    else {
-      assert(LVal.isExtVectorElt());
-      SrcAddr = LVal.getExtVectorAddr();
-    }
-    NonSimpleTempAlloca = CreateTempAlloca(
-        SrcAddr->getType()->getPointerElementType(), "atomic-load-temp");
-    NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment()));
-  }
-
-  // Check whether we should use a library call.
-  if (atomics.shouldUseLibcall()) {
-    llvm::Value *tempAddr;
-    if (LVal.isSimple()) {
-      if (!resultSlot.isIgnored()) {
-        assert(atomics.getEvaluationKind() == TEK_Aggregate);
-        tempAddr = resultSlot.getAddr();
-      } else
-        tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
-    } else
-      tempAddr = NonSimpleTempAlloca;
-
-    // void __atomic_load(size_t size, void *mem, void *return, int order);
-    CallArgList args;
-    args.add(RValue::get(atomics.getAtomicSizeValue()),
-             getContext().getSizeType());
-    args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy);
-    args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy);
-    args.add(RValue::get(llvm::ConstantInt::get(
-                 IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
-             getContext().IntTy);
-    emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
-
-    // Produce the r-value.
-    return atomics.convertTempToRValue(tempAddr, resultSlot, loc);
-  }
-
-  // Okay, we're doing this natively.
-  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr);
-  llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
-  load->setAtomic(AO);
-
-  // Other decoration.
-  load->setAlignment(src.getAlignment().getQuantity());
-  if (IsVolatile)
-    load->setVolatile(true);
-  if (src.getTBAAInfo())
-    CGM.DecorateInstruction(load, src.getTBAAInfo());
-
-  // If we're ignoring an aggregate return, don't do anything.
-  if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
-    return RValue::getAggregate(nullptr, false);
-
-  // Okay, turn that back into the original value type.
-  if (src.isSimple())
-    return atomics.convertIntToValue(load, resultSlot, loc);
-
-  auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca);
-  Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity());
-  return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc);
+  AtomicInfo Atomics(*this, src);
+  return Atomics.EmitAtomicLoad(resultSlot, loc, /*AsValue=*/true, AO,
+                                IsVolatile);
 }
 
-
-
 /// Copy an r-value into memory as part of storing to an atomic type.
 /// This needs to create a bit-pattern suitable for atomic operations.
 void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
@@ -1128,7 +1243,7 @@
   // which means that the caller is responsible for having zeroed
   // any padding.  Just do an aggregate copy of that type.
   if (rvalue.isAggregate()) {
-    CGF.EmitAggregateCopy(LVal.getAddress(),
+    CGF.EmitAggregateCopy(getAtomicAddress(),
                           rvalue.getAggregateAddr(),
                           getAtomicType(),
                           (rvalue.isVolatileQualified()
@@ -1163,24 +1278,24 @@
     return rvalue.getAggregateAddr();
 
   // Otherwise, make a temporary and materialize into it.
-  llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
-  LValue tempLV =
-      CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
-  AtomicInfo Atomics(CGF, tempLV);
+  LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType(),
+                                     getAtomicAlignment());
+  AtomicInfo Atomics(CGF, TempLV);
   Atomics.emitCopyIntoMemory(rvalue);
-  return temp;
+  return TempLV.getAddress();
 }
 
 llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
   // If we've got a scalar value of the right size, try to avoid going
   // through memory.
-  if (RVal.isScalar() && !hasPadding()) {
+  if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) {
     llvm::Value *Value = RVal.getScalarVal();
     if (isa<llvm::IntegerType>(Value->getType()))
       return Value;
     else {
-      llvm::IntegerType *InputIntTy =
-          llvm::IntegerType::get(CGF.getLLVMContext(), getValueSizeInBits());
+      llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
+          CGF.getLLVMContext(),
+          LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
       if (isa<llvm::PointerType>(Value->getType()))
         return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
       else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
@@ -1197,6 +1312,76 @@
                                        getAtomicAlignment().getQuantity());
 }
 
+std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
+    llvm::Value *Expected, llvm::Value *Desired, llvm::AtomicOrdering Success,
+    llvm::AtomicOrdering Failure, bool IsWeak) {
+  // Do the atomic store.
+  auto *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
+  auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, Expected, Desired, Success,
+                                               Failure);
+  // Other decoration.
+  Inst->setVolatile(LVal.isVolatileQualified());
+  Inst->setWeak(IsWeak);
+
+  // Okay, turn that back into the original value type.
+  auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
+  auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1);
+  return std::make_pair(PreviousVal, SuccessFailureVal);
+}
+
+std::pair<llvm::Value *, llvm::Value *>
+AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
+                                             llvm::Value *DesiredAddr,
+                                             llvm::AtomicOrdering Success,
+                                             llvm::AtomicOrdering Failure) {
+  // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
+  // void *desired, int success, int failure);
+  CallArgList Args;
+  Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
+           CGF.getContext().VoidPtrTy);
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(ExpectedAddr)),
+           CGF.getContext().VoidPtrTy);
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)),
+           CGF.getContext().VoidPtrTy);
+  Args.add(RValue::get(llvm::ConstantInt::get(
+               CGF.IntTy, translateAtomicOrdering(Success))),
+           CGF.getContext().IntTy);
+  Args.add(RValue::get(llvm::ConstantInt::get(
+               CGF.IntTy, translateAtomicOrdering(Failure))),
+           CGF.getContext().IntTy);
+  auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
+                                              CGF.getContext().BoolTy, Args);
+  auto *PreviousVal = CGF.Builder.CreateAlignedLoad(
+      ExpectedAddr, getValueAlignment().getQuantity());
+  return std::make_pair(PreviousVal, SuccessFailureRVal.getScalarVal());
+}
+
+std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
+    RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
+    llvm::AtomicOrdering Failure, bool IsWeak) {
+  if (Failure >= Success)
+    // Don't assert on undefined behavior.
+    Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
+
+  // Check whether we should use a library call.
+  if (shouldUseLibcall()) {
+    auto *ExpectedAddr = materializeRValue(Expected);
+    // Produce a source address.
+    auto *DesiredAddr = materializeRValue(Desired);
+    return EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, Success,
+                                            Failure);
+  }
+
+  // If we've got a scalar value of the right size, try to avoid going
+  // through memory.
+  auto *ExpectedIntVal = convertRValueToInt(Expected);
+  auto *DesiredIntVal = convertRValueToInt(Desired);
+
+  return EmitAtomicCompareExchangeOp(ExpectedIntVal, DesiredIntVal, Success,
+                                     Failure, IsWeak);
+}
+
 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
                                       bool isInit) {
   bool IsVolatile = lvalue.isVolatileQualified();
@@ -1225,49 +1410,103 @@
            == dest.getAddress()->getType()->getPointerElementType());
 
   AtomicInfo atomics(*this, dest);
+  LValue LVal = atomics.getAtomicLValue();
 
   // If this is an initialization, just put the value there normally.
-  if (isInit) {
-    atomics.emitCopyIntoMemory(rvalue);
+  if (LVal.isSimple()) {
+    if (isInit) {
+      atomics.emitCopyIntoMemory(rvalue);
+      return;
+    }
+
+    // Check whether we should use a library call.
+    if (atomics.shouldUseLibcall()) {
+      // Produce a source address.
+      llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
+
+      // void __atomic_store(size_t size, void *mem, void *val, int order)
+      CallArgList args;
+      args.add(RValue::get(atomics.getAtomicSizeValue()),
+               getContext().getSizeType());
+      args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicAddress())),
+               getContext().VoidPtrTy);
+      args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), getContext().VoidPtrTy);
+      args.add(RValue::get(llvm::ConstantInt::get(
+                   IntTy, AtomicInfo::translateAtomicOrdering(AO))),
+               getContext().IntTy);
+      emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
+      return;
+    }
+
+    // Okay, we're doing this natively.
+    llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
+
+    // Do the atomic store.
+    llvm::Value *addr =
+        atomics.emitCastToAtomicIntPointer(atomics.getAtomicAddress());
+    intValue = Builder.CreateIntCast(
+        intValue, addr->getType()->getPointerElementType(), /*isSigned=*/false);
+    llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
+
+    // Initializations don't need to be atomic.
+    if (!isInit)
+      store->setAtomic(AO);
+
+    // Other decoration.
+    store->setAlignment(dest.getAlignment().getQuantity());
+    if (IsVolatile)
+      store->setVolatile(true);
+    if (dest.getTBAAInfo())
+      CGM.DecorateInstruction(store, dest.getTBAAInfo());
     return;
   }
 
-  // Check whether we should use a library call.
-  if (atomics.shouldUseLibcall()) {
-    // Produce a source address.
-    llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
-
-    // void __atomic_store(size_t size, void *mem, void *val, int order)
-    CallArgList args;
-    args.add(RValue::get(atomics.getAtomicSizeValue()),
-             getContext().getSizeType());
-    args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())),
-             getContext().VoidPtrTy);
-    args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
-             getContext().VoidPtrTy);
-    args.add(RValue::get(llvm::ConstantInt::get(
-                 IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)),
-             getContext().IntTy);
-    emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
-    return;
+  // Atomic load of prev value.
+  RValue OldRVal =
+      atomics.EmitAtomicLoad(AggValueSlot::ignored(), SourceLocation(),
+                             /*AsValue=*/false, AO, IsVolatile);
+  // For non-simple lvalues perform compare-and-swap procedure.
+  auto *ContBB = createBasicBlock("atomic_cont");
+  auto *ExitBB = createBasicBlock("atomic_exit");
+  auto *CurBB = Builder.GetInsertBlock();
+  EmitBlock(ContBB);
+  llvm::PHINode *PHI = Builder.CreatePHI(OldRVal.getScalarVal()->getType(),
+                                         /*NumReservedValues=*/2);
+  PHI->addIncoming(OldRVal.getScalarVal(), CurBB);
+  RValue OriginalRValue = RValue::get(PHI);
+  // Build new lvalue for temp address
+  auto *Ptr = atomics.materializeRValue(OriginalRValue);
+  // Build new lvalue for temp address
+  LValue UpdateLVal;
+  if (LVal.isBitField())
+    UpdateLVal = LValue::MakeBitfield(Ptr, LVal.getBitFieldInfo(),
+                                      LVal.getType(), LVal.getAlignment());
+  else if (LVal.isVectorElt())
+    UpdateLVal = LValue::MakeVectorElt(Ptr, LVal.getVectorIdx(), LVal.getType(),
+                                       LVal.getAlignment());
+  else {
+    assert(LVal.isExtVectorElt());
+    UpdateLVal = LValue::MakeExtVectorElt(Ptr, LVal.getExtVectorElts(),
+                                          LVal.getType(), LVal.getAlignment());
   }
-
-  // Okay, we're doing this natively.
-  llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
-
-  // Do the atomic store.
-  llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
-  llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
-
-  // Initializations don't need to be atomic.
-  if (!isInit) store->setAtomic(AO);
-
-  // Other decoration.
-  store->setAlignment(dest.getAlignment().getQuantity());
-  if (IsVolatile)
-    store->setVolatile(true);
-  if (dest.getTBAAInfo())
-    CGM.DecorateInstruction(store, dest.getTBAAInfo());
+  UpdateLVal.setTBAAInfo(LVal.getTBAAInfo());
+  // Store new value in the corresponding memory area
+  EmitStoreThroughLValue(rvalue, UpdateLVal);
+  // Load new value
+  RValue NewRValue = RValue::get(EmitLoadOfScalar(
+      Ptr, LVal.isVolatile(), atomics.getAtomicAlignment().getQuantity(),
+      atomics.getAtomicType(), SourceLocation()));
+  // Try to write new value using cmpxchg operation
+  auto Pair = atomics.EmitAtomicCompareExchange(OriginalRValue, NewRValue, AO);
+  llvm::Value *OldValue = Pair.first;
+  if (!atomics.shouldUseLibcall())
+    // Convert integer value to original atomic type
+    OldValue = atomics.ConvertIntToValueOrAtomic(
+                           OldValue, AggValueSlot::ignored(), SourceLocation(),
+                           /*AsValue=*/false).getScalarVal();
+  PHI->addIncoming(OldValue, ContBB);
+  Builder.CreateCondBr(Pair.second, ExitBB, ContBB);
+  EmitBlock(ExitBB, /*IsFinished=*/true);
 }
 
 /// Emit a compare-and-exchange op for atomic type.
@@ -1286,56 +1525,13 @@
              Obj.getAddress()->getType()->getPointerElementType());
   AtomicInfo Atomics(*this, Obj);
 
-  if (Failure >= Success)
-    // Don't assert on undefined behavior.
-    Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
-
-  auto Alignment = Atomics.getValueAlignment();
-  // Check whether we should use a library call.
-  if (Atomics.shouldUseLibcall()) {
-    auto *ExpectedAddr = Atomics.materializeRValue(Expected);
-    // Produce a source address.
-    auto *DesiredAddr = Atomics.materializeRValue(Desired);
-    // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
-    // void *desired, int success, int failure);
-    CallArgList Args;
-    Args.add(RValue::get(Atomics.getAtomicSizeValue()),
-             getContext().getSizeType());
-    Args.add(RValue::get(EmitCastToVoidPtr(Obj.getAddress())),
-             getContext().VoidPtrTy);
-    Args.add(RValue::get(EmitCastToVoidPtr(ExpectedAddr)),
-             getContext().VoidPtrTy);
-    Args.add(RValue::get(EmitCastToVoidPtr(DesiredAddr)),
-             getContext().VoidPtrTy);
-    Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Success)),
-             getContext().IntTy);
-    Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Failure)),
-             getContext().IntTy);
-    auto SuccessFailureRVal = emitAtomicLibcall(
-        *this, "__atomic_compare_exchange", getContext().BoolTy, Args);
-    auto *PreviousVal =
-        Builder.CreateAlignedLoad(ExpectedAddr, Alignment.getQuantity());
-    return std::make_pair(RValue::get(PreviousVal), SuccessFailureRVal);
-  }
-
-  // If we've got a scalar value of the right size, try to avoid going
-  // through memory.
-  auto *ExpectedIntVal = Atomics.convertRValueToInt(Expected);
-  auto *DesiredIntVal = Atomics.convertRValueToInt(Desired);
-
-  // Do the atomic store.
-  auto *Addr = Atomics.emitCastToAtomicIntPointer(Obj.getAddress());
-  auto *Inst = Builder.CreateAtomicCmpXchg(Addr, ExpectedIntVal, DesiredIntVal,
-                                          Success, Failure);
-  // Other decoration.
-  Inst->setVolatile(Obj.isVolatileQualified());
-  Inst->setWeak(IsWeak);
-
-  // Okay, turn that back into the original value type.
-  auto *PreviousVal = Builder.CreateExtractValue(Inst, /*Idxs=*/0);
-  auto *SuccessFailureVal = Builder.CreateExtractValue(Inst, /*Idxs=*/1);
-  return std::make_pair(Atomics.convertIntToValue(PreviousVal, Slot, Loc),
-                        RValue::get(SuccessFailureVal));
+  auto Pair = Atomics.EmitAtomicCompareExchange(Expected, Desired, Success,
+                                                Failure, IsWeak);
+  return std::make_pair(Atomics.shouldUseLibcall()
+                            ? RValue::get(Pair.first)
+                            : Atomics.ConvertIntToValueOrAtomic(
+                                  Pair.first, Slot, Loc, /*AsValue=*/true),
+                        RValue::get(Pair.second));
 }
 
 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index bf7d86f..35597fe 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
+#include "CGCXXABI.h"
 #include "CGObjCRuntime.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
@@ -25,6 +26,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
+#include <sstream>
 
 using namespace clang;
 using namespace CodeGen;
@@ -157,6 +159,27 @@
   return Call;
 }
 
+/// Emit the computation of the sign bit for a floating point value. Returns
+/// the i1 sign bit value.
+static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
+  LLVMContext &C = CGF.CGM.getLLVMContext();
+
+  llvm::Type *Ty = V->getType();
+  int Width = Ty->getPrimitiveSizeInBits();
+  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
+  V = CGF.Builder.CreateBitCast(V, IntTy);
+  if (Ty->isPPC_FP128Ty()) {
+    // The higher-order double comes first, and so we need to truncate the
+    // pair to extract the overall sign. The order of the pair is the same
+    // in both little- and big-Endian modes.
+    Width >>= 1;
+    IntTy = llvm::IntegerType::get(C, Width);
+    V = CGF.Builder.CreateTrunc(V, IntTy);
+  }
+  Value *Zero = llvm::Constant::getNullValue(IntTy);
+  return CGF.Builder.CreateICmpSLT(V, Zero);
+}
+
 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
                               const CallExpr *E, llvm::Value *calleeValue) {
   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
@@ -557,8 +580,22 @@
     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
   }
 
-  // TODO: BI__builtin_isinf_sign
-  //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
+  case Builtin::BI__builtin_isinf_sign: {
+    // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
+    Value *Arg = EmitScalarExpr(E->getArg(0));
+    Value *AbsArg = EmitFAbs(*this, Arg);
+    Value *IsInf = Builder.CreateFCmpOEQ(
+        AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
+    Value *IsNeg = EmitSignBit(*this, Arg);
+
+    llvm::Type *IntTy = ConvertType(E->getType());
+    Value *Zero = Constant::getNullValue(IntTy);
+    Value *One = ConstantInt::get(IntTy, 1);
+    Value *NegativeOne = ConstantInt::get(IntTy, -1);
+    Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
+    Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
+    return RValue::get(Result);
+  }
 
   case Builtin::BI__builtin_isnormal: {
     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
@@ -859,8 +896,6 @@
       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
   }
   case Builtin::BI__builtin_setjmp: {
-    if (!getTargetHooks().hasSjLjLowering(*this))
-      break;
     // Buffer is a void**.
     Value *Buf = EmitScalarExpr(E->getArg(0));
 
@@ -883,8 +918,6 @@
     return RValue::get(Builder.CreateCall(F, Buf));
   }
   case Builtin::BI__builtin_longjmp: {
-    if (!getTargetHooks().hasSjLjLowering(*this))
-      break;
     Value *Buf = EmitScalarExpr(E->getArg(0));
     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
 
@@ -1401,24 +1434,9 @@
   case Builtin::BI__builtin_signbit:
   case Builtin::BI__builtin_signbitf:
   case Builtin::BI__builtin_signbitl: {
-    LLVMContext &C = CGM.getLLVMContext();
-
-    Value *Arg = EmitScalarExpr(E->getArg(0));
-    llvm::Type *ArgTy = Arg->getType();
-    int ArgWidth = ArgTy->getPrimitiveSizeInBits();
-    llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
-    Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
-    if (ArgTy->isPPC_FP128Ty()) {
-      // The higher-order double comes first, and so we need to truncate the
-      // pair to extract the overall sign. The order of the pair is the same
-      // in both little- and big-Endian modes.
-      ArgWidth >>= 1;
-      ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
-      BCArg = Builder.CreateTrunc(BCArg, ArgIntTy);
-    }
-    Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
-    Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
-    return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
+    return RValue::get(
+        Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
+                           ConvertType(E->getType())));
   }
   case Builtin::BI__builtin_annotation: {
     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
@@ -1682,8 +1700,8 @@
       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
           "_setjmpex", ReturnsTwiceAttr);
-      llvm::Value *Buf =
-          Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+      llvm::Value *Buf = Builder.CreateBitOrPointerCast(
+          EmitScalarExpr(E->getArg(0)), Int8PtrTy);
       llvm::Value *FrameAddr =
           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
                              ConstantInt::get(Int32Ty, 0));
@@ -1692,14 +1710,15 @@
       CS.setAttributes(ReturnsTwiceAttr);
       return RValue::get(CS.getInstruction());
     }
+    break;
   }
   case Builtin::BI_setjmp: {
     if (getTarget().getTriple().isOSMSVCRT()) {
       llvm::AttributeSet ReturnsTwiceAttr =
           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
                             llvm::Attribute::ReturnsTwice);
-      llvm::Value *Buf =
-          Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
+      llvm::Value *Buf = Builder.CreateBitOrPointerCast(
+          EmitScalarExpr(E->getArg(0)), Int8PtrTy);
       llvm::CallSite CS;
       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
@@ -1723,6 +1742,14 @@
       CS.setAttributes(ReturnsTwiceAttr);
       return RValue::get(CS.getInstruction());
     }
+    break;
+  }
+
+  case Builtin::BI__GetExceptionInfo: {
+    if (llvm::GlobalVariable *GV =
+            CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
+      return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
+    break;
   }
   }
 
@@ -6090,13 +6117,6 @@
     Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
     return Builder.CreateExtractValue(Call, 1);
   }
-  // AVX2 broadcast
-  case X86::BI__builtin_ia32_vbroadcastsi256: {
-    Value *VecTmp = CreateMemTemp(E->getArg(0)->getType());
-    Builder.CreateStore(Ops[0], VecTmp);
-    Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128);
-    return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy));
-  }
   // SSE comparison intrisics
   case X86::BI__builtin_ia32_cmpeqps:
   case X86::BI__builtin_ia32_cmpltps:
@@ -6351,6 +6371,119 @@
     llvm::Function *F = CGM.getIntrinsic(ID);
     return Builder.CreateCall(F, Ops, "");
   }
+
+  // P8 Crypto builtins
+  case PPC::BI__builtin_altivec_crypto_vshasigmaw:
+  case PPC::BI__builtin_altivec_crypto_vshasigmad:
+  {
+    ConstantInt *CI1 = dyn_cast<ConstantInt>(Ops[1]);
+    ConstantInt *CI2 = dyn_cast<ConstantInt>(Ops[2]);
+    assert(CI1 && CI2);
+    if (CI1->getZExtValue() > 1) {
+      CGM.Error(E->getArg(1)->getExprLoc(),
+                "argument out of range (should be 0-1).");
+      return llvm::UndefValue::get(Ops[0]->getType());
+    }
+    if (CI2->getZExtValue() > 15) {
+      CGM.Error(E->getArg(2)->getExprLoc(),
+                "argument out of range (should be 0-15).");
+      return llvm::UndefValue::get(Ops[0]->getType());
+    }
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported sigma intrinsic!");
+    case PPC::BI__builtin_altivec_crypto_vshasigmaw:
+      ID = Intrinsic::ppc_altivec_crypto_vshasigmaw;
+      break;
+    case PPC::BI__builtin_altivec_crypto_vshasigmad:
+      ID = Intrinsic::ppc_altivec_crypto_vshasigmad;
+      break;
+    }
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops, "");
+  }
+
+  // HTM builtins
+  case PPC::BI__builtin_tbegin:
+  case PPC::BI__builtin_tend:
+  case PPC::BI__builtin_tsr: {
+    unsigned int MaxValue;
+    // The HTM instructions only accept one argument and with limited range.
+    ConstantInt *CI = dyn_cast<ConstantInt>(Ops[0]);
+    assert(CI);
+    switch (BuiltinID) {
+    case PPC::BI__builtin_tbegin:
+      ID = Intrinsic::ppc_tbegin;
+      MaxValue = 1;
+      break;
+    case PPC::BI__builtin_tend:
+      ID = Intrinsic::ppc_tend;
+      MaxValue = 1;
+      break;
+    case PPC::BI__builtin_tsr:
+      ID = Intrinsic::ppc_tsr;
+      MaxValue = 7;
+      break;
+    }
+    if (CI->getZExtValue() > MaxValue) {
+      std::stringstream ss;
+      ss << "argument out of range (should be 0 or " << MaxValue << ")";
+      CGM.Error(E->getArg(0)->getExprLoc(), ss.str());
+      return llvm::UndefValue::get(Ops[0]->getType());
+    }
+
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops, "");
+  }
+  case PPC::BI__builtin_tabortdc:
+  case PPC::BI__builtin_tabortwc: {
+    // For wd and dc variant of tabort first argument must be a 5-bit constant
+    // integer
+    ConstantInt *CI = dyn_cast<ConstantInt>(Ops[0]);
+    assert(CI);
+    if (CI->getZExtValue() > 31) {
+      CGM.ErrorUnsupported(E->getArg(0), "argument out of range (should be 0-31)");
+      return llvm::UndefValue::get(Ops[0]->getType());
+    }
+    switch (BuiltinID) {
+    case PPC::BI__builtin_tabortdc:
+      ID = Intrinsic::ppc_tabortdc;
+      break;
+    case PPC::BI__builtin_tabortwc:
+      ID = Intrinsic::ppc_tabortwc;
+      break;
+    }
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops, "");
+  }
+  case PPC::BI__builtin_tabortdci:
+  case PPC::BI__builtin_tabortwci: {
+    // For wd and dc variant of tabort first and third argument must be a
+    // 5-bit constant integer
+    ConstantInt *CI = dyn_cast<ConstantInt>(Ops[0]);
+    assert(CI);
+    if (CI->getZExtValue() > 31) {
+      CGM.ErrorUnsupported(E->getArg(0), "argument out of range (should be 0-31)");
+      return llvm::UndefValue::get(Ops[0]->getType());
+    }
+    CI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(CI);
+    if (CI->getZExtValue() > 31) {
+      CGM.ErrorUnsupported(E->getArg(2), "argument out of range (should be 0-31)");
+      return llvm::UndefValue::get(Ops[2]->getType());
+    }
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported htm intrinsic!");
+    case PPC::BI__builtin_tabortdci:
+      ID = Intrinsic::ppc_tabortdci;
+      break;
+    case PPC::BI__builtin_tabortwci:
+      ID = Intrinsic::ppc_tabortwci;
+      break;
+    }
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops, "");
+  }
+
   }
 }
 
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index d31331d..cb7e6df 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -302,3 +302,10 @@
 bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) {
   return false;
 }
+
+llvm::CallInst *
+CGCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
+                                              llvm::Value *Exn) {
+  // Just call std::terminate and ignore the violating exception.
+  return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
+}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index cc5c1b2..b6a94f9 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -22,6 +22,7 @@
 class Constant;
 class Type;
 class Value;
+class CallInst;
 }
 
 namespace clang {
@@ -214,8 +215,18 @@
                                        llvm::Value *Ptr, QualType ElementType,
                                        const CXXDestructorDecl *Dtor) = 0;
   virtual void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) = 0;
+  virtual void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) = 0;
+  virtual llvm::GlobalVariable *getThrowInfo(QualType T) { return nullptr; }
+
+  virtual void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) = 0;
+
+  virtual llvm::CallInst *
+  emitTerminateForUnexpectedException(CodeGenFunction &CGF,
+                                      llvm::Value *Exn);
 
   virtual llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) = 0;
+  virtual llvm::Constant *
+  getAddrOfCXXHandlerMapEntry(QualType Ty, QualType CatchHandlerType) = 0;
 
   virtual bool shouldTypeidBeNullChecked(bool IsDeref,
                                          QualType SrcRecordTy) = 0;
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 7e6fef9..d397761 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -31,6 +31,7 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <sstream>
 using namespace clang;
 using namespace CodeGen;
 
@@ -346,6 +347,26 @@
                                  FTP->getExtInfo(), RequiredArgs(1));
 }
 
+const CGFunctionInfo &
+CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
+                                   CXXCtorType CT) {
+  assert(CT == Ctor_CopyingClosure || CT == Ctor_DefaultClosure);
+
+  CanQual<FunctionProtoType> FTP = GetFormalType(CD);
+  SmallVector<CanQualType, 2> ArgTys;
+  const CXXRecordDecl *RD = CD->getParent();
+  ArgTys.push_back(GetThisType(Context, RD));
+  if (CT == Ctor_CopyingClosure)
+    ArgTys.push_back(*FTP->param_type_begin());
+  if (RD->getNumVBases() > 0)
+    ArgTys.push_back(Context.IntTy);
+  CallingConv CC = Context.getDefaultCallingConvention(
+      /*IsVariadic=*/false, /*IsCXXMethod=*/true);
+  return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true,
+                                 /*chainCall=*/false, ArgTys,
+                                 FunctionType::ExtInfo(CC), RequiredArgs::All);
+}
+
 /// Arrange a call as unto a free function, except possibly with an
 /// additional number of formal parameters considered required.
 static const CGFunctionInfo &
@@ -1455,6 +1476,26 @@
 
     if (!CodeGenOpts.StackRealignment)
       FuncAttrs.addAttribute("no-realign-stack");
+
+    // Add target-cpu and target-features work if they differ from the defaults.
+    std::string &CPU = getTarget().getTargetOpts().CPU;
+    if (CPU != "" && CPU != getTarget().getTriple().getArchName())
+      FuncAttrs.addAttribute("target-cpu", getTarget().getTargetOpts().CPU);
+
+    // TODO: FeaturesAsWritten gets us the features on the command line,
+    // for canonicalization purposes we might want to avoid putting features
+    // in the target-features set if we know it'll be one of the default
+    // features in the backend, e.g. corei7-avx and +avx.
+    std::vector<std::string> &Features =
+        getTarget().getTargetOpts().FeaturesAsWritten;
+    if (!Features.empty()) {
+      std::stringstream S;
+      std::copy(Features.begin(), Features.end(),
+                std::ostream_iterator<std::string>(S, ","));
+      // The drop_back gets rid of the trailing space.
+      FuncAttrs.addAttribute("target-features",
+                             StringRef(S.str()).drop_back(1));
+    }
   }
 
   ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
@@ -3102,8 +3143,8 @@
                  : 0);
         if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
             (ArgInfo.getIndirectByVal() && TypeAlign.getQuantity() < Align &&
-             llvm::getOrEnforceKnownAlignment(Addr, Align, TD) < Align) ||
-             (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
+             llvm::getOrEnforceKnownAlignment(Addr, Align, *TD) < Align) ||
+            (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
           // Create an aligned temporary, and copy to it.
           llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
           if (Align > AI->getAlignment())
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 5649708..84d6437 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -2093,7 +2093,96 @@
   if (!SanOpts.has(SanitizerKind::CFIVptr))
     return;
 
-  const CXXRecordDecl *RD = MD->getParent();
+  EmitVTablePtrCheck(MD->getParent(), VTable);
+}
+
+// If a class has a single non-virtual base and does not introduce or override
+// virtual member functions or fields, it will have the same layout as its base.
+// This function returns the least derived such class.
+//
+// Casting an instance of a base class to such a derived class is technically
+// undefined behavior, but it is a relatively common hack for introducing member
+// functions on class instances with specific properties (e.g. llvm::Operator)
+// that works under most compilers and should not have security implications, so
+// we allow it by default. It can be disabled with -fsanitize=cfi-cast-strict.
+static const CXXRecordDecl *
+LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) {
+  if (!RD->field_empty())
+    return RD;
+
+  if (RD->getNumVBases() != 0)
+    return RD;
+
+  if (RD->getNumBases() != 1)
+    return RD;
+
+  for (const CXXMethodDecl *MD : RD->methods()) {
+    if (MD->isVirtual()) {
+      // Virtual member functions are only ok if they are implicit destructors
+      // because the implicit destructor will have the same semantics as the
+      // base class's destructor if no fields are added.
+      if (isa<CXXDestructorDecl>(MD) && MD->isImplicit())
+        continue;
+      return RD;
+    }
+  }
+
+  return LeastDerivedClassWithSameLayout(
+      RD->bases_begin()->getType()->getAsCXXRecordDecl());
+}
+
+void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
+                                                llvm::Value *Derived,
+                                                bool MayBeNull) {
+  if (!getLangOpts().CPlusPlus)
+    return;
+
+  auto *ClassTy = T->getAs<RecordType>();
+  if (!ClassTy)
+    return;
+
+  const CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(ClassTy->getDecl());
+
+  if (!ClassDecl->isCompleteDefinition() || !ClassDecl->isDynamicClass())
+    return;
+
+  SmallString<64> MangledName;
+  llvm::raw_svector_ostream Out(MangledName);
+  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(T.getUnqualifiedType(),
+                                                   Out);
+
+  // Blacklist based on the mangled type.
+  if (CGM.getContext().getSanitizerBlacklist().isBlacklistedType(Out.str()))
+    return;
+
+  if (!SanOpts.has(SanitizerKind::CFICastStrict))
+    ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl);
+
+  llvm::BasicBlock *ContBlock = 0;
+
+  if (MayBeNull) {
+    llvm::Value *DerivedNotNull =
+        Builder.CreateIsNotNull(Derived, "cast.nonnull");
+
+    llvm::BasicBlock *CheckBlock = createBasicBlock("cast.check");
+    ContBlock = createBasicBlock("cast.cont");
+
+    Builder.CreateCondBr(DerivedNotNull, CheckBlock, ContBlock);
+
+    EmitBlock(CheckBlock);
+  }
+
+  llvm::Value *VTable = GetVTablePtr(Derived, Int8PtrTy);
+  EmitVTablePtrCheck(ClassDecl, VTable);
+
+  if (MayBeNull) {
+    Builder.CreateBr(ContBlock);
+    EmitBlock(ContBlock);
+  }
+}
+
+void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
+                                         llvm::Value *VTable) {
   // FIXME: Add blacklisting scheme.
   if (RD->isInStdNamespace())
     return;
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 6d78ee8..186c522 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -647,11 +647,19 @@
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
+  uint64_t Size = 0;
+  uint64_t Align = 0;
+
+  const RecordDecl *D = RD->getDefinition();
+  if (D && D->isCompleteDefinition()) {
+    Size = CGM.getContext().getTypeSize(Ty);
+    Align = CGM.getContext().getTypeAlign(Ty);
+  }
 
   // Create the type.
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
   llvm::DICompositeType RetTy = DBuilder.createReplaceableCompositeType(
-      getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, 0, 0,
+      getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
       llvm::DIDescriptor::FlagFwdDecl, FullName);
   ReplaceMap.emplace_back(
       std::piecewise_construct, std::make_tuple(Ty),
@@ -2224,7 +2232,7 @@
   // Propagate members from the declaration to the definition
   // CreateType(const RecordType*) will overwrite this with the members in the
   // correct order if the full type is needed.
-  DBuilder.replaceArrays(Res, T.getElements());
+  DBuilder.replaceArrays(Res, T ? T.getElements() : llvm::DIArray());
 
   // And update the type cache.
   TypeCache[QTy.getAsOpaquePtr()].reset(Res);
@@ -2376,9 +2384,17 @@
   // FIXME: Generalize this for even non-member global variables where the
   // declaration and definition may have different lexical decl contexts, once
   // we have support for emitting declarations of (non-member) global variables.
-  VDContext = getContextDescriptor(
-      dyn_cast<Decl>(VD->isStaticDataMember() ? VD->getLexicalDeclContext()
-                                              : VD->getDeclContext()));
+  const DeclContext *DC = VD->isStaticDataMember() ? VD->getLexicalDeclContext()
+                                                   : VD->getDeclContext();
+  // When a record type contains an in-line initialization of a static data
+  // member, and the record type is marked as __declspec(dllexport), an implicit
+  // definition of the member will be created in the record context.  DWARF
+  // doesn't seem to have a nice way to describe this in a form that consumers
+  // are likely to understand, so fake the "normal" situation of a definition
+  // outside the class by putting it in the global scope.
+  if (DC->isRecord())
+    DC = CGM.getContext().getTranslationUnitDecl();
+  VDContext = getContextDescriptor(dyn_cast<Decl>(DC));
 }
 
 llvm::DISubprogram
@@ -3171,6 +3187,7 @@
 CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
   if (!D->isStaticDataMember())
     return llvm::DIDerivedType();
+
   auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
   if (MI != StaticDataMemberCache.end()) {
     assert(MI->second && "Static data member declaration should still exist");
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index fb72a9a..f79d137 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -34,6 +34,7 @@
 void CodeGenFunction::EmitDecl(const Decl &D) {
   switch (D.getKind()) {
   case Decl::TranslationUnit:
+  case Decl::ExternCContext:
   case Decl::Namespace:
   case Decl::UnresolvedUsingTypename:
   case Decl::ClassTemplateSpecialization:
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 9a4303e..236337b 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -139,6 +139,29 @@
   const Expr *Init = D.getInit();
   QualType T = D.getType();
 
+  // The address space of a static local variable (DeclPtr) may be different
+  // from the address space of the "this" argument of the constructor. In that
+  // case, we need an addrspacecast before calling the constructor.
+  //
+  // struct StructWithCtor {
+  //   __device__ StructWithCtor() {...}
+  // };
+  // __device__ void foo() {
+  //   __shared__ StructWithCtor s;
+  //   ...
+  // }
+  //
+  // For example, in the above CUDA code, the static local variable s has a
+  // "shared" address space qualifier, but the constructor of StructWithCtor
+  // expects "this" in the "generic" address space.
+  unsigned ExpectedAddrSpace = getContext().getTargetAddressSpace(T);
+  unsigned ActualAddrSpace = DeclPtr->getType()->getPointerAddressSpace();
+  if (ActualAddrSpace != ExpectedAddrSpace) {
+    llvm::Type *LTy = CGM.getTypes().ConvertTypeForMem(T);
+    llvm::PointerType *PTy = llvm::PointerType::get(LTy, ExpectedAddrSpace);
+    DeclPtr = llvm::ConstantExpr::getAddrSpaceCast(DeclPtr, PTy);
+  }
+
   if (!T->isReferenceType()) {
     if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
       (void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition(
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 4e9eb32..223b8f7 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -25,15 +25,6 @@
 using namespace clang;
 using namespace CodeGen;
 
-static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) {
-  // void *__cxa_allocate_exception(size_t thrown_size);
-
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false);
-
-  return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
-}
-
 static llvm::Constant *getFreeExceptionFn(CodeGenModule &CGM) {
   // void __cxa_free_exception(void *thrown_exception);
 
@@ -43,50 +34,6 @@
   return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception");
 }
 
-static llvm::Constant *getThrowFn(CodeGenModule &CGM) {
-  // void __cxa_throw(void *thrown_exception, std::type_info *tinfo,
-  //                  void (*dest) (void *));
-
-  llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy };
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
-
-  return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
-}
-
-static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) {
-  // void *__cxa_get_exception_ptr(void*);
-
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
-
-  return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
-}
-
-static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) {
-  if (CGM.getTarget().getCXXABI().isMicrosoft())
-    return CGM.getIntrinsic(llvm::Intrinsic::eh_begincatch);
-
-  // void *__cxa_begin_catch(void*);
-
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
-
-  return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
-}
-
-static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) {
-  if (CGM.getTarget().getCXXABI().isMicrosoft())
-    return CGM.getIntrinsic(llvm::Intrinsic::eh_endcatch);
-
-  // void __cxa_end_catch();
-
-  llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
-
-  return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
-}
-
 static llvm::Constant *getUnexpectedFn(CodeGenModule &CGM) {
   // void __cxa_call_unexpected(void *thrown_exception);
 
@@ -96,27 +43,27 @@
   return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected");
 }
 
-static llvm::Constant *getTerminateFn(CodeGenModule &CGM) {
+llvm::Constant *CodeGenModule::getTerminateFn() {
   // void __terminate();
 
   llvm::FunctionType *FTy =
-    llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
+    llvm::FunctionType::get(VoidTy, /*IsVarArgs=*/false);
 
   StringRef name;
 
   // In C++, use std::terminate().
-  if (CGM.getLangOpts().CPlusPlus &&
-      CGM.getTarget().getCXXABI().isItaniumFamily()) {
+  if (getLangOpts().CPlusPlus &&
+      getTarget().getCXXABI().isItaniumFamily()) {
     name = "_ZSt9terminatev";
-  } else if (CGM.getLangOpts().CPlusPlus &&
-             CGM.getTarget().getCXXABI().isMicrosoft()) {
+  } else if (getLangOpts().CPlusPlus &&
+             getTarget().getCXXABI().isMicrosoft()) {
     name = "\01?terminate@@YAXXZ";
-  } else if (CGM.getLangOpts().ObjC1 &&
-             CGM.getLangOpts().ObjCRuntime.hasTerminate())
+  } else if (getLangOpts().ObjC1 &&
+             getLangOpts().ObjCRuntime.hasTerminate())
     name = "objc_terminate";
   else
     name = "abort";
-  return CGM.CreateRuntimeFunction(FTy, name);
+  return CreateRuntimeFunction(FTy, name);
 }
 
 static llvm::Constant *getCatchallRethrowFn(CodeGenModule &CGM,
@@ -414,17 +361,16 @@
 // differs from EmitAnyExprToMem only in that, if a final copy-ctor
 // call is required, an exception within that copy ctor causes
 // std::terminate to be invoked.
-static void EmitAnyExprToExn(CodeGenFunction &CGF, const Expr *e,
-                             llvm::Value *addr) {
+void CodeGenFunction::EmitAnyExprToExn(const Expr *e, llvm::Value *addr) {
   // Make sure the exception object is cleaned up if there's an
   // exception during initialization.
-  CGF.pushFullExprCleanup<FreeException>(EHCleanup, addr);
-  EHScopeStack::stable_iterator cleanup = CGF.EHStack.stable_begin();
+  pushFullExprCleanup<FreeException>(EHCleanup, addr);
+  EHScopeStack::stable_iterator cleanup = EHStack.stable_begin();
 
   // __cxa_allocate_exception returns a void*;  we need to cast this
   // to the appropriate type for the object.
-  llvm::Type *ty = CGF.ConvertTypeForMem(e->getType())->getPointerTo();
-  llvm::Value *typedAddr = CGF.Builder.CreateBitCast(addr, ty);
+  llvm::Type *ty = ConvertTypeForMem(e->getType())->getPointerTo();
+  llvm::Value *typedAddr = Builder.CreateBitCast(addr, ty);
 
   // FIXME: this isn't quite right!  If there's a final unelided call
   // to a copy constructor, then according to [except.terminate]p1 we
@@ -433,11 +379,11 @@
   // evaluated but before the exception is caught.  But the best way
   // to handle that is to teach EmitAggExpr to do the final copy
   // differently if it can't be elided.
-  CGF.EmitAnyExprToMem(e, typedAddr, e->getType().getQualifiers(), 
-                       /*IsInit*/ true);
+  EmitAnyExprToMem(e, typedAddr, e->getType().getQualifiers(),
+                   /*IsInit*/ true);
 
   // Deactivate the cleanup block.
-  CGF.DeactivateCleanupBlock(cleanup, cast<llvm::Instruction>(typedAddr));
+  DeactivateCleanupBlock(cleanup, cast<llvm::Instruction>(typedAddr));
 }
 
 llvm::Value *CodeGenFunction::getExceptionSlot() {
@@ -469,75 +415,18 @@
 
 void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
                                        bool KeepInsertionPoint) {
-  if (!E->getSubExpr()) {
-    CGM.getCXXABI().emitRethrow(*this, /*isNoReturn*/true);
-
-    // throw is an expression, and the expression emitters expect us
-    // to leave ourselves at a valid insertion point.
-    if (KeepInsertionPoint)
-      EmitBlock(createBasicBlock("throw.cont"));
-
-    return;
-  }
-
-  if (CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment()) {
-    // Call std::terminate().
-    llvm::CallInst *TermCall = EmitNounwindRuntimeCall(getTerminateFn(CGM));
-    TermCall->setDoesNotReturn();
-
-    // throw is an expression, and the expression emitters expect us
-    // to leave ourselves at a valid insertion point.
-    if (KeepInsertionPoint)
-      EmitBlock(createBasicBlock("throw.cont"));
-
-    return;
-  }
-
-  QualType ThrowType = E->getSubExpr()->getType();
-
-  if (ThrowType->isObjCObjectPointerType()) {
-    const Stmt *ThrowStmt = E->getSubExpr();
-    const ObjCAtThrowStmt S(E->getExprLoc(),
-                            const_cast<Stmt *>(ThrowStmt));
-    CGM.getObjCRuntime().EmitThrowStmt(*this, S, false);
-    // This will clear insertion point which was not cleared in
-    // call to EmitThrowStmt.
-    if (KeepInsertionPoint)
-      EmitBlock(createBasicBlock("throw.cont"));
-    return;
-  }
-  
-  // Now allocate the exception object.
-  llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
-  uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity();
-
-  llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM);
-  llvm::CallInst *ExceptionPtr =
-    EmitNounwindRuntimeCall(AllocExceptionFn,
-                            llvm::ConstantInt::get(SizeTy, TypeSize),
-                            "exception");
-  
-  EmitAnyExprToExn(*this, E->getSubExpr(), ExceptionPtr);
-
-  // Now throw the exception.
-  llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(ThrowType, 
-                                                         /*ForEH=*/true);
-
-  // The address of the destructor.  If the exception type has a
-  // trivial destructor (or isn't a record), we just pass null.
-  llvm::Constant *Dtor = nullptr;
-  if (const RecordType *RecordTy = ThrowType->getAs<RecordType>()) {
-    CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
-    if (!Record->hasTrivialDestructor()) {
-      CXXDestructorDecl *DtorD = Record->getDestructor();
-      Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete);
-      Dtor = llvm::ConstantExpr::getBitCast(Dtor, Int8PtrTy);
+  if (const Expr *SubExpr = E->getSubExpr()) {
+    QualType ThrowType = SubExpr->getType();
+    if (ThrowType->isObjCObjectPointerType()) {
+      const Stmt *ThrowStmt = E->getSubExpr();
+      const ObjCAtThrowStmt S(E->getExprLoc(), const_cast<Stmt *>(ThrowStmt));
+      CGM.getObjCRuntime().EmitThrowStmt(*this, S, false);
+    } else {
+      CGM.getCXXABI().emitThrow(*this, E);
     }
+  } else {
+    CGM.getCXXABI().emitRethrow(*this, /*isNoReturn=*/true);
   }
-  if (!Dtor) Dtor = llvm::Constant::getNullValue(Int8PtrTy);
-
-  llvm::Value *args[] = { ExceptionPtr, TypeInfo, Dtor };
-  EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args);
 
   // throw is an expression, and the expression emitters expect us
   // to leave ourselves at a valid insertion point.
@@ -678,7 +567,8 @@
       if (CaughtType->isObjCObjectPointerType())
         TypeInfo = CGM.getObjCRuntime().GetEHType(CaughtType);
       else
-        TypeInfo = CGM.GetAddrOfRTTIDescriptor(CaughtType, /*ForEH=*/true);
+        TypeInfo =
+            CGM.getAddrOfCXXHandlerMapEntry(CaughtType, C->getCaughtType());
       CatchScope->setHandler(I, TypeInfo, Handler);
     } else {
       // No exception decl indicates '...', a catch-all.
@@ -920,263 +810,6 @@
   return lpad;
 }
 
-namespace {
-  /// A cleanup to call __cxa_end_catch.  In many cases, the caught
-  /// exception type lets us state definitively that the thrown exception
-  /// type does not have a destructor.  In particular:
-  ///   - Catch-alls tell us nothing, so we have to conservatively
-  ///     assume that the thrown exception might have a destructor.
-  ///   - Catches by reference behave according to their base types.
-  ///   - Catches of non-record types will only trigger for exceptions
-  ///     of non-record types, which never have destructors.
-  ///   - Catches of record types can trigger for arbitrary subclasses
-  ///     of the caught type, so we have to assume the actual thrown
-  ///     exception type might have a throwing destructor, even if the
-  ///     caught type's destructor is trivial or nothrow.
-  struct CallEndCatch : EHScopeStack::Cleanup {
-    CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
-    bool MightThrow;
-
-    void Emit(CodeGenFunction &CGF, Flags flags) override {
-      if (!MightThrow) {
-        CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
-        return;
-      }
-
-      CGF.EmitRuntimeCallOrInvoke(getEndCatchFn(CGF.CGM));
-    }
-  };
-}
-
-/// Emits a call to __cxa_begin_catch and enters a cleanup to call
-/// __cxa_end_catch.
-///
-/// \param EndMightThrow - true if __cxa_end_catch might throw
-static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
-                                   llvm::Value *Exn,
-                                   bool EndMightThrow) {
-  llvm::CallInst *call =
-    CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn);
-
-  CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow);
-
-  return call;
-}
-
-/// A "special initializer" callback for initializing a catch
-/// parameter during catch initialization.
-static void InitCatchParam(CodeGenFunction &CGF,
-                           const VarDecl &CatchParam,
-                           llvm::Value *ParamAddr,
-                           SourceLocation Loc) {
-  // Load the exception from where the landing pad saved it.
-  llvm::Value *Exn = CGF.getExceptionFromSlot();
-
-  CanQualType CatchType =
-    CGF.CGM.getContext().getCanonicalType(CatchParam.getType());
-  llvm::Type *LLVMCatchTy = CGF.ConvertTypeForMem(CatchType);
-
-  // If we're catching by reference, we can just cast the object
-  // pointer to the appropriate pointer.
-  if (isa<ReferenceType>(CatchType)) {
-    QualType CaughtType = cast<ReferenceType>(CatchType)->getPointeeType();
-    bool EndCatchMightThrow = CaughtType->isRecordType();
-
-    // __cxa_begin_catch returns the adjusted object pointer.
-    llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, EndCatchMightThrow);
-
-    // We have no way to tell the personality function that we're
-    // catching by reference, so if we're catching a pointer,
-    // __cxa_begin_catch will actually return that pointer by value.
-    if (const PointerType *PT = dyn_cast<PointerType>(CaughtType)) {
-      QualType PointeeType = PT->getPointeeType();
-
-      // When catching by reference, generally we should just ignore
-      // this by-value pointer and use the exception object instead.
-      if (!PointeeType->isRecordType()) {
-
-        // Exn points to the struct _Unwind_Exception header, which
-        // we have to skip past in order to reach the exception data.
-        unsigned HeaderSize =
-          CGF.CGM.getTargetCodeGenInfo().getSizeOfUnwindException();
-        AdjustedExn = CGF.Builder.CreateConstGEP1_32(Exn, HeaderSize);
-
-      // However, if we're catching a pointer-to-record type that won't
-      // work, because the personality function might have adjusted
-      // the pointer.  There's actually no way for us to fully satisfy
-      // the language/ABI contract here:  we can't use Exn because it
-      // might have the wrong adjustment, but we can't use the by-value
-      // pointer because it's off by a level of abstraction.
-      //
-      // The current solution is to dump the adjusted pointer into an
-      // alloca, which breaks language semantics (because changing the
-      // pointer doesn't change the exception) but at least works.
-      // The better solution would be to filter out non-exact matches
-      // and rethrow them, but this is tricky because the rethrow
-      // really needs to be catchable by other sites at this landing
-      // pad.  The best solution is to fix the personality function.
-      } else {
-        // Pull the pointer for the reference type off.
-        llvm::Type *PtrTy =
-          cast<llvm::PointerType>(LLVMCatchTy)->getElementType();
-
-        // Create the temporary and write the adjusted pointer into it.
-        llvm::Value *ExnPtrTmp = CGF.CreateTempAlloca(PtrTy, "exn.byref.tmp");
-        llvm::Value *Casted = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
-        CGF.Builder.CreateStore(Casted, ExnPtrTmp);
-
-        // Bind the reference to the temporary.
-        AdjustedExn = ExnPtrTmp;
-      }
-    }
-
-    llvm::Value *ExnCast =
-      CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.byref");
-    CGF.Builder.CreateStore(ExnCast, ParamAddr);
-    return;
-  }
-
-  // Scalars and complexes.
-  TypeEvaluationKind TEK = CGF.getEvaluationKind(CatchType);
-  if (TEK != TEK_Aggregate) {
-    llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, false);
-    
-    // If the catch type is a pointer type, __cxa_begin_catch returns
-    // the pointer by value.
-    if (CatchType->hasPointerRepresentation()) {
-      llvm::Value *CastExn =
-        CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.casted");
-
-      switch (CatchType.getQualifiers().getObjCLifetime()) {
-      case Qualifiers::OCL_Strong:
-        CastExn = CGF.EmitARCRetainNonBlock(CastExn);
-        // fallthrough
-
-      case Qualifiers::OCL_None:
-      case Qualifiers::OCL_ExplicitNone:
-      case Qualifiers::OCL_Autoreleasing:
-        CGF.Builder.CreateStore(CastExn, ParamAddr);
-        return;
-
-      case Qualifiers::OCL_Weak:
-        CGF.EmitARCInitWeak(ParamAddr, CastExn);
-        return;
-      }
-      llvm_unreachable("bad ownership qualifier!");
-    }
-
-    // Otherwise, it returns a pointer into the exception object.
-
-    llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
-    llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
-
-    LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType);
-    LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType,
-                                  CGF.getContext().getDeclAlign(&CatchParam));
-    switch (TEK) {
-    case TEK_Complex:
-      CGF.EmitStoreOfComplex(CGF.EmitLoadOfComplex(srcLV, Loc), destLV,
-                             /*init*/ true);
-      return;
-    case TEK_Scalar: {
-      llvm::Value *ExnLoad = CGF.EmitLoadOfScalar(srcLV, Loc);
-      CGF.EmitStoreOfScalar(ExnLoad, destLV, /*init*/ true);
-      return;
-    }
-    case TEK_Aggregate:
-      llvm_unreachable("evaluation kind filtered out!");
-    }
-    llvm_unreachable("bad evaluation kind");
-  }
-
-  assert(isa<RecordType>(CatchType) && "unexpected catch type!");
-
-  llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
-
-  // Check for a copy expression.  If we don't have a copy expression,
-  // that means a trivial copy is okay.
-  const Expr *copyExpr = CatchParam.getInit();
-  if (!copyExpr) {
-    llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-    llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
-    CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
-    return;
-  }
-
-  // We have to call __cxa_get_exception_ptr to get the adjusted
-  // pointer before copying.
-  llvm::CallInst *rawAdjustedExn =
-    CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
-
-  // Cast that to the appropriate type.
-  llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
-
-  // The copy expression is defined in terms of an OpaqueValueExpr.
-  // Find it and map it to the adjusted expression.
-  CodeGenFunction::OpaqueValueMapping
-    opaque(CGF, OpaqueValueExpr::findInCopyConstruct(copyExpr),
-           CGF.MakeAddrLValue(adjustedExn, CatchParam.getType()));
-
-  // Call the copy ctor in a terminate scope.
-  CGF.EHStack.pushTerminate();
-
-  // Perform the copy construction.
-  CharUnits Alignment = CGF.getContext().getDeclAlign(&CatchParam);
-  CGF.EmitAggExpr(copyExpr,
-                  AggValueSlot::forAddr(ParamAddr, Alignment, Qualifiers(),
-                                        AggValueSlot::IsNotDestructed,
-                                        AggValueSlot::DoesNotNeedGCBarriers,
-                                        AggValueSlot::IsNotAliased));
-
-  // Leave the terminate scope.
-  CGF.EHStack.popTerminate();
-
-  // Undo the opaque value mapping.
-  opaque.pop();
-
-  // Finally we can call __cxa_begin_catch.
-  CallBeginCatch(CGF, Exn, true);
-}
-
-/// Begins a catch statement by initializing the catch variable and
-/// calling __cxa_begin_catch.
-static void BeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *S) {
-  // We have to be very careful with the ordering of cleanups here:
-  //   C++ [except.throw]p4:
-  //     The destruction [of the exception temporary] occurs
-  //     immediately after the destruction of the object declared in
-  //     the exception-declaration in the handler.
-  //
-  // So the precise ordering is:
-  //   1.  Construct catch variable.
-  //   2.  __cxa_begin_catch
-  //   3.  Enter __cxa_end_catch cleanup
-  //   4.  Enter dtor cleanup
-  //
-  // We do this by using a slightly abnormal initialization process.
-  // Delegation sequence:
-  //   - ExitCXXTryStmt opens a RunCleanupsScope
-  //     - EmitAutoVarAlloca creates the variable and debug info
-  //       - InitCatchParam initializes the variable from the exception
-  //       - CallBeginCatch calls __cxa_begin_catch
-  //       - CallBeginCatch enters the __cxa_end_catch cleanup
-  //     - EmitAutoVarCleanups enters the variable destructor cleanup
-  //   - EmitCXXTryStmt emits the code for the catch body
-  //   - EmitCXXTryStmt close the RunCleanupsScope
-
-  VarDecl *CatchParam = S->getExceptionDecl();
-  if (!CatchParam) {
-    llvm::Value *Exn = CGF.getExceptionFromSlot();
-    CallBeginCatch(CGF, Exn, true);
-    return;
-  }
-
-  // Emit the local.
-  CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
-  InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getLocStart());
-  CGF.EmitAutoVarCleanups(var);
-}
-
 /// Emit the structure of the dispatch block for the given catch scope.
 /// It is an invariant that the dispatch block already exists.
 static void emitCatchDispatchBlock(CodeGenFunction &CGF,
@@ -1315,7 +948,7 @@
     RunCleanupsScope CatchScope(*this);
 
     // Initialize the catch variable and set up the cleanups.
-    BeginCatch(*this, C);
+    CGM.getCXXABI().emitBeginCatch(*this, C);
 
     // Emit the PGO counter increment.
     RegionCounter CatchCnt = getPGORegionCounter(C);
@@ -1543,70 +1176,6 @@
   CGF.PopCleanupBlock();
 }
 
-/// In a terminate landing pad, should we use __clang__call_terminate
-/// or just a naked call to std::terminate?
-///
-/// __clang_call_terminate calls __cxa_begin_catch, which then allows
-/// std::terminate to usefully report something about the
-/// violating exception.
-static bool useClangCallTerminate(CodeGenModule &CGM) {
-  // Only do this for Itanium-family ABIs in C++ mode.
-  return (CGM.getLangOpts().CPlusPlus &&
-          CGM.getTarget().getCXXABI().isItaniumFamily());
-}
-
-/// Get or define the following function:
-///   void @__clang_call_terminate(i8* %exn) nounwind noreturn
-/// This code is used only in C++.
-static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
-  llvm::FunctionType *fnTy =
-    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
-  llvm::Constant *fnRef =
-    CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate");
-
-  llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
-  if (fn && fn->empty()) {
-    fn->setDoesNotThrow();
-    fn->setDoesNotReturn();
-
-    // What we really want is to massively penalize inlining without
-    // forbidding it completely.  The difference between that and
-    // 'noinline' is negligible.
-    fn->addFnAttr(llvm::Attribute::NoInline);
-
-    // Allow this function to be shared across translation units, but
-    // we don't want it to turn into an exported symbol.
-    fn->setLinkage(llvm::Function::LinkOnceODRLinkage);
-    fn->setVisibility(llvm::Function::HiddenVisibility);
-    if (CGM.supportsCOMDAT())
-      fn->setComdat(CGM.getModule().getOrInsertComdat(fn->getName()));
-
-    // Set up the function.
-    llvm::BasicBlock *entry =
-      llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn);
-    CGBuilderTy builder(entry);
-
-    // Pull the exception pointer out of the parameter list.
-    llvm::Value *exn = &*fn->arg_begin();
-
-    // Call __cxa_begin_catch(exn).
-    llvm::CallInst *catchCall = builder.CreateCall(getBeginCatchFn(CGM), exn);
-    catchCall->setDoesNotThrow();
-    catchCall->setCallingConv(CGM.getRuntimeCC());
-
-    // Call std::terminate().
-    llvm::CallInst *termCall = builder.CreateCall(getTerminateFn(CGM));
-    termCall->setDoesNotThrow();
-    termCall->setDoesNotReturn();
-    termCall->setCallingConv(CGM.getRuntimeCC());
-
-    // std::terminate cannot return.
-    builder.CreateUnreachable();
-  }
-
-  return fnRef;
-}
-
 llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
   if (TerminateLandingPad)
     return TerminateLandingPad;
@@ -1624,14 +1193,11 @@
                              getOpaquePersonalityFn(CGM, Personality), 0);
   LPadInst->addClause(getCatchAllValue(*this));
 
-  llvm::CallInst *terminateCall;
-  if (useClangCallTerminate(CGM)) {
-    // Extract out the exception pointer.
-    llvm::Value *exn = Builder.CreateExtractValue(LPadInst, 0);
-    terminateCall = EmitNounwindRuntimeCall(getClangCallTerminateFn(CGM), exn);
-  } else {
-    terminateCall = EmitNounwindRuntimeCall(getTerminateFn(CGM));
-  }
+  llvm::Value *Exn = 0;
+  if (getLangOpts().CPlusPlus)
+    Exn = Builder.CreateExtractValue(LPadInst, 0);
+  llvm::CallInst *terminateCall =
+      CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
   terminateCall->setDoesNotReturn();
   Builder.CreateUnreachable();
 
@@ -1651,14 +1217,11 @@
   // end of the function by FinishFunction.
   TerminateHandler = createBasicBlock("terminate.handler");
   Builder.SetInsertPoint(TerminateHandler);
-  llvm::CallInst *terminateCall;
-  if (useClangCallTerminate(CGM)) {
-    // Load the exception pointer.
-    llvm::Value *exn = getExceptionFromSlot();
-    terminateCall = EmitNounwindRuntimeCall(getClangCallTerminateFn(CGM), exn);
-  } else {
-    terminateCall = EmitNounwindRuntimeCall(getTerminateFn(CGM));
-  }
+  llvm::Value *Exn = 0;
+  if (getLangOpts().CPlusPlus)
+    Exn = getExceptionFromSlot();
+  llvm::CallInst *terminateCall =
+      CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
   terminateCall->setDoesNotReturn();
   Builder.CreateUnreachable();
 
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 78e80a1..5ba51cc 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -301,6 +301,23 @@
   switch (M->getStorageDuration()) {
   case SD_FullExpression:
   case SD_Automatic:
+    // If we have a constant temporary array or record try to promote it into a
+    // constant global under the same rules a normal constant would've been
+    // promoted. This is easier on the optimizer and generally emits fewer
+    // instructions.
+    if (CGF.CGM.getCodeGenOpts().MergeAllConstants &&
+        (M->getType()->isArrayType() || M->getType()->isRecordType()) &&
+        CGF.CGM.isTypeConstant(M->getType(), true))
+      if (llvm::Constant *Init =
+              CGF.CGM.EmitConstantExpr(Inner, M->getType(), &CGF)) {
+        auto *GV = new llvm::GlobalVariable(
+            CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+            llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp");
+        GV->setAlignment(
+            CGF.getContext().getTypeAlignInChars(M->getType()).getQuantity());
+        // FIXME: Should we put the new global into a COMDAT?
+        return GV;
+      }
     return CGF.CreateMemTemp(Inner->getType(), "ref.tmp");
 
   case SD_Thread:
@@ -324,14 +341,15 @@
       M->getType().getObjCLifetime() != Qualifiers::OCL_None &&
       M->getType().getObjCLifetime() != Qualifiers::OCL_ExplicitNone) {
     llvm::Value *Object = createReferenceTemporary(*this, M, E);
-    LValue RefTempDst = MakeAddrLValue(Object, M->getType());
-
     if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
+      Object = llvm::ConstantExpr::getBitCast(
+          Var, ConvertTypeForMem(E->getType())->getPointerTo());
       // We should not have emitted the initializer for this temporary as a
       // constant.
       assert(!Var->hasInitializer());
       Var->setInitializer(CGM.EmitNullConstant(E->getType()));
     }
+    LValue RefTempDst = MakeAddrLValue(Object, M->getType());
 
     switch (getEvaluationKind(E->getType())) {
     default: llvm_unreachable("expected scalar or aggregate expression");
@@ -370,8 +388,11 @@
   // Create and initialize the reference temporary.
   llvm::Value *Object = createReferenceTemporary(*this, M, E);
   if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
-    // If the temporary is a global and has a constant initializer, we may
-    // have already initialized it.
+    Object = llvm::ConstantExpr::getBitCast(
+        Var, ConvertTypeForMem(E->getType())->getPointerTo());
+    // If the temporary is a global and has a constant initializer or is a
+    // constant temporary that we promoted to a global, we may have already
+    // initialized it.
     if (!Var->hasInitializer()) {
       Var->setInitializer(CGM.EmitNullConstant(E->getType()));
       EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true);
@@ -3013,6 +3034,9 @@
       EmitTypeCheck(TCK_DowncastReference, E->getExprLoc(),
                     Derived, E->getType());
 
+    if (SanOpts.has(SanitizerKind::CFIDerivedCast))
+      EmitVTablePtrCheckForCast(E->getType(), Derived, /*MayBeNull=*/false);
+
     return MakeAddrLValue(Derived, E->getType());
   }
   case CK_LValueBitCast: {
@@ -3022,6 +3046,10 @@
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *V = Builder.CreateBitCast(LV.getAddress(),
                                            ConvertType(CE->getTypeAsWritten()));
+
+    if (SanOpts.has(SanitizerKind::CFIUnrelatedCast))
+      EmitVTablePtrCheckForCast(E->getType(), V, /*MayBeNull=*/false);
+
     return MakeAddrLValue(V, E->getType());
   }
   case CK_ObjCObjectLValueCast: {
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 425a968..6852d3a 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -1422,71 +1422,6 @@
                                         OperatorDelete, ElementType);
 }
 
-static void EmitDelete(CodeGenFunction &CGF,
-                              const CXXDeleteExpr *DE,
-                              llvm::Value *Ptr,
-                              QualType ElementType);
-
-static void EmitSizedDelete(CodeGenFunction &CGF,
-                            const CXXDeleteExpr *DE,
-                            llvm::Value *Ptr,
-                            QualType ElementType,
-                            FunctionDecl* UnsizedDealloc) {
-
-  if (CGF.getLangOpts().DefineSizedDeallocation) {
-    // The delete operator in use is fixed. So simply emit the delete expr.
-    EmitDelete(CGF, DE, Ptr, ElementType);
-    return;
-  }
-
-  assert(UnsizedDealloc && "We must be emiting a 'sized' delete expr");
-
-  // Branch off over the value of operator delete:
-  // Use the sized form if available, and default on the unsized form otherwise.
-  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("if.then");
-  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("if.end");
-  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("if.else");
-
-  // Emit the condition.
-  const FunctionDecl *OpDelFD = DE->getOperatorDelete();
-  llvm::Value *OpDelAddr = CGF.CGM.GetAddrOfFunction(OpDelFD);
-  //llvm::Function *OpDel = dyn_cast<llvm::Function>(OpDelAddr);
-  llvm::Value *SDE = CGF.Builder.CreateIsNotNull(OpDelAddr, "sized.del.exists");
-  CGF.Builder.CreateCondBr(SDE, ThenBlock, ElseBlock);
-
-  // Emit the 'then' code.
-  CGF.EmitBlock(ThenBlock);
-  EmitDelete(CGF, DE, Ptr, ElementType);
-  CGF.EmitBranch(ContBlock);
-
-  // Compute the 'unsized' delete expr.
-  CXXDeleteExpr * E = const_cast<CXXDeleteExpr*>(DE);
-  CXXDeleteExpr *UnsizedDE =
-  new (CGF.getContext()) CXXDeleteExpr(CGF.getContext().VoidTy,
-                                       E->isGlobalDelete(),
-                                       E->isArrayForm(),
-                                       E->isArrayFormAsWritten(),
-                                       E->doesUsualArrayDeleteWantSize(),
-                                       UnsizedDealloc,
-                                       E->getArgument(),
-                                       E->getLocStart());
-  // Emit the 'else' code.
-  {
-    // There is no need to emit line number for an unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBlock(ElseBlock);
-  }
-  EmitDelete(CGF, UnsizedDE, Ptr, ElementType);
-  {
-    // There is no need to emit line number for an unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBranch(ContBlock);
-  }
-
-  // Emit the continuation block for code after the if.
-  CGF.EmitBlock(ContBlock, true);
-}
-
 /// Emit the code for deleting a single object.
 static void EmitObjectDelete(CodeGenFunction &CGF,
                              const CXXDeleteExpr *DE,
@@ -1646,17 +1581,6 @@
   CGF.PopCleanupBlock();
 }
 
-static void EmitDelete(CodeGenFunction &CGF,
-                       const CXXDeleteExpr *DE,
-                       llvm::Value *Ptr,
-                       QualType ElementType) {
-  if (DE->isArrayForm()) {
-    EmitArrayDelete(CGF, DE, Ptr, ElementType);
-  } else {
-    EmitObjectDelete(CGF, DE, Ptr, ElementType);
-  }
-}
-
 void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
   const Expr *Arg = E->getArgument();
   llvm::Value *Ptr = EmitScalarExpr(Arg);
@@ -1696,12 +1620,11 @@
   assert(ConvertTypeForMem(DeleteTy) ==
          cast<llvm::PointerType>(Ptr->getType())->getElementType());
 
-  const FunctionDecl *Dealloc = E->getOperatorDelete();
-  if (FunctionDecl* UnsizedDealloc =
-      Dealloc->getCorrespondingUnsizedGlobalDeallocationFunction())
-    EmitSizedDelete(*this, E, Ptr, DeleteTy, UnsizedDealloc);
-  else
-    EmitDelete(*this, E, Ptr, DeleteTy);
+  if (E->isArrayForm()) {
+    EmitArrayDelete(*this, E, Ptr, DeleteTy);
+  } else {
+    EmitObjectDelete(*this, E, Ptr, DeleteTy);
+  }
 
   EmitBlock(DeleteEnd);
 }
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 54f7eee..7406354 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -383,14 +383,19 @@
 
     if (!EltInit)
       return false;
-    
+
     if (!Field->isBitField()) {
       // Handle non-bitfield members.
       AppendField(*Field, Layout.getFieldOffset(FieldNo), EltInit);
     } else {
       // Otherwise we have a bitfield.
-      AppendBitField(*Field, Layout.getFieldOffset(FieldNo),
-                     cast<llvm::ConstantInt>(EltInit));
+      if (auto *CI = dyn_cast<llvm::ConstantInt>(EltInit)) {
+        AppendBitField(*Field, Layout.getFieldOffset(FieldNo), CI);
+      } else {
+        // We are trying to initialize a bitfield with a non-trivial constant,
+        // this must require run-time code.
+        return false;
+      }
     }
   }
 
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index dc12dd8..16ce69d 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -745,23 +745,37 @@
   QualType OrigSrcType = SrcType;
   llvm::Type *SrcTy = Src->getType();
 
-  // If casting to/from storage-only half FP, use special intrinsics.
-  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
-      !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
-    Src = Builder.CreateCall(
-        CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
-                             CGF.CGM.FloatTy),
-        Src);
-    SrcType = CGF.getContext().FloatTy;
-    SrcTy = CGF.FloatTy;
-  }
-
   // Handle conversions to bool first, they are special: comparisons against 0.
   if (DstType->isBooleanType())
     return EmitConversionToBool(Src, SrcType);
 
   llvm::Type *DstTy = ConvertType(DstType);
 
+  // Cast from half through float if half isn't a native type.
+  if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+    // Cast to FP using the intrinsic if the half type itself isn't supported.
+    if (DstTy->isFloatingPointTy()) {
+      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+        return Builder.CreateCall(
+            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy),
+            Src);
+    } else {
+      // Cast to other types through float, using either the intrinsic or FPExt,
+      // depending on whether the half type itself is supported
+      // (as opposed to operations on half, available with NativeHalfType).
+      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+        Src = Builder.CreateCall(
+            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
+                                 CGF.CGM.FloatTy),
+            Src);
+      } else {
+        Src = Builder.CreateFPExt(Src, CGF.CGM.FloatTy, "conv");
+      }
+      SrcType = CGF.getContext().FloatTy;
+      SrcTy = CGF.FloatTy;
+    }
+  }
+
   // Ignore conversions like int -> uint.
   if (SrcTy == DstTy)
     return Src;
@@ -818,10 +832,20 @@
     EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType,
                              DstTy);
 
-  // Cast to half via float
-  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
-      !CGF.getContext().getLangOpts().HalfArgsAndReturns)
+  // Cast to half through float if half isn't a native type.
+  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+    // Make sure we cast in a single step if from another FP type.
+    if (SrcTy->isFloatingPointTy()) {
+      // Use the intrinsic if the half type itself isn't supported
+      // (as opposed to operations on half, available with NativeHalfType).
+      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns)
+        return Builder.CreateCall(
+            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
+      // If the half type is supported, just use an fptrunc.
+      return Builder.CreateFPTrunc(Src, DstTy);
+    }
     DstTy = CGF.FloatTy;
+  }
 
   if (isa<llvm::IntegerType>(SrcTy)) {
     bool InputSigned = SrcType->isSignedIntegerOrEnumerationType();
@@ -847,10 +871,14 @@
   }
 
   if (DstTy != ResTy) {
-    assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
-    Res = Builder.CreateCall(
+    if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+      assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
+      Res = Builder.CreateCall(
         CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy),
         Res);
+    } else {
+      Res = Builder.CreateFPTrunc(Res, ResTy, "conv");
+    }
   }
 
   return Res;
@@ -1355,6 +1383,13 @@
       llvm_unreachable("wrong cast for pointers in different address spaces"
                        "(must be an address space cast)!");
     }
+
+    if (CGF.SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
+      if (auto PT = DestTy->getAs<PointerType>())
+        CGF.EmitVTablePtrCheckForCast(PT->getPointeeType(), Src,
+                                      /*MayBeNull=*/true);
+    }
+
     return Builder.CreateBitCast(Src, DstTy);
   }
   case CK_AddressSpaceConversion: {
@@ -1384,6 +1419,10 @@
       CGF.EmitTypeCheck(CodeGenFunction::TCK_DowncastPointer, CE->getExprLoc(),
                         Derived, DestTy->getPointeeType());
 
+    if (CGF.SanOpts.has(SanitizerKind::CFIDerivedCast))
+      CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(), Derived,
+                                    /*MayBeNull=*/true);
+
     return Derived;
   }
   case CK_UncheckedDerivedToBase:
@@ -1742,13 +1781,16 @@
     // Add the inc/dec to the real part.
     llvm::Value *amt;
 
-    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
-        !CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
       // Another special case: half FP increment should be done via float
-      value = Builder.CreateCall(
-          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
-                               CGF.CGM.FloatTy),
-          input);
+      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+        value = Builder.CreateCall(
+            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16,
+                                 CGF.CGM.FloatTy),
+            input, "incdec.conv");
+      } else {
+        value = Builder.CreateFPExt(input, CGF.CGM.FloatTy, "incdec.conv");
+      }
     }
 
     if (value->getType()->isFloatTy())
@@ -1758,20 +1800,29 @@
       amt = llvm::ConstantFP::get(VMContext,
                                   llvm::APFloat(static_cast<double>(amount)));
     else {
+      // Remaining types are either Half or LongDouble.  Convert from float.
       llvm::APFloat F(static_cast<float>(amount));
       bool ignored;
-      F.convert(CGF.getTarget().getLongDoubleFormat(),
+      // Don't use getFloatTypeSemantics because Half isn't
+      // necessarily represented using the "half" LLVM type.
+      F.convert(value->getType()->isHalfTy()
+                    ? CGF.getTarget().getHalfFormat()
+                    : CGF.getTarget().getLongDoubleFormat(),
                 llvm::APFloat::rmTowardZero, &ignored);
       amt = llvm::ConstantFP::get(VMContext, F);
     }
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
 
-    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType &&
-        !CGF.getContext().getLangOpts().HalfArgsAndReturns)
-      value = Builder.CreateCall(
-          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
-                               CGF.CGM.FloatTy),
-          value);
+    if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+      if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) {
+        value = Builder.CreateCall(
+            CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16,
+                                 CGF.CGM.FloatTy),
+            value, "incdec.conv");
+      } else {
+        value = Builder.CreateFPTrunc(value, input->getType(), "incdec.conv");
+      }
+    }
 
   // Objective-C pointer types.
   } else {
@@ -2664,21 +2715,34 @@
   if (Ops.LHS->getType() != RHS->getType())
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
-  if (CGF.SanOpts.has(SanitizerKind::Shift) && !CGF.getLangOpts().OpenCL &&
-      isa<llvm::IntegerType>(Ops.LHS->getType())) {
+  bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) &&
+                      Ops.Ty->hasSignedIntegerRepresentation();
+  bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (CGF.getLangOpts().OpenCL)
+    RHS =
+        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
+  else if ((SanitizeBase || SanitizeExponent) &&
+           isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
+    SmallVector<std::pair<Value *, SanitizerKind>, 2> Checks;
     llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS);
-    llvm::Value *Valid = Builder.CreateICmpULE(RHS, WidthMinusOne);
+    llvm::Value *ValidExponent = Builder.CreateICmpULE(RHS, WidthMinusOne);
 
-    if (Ops.Ty->hasSignedIntegerRepresentation()) {
+    if (SanitizeExponent) {
+      Checks.push_back(
+          std::make_pair(ValidExponent, SanitizerKind::ShiftExponent));
+    }
+
+    if (SanitizeBase) {
+      // Check whether we are shifting any non-zero bits off the top of the
+      // integer. We only emit this check if exponent is valid - otherwise
+      // instructions below will have undefined behavior themselves.
       llvm::BasicBlock *Orig = Builder.GetInsertBlock();
       llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");
-      llvm::BasicBlock *CheckBitsShifted = CGF.createBasicBlock("check");
-      Builder.CreateCondBr(Valid, CheckBitsShifted, Cont);
-
-      // Check whether we are shifting any non-zero bits off the top of the
-      // integer.
-      CGF.EmitBlock(CheckBitsShifted);
+      llvm::BasicBlock *CheckShiftBase = CGF.createBasicBlock("check");
+      Builder.CreateCondBr(ValidExponent, CheckShiftBase, Cont);
+      CGF.EmitBlock(CheckShiftBase);
       llvm::Value *BitsShiftedOff =
         Builder.CreateLShr(Ops.LHS,
                            Builder.CreateSub(WidthMinusOne, RHS, "shl.zeros",
@@ -2693,19 +2757,17 @@
         BitsShiftedOff = Builder.CreateLShr(BitsShiftedOff, One);
       }
       llvm::Value *Zero = llvm::ConstantInt::get(BitsShiftedOff->getType(), 0);
-      llvm::Value *SecondCheck = Builder.CreateICmpEQ(BitsShiftedOff, Zero);
+      llvm::Value *ValidBase = Builder.CreateICmpEQ(BitsShiftedOff, Zero);
       CGF.EmitBlock(Cont);
-      llvm::PHINode *P = Builder.CreatePHI(Valid->getType(), 2);
-      P->addIncoming(Valid, Orig);
-      P->addIncoming(SecondCheck, CheckBitsShifted);
-      Valid = P;
+      llvm::PHINode *BaseCheck = Builder.CreatePHI(ValidBase->getType(), 2);
+      BaseCheck->addIncoming(Builder.getTrue(), Orig);
+      BaseCheck->addIncoming(ValidBase, CheckShiftBase);
+      Checks.push_back(std::make_pair(BaseCheck, SanitizerKind::ShiftBase));
     }
 
-    EmitBinOpCheck(std::make_pair(Valid, SanitizerKind::Shift), Ops);
+    assert(!Checks.empty());
+    EmitBinOpCheck(Checks, Ops);
   }
-  // OpenCL 6.3j: shift values are effectively % word size of LHS.
-  if (CGF.getLangOpts().OpenCL)
-    RHS = Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
 
   return Builder.CreateShl(Ops.LHS, RHS, "shl");
 }
@@ -2717,18 +2779,18 @@
   if (Ops.LHS->getType() != RHS->getType())
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
-  if (CGF.SanOpts.has(SanitizerKind::Shift) && !CGF.getLangOpts().OpenCL &&
-      isa<llvm::IntegerType>(Ops.LHS->getType())) {
+  // OpenCL 6.3j: shift values are effectively % word size of LHS.
+  if (CGF.getLangOpts().OpenCL)
+    RHS =
+        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
+  else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) &&
+           isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
     llvm::Value *Valid =
         Builder.CreateICmpULE(RHS, GetWidthMinusOneValue(Ops.LHS, RHS));
-    EmitBinOpCheck(std::make_pair(Valid, SanitizerKind::Shift), Ops);
+    EmitBinOpCheck(std::make_pair(Valid, SanitizerKind::ShiftExponent), Ops);
   }
 
-  // OpenCL 6.3j: shift values are effectively % word size of LHS.
-  if (CGF.getLangOpts().OpenCL)
-    RHS = Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
-
   if (Ops.Ty->hasUnsignedIntegerRepresentation())
     return Builder.CreateLShr(Ops.LHS, RHS, "shr");
   return Builder.CreateAShr(Ops.LHS, RHS, "shr");
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 51865a6..ef2d214 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -13,6 +13,7 @@
 
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
+#include "CGCleanup.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/StmtOpenMP.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -41,7 +42,8 @@
   virtual const VarDecl *getThreadIDVariable() const = 0;
 
   /// \brief Get an LValue for the current ThreadID variable.
-  LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
+  /// \return LValue for thread id variable. This LValue always has type int32*.
+  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
 
     /// \brief Emit the captured statement body.
   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
@@ -76,6 +78,41 @@
   const VarDecl *ThreadIDVar;
 };
 
+/// \brief API for captured statement code generation in OpenMP constructs.
+class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
+public:
+  CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D,
+                                 const CapturedStmt &CS,
+                                 const VarDecl *ThreadIDVar,
+                                 const VarDecl *PartIDVar)
+      : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar),
+        PartIDVar(PartIDVar) {
+    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
+  }
+  /// \brief Get a variable or parameter for storing global thread id
+  /// inside OpenMP construct.
+  virtual const VarDecl *getThreadIDVariable() const override {
+    return ThreadIDVar;
+  }
+
+  /// \brief Get an LValue for the current ThreadID variable.
+  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
+
+  /// \brief Emit the captured statement body.
+  virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
+
+  /// \brief Get the name of the capture helper.
+  StringRef getHelperName() const override { return ".omp_outlined."; }
+
+private:
+  /// \brief A variable or parameter storing global thread id for OpenMP
+  /// constructs.
+  const VarDecl *ThreadIDVar;
+  /// \brief A variable or parameter storing part id for OpenMP tasking
+  /// constructs.
+  const VarDecl *PartIDVar;
+};
+
 /// \brief API for inlined captured statement code generation in OpenMP
 /// constructs.
 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
@@ -109,6 +146,7 @@
       return OuterRegionInfo->getThreadIDVariable();
     return nullptr;
   }
+
   /// \brief Get the name of the capture helper.
   virtual StringRef getHelperName() const override {
     llvm_unreachable("No helper name for inlined OpenMP construct");
@@ -125,8 +163,13 @@
 
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
   return CGF.MakeNaturalAlignAddrLValue(
-      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
-      CGF.getContext().getPointerType(getThreadIDVariable()->getType()));
+      CGF.Builder.CreateAlignedLoad(
+          CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+          CGF.PointerAlignInBytes),
+      getThreadIDVariable()
+          ->getType()
+          ->castAs<PointerType>()
+          ->getPointeeType());
 }
 
 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
@@ -140,8 +183,23 @@
   CGCapturedStmtInfo::EmitBody(CGF, S);
 }
 
+LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
+    CodeGenFunction &CGF) {
+  return CGF.MakeNaturalAlignAddrLValue(
+      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+      getThreadIDVariable()->getType());
+}
+
+void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF,
+                                              const Stmt *S) {
+  if (PartIDVar) {
+    // TODO: emit code for untied tasks.
+  }
+  CGCapturedStmtInfo::EmitBody(CGF, S);
+}
+
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
-    : CGM(CGM), DefaultOpenMPPSource(nullptr) {
+    : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
   IdentTy = llvm::StructType::create(
       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
@@ -153,9 +211,15 @@
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
 }
 
+void CGOpenMPRuntime::clear() {
+  InternalVars.clear();
+}
+
 llvm::Value *
 CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
                                       const VarDecl *ThreadIDVar) {
+  assert(ThreadIDVar->getType()->isPointerType() &&
+         "thread id variable must be of type kmp_int32 *");
   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
   CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
@@ -164,6 +228,19 @@
 }
 
 llvm::Value *
+CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
+                                          const VarDecl *ThreadIDVar,
+                                          const VarDecl *PartIDVar) {
+  assert(!ThreadIDVar->getType()->isPointerType() &&
+         "thread id variable must be of type kmp_int32 for tasks");
+  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+  CodeGenFunction CGF(CGM, true);
+  CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
+  CGF.CapturedStmtInfo = &CGInfo;
+  return CGF.GenerateCapturedStmtFunction(*CS);
+}
+
+llvm::Value *
 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
   if (!Entry) {
@@ -265,12 +342,9 @@
   }
   if (auto OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    if (auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable()) {
+    if (OMPRegionInfo->getThreadIDVariable()) {
       // Check if this an outlined function with thread id passed as argument.
       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
-      auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
-      LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
-                                            ThreadIDVar->getType());
       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
       // If value loaded in entry block, cache it and use it everywhere in
       // function.
@@ -397,87 +471,6 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
     break;
   }
-  // Build __kmpc_for_static_init*(
-  //               ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
-  //               kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
-  //               kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
-  //               kmp_int[32|64] incr, kmp_int[32|64] chunk);
-  case OMPRTL__kmpc_for_static_init_4: {
-    auto ITy = CGM.Int32Ty;
-    auto PtrTy = llvm::PointerType::getUnqual(ITy);
-    llvm::Type *TypeParams[] = {
-        getIdentTyPointerTy(),                     // loc
-        CGM.Int32Ty,                               // tid
-        CGM.Int32Ty,                               // schedtype
-        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
-        PtrTy,                                     // p_lower
-        PtrTy,                                     // p_upper
-        PtrTy,                                     // p_stride
-        ITy,                                       // incr
-        ITy                                        // chunk
-    };
-    llvm::FunctionType *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
-    break;
-  }
-  case OMPRTL__kmpc_for_static_init_4u: {
-    auto ITy = CGM.Int32Ty;
-    auto PtrTy = llvm::PointerType::getUnqual(ITy);
-    llvm::Type *TypeParams[] = {
-        getIdentTyPointerTy(),                     // loc
-        CGM.Int32Ty,                               // tid
-        CGM.Int32Ty,                               // schedtype
-        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
-        PtrTy,                                     // p_lower
-        PtrTy,                                     // p_upper
-        PtrTy,                                     // p_stride
-        ITy,                                       // incr
-        ITy                                        // chunk
-    };
-    llvm::FunctionType *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
-    break;
-  }
-  case OMPRTL__kmpc_for_static_init_8: {
-    auto ITy = CGM.Int64Ty;
-    auto PtrTy = llvm::PointerType::getUnqual(ITy);
-    llvm::Type *TypeParams[] = {
-        getIdentTyPointerTy(),                     // loc
-        CGM.Int32Ty,                               // tid
-        CGM.Int32Ty,                               // schedtype
-        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
-        PtrTy,                                     // p_lower
-        PtrTy,                                     // p_upper
-        PtrTy,                                     // p_stride
-        ITy,                                       // incr
-        ITy                                        // chunk
-    };
-    llvm::FunctionType *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
-    break;
-  }
-  case OMPRTL__kmpc_for_static_init_8u: {
-    auto ITy = CGM.Int64Ty;
-    auto PtrTy = llvm::PointerType::getUnqual(ITy);
-    llvm::Type *TypeParams[] = {
-        getIdentTyPointerTy(),                     // loc
-        CGM.Int32Ty,                               // tid
-        CGM.Int32Ty,                               // schedtype
-        llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
-        PtrTy,                                     // p_lower
-        PtrTy,                                     // p_upper
-        PtrTy,                                     // p_stride
-        ITy,                                       // incr
-        ITy                                        // chunk
-    };
-    llvm::FunctionType *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
-    break;
-  }
   case OMPRTL__kmpc_for_static_fini: {
     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
@@ -563,10 +556,120 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
     break;
   }
+  case OMPRTL__kmpc_omp_task_alloc: {
+    // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+    // kmp_routine_entry_t *task_entry);
+    assert(KmpRoutineEntryPtrTy != nullptr &&
+           "Type kmp_routine_entry_t must be created.");
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+                                CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
+    // Return void * and then cast to particular kmp_task_t type.
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
+    break;
+  }
+  case OMPRTL__kmpc_omp_task: {
+    // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+    // *new_task);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
+    break;
+  }
+  case OMPRTL__kmpc_copyprivate: {
+    // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+    // kmp_int32 didit);
+    llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+    auto *CpyFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+                                CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
+    break;
+  }
   }
   return RTLFn;
 }
 
+llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
+                                                             bool IVSigned) {
+  assert((IVSize == 32 || IVSize == 64) &&
+         "IV size is not compatible with the omp runtime");
+  auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
+                                       : "__kmpc_for_static_init_4u")
+                           : (IVSigned ? "__kmpc_for_static_init_8"
+                                       : "__kmpc_for_static_init_8u");
+  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
+  auto PtrTy = llvm::PointerType::getUnqual(ITy);
+  llvm::Type *TypeParams[] = {
+    getIdentTyPointerTy(),                     // loc
+    CGM.Int32Ty,                               // tid
+    CGM.Int32Ty,                               // schedtype
+    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+    PtrTy,                                     // p_lower
+    PtrTy,                                     // p_upper
+    PtrTy,                                     // p_stride
+    ITy,                                       // incr
+    ITy                                        // chunk
+  };
+  llvm::FunctionType *FnTy =
+      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+  return CGM.CreateRuntimeFunction(FnTy, Name);
+}
+
+llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
+                                                            bool IVSigned) {
+  assert((IVSize == 32 || IVSize == 64) &&
+         "IV size is not compatible with the omp runtime");
+  auto Name =
+      IVSize == 32
+          ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
+          : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
+  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
+  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
+                               CGM.Int32Ty,           // tid
+                               CGM.Int32Ty,           // schedtype
+                               ITy,                   // lower
+                               ITy,                   // upper
+                               ITy,                   // stride
+                               ITy                    // chunk
+  };
+  llvm::FunctionType *FnTy =
+      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+  return CGM.CreateRuntimeFunction(FnTy, Name);
+}
+
+llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
+                                                            bool IVSigned) {
+  assert((IVSize == 32 || IVSize == 64) &&
+         "IV size is not compatible with the omp runtime");
+  auto Name =
+      IVSize == 32
+          ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
+          : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
+  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
+  auto PtrTy = llvm::PointerType::getUnqual(ITy);
+  llvm::Type *TypeParams[] = {
+    getIdentTyPointerTy(),                     // loc
+    CGM.Int32Ty,                               // tid
+    llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+    PtrTy,                                     // p_lower
+    PtrTy,                                     // p_upper
+    PtrTy                                      // p_stride
+  };
+  llvm::FunctionType *FnTy =
+      llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+  return CGM.CreateRuntimeFunction(FnTy, Name);
+}
+
 llvm::Constant *
 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
   // Lookup the entry, lazily creating it if necessary.
@@ -766,8 +869,7 @@
   if (auto OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
     if (OMPRegionInfo->getThreadIDVariable())
-      return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF),
-                                  Loc).getScalarVal();
+      return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
 
   auto ThreadID = getThreadID(CGF, Loc);
   auto Int32Ty =
@@ -878,19 +980,107 @@
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
 }
 
+static llvm::Value *emitCopyprivateCopyFunction(
+    CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> SrcExprs,
+    ArrayRef<const Expr *> DstExprs, ArrayRef<const Expr *> AssignmentOps) {
+  auto &C = CGM.getContext();
+  // void copy_func(void *LHSArg, void *RHSArg);
+  FunctionArgList Args;
+  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+                           C.VoidPtrTy);
+  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+                           C.VoidPtrTy);
+  Args.push_back(&LHSArg);
+  Args.push_back(&RHSArg);
+  FunctionType::ExtInfo EI;
+  auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, Args, EI, /*isVariadic=*/false);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      ".omp.copyprivate.copy_func", &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+  // Dst = (void*[n])(LHSArg);
+  // Src = (void*[n])(RHSArg);
+  auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
+                                    CGF.PointerAlignInBytes),
+      ArgsType);
+  auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
+                                    CGF.PointerAlignInBytes),
+      ArgsType);
+  // *(Type0*)Dst[0] = *(Type0*)Src[0];
+  // *(Type1*)Dst[1] = *(Type1*)Src[1];
+  // ...
+  // *(Typen*)Dst[n] = *(Typen*)Src[n];
+  CodeGenFunction::OMPPrivateScope Scope(CGF);
+  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
+    Scope.addPrivate(
+        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
+        [&]() -> llvm::Value *{
+          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(RHS, I),
+                                            CGM.PointerAlignInBytes),
+              CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+        });
+    Scope.addPrivate(
+        cast<VarDecl>(cast<DeclRefExpr>(DstExprs[I])->getDecl()),
+        [&]() -> llvm::Value *{
+          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(LHS, I),
+                                            CGM.PointerAlignInBytes),
+              CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+        });
+  }
+  Scope.Privatize();
+  for (auto *E : AssignmentOps) {
+    CGF.EmitIgnoredExpr(E);
+  }
+  Scope.ForceCleanup();
+  CGF.FinishFunction();
+  return Fn;
+}
+
 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
                                        const std::function<void()> &SingleOpGen,
-                                       SourceLocation Loc) {
+                                       SourceLocation Loc,
+                                       ArrayRef<const Expr *> CopyprivateVars,
+                                       ArrayRef<const Expr *> SrcExprs,
+                                       ArrayRef<const Expr *> DstExprs,
+                                       ArrayRef<const Expr *> AssignmentOps) {
+  assert(CopyprivateVars.size() == SrcExprs.size() &&
+         CopyprivateVars.size() == DstExprs.size() &&
+         CopyprivateVars.size() == AssignmentOps.size());
+  auto &C = CGM.getContext();
+  // int32 did_it = 0;
   // if(__kmpc_single(ident_t *, gtid)) {
   //   SingleOpGen();
   //   __kmpc_end_single(ident_t *, gtid);
+  //   did_it = 1;
   // }
+  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
+  // <copy_func>, did_it);
+
+  llvm::AllocaInst *DidIt = nullptr;
+  if (!CopyprivateVars.empty()) {
+    // int32 did_it = 0;
+    auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+    DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
+    CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
+  }
   // Prepare arguments and build a call to __kmpc_single
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   auto *IsSingle =
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
   emitIfStmt(CGF, IsSingle, [&]() -> void {
     SingleOpGen();
+    if (DidIt) {
+      // did_it = 1;
+      CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
+                                     DidIt->getAlignment());
+    }
     // Build a call to __kmpc_end_single.
     // OpenMP [1.2.2 OpenMP Language Terminology]
     // For C/C++, an executable statement, possibly compound, with a single
@@ -907,6 +1097,44 @@
     // fallthrough rather than pushing a normal cleanup for it.
     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
   });
+  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
+  // <copy_func>, did_it);
+  if (DidIt) {
+    llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
+    auto CopyprivateArrayTy =
+        C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+                               /*IndexTypeQuals=*/0);
+    // Create a list of all private variables for copyprivate.
+    auto *CopyprivateList =
+        CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
+    for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
+      auto *Elem = CGF.Builder.CreateStructGEP(CopyprivateList, I);
+      CGF.Builder.CreateAlignedStore(
+          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
+          Elem, CGM.PointerAlignInBytes);
+    }
+    // Build function that copies private values from single region to all other
+    // threads in the corresponding parallel region.
+    auto *CpyFn = emitCopyprivateCopyFunction(
+        CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
+        SrcExprs, DstExprs, AssignmentOps);
+    auto *BufSize = CGF.Builder.getInt32(
+        C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
+    auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
+                                                               CGF.VoidPtrTy);
+    auto *DidItVal =
+        CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
+    llvm::Value *Args[] = {
+        emitUpdateLocation(CGF, Loc), // ident_t *<loc>
+        getThreadID(CGF, Loc),        // i32 <gtid>
+        BufSize,                      // i32 <buf_size>
+        CL,                           // void *<copyprivate list>
+        CpyFn,                        // void (*) (void *, void *) <copy_func>
+        DidItVal                      // i32 did_it
+    };
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
+  }
 }
 
 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
@@ -983,34 +1211,50 @@
                                   llvm::Value *UB, llvm::Value *ST,
                                   llvm::Value *Chunk) {
   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
-  // Call __kmpc_for_static_init(
-  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
-  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
-  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
-  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
-  // TODO: Implement dynamic schedule.
+  if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
+    // Call __kmpc_dispatch_init(
+    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
+    //          kmp_int[32|64] lower, kmp_int[32|64] upper,
+    //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
 
-  // If the Chunk was not specified in the clause - use default value 1.
-  if (Chunk == nullptr)
-    Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
-
-  llvm::Value *Args[] = {
-      emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
-      CGF.Builder.getInt32(Schedule), // Schedule type
-      IL,                             // &isLastIter
-      LB,                             // &LB
-      UB,                             // &UB
-      ST,                             // &Stride
-      CGF.Builder.getIntN(IVSize, 1), // Incr
-      Chunk                           // Chunk
-  };
-  assert((IVSize == 32 || IVSize == 64) &&
-         "Index size is not compatible with the omp runtime");
-  auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
-                                    : OMPRTL__kmpc_for_static_init_4u)
-                        : (IVSigned ? OMPRTL__kmpc_for_static_init_8
-                                    : OMPRTL__kmpc_for_static_init_8u);
-  CGF.EmitRuntimeCall(createRuntimeFunction(F), Args);
+    // If the Chunk was not specified in the clause - use default value 1.
+    if (Chunk == nullptr)
+      Chunk = CGF.Builder.getIntN(IVSize, 1);
+    llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+                            getThreadID(CGF, Loc),
+                            CGF.Builder.getInt32(Schedule), // Schedule type
+                            CGF.Builder.getIntN(IVSize, 0), // Lower
+                            UB,                             // Upper
+                            CGF.Builder.getIntN(IVSize, 1), // Stride
+                            Chunk                           // Chunk
+    };
+    CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
+  } else {
+    // Call __kmpc_for_static_init(
+    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
+    if (Chunk == nullptr) {
+      assert(Schedule == OMP_sch_static &&
+             "expected static non-chunked schedule");
+      // If the Chunk was not specified in the clause - use default value 1.
+      Chunk = CGF.Builder.getIntN(IVSize, 1);
+    } else
+      assert(Schedule == OMP_sch_static_chunked &&
+             "expected static chunked schedule");
+    llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+                            getThreadID(CGF, Loc),
+                            CGF.Builder.getInt32(Schedule), // Schedule type
+                            IL,                             // &isLastIter
+                            LB,                             // &LB
+                            UB,                             // &UB
+                            ST,                             // &Stride
+                            CGF.Builder.getIntN(IVSize, 1), // Incr
+                            Chunk                           // Chunk
+    };
+    CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
+  }
 }
 
 void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
@@ -1025,6 +1269,29 @@
                       Args);
 }
 
+llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
+                                          SourceLocation Loc, unsigned IVSize,
+                                          bool IVSigned, llvm::Value *IL,
+                                          llvm::Value *LB, llvm::Value *UB,
+                                          llvm::Value *ST) {
+  // Call __kmpc_dispatch_next(
+  //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
+  //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
+  //          kmp_int[32|64] *p_stride);
+  llvm::Value *Args[] = {
+      emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
+      IL, // &isLastIter
+      LB, // &Lower
+      UB, // &Upper
+      ST  // &Stride
+  };
+  llvm::Value *Call =
+      CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
+  return CGF.EmitScalarConversion(
+      Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
+      CGF.getContext().BoolTy);
+}
+
 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
                                            llvm::Value *NumThreads,
                                            SourceLocation Loc) {
@@ -1043,13 +1310,214 @@
                       emitUpdateLocation(CGF, Loc));
 }
 
+namespace {
+/// \brief Indexes of fields for type kmp_task_t.
+enum KmpTaskTFields {
+  /// \brief List of shared variables.
+  KmpTaskTShareds,
+  /// \brief Task routine.
+  KmpTaskTRoutine,
+  /// \brief Partition id for the untied tasks.
+  KmpTaskTPartId,
+  /// \brief Function with call of destructors for private variables.
+  KmpTaskTDestructors,
+};
+} // namespace
+
+void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
+  if (!KmpRoutineEntryPtrTy) {
+    // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
+    auto &C = CGM.getContext();
+    QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
+    FunctionProtoType::ExtProtoInfo EPI;
+    KmpRoutineEntryPtrQTy = C.getPointerType(
+        C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
+    KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
+  }
+}
+
+static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+                                 QualType FieldTy) {
+  auto *Field = FieldDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
+      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
+      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+  Field->setAccess(AS_public);
+  DC->addDecl(Field);
+}
+
+static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
+                                         QualType KmpInt32Ty,
+                                         QualType KmpRoutineEntryPointerQTy) {
+  auto &C = CGM.getContext();
+  // Build struct kmp_task_t {
+  //         void *              shareds;
+  //         kmp_routine_entry_t routine;
+  //         kmp_int32           part_id;
+  //         kmp_routine_entry_t destructors;
+  //         /*  private vars  */
+  //       };
+  auto *RD = C.buildImplicitRecord("kmp_task_t");
+  RD->startDefinition();
+  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  addFieldToRecordDecl(C, RD, KmpInt32Ty);
+  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  // TODO: add private fields.
+  RD->completeDefinition();
+  return C.getRecordType(RD);
+}
+
+/// \brief Emit a proxy function which accepts kmp_task_t as the second
+/// argument.
+/// \code
+/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+///   TaskFunction(gtid, tt->part_id, tt->shareds);
+///   return 0;
+/// }
+/// \endcode
+static llvm::Value *
+emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
+                      QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
+                      QualType SharedsPtrTy, llvm::Value *TaskFunction) {
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
+  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
+                                /*Id=*/nullptr, KmpTaskTPtrQTy);
+  Args.push_back(&GtidArg);
+  Args.push_back(&TaskTypeArg);
+  FunctionType::ExtInfo Info;
+  auto &TaskEntryFnInfo =
+      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
+                                                    /*isVariadic=*/false);
+  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+  auto *TaskEntry =
+      llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
+                             ".omp_task_entry.", &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
+
+  // TaskFunction(gtid, tt->part_id, tt->shareds);
+  auto *GtidParam = CGF.EmitLoadOfScalar(
+      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
+      C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
+  auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
+      CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
+  auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+                                                /*Idx=*/KmpTaskTPartId);
+  auto *PartidParam = CGF.EmitLoadOfScalar(
+      PartidPtr, /*Volatile=*/false,
+      C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
+  auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+                                                 /*Idx=*/KmpTaskTShareds);
+  auto *SharedsParam =
+      CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
+                           CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
+  llvm::Value *CallArgs[] = {
+      GtidParam, PartidParam,
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
+  CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
+  CGF.EmitStoreThroughLValue(
+      RValue::get(CGF.Builder.getInt32(/*C=*/0)),
+      CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
+  CGF.FinishFunction();
+  return TaskEntry;
+}
+
+void CGOpenMPRuntime::emitTaskCall(
+    CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+    llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+    llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
+  auto &C = CGM.getContext();
+  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+  // Build type kmp_routine_entry_t (if not built yet).
+  emitKmpRoutineEntryT(KmpInt32Ty);
+  // Build particular struct kmp_task_t for the given task.
+  auto KmpTaskQTy =
+      createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
+  QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
+  auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo();
+  auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
+  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
+
+  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
+  // kmp_task_t *tt);
+  auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy,
+                                          SharedsPtrTy, TaskFunction);
+
+  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  // kmp_routine_entry_t *task_entry);
+  // Task flags. Format is taken from
+  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
+  // description of kmp_tasking_flags struct.
+  const unsigned TiedFlag = 0x1;
+  const unsigned FinalFlag = 0x2;
+  unsigned Flags = Tied ? TiedFlag : 0;
+  auto *TaskFlags =
+      Final.getPointer()
+          ? CGF.Builder.CreateSelect(Final.getPointer(),
+                                     CGF.Builder.getInt32(FinalFlag),
+                                     CGF.Builder.getInt32(/*C=*/0))
+          : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
+  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
+  auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
+  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
+                              getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
+                              CGM.getSize(SharedsSize),
+                              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                                  TaskEntry, KmpRoutineEntryPtrTy)};
+  auto *NewTask = CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
+  auto *NewTaskNewTaskTTy =
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
+  // Fill the data in the resulting kmp_task_t record.
+  // Copy shareds if there are any.
+  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
+    CGF.EmitAggregateCopy(
+        CGF.EmitLoadOfScalar(
+            CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+                                        /*Idx=*/KmpTaskTShareds),
+            /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
+        Shareds, SharedsTy);
+  // TODO: generate function with destructors for privates.
+  // Provide pointer to function with destructors for privates.
+  CGF.Builder.CreateAlignedStore(
+      llvm::ConstantPointerNull::get(
+          cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
+      CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+                                  /*Idx=*/KmpTaskTDestructors),
+      CGM.PointerAlignInBytes);
+
+  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+  // libcall.
+  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+  // *new_task);
+  llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
+                             getThreadID(CGF, Loc), NewTask};
+  // TODO: add check for untied tasks.
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+}
+
 InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
     CodeGenFunction &CGF, const OMPExecutableDirective &D)
     : CGF(CGF) {
   CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CGF.EHStack.pushTerminate();
 }
 
 InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() {
+  CGF.EHStack.popTerminate();
   auto *OldCSI =
       cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
   delete CGF.CapturedStmtInfo;
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 97aa5b8..f8849e6 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
+#include "clang/AST/Type.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
@@ -64,11 +65,7 @@
     // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
     // global_tid);
     OMPRTL__kmpc_cancel_barrier,
-    // Calls for static scheduling 'omp for' loops.
-    OMPRTL__kmpc_for_static_init_4,
-    OMPRTL__kmpc_for_static_init_4u,
-    OMPRTL__kmpc_for_static_init_8,
-    OMPRTL__kmpc_for_static_init_8u,
+    // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
     OMPRTL__kmpc_for_static_fini,
     // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
     // global_tid);
@@ -92,6 +89,17 @@
     OMPRTL__kmpc_single,
     // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
     OMPRTL__kmpc_end_single,
+    // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+    // kmp_routine_entry_t *task_entry);
+    OMPRTL__kmpc_omp_task_alloc,
+    // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
+    // new_task);
+    OMPRTL__kmpc_omp_task,
+    // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+    // kmp_int32 didit);
+    OMPRTL__kmpc_copyprivate,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -190,6 +198,12 @@
   /// variables.
   llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
       InternalVars;
+  /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
+  llvm::Type *KmpRoutineEntryPtrTy;
+  QualType KmpRoutineEntryPtrQTy;
+
+  /// \brief Build type kmp_routine_entry_t (if not built yet).
+  void emitKmpRoutineEntryT(QualType KmpInt32Ty);
 
   /// \brief Emits object of ident_t type with info for source location.
   /// \param Flags Flags for OpenMP location.
@@ -208,6 +222,18 @@
   /// \return Specified function.
   llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function);
 
+  /// \brief Returns __kmpc_for_static_init_* runtime function for the specified
+  /// size \a IVSize and sign \a IVSigned.
+  llvm::Constant *createForStaticInitFunction(unsigned IVSize, bool IVSigned);
+
+  /// \brief Returns __kmpc_dispatch_init_* runtime function for the specified
+  /// size \a IVSize and sign \a IVSigned.
+  llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned);
+
+  /// \brief Returns __kmpc_dispatch_next_* runtime function for the specified
+  /// size \a IVSize and sign \a IVSigned.
+  llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned);
+
   /// \brief If the specified mangled name is not in the module, create and
   /// return threadprivate cache object. This object is a pointer's worth of
   /// storage that's reserved for use by the OpenMP runtime.
@@ -256,17 +282,28 @@
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
+  virtual void clear();
 
-  /// \brief Emits outlined function for the specified OpenMP directive \a D
-  /// (required for parallel and task directives). This outlined function has
-  /// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
-  /// context_vars*).
+  /// \brief Emits outlined function for the specified OpenMP directive \a D.
+  /// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32
+  /// BoundID, struct context_vars*).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
   ///
   virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
                                             const VarDecl *ThreadIDVar);
 
+  /// \brief Emits outlined function for the OpenMP task directive \a D. This
+  /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
+  /// PartID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param PartIDVar If not nullptr - variable used for part id in tasks.
+  ///
+  virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
+                                                const VarDecl *ThreadIDVar,
+                                                const VarDecl *PartIDVar);
+
   /// \brief Cleans up references to the objects in finished function.
   ///
   void functionFinished(CodeGenFunction &CGF);
@@ -274,7 +311,7 @@
   /// \brief Emits code for parallel call of the \a OutlinedFn with variables
   /// captured in a record which address is stored in \a CapturedStruct.
   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*).
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
   /// \param CapturedStruct A pointer to the record with the references to
   /// variables used in \a OutlinedFn function.
   ///
@@ -315,7 +352,11 @@
   /// single region.
   virtual void emitSingleRegion(CodeGenFunction &CGF,
                                 const std::function<void()> &SingleOpGen,
-                                SourceLocation Loc);
+                                SourceLocation Loc,
+                                ArrayRef<const Expr *> CopyprivateVars,
+                                ArrayRef<const Expr *> SrcExprs,
+                                ArrayRef<const Expr *> DstExprs,
+                                ArrayRef<const Expr *> AssignmentOps);
 
   /// \brief Emits explicit barrier for OpenMP threads.
   /// \param IsExplicit true, if it is explicitly specified barrier.
@@ -376,6 +417,25 @@
   virtual void emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
                              OpenMPScheduleClauseKind ScheduleKind);
 
+  /// Call __kmpc_dispatch_next(
+  ///          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
+  ///          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
+  ///          kmp_int[32|64] *p_stride);
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the interation variable.
+  /// \param IL Address of the output variable in which the flag of the
+  /// last iteration is returned.
+  /// \param LB Address of the output variable in which the lower iteration
+  /// number is returned.
+  /// \param UB Address of the output variable in which the upper iteration
+  /// number is returned.
+  /// \param ST Address of the output variable in which the stride value is
+  /// returned.
+  virtual llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc,
+                                   unsigned IVSize, bool IVSigned,
+                                   llvm::Value *IL, llvm::Value *LB,
+                                   llvm::Value *UB, llvm::Value *ST);
+
   /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
   /// clause.
@@ -412,6 +472,39 @@
   /// \param Vars List of variables to flush.
   virtual void emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
                          SourceLocation Loc);
+
+  /// \brief Emit task region for the task directive. The task region is
+  /// emmitted in several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
+  /// kmp_task_t *new_task), where new_task is a resulting structure from
+  /// previous items.
+  /// \param Tied true if the task is tied (the task is tied to the thread that
+  /// can suspend its task region), false - untied (the task is not tied to any
+  /// thread).
+  /// \param Final Contains either constant bool value, or llvm::Value * of i1
+  /// type for final clause. If the value is true, the task forces all of its
+  /// child tasks to become final and included tasks.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \a
+  /// TaskFunction.
+  virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+                            llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+                            llvm::Value *TaskFunction, QualType SharedsTy,
+                            llvm::Value *Shareds);
 };
 
 /// \brief RAII for emitting code of CapturedStmt without function outlining.
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 0d160d3..e433984 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -2186,6 +2186,8 @@
     llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
                            CapturedStmtInfo->getHelperName(), &CGM.getModule());
   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
+  if (CD->isNothrow())
+    F->addFnAttr(llvm::Attribute::NoUnwind);
 
   // Generate the function.
   StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args,
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index daf5fcc..9af7474 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -23,6 +23,20 @@
 //===----------------------------------------------------------------------===//
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
+namespace {
+/// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp
+/// critical' etc.). Helps to generate proper debug info and provides correct
+/// code generation for such constructs.
+class InlinedOpenMPRegionScopeRAII {
+  InlinedOpenMPRegionRAII Region;
+  CodeGenFunction::LexicalScope DirectiveScope;
+
+public:
+  InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF,
+                               const OMPExecutableDirective &D)
+      : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {}
+};
+} // namespace
 
 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
 /// function. Here is the logic:
@@ -175,7 +189,7 @@
           EmitDecl(*VD);
           return GetAddrOfLocalVar(VD);
         });
-      assert(IsRegistered && "counter already registered as private");
+      assert(IsRegistered && "firstprivate var already registered as private");
       // Silence the warning about unused variable.
       (void)IsRegistered;
       ++IRef, ++InitsRef;
@@ -202,7 +216,7 @@
             EmitDecl(*VD);
             return GetAddrOfLocalVar(VD);
           });
-      assert(IsRegistered && "counter already registered as private");
+      assert(IsRegistered && "private var already registered as private");
       // Silence the warning about unused variable.
       (void)IsRegistered;
       ++IRef;
@@ -252,6 +266,13 @@
   for (auto I : S.updates()) {
     EmitIgnoredExpr(I);
   }
+  // Update the linear variables.
+  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+    for (auto U : C->updates()) {
+      EmitIgnoredExpr(U);
+    }
+  }
+
   // On a continue in the body, jump to the end.
   auto Continue = getJumpDestInCurrentScope("omp.body.continue");
   BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue));
@@ -268,9 +289,10 @@
   }
 }
 
-void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S,
-                                       OMPPrivateScope &LoopScope,
-                                       bool SeparateIter) {
+void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
+                                       const Expr *LoopCond,
+                                       const Expr *IncExpr,
+                                       const std::function<void()> &BodyGen) {
   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
   auto Cnt = getPGORegionCounter(&S);
 
@@ -282,17 +304,13 @@
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
   auto ExitBlock = LoopExit.getBlock();
-  if (LoopScope.requiresCleanups())
+  if (RequiresCleanup)
     ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
 
   auto LoopBody = createBasicBlock("omp.inner.for.body");
 
-  // Emit condition: "IV < LastIteration + 1 [ - 1]"
-  // ("- 1" when lastprivate clause is present - separate one iteration).
-  llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter));
-  Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
-                       PGO.createLoopWeights(S.getCond(SeparateIter), Cnt));
-
+  // Emit condition.
+  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
   if (ExitBlock != LoopExit.getBlock()) {
     EmitBlock(ExitBlock);
     EmitBranchThroughCleanup(LoopExit);
@@ -305,12 +323,11 @@
   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  EmitOMPLoopBody(S);
-  EmitStopPoint(&S);
+  BodyGen();
 
   // Emit "IV = IV + 1" and a back-edge to the condition block.
   EmitBlock(Continue.getBlock());
-  EmitIgnoredExpr(S.getInc());
+  EmitIgnoredExpr(IncExpr);
   BreakContinueStack.pop_back();
   EmitBranch(CondBlock);
   LoopStack.pop();
@@ -326,6 +343,12 @@
     }
     ++IC;
   }
+  // Emit the final values of the linear variables.
+  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+    for (auto F : C->finals()) {
+      EmitIgnoredExpr(F);
+    }
+  }
 }
 
 static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM,
@@ -369,7 +392,25 @@
     // Silence the warning about unused variable.
     (void)IsRegistered;
   }
-  (void)LoopScope.Privatize();
+}
+
+static void
+EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                      CodeGenFunction::OMPPrivateScope &PrivateScope) {
+  for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
+    for (auto *E : Clause->varlists()) {
+      auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+      bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
+        // Emit var without initialization.
+        auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
+        CGF.EmitAutoVarCleanups(VarEmission);
+        return VarEmission.getAllocatedAddress();
+      });
+      assert(IsRegistered && "linear var already registered as private");
+      // Silence the warning about unused variable.
+      (void)IsRegistered;
+    }
+  }
 }
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
@@ -417,12 +458,15 @@
     }
   }
 
-  InlinedOpenMPRegionRAII Region(*this, S);
-  RunCleanupsScope DirectiveScope(*this);
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
 
-  CGDebugInfo *DI = getDebugInfo();
-  if (DI)
-    DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+  // Emit inits for the linear variables.
+  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+    for (auto Init : C->inits()) {
+      auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
+      EmitVarDecl(*D);
+    }
+  }
 
   // Emit the loop iteration variable.
   const Expr *IVExpr = S.getIterationVariable();
@@ -439,6 +483,17 @@
     EmitIgnoredExpr(S.getCalcLastIteration());
   }
 
+  // Emit the linear steps for the linear clauses.
+  // If a step is not constant, it is pre-calculated before the loop.
+  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+    if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
+      if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
+        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
+        // Emit calculation of the linear step.
+        EmitIgnoredExpr(CS);
+      }
+  }
+
   if (SeparateIter) {
     // Emit: if (LastIteration > 0) - begin.
     RegionCounter Cnt = getPGORegionCounter(&S);
@@ -451,7 +506,15 @@
     {
       OMPPrivateScope LoopScope(*this);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
-      EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true);
+      EmitPrivateLinearVars(*this, S, LoopScope);
+      EmitOMPPrivateClause(S, LoopScope);
+      (void)LoopScope.Privatize();
+      EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                       S.getCond(/*SeparateIter=*/true), S.getInc(),
+                       [&S, this]() {
+                         EmitOMPLoopBody(S);
+                         EmitStopPoint(&S);
+                       });
       EmitOMPLoopBody(S, /* SeparateIter */ true);
     }
     EmitOMPSimdFinal(S);
@@ -462,13 +525,18 @@
     {
       OMPPrivateScope LoopScope(*this);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
-      EmitOMPInnerLoop(S, LoopScope);
+      EmitPrivateLinearVars(*this, S, LoopScope);
+      EmitOMPPrivateClause(S, LoopScope);
+      (void)LoopScope.Privatize();
+      EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                       S.getCond(/*SeparateIter=*/false), S.getInc(),
+                       [&S, this]() {
+                         EmitOMPLoopBody(S);
+                         EmitStopPoint(&S);
+                       });
     }
     EmitOMPSimdFinal(S);
   }
-
-  if (DI)
-    DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 }
 
 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
@@ -478,16 +546,50 @@
                                           llvm::Value *ST, llvm::Value *IL,
                                           llvm::Value *Chunk) {
   auto &RT = CGM.getOpenMPRuntime();
+
+  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
+  const bool Dynamic = RT.isDynamic(ScheduleKind);
+
   assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
          "static non-chunked schedule does not need outer loop");
-  if (RT.isDynamic(ScheduleKind)) {
-    ErrorUnsupported(&S, "OpenMP loop with dynamic schedule");
-    return;
-  }
 
   // Emit outer loop.
   //
   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+  // When schedule(dynamic,chunk_size) is specified, the iterations are
+  // distributed to threads in the team in chunks as the threads request them.
+  // Each thread executes a chunk of iterations, then requests another chunk,
+  // until no chunks remain to be distributed. Each chunk contains chunk_size
+  // iterations, except for the last chunk to be distributed, which may have
+  // fewer iterations. When no chunk_size is specified, it defaults to 1.
+  //
+  // When schedule(guided,chunk_size) is specified, the iterations are assigned
+  // to threads in the team in chunks as the executing threads request them.
+  // Each thread executes a chunk of iterations, then requests another chunk,
+  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
+  // each chunk is proportional to the number of unassigned iterations divided
+  // by the number of threads in the team, decreasing to 1. For a chunk_size
+  // with value k (greater than 1), the size of each chunk is determined in the
+  // same way, with the restriction that the chunks do not contain fewer than k
+  // iterations (except for the last chunk to be assigned, which may have fewer
+  // than k iterations).
+  //
+  // When schedule(auto) is specified, the decision regarding scheduling is
+  // delegated to the compiler and/or runtime system. The programmer gives the
+  // implementation the freedom to choose any possible mapping of iterations to
+  // threads in the team.
+  //
+  // When schedule(runtime) is specified, the decision regarding scheduling is
+  // deferred until run time, and the schedule and chunk size are taken from the
+  // run-sched-var ICV. If the ICV is set to auto, the schedule is
+  // implementation defined
+  //
+  // while(__kmpc_dispatch_next(&LB, &UB)) {
+  //   idx = LB;
+  //   while (idx <= UB) { BODY; ++idx; } // inner loop
+  // }
+  //
+  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
   // When schedule(static, chunk_size) is specified, iterations are divided into
   // chunks of size chunk_size, and the chunks are assigned to the threads in
   // the team in a round-robin fashion in the order of the thread number.
@@ -498,12 +600,16 @@
   //   UB = UB + ST;
   // }
   //
+
   const Expr *IVExpr = S.getIterationVariable();
   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
 
-  RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
-                 UB, ST, Chunk);
+  RT.emitForInit(
+      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
+      (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
+      Chunk);
+
   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
 
   // Start the loop with a block that tests the condition.
@@ -512,12 +618,17 @@
   LoopStack.push(CondBlock);
 
   llvm::Value *BoolCondVal = nullptr;
-  // UB = min(UB, GlobalUB)
-  EmitIgnoredExpr(S.getEnsureUpperBound());
-  // IV = LB
-  EmitIgnoredExpr(S.getInit());
-  // IV < UB
-  BoolCondVal = EvaluateExprAsBool(S.getCond(false));
+  if (!Dynamic) {
+    // UB = min(UB, GlobalUB)
+    EmitIgnoredExpr(S.getEnsureUpperBound());
+    // IV = LB
+    EmitIgnoredExpr(S.getInit());
+    // IV < UB
+    BoolCondVal = EvaluateExprAsBool(S.getCond(false));
+  } else {
+    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
+                                    IL, LB, UB, ST);
+  }
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -533,17 +644,28 @@
   }
   EmitBlock(LoopBody);
 
+  // Emit "IV = LB" (in case of static schedule, we have already calculated new
+  // LB for loop condition and emitted it above).
+  if (Dynamic)
+    EmitIgnoredExpr(S.getInit());
+
   // Create a block for the increment.
   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  EmitOMPInnerLoop(S, LoopScope);
+  EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                   S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() {
+                     EmitOMPLoopBody(S);
+                     EmitStopPoint(&S);
+                   });
 
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
-  // Emit "LB = LB + Stride", "UB = UB + Stride".
-  EmitIgnoredExpr(S.getNextLowerBound());
-  EmitIgnoredExpr(S.getNextUpperBound());
+  if (!Dynamic) {
+    // Emit "LB = LB + Stride", "UB = UB + Stride".
+    EmitIgnoredExpr(S.getNextLowerBound());
+    EmitIgnoredExpr(S.getNextUpperBound());
+  }
 
   EmitBranch(CondBlock);
   LoopStack.pop();
@@ -551,7 +673,9 @@
   EmitBlock(LoopExit.getBlock());
 
   // Tell the runtime we are done.
-  RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
+  // FIXME: Also call fini for ordered loops with dynamic scheduling.
+  if (!Dynamic)
+    RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
 }
 
 /// \brief Emit a helper variable and return corresponding lvalue.
@@ -602,6 +726,7 @@
 
       OMPPrivateScope LoopScope(*this);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
+      (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
       auto ScheduleKind = OMPC_SCHEDULE_unknown;
@@ -632,7 +757,12 @@
         // IV = LB;
         EmitIgnoredExpr(S.getInit());
         // while (idx <= UB) { BODY; ++idx; }
-        EmitOMPInnerLoop(S, LoopScope);
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                         S.getCond(/*SeparateIter=*/false), S.getInc(),
+                         [&S, this]() {
+                           EmitOMPLoopBody(S);
+                           EmitStopPoint(&S);
+                         });
         // Tell the runtime we are done.
         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
       } else {
@@ -650,47 +780,161 @@
 }
 
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
-  InlinedOpenMPRegionRAII Region(*this, S);
-  RunCleanupsScope DirectiveScope(*this);
-
-  CGDebugInfo *DI = getDebugInfo();
-  if (DI)
-    DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
 
   EmitOMPWorksharingLoop(S);
 
   // Emit an implicit barrier at the end.
   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
                                          /*IsExplicit*/ false);
-  if (DI)
-    DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 }
 
 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
   llvm_unreachable("CodeGen for 'omp for simd' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) {
-  llvm_unreachable("CodeGen for 'omp sections' is not supported yet.");
+static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
+                                const Twine &Name,
+                                llvm::Value *Init = nullptr) {
+  auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
+  if (Init)
+    CGF.EmitScalarInit(Init, LVal);
+  return LVal;
 }
 
-void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) {
-  llvm_unreachable("CodeGen for 'omp section' is not supported yet.");
+void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
+
+  auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
+  auto *CS = dyn_cast<CompoundStmt>(Stmt);
+  if (CS && CS->size() > 1) {
+    auto &C = CGM.getContext();
+    auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+    // Emit helper vars inits.
+    LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.",
+                                  Builder.getInt32(0));
+    auto *GlobalUBVal = Builder.getInt32(CS->size() - 1);
+    LValue UB =
+        createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
+    LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.",
+                                  Builder.getInt32(1));
+    LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.",
+                                  Builder.getInt32(0));
+    // Loop counter.
+    LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv.");
+    OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+    OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV);
+    OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+    OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB);
+    // Generate condition for loop.
+    BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
+                        OK_Ordinary, S.getLocStart(), /*fpContractable=*/false);
+    // Increment for loop counter.
+    UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
+                      S.getLocStart());
+    auto BodyGen = [this, CS, &S, &IV]() {
+      // Iterate through all sections and emit a switch construct:
+      // switch (IV) {
+      //   case 0:
+      //     <SectionStmt[0]>;
+      //     break;
+      // ...
+      //   case <NumSection> - 1:
+      //     <SectionStmt[<NumSection> - 1]>;
+      //     break;
+      // }
+      // .omp.sections.exit:
+      auto *ExitBB = createBasicBlock(".omp.sections.exit");
+      auto *SwitchStmt = Builder.CreateSwitch(
+          EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
+          CS->size());
+      unsigned CaseNumber = 0;
+      for (auto C = CS->children(); C; ++C, ++CaseNumber) {
+        auto CaseBB = createBasicBlock(".omp.sections.case");
+        EmitBlock(CaseBB);
+        SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
+        EmitStmt(*C);
+        EmitBranch(ExitBB);
+      }
+      EmitBlock(ExitBB, /*IsFinished=*/true);
+    };
+    // Emit static non-chunked loop.
+    CGM.getOpenMPRuntime().emitForInit(
+        *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
+        /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
+        ST.getAddress());
+    // UB = min(UB, GlobalUB);
+    auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart());
+    auto *MinUBGlobalUB = Builder.CreateSelect(
+        Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
+    EmitStoreOfScalar(MinUBGlobalUB, UB);
+    // IV = LB;
+    EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV);
+    // while (idx <= UB) { BODY; ++idx; }
+    EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
+    // Tell the runtime we are done.
+    CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(),
+                                         OMPC_SCHEDULE_static);
+  } else {
+    // If only one section is found - no need to generate loop, emit as a single
+    // region.
+    CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
+      InlinedOpenMPRegionScopeRAII Region(*this, S);
+      EmitStmt(Stmt);
+      EnsureInsertPoint();
+    }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None);
+  }
+
+  // Emit an implicit barrier at the end.
+  if (!S.getSingleClause(OMPC_nowait))
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                           /*IsExplicit=*/false);
+}
+
+void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
+  EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  EnsureInsertPoint();
 }
 
 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
+  llvm::SmallVector<const Expr *, 8> CopyprivateVars;
+  llvm::SmallVector<const Expr *, 8> SrcExprs;
+  llvm::SmallVector<const Expr *, 8> DstExprs;
+  llvm::SmallVector<const Expr *, 8> AssignmentOps;
+  // Check if there are any 'copyprivate' clauses associated with this 'single'
+  // construct.
+  auto CopyprivateFilter = [](const OMPClause *C) -> bool {
+    return C->getClauseKind() == OMPC_copyprivate;
+  };
+  // Build a list of copyprivate variables along with helper expressions
+  // (<source>, <destination>, <destination>=<source> expressions)
+  typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
+      CopyprivateFilter)> CopyprivateIter;
+  for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
+    auto *C = cast<OMPCopyprivateClause>(*I);
+    CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
+    SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
+    DstExprs.append(C->destination_exprs().begin(),
+                    C->destination_exprs().end());
+    AssignmentOps.append(C->assignment_ops().begin(),
+                         C->assignment_ops().end());
+  }
+  // Emit code for 'single' region along with 'copyprivate' clauses
   CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
-    InlinedOpenMPRegionRAII Region(*this, S);
-    RunCleanupsScope Scope(*this);
+    InlinedOpenMPRegionScopeRAII Region(*this, S);
     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     EnsureInsertPoint();
-  }, S.getLocStart());
+  }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
+  // Emit an implicit barrier at the end.
+  if (!S.getSingleClause(OMPC_nowait))
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                           /*IsExplicit=*/false);
 }
 
 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
   CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void {
-    InlinedOpenMPRegionRAII Region(*this, S);
-    RunCleanupsScope Scope(*this);
+    InlinedOpenMPRegionScopeRAII Region(*this, S);
     EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     EnsureInsertPoint();
   }, S.getLocStart());
@@ -699,8 +943,7 @@
 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
   CGM.getOpenMPRuntime().emitCriticalRegion(
       *this, S.getDirectiveName().getAsString(), [&]() -> void {
-        InlinedOpenMPRegionRAII Region(*this, S);
-        RunCleanupsScope Scope(*this);
+        InlinedOpenMPRegionScopeRAII Region(*this, S);
         EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
         EnsureInsertPoint();
       }, S.getLocStart());
@@ -721,8 +964,35 @@
   llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &) {
-  llvm_unreachable("CodeGen for 'omp task' is not supported yet.");
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+  // Emit outlined function for task construct.
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto *I = CS->getCapturedDecl()->param_begin();
+  // The first function argument for tasks is a thread id, the second one is a
+  // part id (0 for tied tasks, >=0 for untied task).
+  auto OutlinedFn =
+      CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
+  // Check if we should emit tied or untied task.
+  bool Tied = !S.getSingleClause(OMPC_untied);
+  // Check if the task is final
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+  if (auto *Clause = S.getSingleClause(OMPC_final)) {
+    // If the condition constant folds and can be elided, try to avoid emitting
+    // the condition and the dead arm of the if/else.
+    auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
+    bool CondConstant;
+    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+      Final.setInt(CondConstant);
+    else
+      Final.setPointer(EvaluateExprAsBool(Cond));
+  } else {
+    // By default the task is not final.
+    Final.setInt(/*IntVal=*/false);
+  }
+  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
+                                      OutlinedFn, SharedsTy, CapturedStruct);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -801,7 +1071,8 @@
                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
                    : CGF.EmitAtomicLoad(XLValue, Loc,
                                         IsSeqCst ? llvm::SequentiallyConsistent
-                                                 : llvm::Monotonic);
+                                                 : llvm::Monotonic,
+                                        XLValue.isVolatile());
   // OpenMP, 2.12.6, atomic Construct
   // Any atomic construct with a seq_cst clause forces the atomically
   // performed operation to include an implicit flush operation without a
@@ -823,14 +1094,38 @@
   }
 }
 
+static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                   const Expr *X, const Expr *E,
+                                   SourceLocation Loc) {
+  // x = expr;
+  assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
+  LValue XLValue = CGF.EmitLValue(X);
+  RValue ExprRValue = CGF.EmitAnyExpr(E);
+  if (XLValue.isGlobalReg())
+    CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
+  else
+    CGF.EmitAtomicStore(ExprRValue, XLValue,
+                        IsSeqCst ? llvm::SequentiallyConsistent
+                                 : llvm::Monotonic,
+                        XLValue.isVolatile(), /*IsInit=*/false);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+}
+
 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
                               bool IsSeqCst, const Expr *X, const Expr *V,
-                              const Expr *, SourceLocation Loc) {
+                              const Expr *E, SourceLocation Loc) {
   switch (Kind) {
   case OMPC_read:
     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
     break;
   case OMPC_write:
+    EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
+    break;
   case OMPC_update:
   case OMPC_capture:
     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
@@ -873,6 +1168,13 @@
       break;
     }
   }
+
+  const auto *CS =
+      S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
+    enterFullExpression(EWC);
+  InlinedOpenMPRegionScopeRAII Region(*this, S);
+
   EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
                     S.getLocStart());
 }
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 2e8471e..372db7a 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -743,7 +743,7 @@
     return DiscardableODRLinkage;
 
   case TSK_ExplicitInstantiationDeclaration:
-    llvm_unreachable("Should not have been asked to emit this");
+    return llvm::GlobalVariable::ExternalLinkage;
 
   case TSK_ExplicitInstantiationDefinition:
     return NonDiscardableODRLinkage;
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 7d510d6..9e80f0a 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -802,20 +802,6 @@
   F->setDoesNotThrow();
 }
 
-static void EmitSizedDeallocationFunction(CodeGenFunction &CGF,
-                                          const FunctionDecl *UnsizedDealloc) {
-  // This is a weak discardable definition of the sized deallocation function.
-  CGF.CurFn->setLinkage(llvm::Function::LinkOnceAnyLinkage);
-  if (CGF.CGM.supportsCOMDAT())
-    CGF.CurFn->setComdat(
-        CGF.CGM.getModule().getOrInsertComdat(CGF.CurFn->getName()));
-
-  // Call the unsized deallocation function and forward the first argument
-  // unchanged.
-  llvm::Constant *Unsized = CGF.CGM.GetAddrOfFunction(UnsizedDealloc);
-  CGF.Builder.CreateCall(Unsized, &*CGF.CurFn->arg_begin());
-}
-
 void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                                    const CGFunctionInfo &FnInfo) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
@@ -870,7 +856,7 @@
   else if (isa<CXXConstructorDecl>(FD))
     EmitConstructorBody(Args);
   else if (getLangOpts().CUDA &&
-           !CGM.getCodeGenOpts().CUDAIsDevice &&
+           !getLangOpts().CUDAIsDevice &&
            FD->hasAttr<CUDAGlobalAttr>())
     CGM.getCUDARuntime().EmitDeviceStubBody(*this, Args);
   else if (isa<CXXConversionDecl>(FD) &&
@@ -891,14 +877,6 @@
     emitImplicitAssignmentOperatorBody(Args);
   } else if (Stmt *Body = FD->getBody()) {
     EmitFunctionBody(Args, Body);
-  } else if (FunctionDecl *UnsizedDealloc =
-                 FD->getCorrespondingUnsizedGlobalDeallocationFunction()) {
-    // Global sized deallocation functions get an implicit weak definition if
-    // they don't have an explicit definition, if allowed.
-    assert(getLangOpts().DefineSizedDeallocation &&
-           "Can't emit unallowed definition.");
-    EmitSizedDeallocationFunction(*this, UnsizedDealloc);
-
   } else
     llvm_unreachable("no definition for emitted function");
 
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index c0368aa..151eb7e 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -368,84 +368,26 @@
   /// pushFullExprCleanup - Push a cleanup to be run at the end of the
   /// current full-expression.  Safe against the possibility that
   /// we're currently inside a conditionally-evaluated expression.
-  template <class T, class A0>
-  void pushFullExprCleanup(CleanupKind kind, A0 a0) {
+  template <class T, class... As>
+  void pushFullExprCleanup(CleanupKind kind, As... A) {
     // If we're not in a conditional branch, or if none of the
     // arguments requires saving, then use the unconditional cleanup.
     if (!isInConditionalBranch())
-      return EHStack.pushCleanup<T>(kind, a0);
+      return EHStack.pushCleanup<T>(kind, A...);
 
-    typename DominatingValue<A0>::saved_type a0_saved = saveValueInCond(a0);
+    // Stash values in a tuple so we can guarantee the order of saves.
+    typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
+    SavedTuple Saved{saveValueInCond(A)...};
 
-    typedef EHScopeStack::ConditionalCleanup1<T, A0> CleanupType;
-    EHStack.pushCleanup<CleanupType>(kind, a0_saved);
-    initFullExprCleanup();
-  }
-
-  /// pushFullExprCleanup - Push a cleanup to be run at the end of the
-  /// current full-expression.  Safe against the possibility that
-  /// we're currently inside a conditionally-evaluated expression.
-  template <class T, class A0, class A1>
-  void pushFullExprCleanup(CleanupKind kind, A0 a0, A1 a1) {
-    // If we're not in a conditional branch, or if none of the
-    // arguments requires saving, then use the unconditional cleanup.
-    if (!isInConditionalBranch())
-      return EHStack.pushCleanup<T>(kind, a0, a1);
-
-    typename DominatingValue<A0>::saved_type a0_saved = saveValueInCond(a0);
-    typename DominatingValue<A1>::saved_type a1_saved = saveValueInCond(a1);
-
-    typedef EHScopeStack::ConditionalCleanup2<T, A0, A1> CleanupType;
-    EHStack.pushCleanup<CleanupType>(kind, a0_saved, a1_saved);
-    initFullExprCleanup();
-  }
-
-  /// pushFullExprCleanup - Push a cleanup to be run at the end of the
-  /// current full-expression.  Safe against the possibility that
-  /// we're currently inside a conditionally-evaluated expression.
-  template <class T, class A0, class A1, class A2>
-  void pushFullExprCleanup(CleanupKind kind, A0 a0, A1 a1, A2 a2) {
-    // If we're not in a conditional branch, or if none of the
-    // arguments requires saving, then use the unconditional cleanup.
-    if (!isInConditionalBranch()) {
-      return EHStack.pushCleanup<T>(kind, a0, a1, a2);
-    }
-    
-    typename DominatingValue<A0>::saved_type a0_saved = saveValueInCond(a0);
-    typename DominatingValue<A1>::saved_type a1_saved = saveValueInCond(a1);
-    typename DominatingValue<A2>::saved_type a2_saved = saveValueInCond(a2);
-    
-    typedef EHScopeStack::ConditionalCleanup3<T, A0, A1, A2> CleanupType;
-    EHStack.pushCleanup<CleanupType>(kind, a0_saved, a1_saved, a2_saved);
-    initFullExprCleanup();
-  }
-
-  /// pushFullExprCleanup - Push a cleanup to be run at the end of the
-  /// current full-expression.  Safe against the possibility that
-  /// we're currently inside a conditionally-evaluated expression.
-  template <class T, class A0, class A1, class A2, class A3>
-  void pushFullExprCleanup(CleanupKind kind, A0 a0, A1 a1, A2 a2, A3 a3) {
-    // If we're not in a conditional branch, or if none of the
-    // arguments requires saving, then use the unconditional cleanup.
-    if (!isInConditionalBranch()) {
-      return EHStack.pushCleanup<T>(kind, a0, a1, a2, a3);
-    }
-    
-    typename DominatingValue<A0>::saved_type a0_saved = saveValueInCond(a0);
-    typename DominatingValue<A1>::saved_type a1_saved = saveValueInCond(a1);
-    typename DominatingValue<A2>::saved_type a2_saved = saveValueInCond(a2);
-    typename DominatingValue<A3>::saved_type a3_saved = saveValueInCond(a3);
-    
-    typedef EHScopeStack::ConditionalCleanup4<T, A0, A1, A2, A3> CleanupType;
-    EHStack.pushCleanup<CleanupType>(kind, a0_saved, a1_saved,
-                                     a2_saved, a3_saved);
+    typedef EHScopeStack::ConditionalCleanup<T, As...> CleanupType;
+    EHStack.pushCleanupTuple<CleanupType>(kind, Saved);
     initFullExprCleanup();
   }
 
   /// \brief Queue a cleanup to be pushed after finishing the current
   /// full-expression.
-  template <class T, class A0, class A1, class A2, class A3>
-  void pushCleanupAfterFullExpr(CleanupKind Kind, A0 a0, A1 a1, A2 a2, A3 a3) {
+  template <class T, class... As>
+  void pushCleanupAfterFullExpr(CleanupKind Kind, As... A) {
     assert(!isInConditionalBranch() && "can't defer conditional cleanup");
 
     LifetimeExtendedCleanupHeader Header = { sizeof(T), Kind };
@@ -456,7 +398,7 @@
 
     char *Buffer = &LifetimeExtendedCleanupStack[OldSize];
     new (Buffer) LifetimeExtendedCleanupHeader(Header);
-    new (Buffer + sizeof(Header)) T(a0, a1, a2, a3);
+    new (Buffer + sizeof(Header)) T(A...);
   }
 
   /// Set up the last cleaup that was pushed as a conditional
@@ -662,7 +604,10 @@
     }
 
     /// \brief Exit scope - all the mapped variables are restored.
-    ~OMPPrivateScope() { ForceCleanup(); }
+    ~OMPPrivateScope() {
+      if (PerformCleanup)
+        ForceCleanup();
+    }
   };
 
   /// \brief Takes the old cleanup stack size and emits the cleanup blocks
@@ -1346,10 +1291,20 @@
   /// to by This.
   llvm::Value *GetVTablePtr(llvm::Value *This, llvm::Type *Ty);
 
+  /// \brief Derived is the presumed address of an object of type T after a
+  /// cast. If T is a polymorphic class type, emit a check that the virtual
+  /// table for Derived belongs to a class derived from T.
+  void EmitVTablePtrCheckForCast(QualType T, llvm::Value *Derived,
+                                 bool MayBeNull);
+
   /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable.
   /// If vptr CFI is enabled, emit a check that VTable is valid.
   void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable);
 
+  /// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for
+  /// RD using llvm.bitset.test.
+  void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable);
+
   /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
   /// expr can be devirtualized.
   bool CanDevirtualizeMemberFunctionCall(const Expr *Base,
@@ -1567,6 +1522,8 @@
   void EmitAnyExprToMem(const Expr *E, llvm::Value *Location,
                         Qualifiers Quals, bool IsInitializer);
 
+  void EmitAnyExprToExn(const Expr *E, llvm::Value *Addr);
+
   /// EmitExprAsInit - Emits the code necessary to initialize a
   /// location in memory with the given initializer.
   void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue,
@@ -2092,8 +2049,9 @@
   /// Helpers for the OpenMP loop directives.
   void EmitOMPLoopBody(const OMPLoopDirective &Directive,
                        bool SeparateIter = false);
-  void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
-                        bool SeparateIter = false);
+  void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
+                        const Expr *LoopCond, const Expr *IncExpr,
+                        const std::function<void()> &BodyGen);
   void EmitOMPSimdFinal(const OMPLoopDirective &S);
   void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 1b6439b..f4ae684 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -323,6 +323,8 @@
 
 void CodeGenModule::clear() {
   DeferredDeclsToEmit.clear();
+  if (OpenMPRuntime)
+    OpenMPRuntime->clear();
 }
 
 void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags,
@@ -1338,7 +1340,7 @@
 
   // If this is CUDA, be selective about which declarations we emit.
   if (LangOpts.CUDA) {
-    if (CodeGenOpts.CUDAIsDevice) {
+    if (LangOpts.CUDAIsDevice) {
       if (!Global->hasAttr<CUDADeviceAttr>() &&
           !Global->hasAttr<CUDAGlobalAttr>() &&
           !Global->hasAttr<CUDAConstantAttr>() &&
@@ -1618,16 +1620,6 @@
       // don't need it anymore).
       addDeferredDeclToEmit(F, DDI->second);
       DeferredDecls.erase(DDI);
-      
-      // Otherwise, if this is a sized deallocation function, emit a weak
-      // definition for it at the end of the translation unit (if allowed),
-      // unless the sized deallocation function is aliased.
-    } else if (D &&
-               cast<FunctionDecl>(D)
-                  ->getCorrespondingUnsizedGlobalDeallocationFunction() &&
-               getLangOpts().DefineSizedDeallocation &&
-               !D->hasAttr<AliasAttr>()) {
-      addDeferredDeclToEmit(F, GD);
 
       // Otherwise, there are cases we have to worry about where we're
       // using a declaration for which we must emit a definition but where
@@ -1849,7 +1841,8 @@
     OldGV->eraseFromParent();
   }
 
-  if (supportsCOMDAT() && GV->isWeakForLinker())
+  if (supportsCOMDAT() && GV->isWeakForLinker() &&
+      !GV->hasAvailableExternallyLinkage())
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
 
   return GV;
@@ -1906,7 +1899,7 @@
 
 unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
                                                  unsigned AddrSpace) {
-  if (LangOpts.CUDA && CodeGenOpts.CUDAIsDevice) {
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
     if (D->hasAttr<CUDAConstantAttr>())
       AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
     else if (D->hasAttr<CUDASharedAttr>())
@@ -3360,15 +3353,7 @@
 
   case Decl::FileScopeAsm: {
     auto *AD = cast<FileScopeAsmDecl>(D);
-    StringRef AsmString = AD->getAsmString()->getString();
-
-    const std::string &S = getModule().getModuleInlineAsm();
-    if (S.empty())
-      getModule().setModuleInlineAsm(AsmString);
-    else if (S.end()[-1] == '\n')
-      getModule().setModuleInlineAsm(S + AsmString.str());
-    else
-      getModule().setModuleInlineAsm(S + '\n' + AsmString.str());
+    getModule().appendModuleInlineAsm(AD->getAsmString()->getString());
     break;
   }
 
@@ -3652,6 +3637,12 @@
   return llvm::ConstantStruct::getAnon(Fields);
 }
 
+llvm::Constant *
+CodeGenModule::getAddrOfCXXHandlerMapEntry(QualType Ty,
+                                           QualType CatchHandlerType) {
+  return getCXXABI().getAddrOfCXXHandlerMapEntry(Ty, CatchHandlerType);
+}
+
 llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
                                                        bool ForEH) {
   // Return a bogus pointer if RTTI is disabled, unless it's for EH.
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 6902d19..ce540e9 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -719,6 +719,9 @@
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
+  llvm::Constant *getAddrOfCXXHandlerMapEntry(QualType Ty,
+                                              QualType CatchHandlerType);
+
   /// Get the address of a uuid descriptor .
   llvm::Constant *GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
@@ -1108,6 +1111,9 @@
   void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
                                const VTableLayout &VTLayout);
 
+  /// \breif Get the declaration of std::terminate for the platform.
+  llvm::Constant *getTerminateFn();
+
 private:
   llvm::Constant *
   GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index a88335d..557828d 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -58,12 +58,16 @@
 }
 
 void CodeGenPGO::createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage) {
-  // Usually, we want to match the function's linkage, but
-  // available_externally and extern_weak both have the wrong semantics.
+  // We generally want to match the function's linkage, but available_externally
+  // and extern_weak both have the wrong semantics, and anything that doesn't
+  // need to link across compilation units doesn't need to be visible at all.
   if (Linkage == llvm::GlobalValue::ExternalWeakLinkage)
     Linkage = llvm::GlobalValue::LinkOnceAnyLinkage;
   else if (Linkage == llvm::GlobalValue::AvailableExternallyLinkage)
     Linkage = llvm::GlobalValue::LinkOnceODRLinkage;
+  else if (Linkage == llvm::GlobalValue::InternalLinkage ||
+           Linkage == llvm::GlobalValue::ExternalLinkage)
+    Linkage = llvm::GlobalValue::PrivateLinkage;
 
   auto *Value =
       llvm::ConstantDataArray::getString(CGM.getLLVMContext(), FuncName, false);
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 64c5799..26d37f3 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -82,6 +82,9 @@
     return StructorType::Base;
   case Ctor_Comdat:
     llvm_unreachable("not expecting a COMDAT");
+  case Ctor_CopyingClosure:
+  case Ctor_DefaultClosure:
+    llvm_unreachable("not expecting a closure");
   }
   llvm_unreachable("not a CXXCtorType");
 }
@@ -261,6 +264,8 @@
                                              const FunctionProtoType *type,
                                              RequiredArgs required);
   const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
+  const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
+                                                 CXXCtorType CT);
 
   const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty);
   const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty);
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 55e7334..07db6c7 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -124,7 +124,7 @@
   SourceLocation getEndOfFileOrMacro(SourceLocation Loc) {
     if (Loc.isMacroID())
       return Loc.getLocWithOffset(SM.getFileIDSize(SM.getFileID(Loc)) -
-                                  SM.getFileOffset(Loc) - 1);
+                                  SM.getFileOffset(Loc));
     return SM.getLocForEndOfFile(SM.getFileID(Loc));
   }
 
@@ -147,7 +147,7 @@
     SourceLocation Loc = S->getLocEnd();
     while (SM.isMacroArgExpansion(Loc))
       Loc = SM.getImmediateExpansionRange(Loc).first;
-    return Loc;
+    return getPreciseTokenLocEnd(Loc);
   }
 
   /// \brief Find the set of files we have regions for and assign IDs
@@ -257,7 +257,7 @@
       if (!CovFileID)
         continue;
 
-      SourceLocation LocEnd = getPreciseTokenLocEnd(Region.getEndLoc());
+      SourceLocation LocEnd = Region.getEndLoc();
       assert(SM.isWrittenInSameFile(LocStart, LocEnd) &&
              "region spans multiple files");
 
@@ -407,7 +407,7 @@
 
           SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
 
-          EndLoc = getIncludeOrExpansionLoc(EndLoc);
+          EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
           assert(!EndLoc.isInvalid() &&
                  "File exit was not handled before popRegions");
         }
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index 6535b76..363d8b8 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -17,6 +17,7 @@
 #define LLVM_CLANG_LIB_CODEGEN_EHSCOPESTACK_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instructions.h"
@@ -181,84 +182,28 @@
     virtual void Emit(CodeGenFunction &CGF, Flags flags) = 0;
   };
 
-  /// ConditionalCleanupN stores the saved form of its N parameters,
+  /// ConditionalCleanup stores the saved form of its parameters,
   /// then restores them and performs the cleanup.
-  template <class T, class A0>
-  class ConditionalCleanup1 : public Cleanup {
-    typedef typename DominatingValue<A0>::saved_type A0_saved;
-    A0_saved a0_saved;
+  template <class T, class... As> class ConditionalCleanup : public Cleanup {
+    typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
+    SavedTuple Saved;
+
+    template <std::size_t... Is>
+    T restore(CodeGenFunction &CGF, llvm::index_sequence<Is...>) {
+      // It's important that the restores are emitted in order. The braced init
+      // list guarentees that.
+      return T{DominatingValue<As>::restore(CGF, std::get<Is>(Saved))...};
+    }
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
-      A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
-      T(a0).Emit(CGF, flags);
+      restore(CGF, llvm::index_sequence_for<As...>()).Emit(CGF, flags);
     }
 
   public:
-    ConditionalCleanup1(A0_saved a0)
-      : a0_saved(a0) {}
-  };
+    ConditionalCleanup(typename DominatingValue<As>::saved_type... A)
+        : Saved(A...) {}
 
-  template <class T, class A0, class A1>
-  class ConditionalCleanup2 : public Cleanup {
-    typedef typename DominatingValue<A0>::saved_type A0_saved;
-    typedef typename DominatingValue<A1>::saved_type A1_saved;
-    A0_saved a0_saved;
-    A1_saved a1_saved;
-
-    void Emit(CodeGenFunction &CGF, Flags flags) override {
-      A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
-      A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
-      T(a0, a1).Emit(CGF, flags);
-    }
-
-  public:
-    ConditionalCleanup2(A0_saved a0, A1_saved a1)
-      : a0_saved(a0), a1_saved(a1) {}
-  };
-
-  template <class T, class A0, class A1, class A2>
-  class ConditionalCleanup3 : public Cleanup {
-    typedef typename DominatingValue<A0>::saved_type A0_saved;
-    typedef typename DominatingValue<A1>::saved_type A1_saved;
-    typedef typename DominatingValue<A2>::saved_type A2_saved;
-    A0_saved a0_saved;
-    A1_saved a1_saved;
-    A2_saved a2_saved;
-
-    void Emit(CodeGenFunction &CGF, Flags flags) override {
-      A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
-      A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
-      A2 a2 = DominatingValue<A2>::restore(CGF, a2_saved);
-      T(a0, a1, a2).Emit(CGF, flags);
-    }
-
-  public:
-    ConditionalCleanup3(A0_saved a0, A1_saved a1, A2_saved a2)
-      : a0_saved(a0), a1_saved(a1), a2_saved(a2) {}
-  };
-
-  template <class T, class A0, class A1, class A2, class A3>
-  class ConditionalCleanup4 : public Cleanup {
-    typedef typename DominatingValue<A0>::saved_type A0_saved;
-    typedef typename DominatingValue<A1>::saved_type A1_saved;
-    typedef typename DominatingValue<A2>::saved_type A2_saved;
-    typedef typename DominatingValue<A3>::saved_type A3_saved;
-    A0_saved a0_saved;
-    A1_saved a1_saved;
-    A2_saved a2_saved;
-    A3_saved a3_saved;
-
-    void Emit(CodeGenFunction &CGF, Flags flags) override {
-      A0 a0 = DominatingValue<A0>::restore(CGF, a0_saved);
-      A1 a1 = DominatingValue<A1>::restore(CGF, a1_saved);
-      A2 a2 = DominatingValue<A2>::restore(CGF, a2_saved);
-      A3 a3 = DominatingValue<A3>::restore(CGF, a3_saved);
-      T(a0, a1, a2, a3).Emit(CGF, flags);
-    }
-
-  public:
-    ConditionalCleanup4(A0_saved a0, A1_saved a1, A2_saved a2, A3_saved a3)
-      : a0_saved(a0), a1_saved(a1), a2_saved(a2), a3_saved(a3) {}
+    ConditionalCleanup(SavedTuple Tuple) : Saved(std::move(Tuple)) {}
   };
 
 private:
@@ -319,6 +264,14 @@
     (void) Obj;
   }
 
+  /// Push a lazily-created cleanup on the stack. Tuple version.
+  template <class T, class... As>
+  void pushCleanupTuple(CleanupKind Kind, std::tuple<As...> A) {
+    void *Buffer = pushCleanup(Kind, sizeof(T));
+    Cleanup *Obj = new (Buffer) T(std::move(A));
+    (void) Obj;
+  }
+
   // Feel free to add more variants of the following:
 
   /// Push a cleanup with non-constant storage requirements on the
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index e580969..f23cd9f 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -19,14 +19,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCXXABI.h"
+#include "CGCleanup.h"
 #include "CGRecordLayout.h"
 #include "CGVTables.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
+#include "TargetInfo.h"
 #include "clang/AST/Mangle.h"
 #include "clang/AST/Type.h"
+#include "clang/AST/StmtCXX.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Value.h"
 
@@ -111,10 +115,21 @@
                                const CXXDestructorDecl *Dtor) override;
 
   void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
+  void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;
+
+  void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;
+
+  llvm::CallInst *
+  emitTerminateForUnexpectedException(CodeGenFunction &CGF,
+                                      llvm::Value *Exn) override;
 
   void EmitFundamentalRTTIDescriptor(QualType Type);
   void EmitFundamentalRTTIDescriptors();
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
+  llvm::Constant *
+  getAddrOfCXXHandlerMapEntry(QualType Ty, QualType CatchHandlerType) override {
+    return getAddrOfRTTIDescriptor(Ty);
+  }
 
   bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
   void EmitBadTypeidCall(CodeGenFunction &CGF) override;
@@ -906,6 +921,59 @@
     CGF.EmitRuntimeCallOrInvoke(Fn);
 }
 
+static llvm::Constant *getAllocateExceptionFn(CodeGenModule &CGM) {
+  // void *__cxa_allocate_exception(size_t thrown_size);
+
+  llvm::FunctionType *FTy =
+    llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false);
+
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception");
+}
+
+static llvm::Constant *getThrowFn(CodeGenModule &CGM) {
+  // void __cxa_throw(void *thrown_exception, std::type_info *tinfo,
+  //                  void (*dest) (void *));
+
+  llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy };
+  llvm::FunctionType *FTy =
+    llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
+
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_throw");
+}
+
+void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
+  QualType ThrowType = E->getSubExpr()->getType();
+  // Now allocate the exception object.
+  llvm::Type *SizeTy = CGF.ConvertType(getContext().getSizeType());
+  uint64_t TypeSize = getContext().getTypeSizeInChars(ThrowType).getQuantity();
+
+  llvm::Constant *AllocExceptionFn = getAllocateExceptionFn(CGM);
+  llvm::CallInst *ExceptionPtr = CGF.EmitNounwindRuntimeCall(
+      AllocExceptionFn, llvm::ConstantInt::get(SizeTy, TypeSize), "exception");
+
+  CGF.EmitAnyExprToExn(E->getSubExpr(), ExceptionPtr);
+
+  // Now throw the exception.
+  llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(ThrowType,
+                                                         /*ForEH=*/true);
+
+  // The address of the destructor.  If the exception type has a
+  // trivial destructor (or isn't a record), we just pass null.
+  llvm::Constant *Dtor = nullptr;
+  if (const RecordType *RecordTy = ThrowType->getAs<RecordType>()) {
+    CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
+    if (!Record->hasTrivialDestructor()) {
+      CXXDestructorDecl *DtorD = Record->getDestructor();
+      Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete);
+      Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy);
+    }
+  }
+  if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
+
+  llvm::Value *args[] = { ExceptionPtr, TypeInfo, Dtor };
+  CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(CGM), args);
+}
+
 static llvm::Constant *getItaniumDynamicCastFn(CodeGenFunction &CGF) {
   // void *__dynamic_cast(const void *sub,
   //                      const abi::__class_type_info *src,
@@ -3220,3 +3288,348 @@
     CGM.maybeSetTrivialComdat(*MD, *Fn);
   }
 }
+
+static llvm::Constant *getBeginCatchFn(CodeGenModule &CGM) {
+  // void *__cxa_begin_catch(void*);
+  llvm::FunctionType *FTy = llvm::FunctionType::get(
+      CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch");
+}
+
+static llvm::Constant *getEndCatchFn(CodeGenModule &CGM) {
+  // void __cxa_end_catch();
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false);
+
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch");
+}
+
+static llvm::Constant *getGetExceptionPtrFn(CodeGenModule &CGM) {
+  // void *__cxa_get_exception_ptr(void*);
+  llvm::FunctionType *FTy = llvm::FunctionType::get(
+      CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+
+  return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr");
+}
+
+namespace {
+  /// A cleanup to call __cxa_end_catch.  In many cases, the caught
+  /// exception type lets us state definitively that the thrown exception
+  /// type does not have a destructor.  In particular:
+  ///   - Catch-alls tell us nothing, so we have to conservatively
+  ///     assume that the thrown exception might have a destructor.
+  ///   - Catches by reference behave according to their base types.
+  ///   - Catches of non-record types will only trigger for exceptions
+  ///     of non-record types, which never have destructors.
+  ///   - Catches of record types can trigger for arbitrary subclasses
+  ///     of the caught type, so we have to assume the actual thrown
+  ///     exception type might have a throwing destructor, even if the
+  ///     caught type's destructor is trivial or nothrow.
+  struct CallEndCatch : EHScopeStack::Cleanup {
+    CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
+    bool MightThrow;
+
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      if (!MightThrow) {
+        CGF.EmitNounwindRuntimeCall(getEndCatchFn(CGF.CGM));
+        return;
+      }
+
+      CGF.EmitRuntimeCallOrInvoke(getEndCatchFn(CGF.CGM));
+    }
+  };
+}
+
+/// Emits a call to __cxa_begin_catch and enters a cleanup to call
+/// __cxa_end_catch.
+///
+/// \param EndMightThrow - true if __cxa_end_catch might throw
+static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
+                                   llvm::Value *Exn,
+                                   bool EndMightThrow) {
+  llvm::CallInst *call =
+    CGF.EmitNounwindRuntimeCall(getBeginCatchFn(CGF.CGM), Exn);
+
+  CGF.EHStack.pushCleanup<CallEndCatch>(NormalAndEHCleanup, EndMightThrow);
+
+  return call;
+}
+
+/// A "special initializer" callback for initializing a catch
+/// parameter during catch initialization.
+static void InitCatchParam(CodeGenFunction &CGF,
+                           const VarDecl &CatchParam,
+                           llvm::Value *ParamAddr,
+                           SourceLocation Loc) {
+  // Load the exception from where the landing pad saved it.
+  llvm::Value *Exn = CGF.getExceptionFromSlot();
+
+  CanQualType CatchType =
+    CGF.CGM.getContext().getCanonicalType(CatchParam.getType());
+  llvm::Type *LLVMCatchTy = CGF.ConvertTypeForMem(CatchType);
+
+  // If we're catching by reference, we can just cast the object
+  // pointer to the appropriate pointer.
+  if (isa<ReferenceType>(CatchType)) {
+    QualType CaughtType = cast<ReferenceType>(CatchType)->getPointeeType();
+    bool EndCatchMightThrow = CaughtType->isRecordType();
+
+    // __cxa_begin_catch returns the adjusted object pointer.
+    llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, EndCatchMightThrow);
+
+    // We have no way to tell the personality function that we're
+    // catching by reference, so if we're catching a pointer,
+    // __cxa_begin_catch will actually return that pointer by value.
+    if (const PointerType *PT = dyn_cast<PointerType>(CaughtType)) {
+      QualType PointeeType = PT->getPointeeType();
+
+      // When catching by reference, generally we should just ignore
+      // this by-value pointer and use the exception object instead.
+      if (!PointeeType->isRecordType()) {
+
+        // Exn points to the struct _Unwind_Exception header, which
+        // we have to skip past in order to reach the exception data.
+        unsigned HeaderSize =
+          CGF.CGM.getTargetCodeGenInfo().getSizeOfUnwindException();
+        AdjustedExn = CGF.Builder.CreateConstGEP1_32(Exn, HeaderSize);
+
+      // However, if we're catching a pointer-to-record type that won't
+      // work, because the personality function might have adjusted
+      // the pointer.  There's actually no way for us to fully satisfy
+      // the language/ABI contract here:  we can't use Exn because it
+      // might have the wrong adjustment, but we can't use the by-value
+      // pointer because it's off by a level of abstraction.
+      //
+      // The current solution is to dump the adjusted pointer into an
+      // alloca, which breaks language semantics (because changing the
+      // pointer doesn't change the exception) but at least works.
+      // The better solution would be to filter out non-exact matches
+      // and rethrow them, but this is tricky because the rethrow
+      // really needs to be catchable by other sites at this landing
+      // pad.  The best solution is to fix the personality function.
+      } else {
+        // Pull the pointer for the reference type off.
+        llvm::Type *PtrTy =
+          cast<llvm::PointerType>(LLVMCatchTy)->getElementType();
+
+        // Create the temporary and write the adjusted pointer into it.
+        llvm::Value *ExnPtrTmp = CGF.CreateTempAlloca(PtrTy, "exn.byref.tmp");
+        llvm::Value *Casted = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
+        CGF.Builder.CreateStore(Casted, ExnPtrTmp);
+
+        // Bind the reference to the temporary.
+        AdjustedExn = ExnPtrTmp;
+      }
+    }
+
+    llvm::Value *ExnCast =
+      CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.byref");
+    CGF.Builder.CreateStore(ExnCast, ParamAddr);
+    return;
+  }
+
+  // Scalars and complexes.
+  TypeEvaluationKind TEK = CGF.getEvaluationKind(CatchType);
+  if (TEK != TEK_Aggregate) {
+    llvm::Value *AdjustedExn = CallBeginCatch(CGF, Exn, false);
+
+    // If the catch type is a pointer type, __cxa_begin_catch returns
+    // the pointer by value.
+    if (CatchType->hasPointerRepresentation()) {
+      llvm::Value *CastExn =
+        CGF.Builder.CreateBitCast(AdjustedExn, LLVMCatchTy, "exn.casted");
+
+      switch (CatchType.getQualifiers().getObjCLifetime()) {
+      case Qualifiers::OCL_Strong:
+        CastExn = CGF.EmitARCRetainNonBlock(CastExn);
+        // fallthrough
+
+      case Qualifiers::OCL_None:
+      case Qualifiers::OCL_ExplicitNone:
+      case Qualifiers::OCL_Autoreleasing:
+        CGF.Builder.CreateStore(CastExn, ParamAddr);
+        return;
+
+      case Qualifiers::OCL_Weak:
+        CGF.EmitARCInitWeak(ParamAddr, CastExn);
+        return;
+      }
+      llvm_unreachable("bad ownership qualifier!");
+    }
+
+    // Otherwise, it returns a pointer into the exception object.
+
+    llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
+    llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
+
+    LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType);
+    LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType,
+                                  CGF.getContext().getDeclAlign(&CatchParam));
+    switch (TEK) {
+    case TEK_Complex:
+      CGF.EmitStoreOfComplex(CGF.EmitLoadOfComplex(srcLV, Loc), destLV,
+                             /*init*/ true);
+      return;
+    case TEK_Scalar: {
+      llvm::Value *ExnLoad = CGF.EmitLoadOfScalar(srcLV, Loc);
+      CGF.EmitStoreOfScalar(ExnLoad, destLV, /*init*/ true);
+      return;
+    }
+    case TEK_Aggregate:
+      llvm_unreachable("evaluation kind filtered out!");
+    }
+    llvm_unreachable("bad evaluation kind");
+  }
+
+  assert(isa<RecordType>(CatchType) && "unexpected catch type!");
+
+  llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
+
+  // Check for a copy expression.  If we don't have a copy expression,
+  // that means a trivial copy is okay.
+  const Expr *copyExpr = CatchParam.getInit();
+  if (!copyExpr) {
+    llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
+    llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
+    CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
+    return;
+  }
+
+  // We have to call __cxa_get_exception_ptr to get the adjusted
+  // pointer before copying.
+  llvm::CallInst *rawAdjustedExn =
+    CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
+
+  // Cast that to the appropriate type.
+  llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
+
+  // The copy expression is defined in terms of an OpaqueValueExpr.
+  // Find it and map it to the adjusted expression.
+  CodeGenFunction::OpaqueValueMapping
+    opaque(CGF, OpaqueValueExpr::findInCopyConstruct(copyExpr),
+           CGF.MakeAddrLValue(adjustedExn, CatchParam.getType()));
+
+  // Call the copy ctor in a terminate scope.
+  CGF.EHStack.pushTerminate();
+
+  // Perform the copy construction.
+  CharUnits Alignment = CGF.getContext().getDeclAlign(&CatchParam);
+  CGF.EmitAggExpr(copyExpr,
+                  AggValueSlot::forAddr(ParamAddr, Alignment, Qualifiers(),
+                                        AggValueSlot::IsNotDestructed,
+                                        AggValueSlot::DoesNotNeedGCBarriers,
+                                        AggValueSlot::IsNotAliased));
+
+  // Leave the terminate scope.
+  CGF.EHStack.popTerminate();
+
+  // Undo the opaque value mapping.
+  opaque.pop();
+
+  // Finally we can call __cxa_begin_catch.
+  CallBeginCatch(CGF, Exn, true);
+}
+
+/// Begins a catch statement by initializing the catch variable and
+/// calling __cxa_begin_catch.
+void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF,
+                                   const CXXCatchStmt *S) {
+  // We have to be very careful with the ordering of cleanups here:
+  //   C++ [except.throw]p4:
+  //     The destruction [of the exception temporary] occurs
+  //     immediately after the destruction of the object declared in
+  //     the exception-declaration in the handler.
+  //
+  // So the precise ordering is:
+  //   1.  Construct catch variable.
+  //   2.  __cxa_begin_catch
+  //   3.  Enter __cxa_end_catch cleanup
+  //   4.  Enter dtor cleanup
+  //
+  // We do this by using a slightly abnormal initialization process.
+  // Delegation sequence:
+  //   - ExitCXXTryStmt opens a RunCleanupsScope
+  //     - EmitAutoVarAlloca creates the variable and debug info
+  //       - InitCatchParam initializes the variable from the exception
+  //       - CallBeginCatch calls __cxa_begin_catch
+  //       - CallBeginCatch enters the __cxa_end_catch cleanup
+  //     - EmitAutoVarCleanups enters the variable destructor cleanup
+  //   - EmitCXXTryStmt emits the code for the catch body
+  //   - EmitCXXTryStmt close the RunCleanupsScope
+
+  VarDecl *CatchParam = S->getExceptionDecl();
+  if (!CatchParam) {
+    llvm::Value *Exn = CGF.getExceptionFromSlot();
+    CallBeginCatch(CGF, Exn, true);
+    return;
+  }
+
+  // Emit the local.
+  CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
+  InitCatchParam(CGF, *CatchParam, var.getObjectAddress(CGF), S->getLocStart());
+  CGF.EmitAutoVarCleanups(var);
+}
+
+/// Get or define the following function:
+///   void @__clang_call_terminate(i8* %exn) nounwind noreturn
+/// This code is used only in C++.
+static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
+  llvm::FunctionType *fnTy =
+    llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false);
+  llvm::Constant *fnRef =
+    CGM.CreateRuntimeFunction(fnTy, "__clang_call_terminate");
+
+  llvm::Function *fn = dyn_cast<llvm::Function>(fnRef);
+  if (fn && fn->empty()) {
+    fn->setDoesNotThrow();
+    fn->setDoesNotReturn();
+
+    // What we really want is to massively penalize inlining without
+    // forbidding it completely.  The difference between that and
+    // 'noinline' is negligible.
+    fn->addFnAttr(llvm::Attribute::NoInline);
+
+    // Allow this function to be shared across translation units, but
+    // we don't want it to turn into an exported symbol.
+    fn->setLinkage(llvm::Function::LinkOnceODRLinkage);
+    fn->setVisibility(llvm::Function::HiddenVisibility);
+    if (CGM.supportsCOMDAT())
+      fn->setComdat(CGM.getModule().getOrInsertComdat(fn->getName()));
+
+    // Set up the function.
+    llvm::BasicBlock *entry =
+      llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn);
+    CGBuilderTy builder(entry);
+
+    // Pull the exception pointer out of the parameter list.
+    llvm::Value *exn = &*fn->arg_begin();
+
+    // Call __cxa_begin_catch(exn).
+    llvm::CallInst *catchCall = builder.CreateCall(getBeginCatchFn(CGM), exn);
+    catchCall->setDoesNotThrow();
+    catchCall->setCallingConv(CGM.getRuntimeCC());
+
+    // Call std::terminate().
+    llvm::CallInst *termCall = builder.CreateCall(CGM.getTerminateFn());
+    termCall->setDoesNotThrow();
+    termCall->setDoesNotReturn();
+    termCall->setCallingConv(CGM.getRuntimeCC());
+
+    // std::terminate cannot return.
+    builder.CreateUnreachable();
+  }
+
+  return fnRef;
+}
+
+llvm::CallInst *
+ItaniumCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
+                                                   llvm::Value *Exn) {
+  // In C++, we want to call __cxa_begin_catch() before terminating.
+  if (Exn) {
+    assert(CGF.CGM.getLangOpts().CPlusPlus);
+    return CGF.EmitNounwindRuntimeCall(getClangCallTerminateFn(CGF.CGM), Exn);
+  }
+  return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
+}
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 48adf3a..f8f7845 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -17,12 +17,16 @@
 #include "CGCXXABI.h"
 #include "CGVTables.h"
 #include "CodeGenModule.h"
+#include "CodeGenTypes.h"
+#include "TargetInfo.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/StmtCXX.h"
 #include "clang/AST/VTableBuilder.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Intrinsics.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -40,7 +44,8 @@
   MicrosoftCXXABI(CodeGenModule &CGM)
       : CGCXXABI(CGM), BaseClassDescriptorType(nullptr),
         ClassHierarchyDescriptorType(nullptr),
-        CompleteObjectLocatorType(nullptr) {}
+        CompleteObjectLocatorType(nullptr), CatchableTypeType(nullptr),
+        ThrowInfoType(nullptr), HandlerMapEntryType(nullptr) {}
 
   bool HasThisReturn(GlobalDecl GD) const override;
   bool hasMostDerivedReturn(GlobalDecl GD) const override;
@@ -71,11 +76,16 @@
                                const CXXDestructorDecl *Dtor) override;
 
   void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
+  void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;
+
+  void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) override;
 
   llvm::GlobalVariable *getMSCompleteObjectLocator(const CXXRecordDecl *RD,
                                                    const VPtrInfo *Info);
 
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
+  llvm::Constant *
+  getAddrOfCXXHandlerMapEntry(QualType Ty, QualType CatchHandlerType) override;
 
   bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
   void EmitBadTypeidCall(CodeGenFunction &CGF) override;
@@ -225,7 +235,7 @@
     assert(GD.getDtorType() == Dtor_Deleting &&
            "Only deleting destructor thunks are available in this ABI");
     CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)),
-                             CGM.getContext().IntTy);
+                 getContext().IntTy);
   }
 
   void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
@@ -410,6 +420,9 @@
     if (!isImageRelative())
       return PtrVal;
 
+    if (PtrVal->isNullValue())
+      return llvm::Constant::getNullValue(CGM.IntTy);
+
     llvm::Constant *ImageBaseAsInt =
         llvm::ConstantExpr::getPtrToInt(getImageBase(), CGM.IntPtrTy);
     llvm::Constant *PtrValAsInt =
@@ -467,6 +480,10 @@
     return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
   }
 
+  std::pair<llvm::Value *, llvm::Value *>
+  performBaseAdjustment(CodeGenFunction &CGF, llvm::Value *Value,
+                        QualType SrcRecordTy);
+
   /// \brief Performs a full virtual base adjustment.  Used to dereference
   /// pointers to members of virtual bases.
   llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E,
@@ -556,6 +573,94 @@
 
   void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
 
+  llvm::StructType *getHandlerMapEntryType() {
+    if (!HandlerMapEntryType) {
+      llvm::Type *FieldTypes[] = {
+        CGM.IntTy,                           // Flags
+        getImageRelativeType(CGM.Int8PtrTy), // TypeDescriptor
+      };
+      HandlerMapEntryType = llvm::StructType::create(
+          CGM.getLLVMContext(), FieldTypes, "eh.HandlerMapEntry");
+    }
+    return HandlerMapEntryType;
+  }
+
+  llvm::StructType *getCatchableTypeType() {
+    if (CatchableTypeType)
+      return CatchableTypeType;
+    llvm::Type *FieldTypes[] = {
+        CGM.IntTy,                           // Flags
+        getImageRelativeType(CGM.Int8PtrTy), // TypeDescriptor
+        CGM.IntTy,                           // NonVirtualAdjustment
+        CGM.IntTy,                           // OffsetToVBPtr
+        CGM.IntTy,                           // VBTableIndex
+        CGM.IntTy,                           // Size
+        getImageRelativeType(CGM.Int8PtrTy)  // CopyCtor
+    };
+    CatchableTypeType = llvm::StructType::create(
+        CGM.getLLVMContext(), FieldTypes, "eh.CatchableType");
+    return CatchableTypeType;
+  }
+
+  llvm::StructType *getCatchableTypeArrayType(uint32_t NumEntries) {
+    llvm::StructType *&CatchableTypeArrayType =
+        CatchableTypeArrayTypeMap[NumEntries];
+    if (CatchableTypeArrayType)
+      return CatchableTypeArrayType;
+
+    llvm::SmallString<23> CTATypeName("eh.CatchableTypeArray.");
+    CTATypeName += llvm::utostr(NumEntries);
+    llvm::Type *CTType =
+        getImageRelativeType(getCatchableTypeType()->getPointerTo());
+    llvm::Type *FieldTypes[] = {
+        CGM.IntTy,                               // NumEntries
+        llvm::ArrayType::get(CTType, NumEntries) // CatchableTypes
+    };
+    CatchableTypeArrayType =
+        llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, CTATypeName);
+    return CatchableTypeArrayType;
+  }
+
+  llvm::StructType *getThrowInfoType() {
+    if (ThrowInfoType)
+      return ThrowInfoType;
+    llvm::Type *FieldTypes[] = {
+        CGM.IntTy,                           // Flags
+        getImageRelativeType(CGM.Int8PtrTy), // CleanupFn
+        getImageRelativeType(CGM.Int8PtrTy), // ForwardCompat
+        getImageRelativeType(CGM.Int8PtrTy)  // CatchableTypeArray
+    };
+    ThrowInfoType = llvm::StructType::create(CGM.getLLVMContext(), FieldTypes,
+                                             "eh.ThrowInfo");
+    return ThrowInfoType;
+  }
+
+  llvm::Constant *getThrowFn() {
+    // _CxxThrowException is passed an exception object and a ThrowInfo object
+    // which describes the exception.
+    llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()};
+    llvm::FunctionType *FTy =
+        llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false);
+    auto *Fn = cast<llvm::Function>(
+        CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"));
+    // _CxxThrowException is stdcall on 32-bit x86 platforms.
+    if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86)
+      Fn->setCallingConv(llvm::CallingConv::X86_StdCall);
+    return Fn;
+  }
+
+  llvm::Function *getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
+                                          CXXCtorType CT);
+
+  llvm::Constant *getCatchableType(QualType T,
+                                   uint32_t NVOffset = 0,
+                                   int32_t VBPtrOffset = -1,
+                                   uint32_t VBIndex = 0);
+
+  llvm::GlobalVariable *getCatchableTypeArray(QualType T);
+
+  llvm::GlobalVariable *getThrowInfo(QualType T) override;
+
 private:
   typedef std::pair<const CXXRecordDecl *, CharUnits> VFTableIdTy;
   typedef llvm::DenseMap<VFTableIdTy, llvm::GlobalVariable *> VTablesMapTy;
@@ -587,6 +692,13 @@
   llvm::StructType *BaseClassDescriptorType;
   llvm::StructType *ClassHierarchyDescriptorType;
   llvm::StructType *CompleteObjectLocatorType;
+
+  llvm::DenseMap<QualType, llvm::GlobalVariable *> CatchableTypeArrays;
+
+  llvm::StructType *CatchableTypeType;
+  llvm::DenseMap<uint32_t, llvm::StructType *> CatchableTypeArrayTypeMap;
+  llvm::StructType *ThrowInfoType;
+  llvm::StructType *HandlerMapEntryType;
 };
 
 }
@@ -667,55 +779,72 @@
     CGF.EmitDeleteCall(DE->getOperatorDelete(), MDThis, ElementType);
 }
 
-static llvm::Function *getRethrowFn(CodeGenModule &CGM) {
-  // _CxxThrowException takes two pointer width arguments: a value and a context
-  // object which points to a TypeInfo object.
-  llvm::Type *ArgTypes[] = {CGM.Int8PtrTy, CGM.Int8PtrTy};
-  llvm::FunctionType *FTy =
-      llvm::FunctionType::get(CGM.VoidTy, ArgTypes, false);
-  auto *Fn = cast<llvm::Function>(
-      CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"));
-  // _CxxThrowException is stdcall on 32-bit x86 platforms.
-  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::x86)
-    Fn->setCallingConv(llvm::CallingConv::X86_StdCall);
-  return Fn;
-}
-
 void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
-  llvm::Value *Args[] = {llvm::ConstantPointerNull::get(CGM.Int8PtrTy),
-                         llvm::ConstantPointerNull::get(CGM.Int8PtrTy)};
-  auto *Fn = getRethrowFn(CGM);
+  llvm::Value *Args[] = {
+      llvm::ConstantPointerNull::get(CGM.Int8PtrTy),
+      llvm::ConstantPointerNull::get(getThrowInfoType()->getPointerTo())};
+  auto *Fn = getThrowFn();
   if (isNoReturn)
     CGF.EmitNoreturnRuntimeCallOrInvoke(Fn, Args);
   else
     CGF.EmitRuntimeCallOrInvoke(Fn, Args);
 }
 
-/// \brief Gets the offset to the virtual base that contains the vfptr for
-/// MS-ABI polymorphic types.
-static llvm::Value *getPolymorphicOffset(CodeGenFunction &CGF,
-                                         const CXXRecordDecl *RD,
-                                         llvm::Value *Value) {
-  const ASTContext &Context = RD->getASTContext();
-  for (const CXXBaseSpecifier &Base : RD->vbases())
-    if (Context.getASTRecordLayout(Base.getType()->getAsCXXRecordDecl())
-            .hasExtendableVFPtr())
-      return CGF.CGM.getCXXABI().GetVirtualBaseClassOffset(
-          CGF, Value, RD, Base.getType()->getAsCXXRecordDecl());
-  llvm_unreachable("One of our vbases should be polymorphic.");
+namespace {
+struct CallEndCatchMSVC : EHScopeStack::Cleanup {
+  CallEndCatchMSVC() {}
+  void Emit(CodeGenFunction &CGF, Flags flags) override {
+    CGF.EmitNounwindRuntimeCall(
+        CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_endcatch));
+  }
+};
 }
 
-static std::pair<llvm::Value *, llvm::Value *>
-performBaseAdjustment(CodeGenFunction &CGF, llvm::Value *Value,
-                      QualType SrcRecordTy) {
+void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF,
+                                     const CXXCatchStmt *S) {
+  // In the MS ABI, the runtime handles the copy, and the catch handler is
+  // responsible for destruction.
+  VarDecl *CatchParam = S->getExceptionDecl();
+  llvm::Value *Exn = CGF.getExceptionFromSlot();
+  llvm::Function *BeginCatch =
+      CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_begincatch);
+
+  if (!CatchParam) {
+    llvm::Value *Args[2] = {Exn, llvm::Constant::getNullValue(CGF.Int8PtrTy)};
+    CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
+    CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalAndEHCleanup);
+    return;
+  }
+
+  CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
+  llvm::Value *ParamAddr =
+      CGF.Builder.CreateBitCast(var.getObjectAddress(CGF), CGF.Int8PtrTy);
+  llvm::Value *Args[2] = {Exn, ParamAddr};
+  CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
+  // FIXME: Do we really need exceptional endcatch cleanups?
+  CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalAndEHCleanup);
+  CGF.EmitAutoVarCleanups(var);
+}
+
+std::pair<llvm::Value *, llvm::Value *>
+MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, llvm::Value *Value,
+                                       QualType SrcRecordTy) {
   Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy);
   const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
+  const ASTContext &Context = getContext();
 
-  if (CGF.getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
+  if (Context.getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
     return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0));
 
   // Perform a base adjustment.
-  llvm::Value *Offset = getPolymorphicOffset(CGF, SrcDecl, Value);
+  const CXXBaseSpecifier *PolymorphicBase = std::find_if(
+      SrcDecl->vbases_begin(), SrcDecl->vbases_end(),
+      [&](const CXXBaseSpecifier &Base) {
+        const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+        return Context.getASTRecordLayout(BaseDecl).hasExtendableVFPtr();
+      });
+  llvm::Value *Offset = GetVirtualBaseClassOffset(
+      CGF, Value, SrcDecl, PolymorphicBase->getType()->getAsCXXRecordDecl());
   Value = CGF.Builder.CreateInBoundsGEP(Value, Offset);
   Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
   return std::make_pair(Value, Offset);
@@ -725,7 +854,7 @@
                                                 QualType SrcRecordTy) {
   const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
   return IsDeref &&
-         !CGM.getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
+         !getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
 }
 
 static llvm::CallSite emitRTtypeidCall(CodeGenFunction &CGF,
@@ -759,7 +888,7 @@
                                                          QualType SrcRecordTy) {
   const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
   return SrcIsPtr &&
-         !CGM.getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
+         !getContext().getASTRecordLayout(SrcDecl).hasExtendableVFPtr();
 }
 
 llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
@@ -817,10 +946,11 @@
 llvm::Value *MicrosoftCXXABI::GetVirtualBaseClassOffset(
     CodeGenFunction &CGF, llvm::Value *This, const CXXRecordDecl *ClassDecl,
     const CXXRecordDecl *BaseClassDecl) {
+  const ASTContext &Context = getContext();
   int64_t VBPtrChars =
-      getContext().getASTRecordLayout(ClassDecl).getVBPtrOffset().getQuantity();
+      Context.getASTRecordLayout(ClassDecl).getVBPtrOffset().getQuantity();
   llvm::Value *VBPtrOffset = llvm::ConstantInt::get(CGM.PtrDiffTy, VBPtrChars);
-  CharUnits IntSize = getContext().getTypeSizeInChars(getContext().IntTy);
+  CharUnits IntSize = Context.getTypeSizeInChars(Context.IntTy);
   CharUnits VBTableChars =
       IntSize *
       CGM.getMicrosoftVTableContext().getVBTableIndex(ClassDecl, BaseClassDecl);
@@ -947,23 +1077,44 @@
   }
 }
 
+static bool hasDefaultCXXMethodCC(ASTContext &Context,
+                                  const CXXMethodDecl *MD) {
+  CallingConv ExpectedCallingConv = Context.getDefaultCallingConvention(
+      /*IsVariadic=*/false, /*IsCXXMethod=*/true);
+  CallingConv ActualCallingConv =
+      MD->getType()->getAs<FunctionProtoType>()->getCallConv();
+  return ExpectedCallingConv == ActualCallingConv;
+}
+
 void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
   // There's only one constructor type in this ABI.
   CGM.EmitGlobal(GlobalDecl(D, Ctor_Complete));
+
+  // Exported default constructors either have a simple call-site where they use
+  // the typical calling convention and have a single 'this' pointer for an
+  // argument -or- they get a wrapper function which appropriately thunks to the
+  // real default constructor.  This thunk is the default constructor closure.
+  if (D->hasAttr<DLLExportAttr>() && D->isDefaultConstructor())
+    if (!hasDefaultCXXMethodCC(getContext(), D) || D->getNumParams() != 0) {
+      llvm::Function *Fn = getAddrOfCXXCtorClosure(D, Ctor_DefaultClosure);
+      Fn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
+      Fn->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+    }
 }
 
 void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
                                       const CXXRecordDecl *RD) {
   llvm::Value *ThisInt8Ptr =
     CGF.Builder.CreateBitCast(getThisValue(CGF), CGM.Int8PtrTy, "this.int8");
-  const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
+  const ASTContext &Context = getContext();
+  const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
 
   const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
   for (unsigned I = 0, E = VBGlobals.VBTables->size(); I != E; ++I) {
     const VPtrInfo *VBT = (*VBGlobals.VBTables)[I];
     llvm::GlobalVariable *GV = VBGlobals.Globals[I];
     const ASTRecordLayout &SubobjectLayout =
-        CGM.getContext().getASTRecordLayout(VBT->BaseWithVPtr);
+        Context.getASTRecordLayout(VBT->BaseWithVPtr);
     CharUnits Offs = VBT->NonVirtualOffset;
     Offs += SubobjectLayout.getVBPtrOffset();
     if (VBT->getVBaseWithVPtr())
@@ -983,7 +1134,7 @@
   // TODO: 'for base' flag
   if (T == StructorType::Deleting) {
     // The scalar deleting destructor takes an implicit int parameter.
-    ArgTys.push_back(CGM.getContext().IntTy);
+    ArgTys.push_back(getContext().IntTy);
   }
   auto *CD = dyn_cast<CXXConstructorDecl>(MD);
   if (!CD)
@@ -996,9 +1147,9 @@
   const FunctionProtoType *FPT = CD->getType()->castAs<FunctionProtoType>();
   if (Class->getNumVBases()) {
     if (FPT->isVariadic())
-      ArgTys.insert(ArgTys.begin() + 1, CGM.getContext().IntTy);
+      ArgTys.insert(ArgTys.begin() + 1, getContext().IntTy);
     else
-      ArgTys.push_back(CGM.getContext().IntTy);
+      ArgTys.push_back(getContext().IntTy);
   }
 }
 
@@ -1038,7 +1189,7 @@
 
   if (ML.VBase) {
     const ASTRecordLayout &DerivedLayout =
-        CGM.getContext().getASTRecordLayout(MD->getParent());
+        getContext().getASTRecordLayout(MD->getParent());
     Adjustment += DerivedLayout.getVBaseClassOffset(ML.VBase);
   }
 
@@ -1280,7 +1431,7 @@
   llvm::GlobalValue *VTableAddressPoint = VFTablesMap[ID];
   if (!VTableAddressPoint) {
     assert(Base.getBase()->getNumVBases() &&
-           !CGM.getContext().getASTRecordLayout(Base.getBase()).hasOwnVFPtr());
+           !getContext().getASTRecordLayout(Base.getBase()).hasOwnVFPtr());
   }
   return VTableAddressPoint;
 }
@@ -1336,102 +1487,97 @@
 #endif
   }
 
-  for (size_t J = 0, F = VFPtrs.size(); J != F; ++J) {
-    if (VFPtrs[J]->FullOffsetInMDC != VPtrOffset)
-      continue;
-    SmallString<256> VFTableName;
-    mangleVFTableName(getMangleContext(), RD, VFPtrs[J], VFTableName);
-    StringRef VTableName = VFTableName;
+  VPtrInfo *const *VFPtrI =
+      std::find_if(VFPtrs.begin(), VFPtrs.end(), [&](VPtrInfo *VPI) {
+        return VPI->FullOffsetInMDC == VPtrOffset;
+      });
+  if (VFPtrI == VFPtrs.end()) {
+    VFTablesMap[ID] = nullptr;
+    return nullptr;
+  }
+  VPtrInfo *VFPtr = *VFPtrI;
 
-    uint64_t NumVTableSlots =
-        VTContext.getVFTableLayout(RD, VFPtrs[J]->FullOffsetInMDC)
-            .getNumVTableComponents();
-    llvm::GlobalValue::LinkageTypes VTableLinkage =
-        llvm::GlobalValue::ExternalLinkage;
-    llvm::ArrayType *VTableType =
-        llvm::ArrayType::get(CGM.Int8PtrTy, NumVTableSlots);
-    if (getContext().getLangOpts().RTTIData) {
-      VTableLinkage = llvm::GlobalValue::PrivateLinkage;
-      VTableName = "";
-    }
+  SmallString<256> VFTableName;
+  mangleVFTableName(getMangleContext(), RD, VFPtr, VFTableName);
 
-    VTable = CGM.getModule().getNamedGlobal(VFTableName);
-    if (!VTable) {
-      // Create a backing variable for the contents of VTable.  The VTable may
-      // or may not include space for a pointer to RTTI data.
-      llvm::GlobalValue *VFTable = VTable = new llvm::GlobalVariable(
-          CGM.getModule(), VTableType, /*isConstant=*/true, VTableLinkage,
-          /*Initializer=*/nullptr, VTableName);
-      VTable->setUnnamedAddr(true);
+  llvm::GlobalValue::LinkageTypes VFTableLinkage = CGM.getVTableLinkage(RD);
+  bool VFTableComesFromAnotherTU =
+      llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage) ||
+      llvm::GlobalValue::isExternalLinkage(VFTableLinkage);
+  bool VTableAliasIsRequred =
+      !VFTableComesFromAnotherTU && getContext().getLangOpts().RTTIData;
 
-      // Only insert a pointer into the VFTable for RTTI data if we are not
-      // importing it.  We never reference the RTTI data directly so there is no
-      // need to make room for it.
-      if (getContext().getLangOpts().RTTIData &&
-          !RD->hasAttr<DLLImportAttr>()) {
-        llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
-                                     llvm::ConstantInt::get(CGM.IntTy, 1)};
-        // Create a GEP which points just after the first entry in the VFTable,
-        // this should be the location of the first virtual method.
-        llvm::Constant *VTableGEP =
-            llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, GEPIndices);
-        // The symbol for the VFTable is an alias to the GEP.  It is
-        // transparent, to other modules, what the nature of this symbol is; all
-        // that matters is that the alias be the address of the first virtual
-        // method.
-        VFTable = llvm::GlobalAlias::create(
-            cast<llvm::SequentialType>(VTableGEP->getType())->getElementType(),
-            /*AddressSpace=*/0, llvm::GlobalValue::ExternalLinkage,
-            VFTableName.str(), VTableGEP, &CGM.getModule());
-      } else {
-        // We don't need a GlobalAlias to be a symbol for the VTable if we won't
-        // be referencing any RTTI data.  The GlobalVariable will end up being
-        // an appropriate definition of the VFTable.
-        VTable->setName(VFTableName.str());
-      }
-
-      VFTable->setUnnamedAddr(true);
-      if (RD->hasAttr<DLLImportAttr>())
-        VFTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-      else if (RD->hasAttr<DLLExportAttr>())
-        VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
-
-      llvm::GlobalValue::LinkageTypes VFTableLinkage = CGM.getVTableLinkage(RD);
-      if (VFTable != VTable) {
-        if (llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage)) {
-          // AvailableExternally implies that we grabbed the data from another
-          // executable.  No need to stick the alias in a Comdat.
-        } else if (llvm::GlobalValue::isInternalLinkage(VFTableLinkage) ||
-                   llvm::GlobalValue::isWeakODRLinkage(VFTableLinkage) ||
-                   llvm::GlobalValue::isLinkOnceODRLinkage(VFTableLinkage)) {
-          // The alias is going to be dropped into a Comdat, no need to make it
-          // weak.
-          if (!llvm::GlobalValue::isInternalLinkage(VFTableLinkage))
-            VFTableLinkage = llvm::GlobalValue::ExternalLinkage;
-          llvm::Comdat *C =
-              CGM.getModule().getOrInsertComdat(VFTable->getName());
-          // We must indicate which VFTable is larger to support linking between
-          // translation units which do and do not have RTTI data.  The largest
-          // VFTable contains the RTTI data; translation units which reference
-          // the smaller VFTable always reference it relative to the first
-          // virtual method.
-          C->setSelectionKind(llvm::Comdat::Largest);
-          VTable->setComdat(C);
-        } else {
-          llvm_unreachable("unexpected linkage for vftable!");
-        }
-      } else {
-        if (llvm::GlobalValue::isWeakForLinker(VFTableLinkage))
-          VTable->setComdat(
-              CGM.getModule().getOrInsertComdat(VTable->getName()));
-      }
-      VFTable->setLinkage(VFTableLinkage);
-      CGM.setGlobalVisibility(VFTable, RD);
-      VFTablesMap[ID] = VFTable;
-    }
-    break;
+  if (llvm::GlobalValue *VFTable =
+          CGM.getModule().getNamedGlobal(VFTableName)) {
+    VFTablesMap[ID] = VFTable;
+    return VTableAliasIsRequred
+               ? cast<llvm::GlobalVariable>(
+                     cast<llvm::GlobalAlias>(VFTable)->getBaseObject())
+               : cast<llvm::GlobalVariable>(VFTable);
   }
 
+  uint64_t NumVTableSlots =
+      VTContext.getVFTableLayout(RD, VFPtr->FullOffsetInMDC)
+          .getNumVTableComponents();
+  llvm::GlobalValue::LinkageTypes VTableLinkage =
+      VTableAliasIsRequred ? llvm::GlobalValue::PrivateLinkage : VFTableLinkage;
+
+  StringRef VTableName = VTableAliasIsRequred ? StringRef() : VFTableName.str();
+
+  llvm::ArrayType *VTableType =
+      llvm::ArrayType::get(CGM.Int8PtrTy, NumVTableSlots);
+
+  // Create a backing variable for the contents of VTable.  The VTable may
+  // or may not include space for a pointer to RTTI data.
+  llvm::GlobalValue *VFTable;
+  VTable = new llvm::GlobalVariable(CGM.getModule(), VTableType,
+                                    /*isConstant=*/true, VTableLinkage,
+                                    /*Initializer=*/nullptr, VTableName);
+  VTable->setUnnamedAddr(true);
+
+  llvm::Comdat *C = nullptr;
+  if (!VFTableComesFromAnotherTU &&
+      (llvm::GlobalValue::isWeakForLinker(VFTableLinkage) ||
+       (llvm::GlobalValue::isLocalLinkage(VFTableLinkage) &&
+        VTableAliasIsRequred)))
+    C = CGM.getModule().getOrInsertComdat(VFTableName.str());
+
+  // Only insert a pointer into the VFTable for RTTI data if we are not
+  // importing it.  We never reference the RTTI data directly so there is no
+  // need to make room for it.
+  if (VTableAliasIsRequred) {
+    llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
+                                 llvm::ConstantInt::get(CGM.IntTy, 1)};
+    // Create a GEP which points just after the first entry in the VFTable,
+    // this should be the location of the first virtual method.
+    llvm::Constant *VTableGEP =
+        llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, GEPIndices);
+    if (llvm::GlobalValue::isWeakForLinker(VFTableLinkage)) {
+      VFTableLinkage = llvm::GlobalValue::ExternalLinkage;
+      if (C)
+        C->setSelectionKind(llvm::Comdat::Largest);
+    }
+    VFTable = llvm::GlobalAlias::create(
+        cast<llvm::SequentialType>(VTableGEP->getType())->getElementType(),
+        /*AddressSpace=*/0, VFTableLinkage, VFTableName.str(), VTableGEP,
+        &CGM.getModule());
+    VFTable->setUnnamedAddr(true);
+  } else {
+    // We don't need a GlobalAlias to be a symbol for the VTable if we won't
+    // be referencing any RTTI data.
+    // The GlobalVariable will end up being an appropriate definition of the
+    // VFTable.
+    VFTable = VTable;
+  }
+  if (C)
+    VTable->setComdat(C);
+
+  if (RD->hasAttr<DLLImportAttr>())
+    VFTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+  else if (RD->hasAttr<DLLExportAttr>())
+    VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+
+  VFTablesMap[ID] = VFTable;
   return VTable;
 }
 
@@ -1468,7 +1614,7 @@
   llvm::Type *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo);
   llvm::Value *Callee = getVirtualFunctionPointer(CGF, GD, This, Ty);
 
-  ASTContext &Context = CGF.getContext();
+  ASTContext &Context = getContext();
   llvm::Value *ImplicitParam = llvm::ConstantInt::get(
       llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()),
       DtorType == Dtor_Deleting);
@@ -1625,9 +1771,8 @@
          "should only emit vbtables for classes with vbtables");
 
   const ASTRecordLayout &BaseLayout =
-      CGM.getContext().getASTRecordLayout(VBT.BaseWithVPtr);
-  const ASTRecordLayout &DerivedLayout =
-    CGM.getContext().getASTRecordLayout(RD);
+      getContext().getASTRecordLayout(VBT.BaseWithVPtr);
+  const ASTRecordLayout &DerivedLayout = getContext().getASTRecordLayout(RD);
 
   SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(),
                                            nullptr);
@@ -1661,9 +1806,6 @@
     llvm::ArrayType::get(CGM.IntTy, Offsets.size());
   llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
   GV->setInitializer(Init);
-
-  // Set the right visibility.
-  CGM.setGlobalVisibility(GV, RD);
 }
 
 llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
@@ -1719,8 +1861,8 @@
 
   if (RA.Virtual.Microsoft.VBIndex) {
     assert(RA.Virtual.Microsoft.VBIndex > 0);
-    int32_t IntSize =
-        getContext().getTypeSizeInChars(getContext().IntTy).getQuantity();
+    const ASTContext &Context = getContext();
+    int32_t IntSize = Context.getTypeSizeInChars(Context.IntTy).getQuantity();
     llvm::Value *VBPtr;
     llvm::Value *VBaseOffset =
         GetVBaseOffsetFromVBPtr(CGF, V, RA.Virtual.Microsoft.VBPtrOffset,
@@ -2874,7 +3016,7 @@
   auto Type = ABI.getClassHierarchyDescriptorType();
   auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
                                       /*Initializer=*/nullptr,
-                                      MangledName.c_str());
+                                      StringRef(MangledName));
   if (CHD->isWeakForLinker())
     CHD->setComdat(CGM.getModule().getOrInsertComdat(CHD->getName()));
 
@@ -2907,9 +3049,10 @@
   llvm::Type *PtrType = ABI.getImageRelativeType(
       ABI.getBaseClassDescriptorType()->getPointerTo());
   auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1);
-  auto *BCA = new llvm::GlobalVariable(
-      Module, ArrType,
-      /*Constant=*/true, Linkage, /*Initializer=*/nullptr, MangledName.c_str());
+  auto *BCA =
+      new llvm::GlobalVariable(Module, ArrType,
+                               /*Constant=*/true, Linkage,
+                               /*Initializer=*/nullptr, StringRef(MangledName));
   if (BCA->isWeakForLinker())
     BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName()));
 
@@ -2949,9 +3092,9 @@
 
   // Forward-declare the base class descriptor.
   auto Type = ABI.getBaseClassDescriptorType();
-  auto BCD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
-                                      /*Initializer=*/nullptr,
-                                      MangledName.c_str());
+  auto BCD =
+      new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+                               /*Initializer=*/nullptr, StringRef(MangledName));
   if (BCD->isWeakForLinker())
     BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName()));
 
@@ -2997,7 +3140,7 @@
   // Forward-declare the complete object locator.
   llvm::StructType *Type = ABI.getCompleteObjectLocatorType();
   auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
-    /*Initializer=*/nullptr, MangledName.c_str());
+    /*Initializer=*/nullptr, StringRef(MangledName));
 
   // Initialize the CompleteObjectLocator.
   llvm::Constant *Fields[] = {
@@ -3018,12 +3161,87 @@
   return COL;
 }
 
+static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
+                                   bool &IsConst, bool &IsVolatile) {
+  T = Context.getExceptionObjectType(T);
+
+  // C++14 [except.handle]p3:
+  //   A handler is a match for an exception object of type E if [...]
+  //     - the handler is of type cv T or const T& where T is a pointer type and
+  //       E is a pointer type that can be converted to T by [...]
+  //         - a qualification conversion
+  IsConst = false;
+  IsVolatile = false;
+  QualType PointeeType = T->getPointeeType();
+  if (!PointeeType.isNull()) {
+    IsConst = PointeeType.isConstQualified();
+    IsVolatile = PointeeType.isVolatileQualified();
+  }
+
+  // Member pointer types like "const int A::*" are represented by having RTTI
+  // for "int A::*" and separately storing the const qualifier.
+  if (const auto *MPTy = T->getAs<MemberPointerType>())
+    T = Context.getMemberPointerType(PointeeType.getUnqualifiedType(),
+                                     MPTy->getClass());
+
+  // Pointer types like "const int * const *" are represented by having RTTI
+  // for "const int **" and separately storing the const qualifier.
+  if (T->isPointerType())
+    T = Context.getPointerType(PointeeType.getUnqualifiedType());
+
+  return T;
+}
+
+llvm::Constant *
+MicrosoftCXXABI::getAddrOfCXXHandlerMapEntry(QualType Type,
+                                             QualType CatchHandlerType) {
+  // TypeDescriptors for exceptions never have qualified pointer types,
+  // qualifiers are stored seperately in order to support qualification
+  // conversions.
+  bool IsConst, IsVolatile;
+  Type = decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile);
+
+  bool IsReference = CatchHandlerType->isReferenceType();
+
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    getMangleContext().mangleCXXHandlerMapEntry(Type, IsConst, IsVolatile,
+                                                IsReference, Out);
+  }
+
+  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
+    return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+
+  uint32_t Flags = 0;
+  if (IsConst)
+    Flags |= 1;
+  if (IsVolatile)
+    Flags |= 2;
+  if (IsReference)
+    Flags |= 8;
+
+  llvm::Constant *Fields[] = {
+      llvm::ConstantInt::get(CGM.IntTy, Flags),                // Flags
+      getImageRelativeConstant(getAddrOfRTTIDescriptor(Type)), // TypeDescriptor
+  };
+  llvm::StructType *HandlerMapEntryType = getHandlerMapEntryType();
+  auto *Var = new llvm::GlobalVariable(
+      CGM.getModule(), HandlerMapEntryType, /*Constant=*/true,
+      llvm::GlobalValue::PrivateLinkage,
+      llvm::ConstantStruct::get(HandlerMapEntryType, Fields),
+      StringRef(MangledName));
+  Var->setUnnamedAddr(true);
+  Var->setSection("llvm.metadata");
+  return Var;
+}
+
 /// \brief Gets a TypeDescriptor.  Returns a llvm::Constant * rather than a
 /// llvm::GlobalVariable * because different type descriptors have different
 /// types, and need to be abstracted.  They are abstracting by casting the
 /// address to an Int8PtrTy.
 llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
-  SmallString<256> MangledName, TypeInfoString;
+  SmallString<256> MangledName;
   {
     llvm::raw_svector_ostream Out(MangledName);
     getMangleContext().mangleCXXRTTI(Type, Out);
@@ -3034,6 +3252,7 @@
     return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
 
   // Compute the fields for the TypeDescriptor.
+  SmallString<256> TypeInfoString;
   {
     llvm::raw_svector_ostream Out(TypeInfoString);
     getMangleContext().mangleCXXRTTIName(Type, Out);
@@ -3050,7 +3269,7 @@
       CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
       getLinkageForRTTI(Type),
       llvm::ConstantStruct::get(TypeDescriptorType, Fields),
-      MangledName.c_str());
+      StringRef(MangledName));
   if (Var->isWeakForLinker())
     Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName()));
   return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
@@ -3107,3 +3326,399 @@
   }
   emitCXXDestructor(CGM, cast<CXXDestructorDecl>(MD), Type);
 }
+
+llvm::Function *
+MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
+                                         CXXCtorType CT) {
+  assert(CT == Ctor_CopyingClosure || CT == Ctor_DefaultClosure);
+
+  // Calculate the mangled name.
+  SmallString<256> ThunkName;
+  llvm::raw_svector_ostream Out(ThunkName);
+  getMangleContext().mangleCXXCtor(CD, CT, Out);
+  Out.flush();
+
+  // If the thunk has been generated previously, just return it.
+  if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
+    return cast<llvm::Function>(GV);
+
+  // Create the llvm::Function.
+  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeMSCtorClosure(CD, CT);
+  llvm::FunctionType *ThunkTy = CGM.getTypes().GetFunctionType(FnInfo);
+  const CXXRecordDecl *RD = CD->getParent();
+  QualType RecordTy = getContext().getRecordType(RD);
+  llvm::Function *ThunkFn = llvm::Function::Create(
+      ThunkTy, getLinkageForRTTI(RecordTy), ThunkName.str(), &CGM.getModule());
+  ThunkFn->setCallingConv(static_cast<llvm::CallingConv::ID>(
+      FnInfo.getEffectiveCallingConvention()));
+  bool IsCopy = CT == Ctor_CopyingClosure;
+
+  // Start codegen.
+  CodeGenFunction CGF(CGM);
+  CGF.CurGD = GlobalDecl(CD, Ctor_Complete);
+
+  // Build FunctionArgs.
+  FunctionArgList FunctionArgs;
+
+  // A constructor always starts with a 'this' pointer as its first argument.
+  buildThisParam(CGF, FunctionArgs);
+
+  // Following the 'this' pointer is a reference to the source object that we
+  // are copying from.
+  ImplicitParamDecl SrcParam(
+      getContext(), nullptr, SourceLocation(), &getContext().Idents.get("src"),
+      getContext().getLValueReferenceType(RecordTy,
+                                          /*SpelledAsLValue=*/true));
+  if (IsCopy)
+    FunctionArgs.push_back(&SrcParam);
+
+  // Constructors for classes which utilize virtual bases have an additional
+  // parameter which indicates whether or not it is being delegated to by a more
+  // derived constructor.
+  ImplicitParamDecl IsMostDerived(getContext(), nullptr, SourceLocation(),
+                                  &getContext().Idents.get("is_most_derived"),
+                                  getContext().IntTy);
+  // Only add the parameter to the list if thie class has virtual bases.
+  if (RD->getNumVBases() > 0)
+    FunctionArgs.push_back(&IsMostDerived);
+
+  // Start defining the function.
+  CGF.StartFunction(GlobalDecl(), FnInfo.getReturnType(), ThunkFn, FnInfo,
+                    FunctionArgs, CD->getLocation(), SourceLocation());
+  EmitThisParam(CGF);
+  llvm::Value *This = getThisValue(CGF);
+
+  llvm::Value *SrcVal =
+      IsCopy ? CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&SrcParam), "src")
+             : nullptr;
+
+  CallArgList Args;
+
+  // Push the this ptr.
+  Args.add(RValue::get(This), CD->getThisType(getContext()));
+
+  // Push the src ptr.
+  if (SrcVal)
+    Args.add(RValue::get(SrcVal), SrcParam.getType());
+
+  // Add the rest of the default arguments.
+  std::vector<Stmt *> ArgVec;
+  for (unsigned I = IsCopy ? 1 : 0, E = CD->getNumParams(); I != E; ++I) {
+    Stmt *DefaultArg = getContext().getDefaultArgExprForConstructor(CD, I);
+    assert(DefaultArg && "sema forgot to instantiate default args");
+    ArgVec.push_back(DefaultArg);
+  }
+
+  CodeGenFunction::RunCleanupsScope Cleanups(CGF);
+
+  const auto *FPT = CD->getType()->castAs<FunctionProtoType>();
+  ConstExprIterator ArgBegin(ArgVec.data()),
+      ArgEnd(ArgVec.data() + ArgVec.size());
+  CGF.EmitCallArgs(Args, FPT, ArgBegin, ArgEnd, CD, IsCopy ? 1 : 0);
+
+  // Insert any ABI-specific implicit constructor arguments.
+  unsigned ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
+                                                  /*ForVirtualBase=*/false,
+                                                  /*Delegating=*/false, Args);
+
+  // Call the destructor with our arguments.
+  llvm::Value *CalleeFn = CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
+  const CGFunctionInfo &CalleeInfo = CGM.getTypes().arrangeCXXConstructorCall(
+      Args, CD, Ctor_Complete, ExtraArgs);
+  CGF.EmitCall(CalleeInfo, CalleeFn, ReturnValueSlot(), Args, CD);
+
+  Cleanups.ForceCleanup();
+
+  // Emit the ret instruction, remove any temporary instructions created for the
+  // aid of CodeGen.
+  CGF.FinishFunction(SourceLocation());
+
+  return ThunkFn;
+}
+
+llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
+                                                  uint32_t NVOffset,
+                                                  int32_t VBPtrOffset,
+                                                  uint32_t VBIndex) {
+  assert(!T->isReferenceType());
+
+  CXXRecordDecl *RD = T->getAsCXXRecordDecl();
+  const CXXConstructorDecl *CD =
+      RD ? CGM.getContext().getCopyConstructorForExceptionObject(RD) : nullptr;
+  CXXCtorType CT = Ctor_Complete;
+  if (CD)
+    if (!hasDefaultCXXMethodCC(getContext(), CD) || CD->getNumParams() != 1)
+      CT = Ctor_CopyingClosure;
+
+  uint32_t Size = getContext().getTypeSizeInChars(T).getQuantity();
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    getMangleContext().mangleCXXCatchableType(T, CD, CT, Size, NVOffset,
+                                              VBPtrOffset, VBIndex, Out);
+  }
+  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
+    return getImageRelativeConstant(GV);
+
+  // The TypeDescriptor is used by the runtime to determine if a catch handler
+  // is appropriate for the exception object.
+  llvm::Constant *TD = getImageRelativeConstant(getAddrOfRTTIDescriptor(T));
+
+  // The runtime is responsible for calling the copy constructor if the
+  // exception is caught by value.
+  llvm::Constant *CopyCtor;
+  if (CD) {
+    if (CT == Ctor_CopyingClosure)
+      CopyCtor = getAddrOfCXXCtorClosure(CD, Ctor_CopyingClosure);
+    else
+      CopyCtor = CGM.getAddrOfCXXStructor(CD, StructorType::Complete);
+
+    CopyCtor = llvm::ConstantExpr::getBitCast(CopyCtor, CGM.Int8PtrTy);
+  } else {
+    CopyCtor = llvm::Constant::getNullValue(CGM.Int8PtrTy);
+  }
+  CopyCtor = getImageRelativeConstant(CopyCtor);
+
+  bool IsScalar = !RD;
+  bool HasVirtualBases = false;
+  bool IsStdBadAlloc = false; // std::bad_alloc is special for some reason.
+  QualType PointeeType = T;
+  if (T->isPointerType())
+    PointeeType = T->getPointeeType();
+  if (const CXXRecordDecl *RD = PointeeType->getAsCXXRecordDecl()) {
+    HasVirtualBases = RD->getNumVBases() > 0;
+    if (IdentifierInfo *II = RD->getIdentifier())
+      IsStdBadAlloc = II->isStr("bad_alloc") && RD->isInStdNamespace();
+  }
+
+  // Encode the relevant CatchableType properties into the Flags bitfield.
+  // FIXME: Figure out how bits 2 or 8 can get set.
+  uint32_t Flags = 0;
+  if (IsScalar)
+    Flags |= 1;
+  if (HasVirtualBases)
+    Flags |= 4;
+  if (IsStdBadAlloc)
+    Flags |= 16;
+
+  llvm::Constant *Fields[] = {
+      llvm::ConstantInt::get(CGM.IntTy, Flags),       // Flags
+      TD,                                             // TypeDescriptor
+      llvm::ConstantInt::get(CGM.IntTy, NVOffset),    // NonVirtualAdjustment
+      llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset), // OffsetToVBPtr
+      llvm::ConstantInt::get(CGM.IntTy, VBIndex),     // VBTableIndex
+      llvm::ConstantInt::get(CGM.IntTy, Size),        // Size
+      CopyCtor                                        // CopyCtor
+  };
+  llvm::StructType *CTType = getCatchableTypeType();
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T),
+      llvm::ConstantStruct::get(CTType, Fields), StringRef(MangledName));
+  GV->setUnnamedAddr(true);
+  GV->setSection(".xdata");
+  if (GV->isWeakForLinker())
+    GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
+  return getImageRelativeConstant(GV);
+}
+
+llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
+  assert(!T->isReferenceType());
+
+  // See if we've already generated a CatchableTypeArray for this type before.
+  llvm::GlobalVariable *&CTA = CatchableTypeArrays[T];
+  if (CTA)
+    return CTA;
+
+  // Ensure that we don't have duplicate entries in our CatchableTypeArray by
+  // using a SmallSetVector.  Duplicates may arise due to virtual bases
+  // occurring more than once in the hierarchy.
+  llvm::SmallSetVector<llvm::Constant *, 2> CatchableTypes;
+
+  // C++14 [except.handle]p3:
+  //   A handler is a match for an exception object of type E if [...]
+  //     - the handler is of type cv T or cv T& and T is an unambiguous public
+  //       base class of E, or
+  //     - the handler is of type cv T or const T& where T is a pointer type and
+  //       E is a pointer type that can be converted to T by [...]
+  //         - a standard pointer conversion (4.10) not involving conversions to
+  //           pointers to private or protected or ambiguous classes
+  const CXXRecordDecl *MostDerivedClass = nullptr;
+  bool IsPointer = T->isPointerType();
+  if (IsPointer)
+    MostDerivedClass = T->getPointeeType()->getAsCXXRecordDecl();
+  else
+    MostDerivedClass = T->getAsCXXRecordDecl();
+
+  // Collect all the unambiguous public bases of the MostDerivedClass.
+  if (MostDerivedClass) {
+    const ASTContext &Context = getContext();
+    const ASTRecordLayout &MostDerivedLayout =
+        Context.getASTRecordLayout(MostDerivedClass);
+    MicrosoftVTableContext &VTableContext = CGM.getMicrosoftVTableContext();
+    SmallVector<MSRTTIClass, 8> Classes;
+    serializeClassHierarchy(Classes, MostDerivedClass);
+    Classes.front().initialize(/*Parent=*/nullptr, /*Specifier=*/nullptr);
+    detectAmbiguousBases(Classes);
+    for (const MSRTTIClass &Class : Classes) {
+      // Skip any ambiguous or private bases.
+      if (Class.Flags &
+          (MSRTTIClass::IsPrivateOnPath | MSRTTIClass::IsAmbiguous))
+        continue;
+      // Write down how to convert from a derived pointer to a base pointer.
+      uint32_t OffsetInVBTable = 0;
+      int32_t VBPtrOffset = -1;
+      if (Class.VirtualRoot) {
+        OffsetInVBTable =
+          VTableContext.getVBTableIndex(MostDerivedClass, Class.VirtualRoot)*4;
+        VBPtrOffset = MostDerivedLayout.getVBPtrOffset().getQuantity();
+      }
+
+      // Turn our record back into a pointer if the exception object is a
+      // pointer.
+      QualType RTTITy = QualType(Class.RD->getTypeForDecl(), 0);
+      if (IsPointer)
+        RTTITy = Context.getPointerType(RTTITy);
+      CatchableTypes.insert(getCatchableType(RTTITy, Class.OffsetInVBase,
+                                             VBPtrOffset, OffsetInVBTable));
+    }
+  }
+
+  // C++14 [except.handle]p3:
+  //   A handler is a match for an exception object of type E if
+  //     - The handler is of type cv T or cv T& and E and T are the same type
+  //       (ignoring the top-level cv-qualifiers)
+  CatchableTypes.insert(getCatchableType(T));
+
+  // C++14 [except.handle]p3:
+  //   A handler is a match for an exception object of type E if
+  //     - the handler is of type cv T or const T& where T is a pointer type and
+  //       E is a pointer type that can be converted to T by [...]
+  //         - a standard pointer conversion (4.10) not involving conversions to
+  //           pointers to private or protected or ambiguous classes
+  //
+  // All pointers are convertible to pointer-to-void so ensure that it is in the
+  // CatchableTypeArray.
+  if (IsPointer)
+    CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));
+
+  // C++14 [except.handle]p3:
+  //   A handler is a match for an exception object of type E if [...]
+  //     - the handler is of type cv T or const T& where T is a pointer or
+  //       pointer to member type and E is std::nullptr_t.
+  //
+  // We cannot possibly list all possible pointer types here, making this
+  // implementation incompatible with the standard.  However, MSVC includes an
+  // entry for pointer-to-void in this case.  Let's do the same.
+  if (T->isNullPtrType())
+    CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));
+
+  uint32_t NumEntries = CatchableTypes.size();
+  llvm::Type *CTType =
+      getImageRelativeType(getCatchableTypeType()->getPointerTo());
+  llvm::ArrayType *AT = llvm::ArrayType::get(CTType, NumEntries);
+  llvm::StructType *CTAType = getCatchableTypeArrayType(NumEntries);
+  llvm::Constant *Fields[] = {
+      llvm::ConstantInt::get(CGM.IntTy, NumEntries),    // NumEntries
+      llvm::ConstantArray::get(
+          AT, llvm::makeArrayRef(CatchableTypes.begin(),
+                                 CatchableTypes.end())) // CatchableTypes
+  };
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    getMangleContext().mangleCXXCatchableTypeArray(T, NumEntries, Out);
+  }
+  CTA = new llvm::GlobalVariable(
+      CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T),
+      llvm::ConstantStruct::get(CTAType, Fields), StringRef(MangledName));
+  CTA->setUnnamedAddr(true);
+  CTA->setSection(".xdata");
+  if (CTA->isWeakForLinker())
+    CTA->setComdat(CGM.getModule().getOrInsertComdat(CTA->getName()));
+  return CTA;
+}
+
+llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
+  bool IsConst, IsVolatile;
+  T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile);
+
+  // The CatchableTypeArray enumerates the various (CV-unqualified) types that
+  // the exception object may be caught as.
+  llvm::GlobalVariable *CTA = getCatchableTypeArray(T);
+  // The first field in a CatchableTypeArray is the number of CatchableTypes.
+  // This is used as a component of the mangled name which means that we need to
+  // know what it is in order to see if we have previously generated the
+  // ThrowInfo.
+  uint32_t NumEntries =
+      cast<llvm::ConstantInt>(CTA->getInitializer()->getAggregateElement(0U))
+          ->getLimitedValue();
+
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, NumEntries,
+                                          Out);
+  }
+
+  // Reuse a previously generated ThrowInfo if we have generated an appropriate
+  // one before.
+  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
+    return GV;
+
+  // The RTTI TypeDescriptor uses an unqualified type but catch clauses must
+  // be at least as CV qualified.  Encode this requirement into the Flags
+  // bitfield.
+  uint32_t Flags = 0;
+  if (IsConst)
+    Flags |= 1;
+  if (IsVolatile)
+    Flags |= 2;
+
+  // The cleanup-function (a destructor) must be called when the exception
+  // object's lifetime ends.
+  llvm::Constant *CleanupFn = llvm::Constant::getNullValue(CGM.Int8PtrTy);
+  if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
+    if (CXXDestructorDecl *DtorD = RD->getDestructor())
+      if (!DtorD->isTrivial())
+        CleanupFn = llvm::ConstantExpr::getBitCast(
+            CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete),
+            CGM.Int8PtrTy);
+  // This is unused as far as we can tell, initialize it to null.
+  llvm::Constant *ForwardCompat =
+      getImageRelativeConstant(llvm::Constant::getNullValue(CGM.Int8PtrTy));
+  llvm::Constant *PointerToCatchableTypes = getImageRelativeConstant(
+      llvm::ConstantExpr::getBitCast(CTA, CGM.Int8PtrTy));
+  llvm::StructType *TIType = getThrowInfoType();
+  llvm::Constant *Fields[] = {
+      llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
+      getImageRelativeConstant(CleanupFn),      // CleanupFn
+      ForwardCompat,                            // ForwardCompat
+      PointerToCatchableTypes                   // CatchableTypeArray
+  };
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T),
+      llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName));
+  GV->setUnnamedAddr(true);
+  GV->setSection(".xdata");
+  if (GV->isWeakForLinker())
+    GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
+  return GV;
+}
+
+void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
+  const Expr *SubExpr = E->getSubExpr();
+  QualType ThrowType = SubExpr->getType();
+  // The exception object lives on the stack and it's address is passed to the
+  // runtime function.
+  llvm::AllocaInst *AI = CGF.CreateMemTemp(ThrowType);
+  CGF.EmitAnyExprToMem(SubExpr, AI, ThrowType.getQualifiers(),
+                       /*IsInit=*/true);
+
+  // The so-called ThrowInfo is used to describe how the exception object may be
+  // caught.
+  llvm::GlobalVariable *TI = getThrowInfo(ThrowType);
+
+  // Call into the runtime to throw the exception.
+  llvm::Value *Args[] = {CGF.Builder.CreateBitCast(AI, CGM.Int8PtrTy), TI};
+  CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args);
+}
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 36f9914..43cf791 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -238,7 +238,7 @@
 /// \return The field declaration for the single non-empty field, if
 /// it exists.
 static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
-  const RecordType *RT = T->getAsStructureType();
+  const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
     return nullptr;
 
@@ -664,10 +664,6 @@
                    ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
-
-  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
-    return true;
-  }
 };
 
 }
@@ -1613,10 +1609,6 @@
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return HasAVX ? 32 : 16;
   }
-
-  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
-    return true;
-  }
 };
 
 class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
@@ -2210,20 +2202,9 @@
     Ty = QualType(InnerTy, 0);
 
   llvm::Type *IRType = CGT.ConvertType(Ty);
-
-  // If the preferred type is a 16-byte vector, prefer to pass it.
-  if (llvm::VectorType *VT = dyn_cast<llvm::VectorType>(IRType)){
-    llvm::Type *EltTy = VT->getElementType();
-    unsigned BitWidth = VT->getBitWidth();
-    if ((BitWidth >= 128 && BitWidth <= 256) &&
-        (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
-         EltTy->isIntegerTy(8) || EltTy->isIntegerTy(16) ||
-         EltTy->isIntegerTy(32) || EltTy->isIntegerTy(64) ||
-         EltTy->isIntegerTy(128)))
-      return VT;
-  }
-
-  return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), 2);
+  assert(isa<llvm::VectorType>(IRType) &&
+         "Trying to return a non-vector type in a vector register!");
+  return IRType;
 }
 
 /// BitsContainNoUserData - Return true if the specified [start,end) bit range
@@ -3134,10 +3115,6 @@
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return 16; // Natural alignment for Altivec vectors.
   }
-
-  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
-    return true;
-  }
 };
 
 }
@@ -3287,13 +3264,42 @@
 private:
   static const unsigned GPRBits = 64;
   ABIKind Kind;
+  bool HasQPX;
+
+  // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and
+  // will be passed in a QPX register.
+  bool IsQPXVectorTy(const Type *Ty) const {
+    if (!HasQPX)
+      return false;
+
+    if (const VectorType *VT = Ty->getAs<VectorType>()) {
+      unsigned NumElements = VT->getNumElements();
+      if (NumElements == 1)
+        return false;
+
+      if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double)) {
+        if (getContext().getTypeSize(Ty) <= 256)
+          return true;
+      } else if (VT->getElementType()->
+                   isSpecificBuiltinType(BuiltinType::Float)) {
+        if (getContext().getTypeSize(Ty) <= 128)
+          return true;
+      }
+    }
+
+    return false;
+  }
+
+  bool IsQPXVectorTy(QualType Ty) const {
+    return IsQPXVectorTy(Ty.getTypePtr());
+  }
 
 public:
-  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
-    : DefaultABIInfo(CGT), Kind(Kind) {}
+  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX)
+    : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
-  bool isAlignedParamType(QualType Ty) const;
+  bool isAlignedParamType(QualType Ty, bool &Align32) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -3318,7 +3324,8 @@
       const Type *T = isSingleElementStruct(I.type, getContext());
       if (T) {
         const BuiltinType *BT = T->getAs<BuiltinType>();
-        if ((T->isVectorType() && getContext().getTypeSize(T) == 128) ||
+        if (IsQPXVectorTy(T) ||
+            (T->isVectorType() && getContext().getTypeSize(T) == 128) ||
             (BT && BT->isFloatingPoint())) {
           QualType QT(T, 0);
           I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
@@ -3334,10 +3341,13 @@
 };
 
 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
+  bool HasQPX;
+
 public:
   PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
-                               PPC64_SVR4_ABIInfo::ABIKind Kind)
-    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind)) {}
+                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX)
+    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)),
+      HasQPX(HasQPX) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3347,12 +3357,13 @@
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const override;
 
-  unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return 16; // Natural alignment for Altivec and VSX vectors.
-  }
+  unsigned getOpenMPSimdDefaultAlignment(QualType QT) const override {
+    if (HasQPX)
+      if (const PointerType *PT = QT->getAs<PointerType>())
+        if (PT->getPointeeType()->isSpecificBuiltinType(BuiltinType::Double))
+          return 32; // Natural alignment for QPX doubles.
 
-  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
-    return true;
+    return 16; // Natural alignment for Altivec and VSX vectors.
   }
 };
 
@@ -3371,10 +3382,6 @@
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
     return 16; // Natural alignment for Altivec vectors.
   }
-
-  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
-    return true;
-  }
 };
 
 }
@@ -3408,15 +3415,23 @@
 /// isAlignedParamType - Determine whether a type requires 16-byte
 /// alignment in the parameter area.
 bool
-PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
+PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32) const {
+  Align32 = false;
+
   // Complex types are passed just like their elements.
   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
     Ty = CTy->getElementType();
 
   // Only vector types of size 16 bytes need alignment (larger types are
   // passed via reference, smaller types are not aligned).
-  if (Ty->isVectorType())
+  if (IsQPXVectorTy(Ty)) {
+    if (getContext().getTypeSize(Ty) > 128)
+      Align32 = true;
+
+    return true;
+  } else if (Ty->isVectorType()) {
     return getContext().getTypeSize(Ty) == 128;
+  }
 
   // For single-element float/vector structs, we consider the whole type
   // to have the same alignment requirements as its single element.
@@ -3424,7 +3439,7 @@
   const Type *EltType = isSingleElementStruct(Ty, getContext());
   if (EltType) {
     const BuiltinType *BT = EltType->getAs<BuiltinType>();
-    if ((EltType->isVectorType() &&
+    if (IsQPXVectorTy(EltType) || (EltType->isVectorType() &&
          getContext().getTypeSize(EltType) == 128) ||
         (BT && BT->isFloatingPoint()))
       AlignAsType = EltType;
@@ -3438,13 +3453,22 @@
     AlignAsType = Base;
 
   // With special case aggregates, only vector base types need alignment.
-  if (AlignAsType)
+  if (AlignAsType && IsQPXVectorTy(AlignAsType)) {
+    if (getContext().getTypeSize(AlignAsType) > 128)
+      Align32 = true;
+
+    return true;
+  } else if (AlignAsType) {
     return AlignAsType->isVectorType();
+  }
 
   // Otherwise, we only need alignment for any aggregate type that
   // has an alignment requirement of >= 16 bytes.
-  if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128)
+  if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
+    if (HasQPX && getContext().getTypeAlign(Ty) >= 256)
+      Align32 = true;
     return true;
+  }
 
   return false;
 }
@@ -3550,7 +3574,7 @@
       return true;
   }
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
-    if (getContext().getTypeSize(VT) == 128)
+    if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty))
       return true;
   }
   return false;
@@ -3576,7 +3600,7 @@
 
   // Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
   // or via reference (larger than 16 bytes).
-  if (Ty->isVectorType()) {
+  if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size > 128)
       return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
@@ -3590,7 +3614,9 @@
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
       return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
 
-    uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
+    bool Align32;
+    uint64_t ABIAlign = isAlignedParamType(Ty, Align32) ?
+                          (Align32 ? 32 : 16) : 8;
     uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
 
     // ELFv2 homogeneous aggregates are passed as array types.
@@ -3647,7 +3673,7 @@
 
   // Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
   // or via reference (larger than 16 bytes).
-  if (RetTy->isVectorType()) {
+  if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) {
     uint64_t Size = getContext().getTypeSize(RetTy);
     if (Size > 128)
       return ABIArgInfo::getIndirect(0);
@@ -3704,10 +3730,13 @@
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
 
   // Handle types that require 16-byte alignment in the parameter save area.
-  if (isAlignedParamType(Ty)) {
+  bool Align32;
+  if (isAlignedParamType(Ty, Align32)) {
     llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
-    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(15));
-    AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt64(-16));
+    AddrAsInt = Builder.CreateAdd(AddrAsInt,
+                                  Builder.getInt64(Align32 ? 31 : 15));
+    AddrAsInt = Builder.CreateAnd(AddrAsInt,
+                                  Builder.getInt64(Align32 ? -32 : -16));
     Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
   }
 
@@ -4485,12 +4514,6 @@
                                               llvm::AttributeSet::FunctionIndex,
                                               B));
   }
-
-  bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const override {
-    return false;
-    // FIXME: backend implementation too restricted, even on Darwin.
-    // return CGF.getTarget().getTriple().isOSDarwin();
-  }
 };
 
 class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo {
@@ -4646,14 +4669,11 @@
   uint64_t ABIAlign = 4;
   uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
   if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
-      getABIKind() == ARMABIInfo::AAPCS)
+       getABIKind() == ARMABIInfo::AAPCS)
     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
+
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
-    // Update Allocated GPRs. Since this is only used when the size of the
-    // argument is greater than 64 bytes, this will always use up any available
-    // registers (of which there are 4). We also don't care about getting the
-    // alignment right, because general-purpose registers cannot be back-filled.
-    return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
+    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
            /*Realign=*/TyAlign > ABIAlign);
   }
 
@@ -6133,12 +6153,7 @@
 
     // Check if Ty is a usable substitute for the coercion type.
     bool isUsableType(llvm::StructType *Ty) const {
-      if (Ty->getNumElements() != Elems.size())
-        return false;
-      for (unsigned i = 0, e = Elems.size(); i != e; ++i)
-        if (Elems[i] != Ty->getElementType(i))
-          return false;
-      return true;
+      return llvm::makeArrayRef(Elems) == Ty->elements();
     }
 
     // Get the coercion type as a literal struct type.
@@ -6995,19 +7010,21 @@
       PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
       if (getTarget().getABI() == "elfv2")
         Kind = PPC64_SVR4_ABIInfo::ELFv2;
+      bool HasQPX = getTarget().getABI() == "elfv1-qpx";
 
       return *(TheTargetCodeGenInfo =
-               new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+               new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
     } else
       return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
   case llvm::Triple::ppc64le: {
     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
     PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
-    if (getTarget().getABI() == "elfv1")
+    if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx")
       Kind = PPC64_SVR4_ABIInfo::ELFv1;
+    bool HasQPX = getTarget().getABI() == "elfv1-qpx";
 
     return *(TheTargetCodeGenInfo =
-             new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+             new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
   }
 
   case llvm::Triple::nvptx:
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index 87f1376..cc469d6 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -225,13 +225,6 @@
   virtual unsigned getOpenMPSimdDefaultAlignment(QualType Type) const {
     return 0;
   }
-
-  /// Control whether __builtin_longjmp / __builtin_setjmp are lowered to
-  /// llvm.eh.sjlj.longjmp / llvm.eh.sjlj.setjmp or the normal library
-  /// function.
-  virtual bool hasSjLjLowering(CodeGen::CodeGenFunction &CGF) const {
-    return false;
-  }
 };
 }