Update aosp/master clang for rebase to r235153

Change-Id: Ia94bbcb6da7c75b6e7c2afedd1001094d62a7324
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index bdab637..7bc351a 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -27,7 +27,6 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/Timer.h"
@@ -97,7 +96,7 @@
 
   void CreatePasses();
 
-  /// CreateTargetMachine - Generates the TargetMachine.
+  /// Generates the TargetMachine.
   /// Returns Null if it is unable to create the target machine.
   /// Some of our clang tests specify triples which are not built
   /// into clang. This is okay because these tests check the generated
@@ -107,10 +106,10 @@
   /// the requested target.
   TargetMachine *CreateTargetMachine(bool MustCreateTM);
 
-  /// AddEmitPasses - Add passes necessary to emit assembly or LLVM IR.
+  /// Add passes necessary to emit assembly or LLVM IR.
   ///
   /// \return True on success.
-  bool AddEmitPasses(BackendAction Action, formatted_raw_ostream &OS);
+  bool AddEmitPasses(BackendAction Action, raw_pwrite_stream &OS);
 
 public:
   EmitAssemblyHelper(DiagnosticsEngine &_Diags,
@@ -133,7 +132,7 @@
 
   std::unique_ptr<TargetMachine> TM;
 
-  void EmitAssembly(BackendAction Action, raw_ostream *OS);
+  void EmitAssembly(BackendAction Action, raw_pwrite_stream *OS);
 };
 
 // We need this wrapper to access LangOpts and CGOpts from extension functions
@@ -437,8 +436,6 @@
     BackendArgs.push_back("-time-passes");
   for (unsigned i = 0, e = CodeGenOpts.BackendOptions.size(); i != e; ++i)
     BackendArgs.push_back(CodeGenOpts.BackendOptions[i].c_str());
-  if (CodeGenOpts.NoGlobalMerge)
-    BackendArgs.push_back("-enable-global-merge=false");
   BackendArgs.push_back(nullptr);
   llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
                                     BackendArgs.data());
@@ -549,7 +546,7 @@
 }
 
 bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
-                                       formatted_raw_ostream &OS) {
+                                       raw_pwrite_stream &OS) {
 
   // Create the code generator passes.
   legacy::PassManager *PM = getCodeGenPasses();
@@ -586,9 +583,9 @@
   return true;
 }
 
-void EmitAssemblyHelper::EmitAssembly(BackendAction Action, raw_ostream *OS) {
+void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
+                                      raw_pwrite_stream *OS) {
   TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
-  llvm::formatted_raw_ostream FormattedOS;
 
   bool UsesCodeGen = (Action != Backend_EmitNothing &&
                       Action != Backend_EmitBC &&
@@ -604,17 +601,17 @@
     break;
 
   case Backend_EmitBC:
-    getPerModulePasses()->add(createBitcodeWriterPass(*OS));
+    getPerModulePasses()->add(
+        createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists));
     break;
 
   case Backend_EmitLL:
-    FormattedOS.setStream(*OS, formatted_raw_ostream::PRESERVE_STREAM);
-    getPerModulePasses()->add(createPrintModulePass(FormattedOS));
+    getPerModulePasses()->add(
+        createPrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists));
     break;
 
   default:
-    FormattedOS.setStream(*OS, formatted_raw_ostream::PRESERVE_STREAM);
-    if (!AddEmitPasses(Action, FormattedOS))
+    if (!AddEmitPasses(Action, *OS))
       return;
   }
 
@@ -651,7 +648,7 @@
                               const clang::TargetOptions &TOpts,
                               const LangOptions &LOpts, StringRef TDesc,
                               Module *M, BackendAction Action,
-                              raw_ostream *OS) {
+                              raw_pwrite_stream *OS) {
   EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
 
   AsmHelper.EmitAssembly(Action, OS);
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index b7222fc..2de9cb2 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -189,7 +189,7 @@
       assert(LVal.isSimple());
       llvm::Value *addr = getAtomicAddress();
       if (hasPadding())
-        addr = CGF.Builder.CreateStructGEP(addr, 0);
+        addr = CGF.Builder.CreateStructGEP(nullptr, addr, 0);
 
       return LValue::MakeAddr(addr, getValueType(), LVal.getAlignment(),
                               CGF.getContext(), LVal.getTBAAInfo());
@@ -209,7 +209,7 @@
     /// \param IsWeak true if atomic operation is weak, false otherwise.
     /// \returns Pair of values: previous value from storage (value type) and
     /// boolean flag (i1 type) with true if success and false otherwise.
-    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchange(
+    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
         RValue Expected, RValue Desired,
         llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
         llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
@@ -235,13 +235,13 @@
     /// \brief Emits atomic load as LLVM instruction.
     llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
     /// \brief Emits atomic compare-and-exchange op as a libcall.
-    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeLibcall(
-        llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
+    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchangeLibcall(
+        RValue Expected, RValue DesiredAddr,
         llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
         llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent);
     /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
-    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
-        llvm::Value *Expected, llvm::Value *Desired,
+    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchangeOp(
+        RValue Expected, RValue Desired,
         llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
         llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
         bool IsWeak = false);
@@ -1060,19 +1060,20 @@
 
     // Drill into the padding structure if we have one.
     if (hasPadding())
-      addr = CGF.Builder.CreateStructGEP(addr, 0);
+      addr = CGF.Builder.CreateStructGEP(nullptr, addr, 0);
 
     // Otherwise, just convert the temporary to an r-value using the
     // normal conversion routine.
     return CGF.convertTempToRValue(addr, getValueType(), loc);
-  } else if (!AsValue)
+  }
+  if (!AsValue)
     // Get RValue from temp memory as atomic for non-simple lvalues
     return RValue::get(
         CGF.Builder.CreateAlignedLoad(addr, AtomicAlign.getQuantity()));
-  else if (LVal.isBitField())
+  if (LVal.isBitField())
     return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
         addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
-  else if (LVal.isVectorElt())
+  if (LVal.isVectorElt())
     return CGF.EmitLoadOfLValue(LValue::MakeVectorElt(addr, LVal.getVectorIdx(),
                                                       LVal.getType(),
                                                       LVal.getAlignment()),
@@ -1291,7 +1292,7 @@
   if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) {
     llvm::Value *Value = RVal.getScalarVal();
     if (isa<llvm::IntegerType>(Value->getType()))
-      return Value;
+      return CGF.EmitToMemory(Value, ValueTy);
     else {
       llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
           CGF.getLLVMContext(),
@@ -1312,13 +1313,15 @@
                                        getAtomicAlignment().getQuantity());
 }
 
-std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
-    llvm::Value *Expected, llvm::Value *Desired, llvm::AtomicOrdering Success,
+std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
+    RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
     llvm::AtomicOrdering Failure, bool IsWeak) {
   // Do the atomic store.
+  auto *ExpectedVal = convertRValueToInt(Expected);
+  auto *DesiredVal = convertRValueToInt(Desired);
   auto *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
-  auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, Expected, Desired, Success,
-                                               Failure);
+  auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
+                                               Success, Failure);
   // Other decoration.
   Inst->setVolatile(LVal.isVolatileQualified());
   Inst->setWeak(IsWeak);
@@ -1326,16 +1329,20 @@
   // Okay, turn that back into the original value type.
   auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
   auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1);
-  return std::make_pair(PreviousVal, SuccessFailureVal);
+  return std::make_pair(
+      ConvertIntToValueOrAtomic(PreviousVal, AggValueSlot::ignored(),
+                                SourceLocation(), /*AsValue=*/false),
+      SuccessFailureVal);
 }
 
-std::pair<llvm::Value *, llvm::Value *>
-AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
-                                             llvm::Value *DesiredAddr,
+std::pair<RValue, llvm::Value *>
+AtomicInfo::EmitAtomicCompareExchangeLibcall(RValue Expected, RValue Desired,
                                              llvm::AtomicOrdering Success,
                                              llvm::AtomicOrdering Failure) {
   // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
   // void *desired, int success, int failure);
+  auto *ExpectedAddr = materializeRValue(Expected);
+  auto *DesiredAddr = materializeRValue(Desired);
   CallArgList Args;
   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
@@ -1352,12 +1359,14 @@
            CGF.getContext().IntTy);
   auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
                                               CGF.getContext().BoolTy, Args);
-  auto *PreviousVal = CGF.Builder.CreateAlignedLoad(
-      ExpectedAddr, getValueAlignment().getQuantity());
-  return std::make_pair(PreviousVal, SuccessFailureRVal.getScalarVal());
+
+  return std::make_pair(
+      convertTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
+                          SourceLocation(), /*AsValue=*/false),
+      SuccessFailureRVal.getScalarVal());
 }
 
-std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
+std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
     RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
     llvm::AtomicOrdering Failure, bool IsWeak) {
   if (Failure >= Success)
@@ -1366,20 +1375,15 @@
 
   // Check whether we should use a library call.
   if (shouldUseLibcall()) {
-    auto *ExpectedAddr = materializeRValue(Expected);
     // Produce a source address.
-    auto *DesiredAddr = materializeRValue(Desired);
-    return EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, Success,
+    return EmitAtomicCompareExchangeLibcall(Expected, Desired, Success,
                                             Failure);
   }
 
   // If we've got a scalar value of the right size, try to avoid going
   // through memory.
-  auto *ExpectedIntVal = convertRValueToInt(Expected);
-  auto *DesiredIntVal = convertRValueToInt(Desired);
-
-  return EmitAtomicCompareExchangeOp(ExpectedIntVal, DesiredIntVal, Success,
-                                     Failure, IsWeak);
+  return EmitAtomicCompareExchangeOp(Expected, Desired, Success, Failure,
+                                     IsWeak);
 }
 
 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
@@ -1498,20 +1502,14 @@
       atomics.getAtomicType(), SourceLocation()));
   // Try to write new value using cmpxchg operation
   auto Pair = atomics.EmitAtomicCompareExchange(OriginalRValue, NewRValue, AO);
-  llvm::Value *OldValue = Pair.first;
-  if (!atomics.shouldUseLibcall())
-    // Convert integer value to original atomic type
-    OldValue = atomics.ConvertIntToValueOrAtomic(
-                           OldValue, AggValueSlot::ignored(), SourceLocation(),
-                           /*AsValue=*/false).getScalarVal();
-  PHI->addIncoming(OldValue, ContBB);
+  PHI->addIncoming(Pair.first.getScalarVal(), ContBB);
   Builder.CreateCondBr(Pair.second, ExitBB, ContBB);
   EmitBlock(ExitBB, /*IsFinished=*/true);
 }
 
 /// Emit a compare-and-exchange op for atomic type.
 ///
-std::pair<RValue, RValue> CodeGenFunction::EmitAtomicCompareExchange(
+std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
     LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
     llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak,
     AggValueSlot Slot) {
@@ -1525,13 +1523,78 @@
              Obj.getAddress()->getType()->getPointerElementType());
   AtomicInfo Atomics(*this, Obj);
 
-  auto Pair = Atomics.EmitAtomicCompareExchange(Expected, Desired, Success,
-                                                Failure, IsWeak);
-  return std::make_pair(Atomics.shouldUseLibcall()
-                            ? RValue::get(Pair.first)
-                            : Atomics.ConvertIntToValueOrAtomic(
-                                  Pair.first, Slot, Loc, /*AsValue=*/true),
-                        RValue::get(Pair.second));
+  return Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, Failure,
+                                           IsWeak);
+}
+
+void CodeGenFunction::EmitAtomicUpdate(
+    LValue LVal, llvm::AtomicOrdering AO,
+    const std::function<RValue(RValue)> &UpdateOp, bool IsVolatile) {
+  AtomicInfo Atomics(*this, LVal);
+  LValue AtomicLVal = Atomics.getAtomicLValue();
+
+  // Atomic load of prev value.
+  RValue OldRVal =
+      Atomics.EmitAtomicLoad(AggValueSlot::ignored(), SourceLocation(),
+                             /*AsValue=*/false, AO, IsVolatile);
+  bool IsScalar = OldRVal.isScalar();
+  auto *OldVal =
+      IsScalar ? OldRVal.getScalarVal() : Atomics.convertRValueToInt(OldRVal);
+  // For non-simple lvalues perform compare-and-swap procedure.
+  auto *ContBB = createBasicBlock("atomic_cont");
+  auto *ExitBB = createBasicBlock("atomic_exit");
+  auto *CurBB = Builder.GetInsertBlock();
+  EmitBlock(ContBB);
+  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(),
+                                         /*NumReservedValues=*/2);
+  PHI->addIncoming(OldVal, CurBB);
+  RValue OriginalRValue =
+      IsScalar ? RValue::get(PHI) : Atomics.ConvertIntToValueOrAtomic(
+                                        PHI, AggValueSlot::ignored(),
+                                        SourceLocation(), /*AsValue=*/false);
+  // Build new lvalue for temp address
+  LValue UpdateLVal;
+  llvm::Value *Ptr = nullptr;
+  RValue UpRVal;
+  if (AtomicLVal.isSimple()) {
+    UpRVal = OriginalRValue;
+  } else {
+    // Build new lvalue for temp address
+    Ptr = Atomics.materializeRValue(OriginalRValue);
+    if (AtomicLVal.isBitField())
+      UpdateLVal =
+          LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
+                               AtomicLVal.getType(), AtomicLVal.getAlignment());
+    else if (AtomicLVal.isVectorElt())
+      UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
+                                         AtomicLVal.getType(),
+                                         AtomicLVal.getAlignment());
+    else {
+      assert(AtomicLVal.isExtVectorElt());
+      UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
+                                            AtomicLVal.getType(),
+                                            AtomicLVal.getAlignment());
+    }
+    UpdateLVal.setTBAAInfo(LVal.getTBAAInfo());
+    UpRVal = EmitLoadOfLValue(UpdateLVal, SourceLocation());
+  }
+  // Store new value in the corresponding memory area
+  RValue NewRVal = UpdateOp(UpRVal);
+  if (!AtomicLVal.isSimple()) {
+    EmitStoreThroughLValue(NewRVal, UpdateLVal);
+    // Load new value
+    NewRVal = RValue::get(
+        EmitLoadOfScalar(Ptr, AtomicLVal.isVolatile(),
+                         Atomics.getAtomicAlignment().getQuantity(),
+                         Atomics.getAtomicType(), SourceLocation()));
+  }
+  // Try to write new value using cmpxchg operation
+  auto Pair = Atomics.EmitAtomicCompareExchange(OriginalRValue, NewRVal, AO);
+  OldVal = IsScalar ? Pair.first.getScalarVal()
+                    : Atomics.convertRValueToInt(Pair.first);
+  PHI->addIncoming(OldVal, ContBB);
+  Builder.CreateCondBr(Pair.second, ExitBB, ContBB);
+  EmitBlock(ExitBB, /*IsFinished=*/true);
 }
 
 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 7b8e839..202996b 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -621,8 +621,8 @@
     }
 
     // GEP down to the address.
-    llvm::Value *addr = CGF.Builder.CreateStructGEP(blockInfo.Address,
-                                                    capture.getIndex());
+    llvm::Value *addr = CGF.Builder.CreateStructGEP(
+        blockInfo.StructureType, blockInfo.Address, capture.getIndex());
 
     // We can use that GEP as the dominating IP.
     if (!blockInfo.DominatingIP)
@@ -721,6 +721,7 @@
   // Build the block descriptor.
   llvm::Constant *descriptor = buildBlockDescriptor(CGM, blockInfo);
 
+  llvm::Type *blockTy = blockInfo.StructureType;
   llvm::AllocaInst *blockAddr = blockInfo.Address;
   assert(blockAddr && "block has no address!");
 
@@ -732,14 +733,17 @@
   if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET;
 
   // Initialize the block literal.
-  Builder.CreateStore(isa, Builder.CreateStructGEP(blockAddr, 0, "block.isa"));
-  Builder.CreateStore(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
-                      Builder.CreateStructGEP(blockAddr, 1, "block.flags"));
-  Builder.CreateStore(llvm::ConstantInt::get(IntTy, 0),
-                      Builder.CreateStructGEP(blockAddr, 2, "block.reserved"));
-  Builder.CreateStore(blockFn, Builder.CreateStructGEP(blockAddr, 3,
-                                                       "block.invoke"));
-  Builder.CreateStore(descriptor, Builder.CreateStructGEP(blockAddr, 4,
+  Builder.CreateStore(
+      isa, Builder.CreateStructGEP(blockTy, blockAddr, 0, "block.isa"));
+  Builder.CreateStore(
+      llvm::ConstantInt::get(IntTy, flags.getBitMask()),
+      Builder.CreateStructGEP(blockTy, blockAddr, 1, "block.flags"));
+  Builder.CreateStore(
+      llvm::ConstantInt::get(IntTy, 0),
+      Builder.CreateStructGEP(blockTy, blockAddr, 2, "block.reserved"));
+  Builder.CreateStore(
+      blockFn, Builder.CreateStructGEP(blockTy, blockAddr, 3, "block.invoke"));
+  Builder.CreateStore(descriptor, Builder.CreateStructGEP(blockTy, blockAddr, 4,
                                                           "block.descriptor"));
 
   // Finally, capture all the values into the block.
@@ -747,9 +751,8 @@
 
   // First, 'this'.
   if (blockDecl->capturesCXXThis()) {
-    llvm::Value *addr = Builder.CreateStructGEP(blockAddr,
-                                                blockInfo.CXXThisIndex,
-                                                "block.captured-this.addr");
+    llvm::Value *addr = Builder.CreateStructGEP(
+        blockTy, blockAddr, blockInfo.CXXThisIndex, "block.captured-this.addr");
     Builder.CreateStore(LoadCXXThis(), addr);
   }
 
@@ -766,9 +769,8 @@
 
     // This will be a [[type]]*, except that a byref entry will just be
     // an i8**.
-    llvm::Value *blockField =
-      Builder.CreateStructGEP(blockAddr, capture.getIndex(),
-                              "block.captured");
+    llvm::Value *blockField = Builder.CreateStructGEP(
+        blockTy, blockAddr, capture.getIndex(), "block.captured");
 
     // Compute the address of the thing we're going to move into the
     // block literal.
@@ -779,7 +781,7 @@
         BlockInfo->getCapture(variable);
 
       // This is a [[type]]*, except that a byref entry wil just be an i8**.
-      src = Builder.CreateStructGEP(LoadBlockStruct(),
+      src = Builder.CreateStructGEP(BlockInfo->StructureType, LoadBlockStruct(),
                                     enclosingCapture.getIndex(),
                                     "block.capture.addr");
     } else if (blockDecl->isConversionFromLambda()) {
@@ -964,7 +966,8 @@
     Builder.CreateBitCast(Callee, BlockLiteralTy, "block.literal");
 
   // Get the function pointer from the literal.
-  llvm::Value *FuncPtr = Builder.CreateStructGEP(BlockLiteral, 3);
+  llvm::Value *FuncPtr = Builder.CreateStructGEP(
+      CGM.getGenericBlockLiteralType(), BlockLiteral, 3);
 
   BlockLiteral = Builder.CreateBitCast(BlockLiteral, VoidPtrTy);
 
@@ -1004,26 +1007,27 @@
   if (capture.isConstant()) return LocalDeclMap[variable];
 
   llvm::Value *addr =
-    Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(),
-                            "block.capture.addr");
+      Builder.CreateStructGEP(BlockInfo->StructureType, LoadBlockStruct(),
+                              capture.getIndex(), "block.capture.addr");
 
   if (isByRef) {
     // addr should be a void** right now.  Load, then cast the result
     // to byref*.
 
     addr = Builder.CreateLoad(addr);
-    llvm::PointerType *byrefPointerType
-      = llvm::PointerType::get(BuildByRefType(variable), 0);
+    auto *byrefType = BuildByRefType(variable);
+    llvm::PointerType *byrefPointerType = llvm::PointerType::get(byrefType, 0);
     addr = Builder.CreateBitCast(addr, byrefPointerType,
                                  "byref.addr");
 
     // Follow the forwarding pointer.
-    addr = Builder.CreateStructGEP(addr, 1, "byref.forwarding");
+    addr = Builder.CreateStructGEP(byrefType, addr, 1, "byref.forwarding");
     addr = Builder.CreateLoad(addr, "byref.addr.forwarded");
 
     // Cast back to byref* and GEP over to the actual object.
     addr = Builder.CreateBitCast(addr, byrefPointerType);
-    addr = Builder.CreateStructGEP(addr, getByRefValueLLVMField(variable), 
+    addr = Builder.CreateStructGEP(byrefType, addr,
+                                   getByRefValueLLVMField(variable).second,
                                    variable->getNameAsString());
   }
 
@@ -1185,9 +1189,9 @@
   // If we have a C++ 'this' reference, go ahead and force it into
   // existence now.
   if (blockDecl->capturesCXXThis()) {
-    llvm::Value *addr = Builder.CreateStructGEP(BlockPointer,
-                                                blockInfo.CXXThisIndex,
-                                                "block.captured-this");
+    llvm::Value *addr =
+        Builder.CreateStructGEP(blockInfo.StructureType, BlockPointer,
+                                blockInfo.CXXThisIndex, "block.captured-this");
     CXXThisValue = Builder.CreateLoad(addr, "this");
   }
 
@@ -1402,8 +1406,10 @@
     }
 
     unsigned index = capture.getIndex();
-    llvm::Value *srcField = Builder.CreateStructGEP(src, index);
-    llvm::Value *dstField = Builder.CreateStructGEP(dst, index);
+    llvm::Value *srcField =
+        Builder.CreateStructGEP(blockInfo.StructureType, src, index);
+    llvm::Value *dstField =
+        Builder.CreateStructGEP(blockInfo.StructureType, dst, index);
 
     // If there's an explicit copy expression, we do that.
     if (copyExpr) {
@@ -1560,7 +1566,8 @@
     }
 
     unsigned index = capture.getIndex();
-    llvm::Value *srcField = Builder.CreateStructGEP(src, index);
+    llvm::Value *srcField =
+        Builder.CreateStructGEP(blockInfo.StructureType, src, index);
 
     // If there's an explicit copy expression, we do that.
     if (dtor) {
@@ -1799,13 +1806,15 @@
     llvm::Value *destField = CGF.GetAddrOfLocalVar(&dst);
     destField = CGF.Builder.CreateLoad(destField);
     destField = CGF.Builder.CreateBitCast(destField, byrefPtrType);
-    destField = CGF.Builder.CreateStructGEP(destField, valueFieldIndex, "x");
+    destField = CGF.Builder.CreateStructGEP(&byrefType, destField,
+                                            valueFieldIndex, "x");
 
     // src->x
     llvm::Value *srcField = CGF.GetAddrOfLocalVar(&src);
     srcField = CGF.Builder.CreateLoad(srcField);
     srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType);
-    srcField = CGF.Builder.CreateStructGEP(srcField, valueFieldIndex, "x");
+    srcField =
+        CGF.Builder.CreateStructGEP(&byrefType, srcField, valueFieldIndex, "x");
 
     byrefInfo.emitCopy(CGF, destField, srcField);
   }  
@@ -1866,7 +1875,7 @@
     llvm::Value *V = CGF.GetAddrOfLocalVar(&src);
     V = CGF.Builder.CreateLoad(V);
     V = CGF.Builder.CreateBitCast(V, byrefType.getPointerTo(0));
-    V = CGF.Builder.CreateStructGEP(V, byrefValueIndex, "x");
+    V = CGF.Builder.CreateStructGEP(&byrefType, V, byrefValueIndex, "x");
 
     byrefInfo.emitDispose(CGF, V);
   }
@@ -1923,7 +1932,7 @@
   const VarDecl &var = *emission.Variable;
   QualType type = var.getType();
 
-  unsigned byrefValueIndex = getByRefValueLLVMField(&var);
+  unsigned byrefValueIndex = getByRefValueLLVMField(&var).second;
 
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
@@ -1993,18 +2002,20 @@
   return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
 }
 
-unsigned CodeGenFunction::getByRefValueLLVMField(const ValueDecl *VD) const {
+std::pair<llvm::Type *, unsigned>
+CodeGenFunction::getByRefValueLLVMField(const ValueDecl *VD) const {
   assert(ByRefValueInfo.count(VD) && "Did not find value!");
-  
-  return ByRefValueInfo.find(VD)->second.second;
+
+  return ByRefValueInfo.find(VD)->second;
 }
 
 llvm::Value *CodeGenFunction::BuildBlockByrefAddress(llvm::Value *BaseAddr,
                                                      const VarDecl *V) {
-  llvm::Value *Loc = Builder.CreateStructGEP(BaseAddr, 1, "forwarding");
+  auto P = getByRefValueLLVMField(V);
+  llvm::Value *Loc =
+      Builder.CreateStructGEP(P.first, BaseAddr, 1, "forwarding");
   Loc = Builder.CreateLoad(Loc);
-  Loc = Builder.CreateStructGEP(Loc, getByRefValueLLVMField(V),
-                                V->getNameAsString());
+  Loc = Builder.CreateStructGEP(P.first, Loc, P.second, V->getNameAsString());
   return Loc;
 }
 
@@ -2141,11 +2152,12 @@
   if (type.isObjCGCWeak())
     isa = 1;
   V = Builder.CreateIntToPtr(Builder.getInt32(isa), Int8PtrTy, "isa");
-  Builder.CreateStore(V, Builder.CreateStructGEP(addr, 0, "byref.isa"));
+  Builder.CreateStore(V,
+                      Builder.CreateStructGEP(nullptr, addr, 0, "byref.isa"));
 
   // Store the address of the variable into its own forwarding pointer.
-  Builder.CreateStore(addr,
-                      Builder.CreateStructGEP(addr, 1, "byref.forwarding"));
+  Builder.CreateStore(
+      addr, Builder.CreateStructGEP(nullptr, addr, 1, "byref.forwarding"));
 
   // Blocks ABI:
   //   c) the flags field is set to either 0 if no helper functions are
@@ -2191,25 +2203,26 @@
       printf("\n");
     }
   }
-  
+
   Builder.CreateStore(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
-                      Builder.CreateStructGEP(addr, 2, "byref.flags"));
+                      Builder.CreateStructGEP(nullptr, addr, 2, "byref.flags"));
 
   CharUnits byrefSize = CGM.GetTargetTypeStoreSize(byrefType);
   V = llvm::ConstantInt::get(IntTy, byrefSize.getQuantity());
-  Builder.CreateStore(V, Builder.CreateStructGEP(addr, 3, "byref.size"));
+  Builder.CreateStore(V,
+                      Builder.CreateStructGEP(nullptr, addr, 3, "byref.size"));
 
   if (helpers) {
-    llvm::Value *copy_helper = Builder.CreateStructGEP(addr, 4);
+    llvm::Value *copy_helper = Builder.CreateStructGEP(nullptr, addr, 4);
     Builder.CreateStore(helpers->CopyHelper, copy_helper);
 
-    llvm::Value *destroy_helper = Builder.CreateStructGEP(addr, 5);
+    llvm::Value *destroy_helper = Builder.CreateStructGEP(nullptr, addr, 5);
     Builder.CreateStore(helpers->DisposeHelper, destroy_helper);
   }
   if (ByRefHasLifetime && HasByrefExtendedLayout) {
     llvm::Constant* ByrefLayoutInfo = CGM.getObjCRuntime().BuildByrefLayout(CGM, type);
-    llvm::Value *ByrefInfoAddr = Builder.CreateStructGEP(addr, helpers ? 6 : 4,
-                                                         "byref.layout");
+    llvm::Value *ByrefInfoAddr =
+        Builder.CreateStructGEP(nullptr, addr, helpers ? 6 : 4, "byref.layout");
     // cast destination to pointer to source type.
     llvm::Type *DesTy = ByrefLayoutInfo->getType();
     DesTy = DesTy->getPointerTo();
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 35597fe..2653d7c 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -1865,6 +1865,8 @@
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
     return EmitR600BuiltinExpr(BuiltinID, E);
+  case llvm::Triple::systemz:
+    return EmitSystemZBuiltinExpr(BuiltinID, E);
   default:
     return nullptr;
   }
@@ -3105,7 +3107,7 @@
         Indices.push_back(Builder.getInt32(i+vi));
         Indices.push_back(Builder.getInt32(i+e+vi));
       }
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
       SV = Builder.CreateStore(SV, Addr);
@@ -3133,7 +3135,7 @@
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
 
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
       SV = Builder.CreateStore(SV, Addr);
@@ -3153,7 +3155,7 @@
         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
       }
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
       SV = Builder.CreateStore(SV, Addr);
@@ -5768,7 +5770,7 @@
         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
       }
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
       SV = Builder.CreateStore(SV, Addr);
@@ -5787,7 +5789,7 @@
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
 
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
       SV = Builder.CreateStore(SV, Addr);
@@ -5807,7 +5809,7 @@
         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
       }
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
       SV = Builder.CreateStore(SV, Addr);
@@ -6371,119 +6373,6 @@
     llvm::Function *F = CGM.getIntrinsic(ID);
     return Builder.CreateCall(F, Ops, "");
   }
-
-  // P8 Crypto builtins
-  case PPC::BI__builtin_altivec_crypto_vshasigmaw:
-  case PPC::BI__builtin_altivec_crypto_vshasigmad:
-  {
-    ConstantInt *CI1 = dyn_cast<ConstantInt>(Ops[1]);
-    ConstantInt *CI2 = dyn_cast<ConstantInt>(Ops[2]);
-    assert(CI1 && CI2);
-    if (CI1->getZExtValue() > 1) {
-      CGM.Error(E->getArg(1)->getExprLoc(),
-                "argument out of range (should be 0-1).");
-      return llvm::UndefValue::get(Ops[0]->getType());
-    }
-    if (CI2->getZExtValue() > 15) {
-      CGM.Error(E->getArg(2)->getExprLoc(),
-                "argument out of range (should be 0-15).");
-      return llvm::UndefValue::get(Ops[0]->getType());
-    }
-    switch (BuiltinID) {
-    default: llvm_unreachable("Unsupported sigma intrinsic!");
-    case PPC::BI__builtin_altivec_crypto_vshasigmaw:
-      ID = Intrinsic::ppc_altivec_crypto_vshasigmaw;
-      break;
-    case PPC::BI__builtin_altivec_crypto_vshasigmad:
-      ID = Intrinsic::ppc_altivec_crypto_vshasigmad;
-      break;
-    }
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, "");
-  }
-
-  // HTM builtins
-  case PPC::BI__builtin_tbegin:
-  case PPC::BI__builtin_tend:
-  case PPC::BI__builtin_tsr: {
-    unsigned int MaxValue;
-    // The HTM instructions only accept one argument and with limited range.
-    ConstantInt *CI = dyn_cast<ConstantInt>(Ops[0]);
-    assert(CI);
-    switch (BuiltinID) {
-    case PPC::BI__builtin_tbegin:
-      ID = Intrinsic::ppc_tbegin;
-      MaxValue = 1;
-      break;
-    case PPC::BI__builtin_tend:
-      ID = Intrinsic::ppc_tend;
-      MaxValue = 1;
-      break;
-    case PPC::BI__builtin_tsr:
-      ID = Intrinsic::ppc_tsr;
-      MaxValue = 7;
-      break;
-    }
-    if (CI->getZExtValue() > MaxValue) {
-      std::stringstream ss;
-      ss << "argument out of range (should be 0 or " << MaxValue << ")";
-      CGM.Error(E->getArg(0)->getExprLoc(), ss.str());
-      return llvm::UndefValue::get(Ops[0]->getType());
-    }
-
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, "");
-  }
-  case PPC::BI__builtin_tabortdc:
-  case PPC::BI__builtin_tabortwc: {
-    // For wd and dc variant of tabort first argument must be a 5-bit constant
-    // integer
-    ConstantInt *CI = dyn_cast<ConstantInt>(Ops[0]);
-    assert(CI);
-    if (CI->getZExtValue() > 31) {
-      CGM.ErrorUnsupported(E->getArg(0), "argument out of range (should be 0-31)");
-      return llvm::UndefValue::get(Ops[0]->getType());
-    }
-    switch (BuiltinID) {
-    case PPC::BI__builtin_tabortdc:
-      ID = Intrinsic::ppc_tabortdc;
-      break;
-    case PPC::BI__builtin_tabortwc:
-      ID = Intrinsic::ppc_tabortwc;
-      break;
-    }
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, "");
-  }
-  case PPC::BI__builtin_tabortdci:
-  case PPC::BI__builtin_tabortwci: {
-    // For wd and dc variant of tabort first and third argument must be a
-    // 5-bit constant integer
-    ConstantInt *CI = dyn_cast<ConstantInt>(Ops[0]);
-    assert(CI);
-    if (CI->getZExtValue() > 31) {
-      CGM.ErrorUnsupported(E->getArg(0), "argument out of range (should be 0-31)");
-      return llvm::UndefValue::get(Ops[0]->getType());
-    }
-    CI = dyn_cast<ConstantInt>(Ops[2]);
-    assert(CI);
-    if (CI->getZExtValue() > 31) {
-      CGM.ErrorUnsupported(E->getArg(2), "argument out of range (should be 0-31)");
-      return llvm::UndefValue::get(Ops[2]->getType());
-    }
-    switch (BuiltinID) {
-    default: llvm_unreachable("Unsupported htm intrinsic!");
-    case PPC::BI__builtin_tabortdci:
-      ID = Intrinsic::ppc_tabortdci;
-      break;
-    case PPC::BI__builtin_tabortwci:
-      ID = Intrinsic::ppc_tabortwci;
-      break;
-    }
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, "");
-  }
-
   }
 }
 
@@ -6588,3 +6477,41 @@
     return nullptr;
   }
 }
+
+Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
+                                               const CallExpr *E) {
+  switch (BuiltinID) {
+  case SystemZ::BI__builtin_tbegin: {
+    Value *TDB = EmitScalarExpr(E->getArg(0));
+    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
+    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
+    return Builder.CreateCall2(F, TDB, Control);
+  }
+  case SystemZ::BI__builtin_tbegin_nofloat: {
+    Value *TDB = EmitScalarExpr(E->getArg(0));
+    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
+    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
+    return Builder.CreateCall2(F, TDB, Control);
+  }
+  case SystemZ::BI__builtin_tbeginc: {
+    Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
+    Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
+    Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
+    return Builder.CreateCall2(F, TDB, Control);
+  }
+  case SystemZ::BI__builtin_tabort: {
+    Value *Data = EmitScalarExpr(E->getArg(0));
+    Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
+    return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
+  }
+  case SystemZ::BI__builtin_non_tx_store: {
+    Value *Address = EmitScalarExpr(E->getArg(0));
+    Value *Data = EmitScalarExpr(E->getArg(1));
+    Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
+    return Builder.CreateCall2(F, Data, Address);
+  }
+
+  default:
+    return nullptr;
+  }
+}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index b6a94f9..2c73921 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -226,7 +226,7 @@
 
   virtual llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) = 0;
   virtual llvm::Constant *
-  getAddrOfCXXHandlerMapEntry(QualType Ty, QualType CatchHandlerType) = 0;
+  getAddrOfCXXCatchHandlerType(QualType Ty, QualType CatchHandlerType) = 0;
 
   virtual bool shouldTypeidBeNullChecked(bool IsDeref,
                                          QualType SrcRecordTy) = 0;
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index d397761..c031bd7 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -734,7 +734,8 @@
   auto Exp = getTypeExpansion(Ty, getContext());
   if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
     for (int i = 0, n = CAExp->NumElts; i < n; i++) {
-      llvm::Value *EltAddr = Builder.CreateConstGEP2_32(LV.getAddress(), 0, i);
+      llvm::Value *EltAddr =
+          Builder.CreateConstGEP2_32(nullptr, LV.getAddress(), 0, i);
       LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy);
       ExpandTypeFromArgs(CAExp->EltTy, LV, AI);
     }
@@ -756,10 +757,12 @@
       ExpandTypeFromArgs(FD->getType(), SubLV, AI);
     }
   } else if (auto CExp = dyn_cast<ComplexExpansion>(Exp.get())) {
-    llvm::Value *RealAddr = Builder.CreateStructGEP(LV.getAddress(), 0, "real");
+    llvm::Value *RealAddr =
+        Builder.CreateStructGEP(nullptr, LV.getAddress(), 0, "real");
     EmitStoreThroughLValue(RValue::get(*AI++),
                            MakeAddrLValue(RealAddr, CExp->EltTy));
-    llvm::Value *ImagAddr = Builder.CreateStructGEP(LV.getAddress(), 1, "imag");
+    llvm::Value *ImagAddr =
+        Builder.CreateStructGEP(nullptr, LV.getAddress(), 1, "imag");
     EmitStoreThroughLValue(RValue::get(*AI++),
                            MakeAddrLValue(ImagAddr, CExp->EltTy));
   } else {
@@ -775,7 +778,7 @@
   if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
     llvm::Value *Addr = RV.getAggregateAddr();
     for (int i = 0, n = CAExp->NumElts; i < n; i++) {
-      llvm::Value *EltAddr = Builder.CreateConstGEP2_32(Addr, 0, i);
+      llvm::Value *EltAddr = Builder.CreateConstGEP2_32(nullptr, Addr, 0, i);
       RValue EltRV =
           convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
       ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos);
@@ -843,7 +846,7 @@
     return SrcPtr;
 
   // GEP into the first element.
-  SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcPtr, 0, 0, "coerce.dive");
+  SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcSTy, SrcPtr, 0, 0, "coerce.dive");
 
   // If the first element is a struct, recurse.
   llvm::Type *SrcTy =
@@ -981,7 +984,7 @@
   if (llvm::StructType *STy =
         dyn_cast<llvm::StructType>(Val->getType())) {
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(DestPtr, 0, i);
+      llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(STy, DestPtr, 0, i);
       llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
       llvm::StoreInst *SI = CGF.Builder.CreateStore(Elt, EltPtr,
                                                     DestIsVolatile);
@@ -1772,8 +1775,9 @@
     switch (ArgI.getKind()) {
     case ABIArgInfo::InAlloca: {
       assert(NumIRArgs == 0);
-      llvm::Value *V = Builder.CreateStructGEP(
-          ArgStruct, ArgI.getInAllocaFieldIndex(), Arg->getName());
+      llvm::Value *V =
+          Builder.CreateStructGEP(FI.getArgStruct(), ArgStruct,
+                                  ArgI.getInAllocaFieldIndex(), Arg->getName());
       ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
       break;
     }
@@ -1939,7 +1943,7 @@
       // If the value is offset in memory, apply the offset now.
       if (unsigned Offs = ArgI.getDirectOffset()) {
         Ptr = Builder.CreateBitCast(Ptr, Builder.getInt8PtrTy());
-        Ptr = Builder.CreateConstGEP1_32(Ptr, Offs);
+        Ptr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), Ptr, Offs);
         Ptr = Builder.CreateBitCast(Ptr,
                           llvm::PointerType::getUnqual(ArgI.getCoerceToType()));
       }
@@ -1961,7 +1965,7 @@
           for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
             auto AI = FnArgs[FirstIRArg + i];
             AI->setName(Arg->getName() + ".coerce" + Twine(i));
-            llvm::Value *EltPtr = Builder.CreateConstGEP2_32(Ptr, 0, i);
+            llvm::Value *EltPtr = Builder.CreateConstGEP2_32(STy, Ptr, 0, i);
             Builder.CreateStore(AI, EltPtr);
           }
         } else {
@@ -1974,7 +1978,8 @@
           for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
             auto AI = FnArgs[FirstIRArg + i];
             AI->setName(Arg->getName() + ".coerce" + Twine(i));
-            llvm::Value *EltPtr = Builder.CreateConstGEP2_32(TempV, 0, i);
+            llvm::Value *EltPtr =
+                Builder.CreateConstGEP2_32(ArgI.getCoerceToType(), TempV, 0, i);
             Builder.CreateStore(AI, EltPtr);
           }
 
@@ -2269,8 +2274,8 @@
       llvm::Function::arg_iterator EI = CurFn->arg_end();
       --EI;
       llvm::Value *ArgStruct = EI;
-      llvm::Value *SRet =
-          Builder.CreateStructGEP(ArgStruct, RetAI.getInAllocaFieldIndex());
+      llvm::Value *SRet = Builder.CreateStructGEP(
+          nullptr, ArgStruct, RetAI.getInAllocaFieldIndex());
       RV = Builder.CreateLoad(SRet, "sret");
     }
     break;
@@ -2334,7 +2339,7 @@
       // If the value is offset in memory, apply the offset now.
       if (unsigned Offs = RetAI.getDirectOffset()) {
         V = Builder.CreateBitCast(V, Builder.getInt8PtrTy());
-        V = Builder.CreateConstGEP1_32(V, Offs);
+        V = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), V, Offs);
         V = Builder.CreateBitCast(V,
                          llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
       }
@@ -2380,7 +2385,7 @@
     Ret = Builder.CreateRetVoid();
   }
 
-  if (!RetDbgLoc.isUnknown())
+  if (RetDbgLoc)
     Ret->setDebugLoc(std::move(RetDbgLoc));
 }
 
@@ -2992,7 +2997,7 @@
   if (CGM.getLangOpts().ObjCAutoRefCount)
     AddObjCARCExceptionMetadata(Inst);
 
-  return Inst;
+  return llvm::CallSite(Inst);
 }
 
 /// \brief Store a non-aggregate value to an address to initialize it.  For
@@ -3029,7 +3034,7 @@
 
   // If we're using inalloca, insert the allocation after the stack save.
   // FIXME: Do this earlier rather than hacking it in here!
-  llvm::Value *ArgMemory = nullptr;
+  llvm::AllocaInst *ArgMemory = nullptr;
   if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
     llvm::Instruction *IP = CallArgs.getStackBase();
     llvm::AllocaInst *AI;
@@ -3058,7 +3063,8 @@
       IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr;
     } else {
       llvm::Value *Addr =
-          Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
+          Builder.CreateStructGEP(ArgMemory->getAllocatedType(), ArgMemory,
+                                  RetAI.getInAllocaFieldIndex());
       Builder.CreateStore(SRetPtr, Addr);
     }
   }
@@ -3092,14 +3098,16 @@
             cast<llvm::Instruction>(RV.getAggregateAddr());
         CGBuilderTy::InsertPoint IP = Builder.saveIP();
         Builder.SetInsertPoint(Placeholder);
-        llvm::Value *Addr = Builder.CreateStructGEP(
-            ArgMemory, ArgInfo.getInAllocaFieldIndex());
+        llvm::Value *Addr =
+            Builder.CreateStructGEP(ArgMemory->getAllocatedType(), ArgMemory,
+                                    ArgInfo.getInAllocaFieldIndex());
         Builder.restoreIP(IP);
         deferPlaceholderReplacement(Placeholder, Addr);
       } else {
         // Store the RValue into the argument struct.
         llvm::Value *Addr =
-            Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
+            Builder.CreateStructGEP(ArgMemory->getAllocatedType(), ArgMemory,
+                                    ArgInfo.getInAllocaFieldIndex());
         unsigned AS = Addr->getType()->getPointerAddressSpace();
         llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
         // There are some cases where a trivial bitcast is not avoidable.  The
@@ -3201,7 +3209,7 @@
       // If the value is offset in memory, apply the offset now.
       if (unsigned Offs = ArgInfo.getDirectOffset()) {
         SrcPtr = Builder.CreateBitCast(SrcPtr, Builder.getInt8PtrTy());
-        SrcPtr = Builder.CreateConstGEP1_32(SrcPtr, Offs);
+        SrcPtr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), SrcPtr, Offs);
         SrcPtr = Builder.CreateBitCast(SrcPtr,
                        llvm::PointerType::getUnqual(ArgInfo.getCoerceToType()));
 
@@ -3233,7 +3241,7 @@
 
         assert(NumIRArgs == STy->getNumElements());
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          llvm::Value *EltPtr = Builder.CreateConstGEP2_32(SrcPtr, 0, i);
+          llvm::Value *EltPtr = Builder.CreateConstGEP2_32(STy, SrcPtr, 0, i);
           llvm::LoadInst *LI = Builder.CreateLoad(EltPtr);
           // We don't know what we're loading from.
           LI->setAlignment(1);
@@ -3463,7 +3471,8 @@
       llvm::Value *StorePtr = DestPtr;
       if (unsigned Offs = RetAI.getDirectOffset()) {
         StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy());
-        StorePtr = Builder.CreateConstGEP1_32(StorePtr, Offs);
+        StorePtr =
+            Builder.CreateConstGEP1_32(Builder.getInt8Ty(), StorePtr, Offs);
         StorePtr = Builder.CreateBitCast(StorePtr,
                            llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
       }
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 84d6437..bd15c12 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -2088,14 +2088,6 @@
   return VTable;
 }
 
-void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD,
-                                                llvm::Value *VTable) {
-  if (!SanOpts.has(SanitizerKind::CFIVptr))
-    return;
-
-  EmitVTablePtrCheck(MD->getParent(), VTable);
-}
-
 // If a class has a single non-virtual base and does not introduce or override
 // virtual member functions or fields, it will have the same layout as its base.
 // This function returns the least derived such class.
@@ -2131,6 +2123,15 @@
       RD->bases_begin()->getType()->getAsCXXRecordDecl());
 }
 
+void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD,
+                                                llvm::Value *VTable) {
+  const CXXRecordDecl *ClassDecl = MD->getParent();
+  if (!SanOpts.has(SanitizerKind::CFICastStrict))
+    ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl);
+
+  EmitVTablePtrCheck(ClassDecl, VTable);
+}
+
 void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
                                                 llvm::Value *Derived,
                                                 bool MayBeNull) {
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 566befc..299969a 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -52,8 +52,10 @@
       llvm::StructType::get(V.first->getType(), V.second->getType(),
                             (void*) nullptr);
     llvm::Value *addr = CGF.CreateTempAlloca(ComplexTy, "saved-complex");
-    CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0));
-    CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1));
+    CGF.Builder.CreateStore(V.first,
+                            CGF.Builder.CreateStructGEP(ComplexTy, addr, 0));
+    CGF.Builder.CreateStore(V.second,
+                            CGF.Builder.CreateStructGEP(ComplexTy, addr, 1));
     return saved_type(addr, ComplexAddress);
   }
 
@@ -82,9 +84,9 @@
     return RValue::getAggregate(CGF.Builder.CreateLoad(Value));
   case ComplexAddress: {
     llvm::Value *real =
-      CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(Value, 0));
+        CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(nullptr, Value, 0));
     llvm::Value *imag =
-      CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(Value, 1));
+        CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(nullptr, Value, 1));
     return RValue::getComplex(real, imag);
   }
   }
@@ -469,8 +471,14 @@
                         EHScopeStack::Cleanup *Fn,
                         EHScopeStack::Cleanup::Flags flags,
                         llvm::Value *ActiveFlag) {
-  // EH cleanups always occur within a terminate scope.
-  if (flags.isForEHCleanup()) CGF.EHStack.pushTerminate();
+  // Itanium EH cleanups occur within a terminate scope. Microsoft SEH doesn't
+  // have this behavior, and the Microsoft C++ runtime will call terminate for
+  // us if the cleanup throws.
+  bool PushedTerminate = false;
+  if (flags.isForEHCleanup() && !CGF.getTarget().getCXXABI().isMicrosoft()) {
+    CGF.EHStack.pushTerminate();
+    PushedTerminate = true;
+  }
 
   // If there's an active flag, load it and skip the cleanup if it's
   // false.
@@ -493,7 +501,8 @@
     CGF.EmitBlock(ContBB);
 
   // Leave the terminate scope.
-  if (flags.isForEHCleanup()) CGF.EHStack.popTerminate();
+  if (PushedTerminate)
+    CGF.EHStack.popTerminate();
 }
 
 static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit,
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 186c522..4af49c2 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -92,7 +92,7 @@
     : CGF(CGF) {
   if (CGF.getDebugInfo()) {
     OriginalLocation = CGF.Builder.getCurrentDebugLocation();
-    if (!Loc.isUnknown())
+    if (Loc)
       CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
   }
 }
@@ -120,20 +120,20 @@
     return;
 
   SourceManager &SM = CGM.getContext().getSourceManager();
-  llvm::DIScope Scope(LexicalBlockStack.back());
+  auto *Scope = cast<llvm::MDScope>(LexicalBlockStack.back());
   PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
 
-  if (PCLoc.isInvalid() || Scope.getFilename() == PCLoc.getFilename())
+  if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename())
     return;
 
-  if (Scope.isLexicalBlockFile()) {
-    llvm::DILexicalBlockFile LBF = llvm::DILexicalBlockFile(Scope);
+  if (auto *LBF = dyn_cast<llvm::MDLexicalBlockFile>(Scope)) {
     llvm::DIDescriptor D = DBuilder.createLexicalBlockFile(
-        LBF.getScope(), getOrCreateFile(CurLoc));
+        LBF->getScope(), getOrCreateFile(CurLoc));
     llvm::MDNode *N = D;
     LexicalBlockStack.pop_back();
     LexicalBlockStack.emplace_back(N);
-  } else if (Scope.isLexicalBlock() || Scope.isSubprogram()) {
+  } else if (isa<llvm::MDLexicalBlock>(Scope) ||
+             isa<llvm::MDSubprogram>(Scope)) {
     llvm::DIDescriptor D =
         DBuilder.createLexicalBlockFile(Scope, getOrCreateFile(CurLoc));
     llvm::MDNode *N = D;
@@ -150,7 +150,7 @@
   auto I = RegionMap.find(Context);
   if (I != RegionMap.end()) {
     llvm::Metadata *V = I->second;
-    return llvm::DIScope(dyn_cast_or_null<llvm::MDNode>(V));
+    return dyn_cast_or_null<llvm::MDScope>(V);
   }
 
   // Check namespace.
@@ -250,14 +250,14 @@
 llvm::DIFile CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
   if (!Loc.isValid())
     // If Location is not valid then use main input file.
-    return DBuilder.createFile(TheCU.getFilename(), TheCU.getDirectory());
+    return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
 
   SourceManager &SM = CGM.getContext().getSourceManager();
   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
 
   if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
     // If the location is not valid then use main input file.
-    return DBuilder.createFile(TheCU.getFilename(), TheCU.getDirectory());
+    return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
 
   // Cache the results.
   const char *fname = PLoc.getFilename();
@@ -266,7 +266,7 @@
   if (it != DIFileCache.end()) {
     // Verify that the information still exists.
     if (llvm::Metadata *V = it->second)
-      return llvm::DIFile(cast<llvm::MDNode>(V));
+      return cast<llvm::MDFile>(V);
   }
 
   llvm::DIFile F = DBuilder.createFile(PLoc.getFilename(), getCurrentDirname());
@@ -277,7 +277,7 @@
 
 /// getOrCreateMainFile - Get the file info for main compile unit.
 llvm::DIFile CGDebugInfo::getOrCreateMainFile() {
-  return DBuilder.createFile(TheCU.getFilename(), TheCU.getDirectory());
+  return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
 }
 
 /// getLineNumber - Get line number for the location. If location is invalid
@@ -606,7 +606,7 @@
   // FIXME: ODR should apply to ObjC++ exactly the same wasy it does to C++.
   // For now, only apply ODR with C++.
   const TagDecl *TD = Ty->getDecl();
-  if (TheCU.getLanguage() != llvm::dwarf::DW_LANG_C_plus_plus ||
+  if (TheCU->getSourceLanguage() != llvm::dwarf::DW_LANG_C_plus_plus ||
       !TD->isExternallyVisible())
     return FullName;
   // Microsoft Mangler does not have support for mangleCXXRTTIName yet.
@@ -637,12 +637,12 @@
 }
 
 // Creates a forward declaration for a RecordDecl in the given context.
-llvm::DICompositeType
+llvm::MDCompositeType *
 CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
-                                      llvm::DIDescriptor Ctx) {
+                                      llvm::MDScope *Ctx) {
   const RecordDecl *RD = Ty->getDecl();
-  if (llvm::DIType T = getTypeOrNull(CGM.getContext().getRecordType(RD)))
-    return llvm::DICompositeType(T);
+  if (llvm::MDType *T = getTypeOrNull(CGM.getContext().getRecordType(RD)))
+    return cast<llvm::MDCompositeType>(T);
   llvm::DIFile DefUnit = getOrCreateFile(RD->getLocation());
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
@@ -658,9 +658,9 @@
 
   // Create the type.
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
-  llvm::DICompositeType RetTy = DBuilder.createReplaceableCompositeType(
+  llvm::MDCompositeType *RetTy = DBuilder.createReplaceableCompositeType(
       getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
-      llvm::DIDescriptor::FlagFwdDecl, FullName);
+      llvm::DebugNode::FlagFwdDecl, FullName);
   ReplaceMap.emplace_back(
       std::piecewise_construct, std::make_tuple(Ty),
       std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
@@ -718,7 +718,7 @@
   Elements = DBuilder.getOrCreateArray(EltTys);
   EltTys.clear();
 
-  unsigned Flags = llvm::DIDescriptor::FlagAppleBlock;
+  unsigned Flags = llvm::DebugNode::FlagAppleBlock;
   unsigned LineNo = getLineNumber(CurLoc);
 
   EltTy = DBuilder.createStructType(Unit, "__block_descriptor", Unit, LineNo,
@@ -777,31 +777,21 @@
       Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
 
   SourceLocation Loc = AliasDecl->getLocation();
-  llvm::DIFile File = getOrCreateFile(Loc);
-  unsigned Line = getLineNumber(Loc);
-
-  llvm::DIDescriptor Ctxt =
-      getContextDescriptor(cast<Decl>(AliasDecl->getDeclContext()));
-
-  return DBuilder.createTypedef(Src, internString(OS.str()), File, Line, Ctxt);
+  return DBuilder.createTypedef(
+      Src, internString(OS.str()), getOrCreateFile(Loc), getLineNumber(Loc),
+      getContextDescriptor(cast<Decl>(AliasDecl->getDeclContext())));
 }
 
 llvm::DIType CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile Unit) {
-  // Typedefs are derived from some other type.  If we have a typedef of a
-  // typedef, make sure to emit the whole chain.
-  llvm::DIType Src = getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit);
   // We don't set size information, but do specify where the typedef was
   // declared.
   SourceLocation Loc = Ty->getDecl()->getLocation();
-  llvm::DIFile File = getOrCreateFile(Loc);
-  unsigned Line = getLineNumber(Loc);
-  const TypedefNameDecl *TyDecl = Ty->getDecl();
 
-  llvm::DIDescriptor TypedefContext =
-      getContextDescriptor(cast<Decl>(Ty->getDecl()->getDeclContext()));
-
-  return DBuilder.createTypedef(Src, TyDecl->getName(), File, Line,
-                                TypedefContext);
+  // Typedefs are derived from some other type.
+  return DBuilder.createTypedef(
+      getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit),
+      Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc),
+      getContextDescriptor(cast<Decl>(Ty->getDecl()->getDeclContext())));
 }
 
 llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
@@ -841,11 +831,11 @@
 
   switch (Access) {
   case clang::AS_private:
-    return llvm::DIDescriptor::FlagPrivate;
+    return llvm::DebugNode::FlagPrivate;
   case clang::AS_protected:
-    return llvm::DIDescriptor::FlagProtected;
+    return llvm::DebugNode::FlagProtected;
   case clang::AS_public:
-    return llvm::DIDescriptor::FlagPublic;
+    return llvm::DebugNode::FlagPublic;
   case clang::AS_none:
     return 0;
   }
@@ -1003,8 +993,7 @@
         if (MI != StaticDataMemberCache.end()) {
           assert(MI->second &&
                  "Static data member declaration should still exist");
-          elements.push_back(
-              llvm::DIDerivedType(cast<llvm::MDNode>(MI->second)));
+          elements.push_back(cast<llvm::MDDerivedTypeBase>(MI->second));
         } else {
           auto Field = CreateRecordStaticField(V, RecordTy, record);
           elements.push_back(Field);
@@ -1022,27 +1011,29 @@
 /// getOrCreateMethodType - CXXMethodDecl's type is a FunctionType. This
 /// function type is not updated to include implicit "this" pointer. Use this
 /// routine to get a method type which includes "this" pointer.
-llvm::DICompositeType
+llvm::MDSubroutineType *
 CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
                                    llvm::DIFile Unit) {
   const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>();
   if (Method->isStatic())
-    return llvm::DICompositeType(getOrCreateType(QualType(Func, 0), Unit));
+    return cast_or_null<llvm::MDSubroutineType>(
+        getOrCreateType(QualType(Func, 0), Unit));
   return getOrCreateInstanceMethodType(Method->getThisType(CGM.getContext()),
                                        Func, Unit);
 }
 
-llvm::DICompositeType CGDebugInfo::getOrCreateInstanceMethodType(
+llvm::MDSubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
     QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile Unit) {
   // Add "this" pointer.
-  llvm::DITypeArray Args = llvm::DISubroutineType(
-      getOrCreateType(QualType(Func, 0), Unit)).getTypeArray();
-  assert(Args.getNumElements() && "Invalid number of arguments!");
+  llvm::DITypeArray Args(
+      cast<llvm::MDSubroutineType>(getOrCreateType(QualType(Func, 0), Unit))
+          ->getTypeArray());
+  assert(Args.size() && "Invalid number of arguments!");
 
   SmallVector<llvm::Metadata *, 16> Elts;
 
   // First element is always return type. For 'void' functions it is NULL.
-  Elts.push_back(Args.getElement(0));
+  Elts.push_back(Args[0]);
 
   // "this" pointer is always first argument.
   const CXXRecordDecl *RD = ThisPtr->getPointeeCXXRecordDecl();
@@ -1070,16 +1061,16 @@
   }
 
   // Copy rest of the arguments.
-  for (unsigned i = 1, e = Args.getNumElements(); i != e; ++i)
-    Elts.push_back(Args.getElement(i));
+  for (unsigned i = 1, e = Args.size(); i != e; ++i)
+    Elts.push_back(Args[i]);
 
   llvm::DITypeArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
 
   unsigned Flags = 0;
   if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
-    Flags |= llvm::DIDescriptor::FlagLValueReference;
+    Flags |= llvm::DebugNode::FlagLValueReference;
   if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
-    Flags |= llvm::DIDescriptor::FlagRValueReference;
+    Flags |= llvm::DebugNode::FlagRValueReference;
 
   return DBuilder.createSubroutineType(Unit, EltTypeArray, Flags);
 }
@@ -1103,7 +1094,7 @@
       isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method);
 
   StringRef MethodName = getFunctionName(Method);
-  llvm::DICompositeType MethodTy = getOrCreateMethodType(Method, Unit);
+  llvm::MDSubroutineType *MethodTy = getOrCreateMethodType(Method, Unit);
 
   // Since a single ctor/dtor corresponds to multiple functions, it doesn't
   // make sense to give a single ctor/dtor a linkage name.
@@ -1143,29 +1134,29 @@
 
   unsigned Flags = 0;
   if (Method->isImplicit())
-    Flags |= llvm::DIDescriptor::FlagArtificial;
+    Flags |= llvm::DebugNode::FlagArtificial;
   Flags |= getAccessFlag(Method->getAccess(), Method->getParent());
   if (const CXXConstructorDecl *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
     if (CXXC->isExplicit())
-      Flags |= llvm::DIDescriptor::FlagExplicit;
+      Flags |= llvm::DebugNode::FlagExplicit;
   } else if (const CXXConversionDecl *CXXC =
                  dyn_cast<CXXConversionDecl>(Method)) {
     if (CXXC->isExplicit())
-      Flags |= llvm::DIDescriptor::FlagExplicit;
+      Flags |= llvm::DebugNode::FlagExplicit;
   }
   if (Method->hasPrototype())
-    Flags |= llvm::DIDescriptor::FlagPrototyped;
+    Flags |= llvm::DebugNode::FlagPrototyped;
   if (Method->getRefQualifier() == RQ_LValue)
-    Flags |= llvm::DIDescriptor::FlagLValueReference;
+    Flags |= llvm::DebugNode::FlagLValueReference;
   if (Method->getRefQualifier() == RQ_RValue)
-    Flags |= llvm::DIDescriptor::FlagRValueReference;
+    Flags |= llvm::DebugNode::FlagRValueReference;
 
   llvm::DIArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
   llvm::DISubprogram SP = DBuilder.createMethod(
       RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
       MethodTy, /*isLocalToUnit=*/false,
       /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags,
-      CGM.getLangOpts().Optimize, nullptr, TParamsArray);
+      CGM.getLangOpts().Optimize, nullptr, TParamsArray.get());
 
   SPCache[Method->getCanonicalDecl()].reset(SP);
 
@@ -1240,7 +1231,7 @@
         BaseOffset =
             4 * CGM.getMicrosoftVTableContext().getVBTableIndex(RD, Base);
       }
-      BFlags = llvm::DIDescriptor::FlagVirtual;
+      BFlags = llvm::DebugNode::FlagVirtual;
     } else
       BaseOffset = CGM.getContext().toBits(RL.getBaseClassOffset(Base));
     // FIXME: Inconsistent units for BaseOffset. It is in bytes when
@@ -1399,7 +1390,7 @@
 
 /// getOrCreateVTablePtrType - Return debug info descriptor for vtable.
 llvm::DIType CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile Unit) {
-  if (VTablePtrType.isValid())
+  if (VTablePtrType)
     return VTablePtrType;
 
   ASTContext &Context = CGM.getContext();
@@ -1438,7 +1429,7 @@
   unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
   llvm::DIType VPTR = DBuilder.createMemberType(
       Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
-      llvm::DIDescriptor::FlagArtificial, getOrCreateVTablePtrType(Unit));
+      llvm::DebugNode::FlagArtificial, getOrCreateVTablePtrType(Unit));
   EltTys.push_back(VPTR);
 }
 
@@ -1466,11 +1457,10 @@
   QualType Ty = CGM.getContext().getEnumType(ED);
   void *TyPtr = Ty.getAsOpaquePtr();
   auto I = TypeCache.find(TyPtr);
-  if (I == TypeCache.end() ||
-      !llvm::DIType(cast<llvm::MDNode>(I->second)).isForwardDecl())
+  if (I == TypeCache.end() || !cast<llvm::MDType>(I->second)->isForwardDecl())
     return;
   llvm::DIType Res = CreateTypeDefinition(Ty->castAs<EnumType>());
-  assert(!Res.isForwardDecl());
+  assert(!Res->isForwardDecl());
   TypeCache[TyPtr].reset(Res);
 }
 
@@ -1490,7 +1480,7 @@
 
   QualType Ty = CGM.getContext().getRecordType(RD);
   llvm::DIType T = getTypeOrNull(Ty);
-  if (T && T.isForwardDecl())
+  if (T && T->isForwardDecl())
     completeClassData(RD);
 }
 
@@ -1500,11 +1490,10 @@
   QualType Ty = CGM.getContext().getRecordType(RD);
   void *TyPtr = Ty.getAsOpaquePtr();
   auto I = TypeCache.find(TyPtr);
-  if (I != TypeCache.end() &&
-      !llvm::DIType(cast<llvm::MDNode>(I->second)).isForwardDecl())
+  if (I != TypeCache.end() && !cast<llvm::MDType>(I->second)->isForwardDecl())
     return;
   llvm::DIType Res = CreateTypeDefinition(Ty->castAs<RecordType>());
-  assert(!Res.isForwardDecl());
+  assert(!Res->isForwardDecl());
   TypeCache[TyPtr].reset(Res);
 }
 
@@ -1554,7 +1543,7 @@
 /// CreateType - get structure or union type.
 llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
-  llvm::DICompositeType T(getTypeOrNull(QualType(Ty, 0)));
+  llvm::DIType T = cast_or_null<llvm::MDType>(getTypeOrNull(QualType(Ty, 0)));
   if (T || shouldOmitDefinition(DebugKind, RD, CGM.getLangOpts())) {
     if (!T)
       T = getOrCreateRecordFwdDecl(
@@ -1578,9 +1567,8 @@
   // may refer to the forward decl if the struct is recursive) and replace all
   // uses of the forward declaration with the final definition.
 
-  llvm::DICompositeType FwdDecl(getOrCreateLimitedType(Ty, DefUnit));
-  assert(FwdDecl.isCompositeType() &&
-         "The debug type of a RecordType should be a llvm::DICompositeType");
+  auto *FwdDecl =
+      cast<llvm::MDCompositeType>(getOrCreateLimitedType(Ty, DefUnit));
 
   const RecordDecl *D = RD->getDefinition();
   if (!D || !D->isCompleteDefinition())
@@ -1619,8 +1607,8 @@
   DBuilder.replaceArrays(FwdDecl, Elements);
 
   if (FwdDecl->isTemporary())
-    FwdDecl = llvm::DICompositeType(llvm::MDNode::replaceWithPermanent(
-      llvm::TempMDNode(FwdDecl.get())));
+    FwdDecl =
+        llvm::MDNode::replaceWithPermanent(llvm::TempMDCompositeType(FwdDecl));
 
   RegionMap[Ty->getDecl()].reset(FwdDecl);
   return FwdDecl;
@@ -1667,7 +1655,8 @@
   // Get overall information about the record type for the debug info.
   llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
-  llvm::dwarf::SourceLanguage RuntimeLang = TheCU.getLanguage();
+  auto RuntimeLang =
+      static_cast<llvm::dwarf::SourceLanguage>(TheCU->getSourceLanguage());
 
   // If this is just a forward declaration return a special forward-declaration
   // debug type since we won't be able to lay out the entire type.
@@ -1688,7 +1677,7 @@
   ObjCInterfaceDecl *ID = Ty->getDecl();
   llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
-  unsigned RuntimeLang = TheCU.getLanguage();
+  unsigned RuntimeLang = TheCU->getSourceLanguage();
 
   // Bit size, align and offset of the type.
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -1696,9 +1685,9 @@
 
   unsigned Flags = 0;
   if (ID->getImplementation())
-    Flags |= llvm::DIDescriptor::FlagObjcClassComplete;
+    Flags |= llvm::DebugNode::FlagObjcClassComplete;
 
-  llvm::DICompositeType RealDecl = DBuilder.createStructType(
+  llvm::MDCompositeType *RealDecl = DBuilder.createStructType(
       Unit, ID->getName(), DefUnit, Line, Size, Align, Flags, llvm::DIType(),
       llvm::DIArray(), RuntimeLang);
 
@@ -1706,7 +1695,7 @@
   TypeCache[QTy.getAsOpaquePtr()].reset(RealDecl);
 
   // Push the struct on region stack.
-  LexicalBlockStack.emplace_back(static_cast<llvm::MDNode *>(RealDecl));
+  LexicalBlockStack.emplace_back(RealDecl);
   RegionMap[Ty->getDecl()].reset(RealDecl);
 
   // Convert all the elements.
@@ -1716,7 +1705,7 @@
   if (SClass) {
     llvm::DIType SClassTy =
         getOrCreateType(CGM.getContext().getObjCInterfaceType(SClass), Unit);
-    if (!SClassTy.isValid())
+    if (!SClassTy)
       return llvm::DIType();
 
     llvm::DIType InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0);
@@ -1745,7 +1734,7 @@
   for (ObjCIvarDecl *Field = ID->all_declared_ivar_begin(); Field;
        Field = Field->getNextIvar(), ++FieldNo) {
     llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
-    if (!FieldTy.isValid())
+    if (!FieldTy)
       return llvm::DIType();
 
     StringRef FieldName = Field->getName();
@@ -1788,11 +1777,11 @@
 
     unsigned Flags = 0;
     if (Field->getAccessControl() == ObjCIvarDecl::Protected)
-      Flags = llvm::DIDescriptor::FlagProtected;
+      Flags = llvm::DebugNode::FlagProtected;
     else if (Field->getAccessControl() == ObjCIvarDecl::Private)
-      Flags = llvm::DIDescriptor::FlagPrivate;
+      Flags = llvm::DebugNode::FlagPrivate;
     else if (Field->getAccessControl() == ObjCIvarDecl::Public)
-      Flags = llvm::DIDescriptor::FlagPublic;
+      Flags = llvm::DebugNode::FlagPublic;
 
     llvm::MDNode *PropertyNode = nullptr;
     if (ObjCImplementationDecl *ImpD = ID->getImplementation()) {
@@ -1948,14 +1937,14 @@
   // If this is just a forward declaration, construct an appropriately
   // marked node and just return it.
   if (!ED->getDefinition()) {
-    llvm::DIDescriptor EDContext;
-    EDContext = getContextDescriptor(cast<Decl>(ED->getDeclContext()));
+    llvm::MDScope *EDContext =
+        getContextDescriptor(cast<Decl>(ED->getDeclContext()));
     llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
     llvm::DIType RetTy = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
-        0, Size, Align, llvm::DIDescriptor::FlagFwdDecl, FullName);
+        0, Size, Align, llvm::DebugNode::FlagFwdDecl, FullName);
     ReplaceMap.emplace_back(
         std::piecewise_construct, std::make_tuple(Ty),
         std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
@@ -1989,7 +1978,7 @@
 
   llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
   unsigned Line = getLineNumber(ED->getLocation());
-  llvm::DIDescriptor EnumContext =
+  llvm::MDScope *EnumContext =
       getContextDescriptor(cast<Decl>(ED->getDeclContext()));
   llvm::DIType ClassTy = ED->isFixed()
                              ? getOrCreateType(ED->getIntegerType(), DefUnit)
@@ -2066,10 +2055,10 @@
   if (it != TypeCache.end()) {
     // Verify that the debug info still exists.
     if (llvm::Metadata *V = it->second)
-      return llvm::DIType(cast<llvm::MDNode>(V));
+      return cast<llvm::MDType>(V);
   }
 
-  return llvm::DIType();
+  return nullptr;
 }
 
 void CGDebugInfo::completeTemplateDefinition(
@@ -2218,21 +2207,21 @@
                                                  llvm::DIFile Unit) {
   QualType QTy(Ty, 0);
 
-  llvm::DICompositeType T(getTypeOrNull(QTy));
+  auto *T = cast_or_null<llvm::MDCompositeTypeBase>(getTypeOrNull(QTy));
 
   // We may have cached a forward decl when we could have created
   // a non-forward decl. Go ahead and create a non-forward decl
   // now.
-  if (T && !T.isForwardDecl())
+  if (T && !T->isForwardDecl())
     return T;
 
   // Otherwise create the type.
-  llvm::DICompositeType Res = CreateLimitedType(Ty);
+  llvm::MDCompositeType *Res = CreateLimitedType(Ty);
 
   // Propagate members from the declaration to the definition
   // CreateType(const RecordType*) will overwrite this with the members in the
   // correct order if the full type is needed.
-  DBuilder.replaceArrays(Res, T ? T.getElements() : llvm::DIArray());
+  DBuilder.replaceArrays(Res, T ? T->getElements() : llvm::DIArray());
 
   // And update the type cache.
   TypeCache[QTy.getAsOpaquePtr()].reset(Res);
@@ -2240,7 +2229,7 @@
 }
 
 // TODO: Currently used for context chains when limiting debug info.
-llvm::DICompositeType CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
+llvm::MDCompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
 
   // Get overall information about the record type for the debug info.
@@ -2248,13 +2237,14 @@
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
-  llvm::DIDescriptor RDContext =
+  llvm::MDScope *RDContext =
       getContextDescriptor(cast<Decl>(RD->getDeclContext()));
 
   // If we ended up creating the type during the context chain construction,
   // just return that.
-  llvm::DICompositeType T(getTypeOrNull(CGM.getContext().getRecordType(RD)));
-  if (T && (!T.isForwardDecl() || !RD->getDefinition()))
+  auto *T = cast_or_null<llvm::MDCompositeType>(
+      getTypeOrNull(CGM.getContext().getRecordType(RD)));
+  if (T && (!T->isForwardDecl() || !RD->getDefinition()))
     return T;
 
   // If this is just a forward or incomplete declaration, construct an
@@ -2265,12 +2255,12 @@
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
   uint64_t Align = CGM.getContext().getTypeAlign(Ty);
-  llvm::DICompositeType RealDecl;
 
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
-  RealDecl = DBuilder.createReplaceableCompositeType(getTagForRecord(RD),
-      RDName, RDContext, DefUnit, Line, 0, Size, Align, 0, FullName);
+  llvm::MDCompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
+      getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, 0,
+      FullName);
 
   RegionMap[Ty->getDecl()].reset(RealDecl);
   TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl);
@@ -2283,9 +2273,9 @@
 }
 
 void CGDebugInfo::CollectContainingType(const CXXRecordDecl *RD,
-                                        llvm::DICompositeType RealDecl) {
+                                        llvm::MDCompositeType *RealDecl) {
   // A class's primary base or the class itself contains the vtable.
-  llvm::DICompositeType ContainingType;
+  llvm::MDCompositeType *ContainingType = nullptr;
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
   if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) {
     // Seek non-virtual primary base root.
@@ -2297,7 +2287,7 @@
       else
         break;
     }
-    ContainingType = llvm::DICompositeType(
+    ContainingType = cast<llvm::MDCompositeType>(
         getOrCreateType(QualType(PBase->getTypeForDecl(), 0),
                         getOrCreateFile(RD->getLocation())));
   } else if (RD->isDynamicClass())
@@ -2318,18 +2308,15 @@
   return Ty;
 }
 
-void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD,
-                                           llvm::DIFile Unit,
-                                           StringRef &Name, StringRef &LinkageName,
-                                           llvm::DIDescriptor &FDContext,
-                                           llvm::DIArray &TParamsArray,
-                                           unsigned &Flags) {
+void CGDebugInfo::collectFunctionDeclProps(
+    GlobalDecl GD, llvm::DIFile Unit, StringRef &Name, StringRef &LinkageName,
+    llvm::MDScope *&FDContext, llvm::DIArray &TParamsArray, unsigned &Flags) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
   Name = getFunctionName(FD);
   // Use mangled name as linkage name for C/C++ functions.
   if (FD->hasPrototype()) {
     LinkageName = CGM.getMangledName(GD);
-    Flags |= llvm::DIDescriptor::FlagPrototyped;
+    Flags |= llvm::DebugNode::FlagPrototyped;
   }
   // No need to replicate the linkage name if it isn't different from the
   // subprogram name, no need to have it at all unless coverage is enabled or
@@ -2355,7 +2342,7 @@
 void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile &Unit,
                                       unsigned &LineNo, QualType &T,
                                       StringRef &Name, StringRef &LinkageName,
-                                      llvm::DIDescriptor &VDContext) {
+                                      llvm::MDScope *&VDContext) {
   Unit = getOrCreateFile(VD->getLocation());
   LineNo = getLineNumber(VD->getLocation());
 
@@ -2404,7 +2391,7 @@
   unsigned Flags = 0;
   SourceLocation Loc = FD->getLocation();
   llvm::DIFile Unit = getOrCreateFile(Loc);
-  llvm::DIDescriptor DContext(Unit);
+  llvm::MDScope *DContext = Unit;
   unsigned Line = getLineNumber(Loc);
 
   collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext,
@@ -2416,17 +2403,15 @@
   QualType FnType =
     CGM.getContext().getFunctionType(FD->getReturnType(), ArgTypes,
                                      FunctionProtoType::ExtProtoInfo());
-  llvm::DISubprogram SP =
-    DBuilder.createTempFunctionFwdDecl(DContext, Name, LinkageName, Unit, Line,
-                                       getOrCreateFunctionType(FD, FnType, Unit),
-                                       !FD->isExternallyVisible(),
-                                       false /*declaration*/, 0, Flags,
-                                       CGM.getLangOpts().Optimize, nullptr,
-                                       TParamsArray, getFunctionDeclaration(FD));
+  llvm::MDSubprogram *SP = DBuilder.createTempFunctionFwdDecl(
+      DContext, Name, LinkageName, Unit, Line,
+      getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
+      false /*declaration*/, 0, Flags, CGM.getLangOpts().Optimize, nullptr,
+      TParamsArray.get(), getFunctionDeclaration(FD));
   const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
-  FwdDeclReplaceMap.emplace_back(
-      std::piecewise_construct, std::make_tuple(CanonDecl),
-      std::make_tuple(static_cast<llvm::Metadata *>(SP)));
+  FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
+                                 std::make_tuple(CanonDecl),
+                                 std::make_tuple(SP));
   return SP;
 }
 
@@ -2436,7 +2421,7 @@
   StringRef Name, LinkageName;
   SourceLocation Loc = VD->getLocation();
   llvm::DIFile Unit = getOrCreateFile(Loc);
-  llvm::DIDescriptor DContext(Unit);
+  llvm::MDScope *DContext = Unit;
   unsigned Line = getLineNumber(Loc);
 
   collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext);
@@ -2452,7 +2437,7 @@
   return GV;
 }
 
-llvm::DIDescriptor CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
+llvm::DebugNode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
   // We only need a declaration (not a definition) of the type - so use whatever
   // we would otherwise do to get a type for a pointee. (forward declarations in
   // limited debug info, full definitions (if the type definition is available)
@@ -2463,7 +2448,7 @@
   auto I = DeclCache.find(D->getCanonicalDecl());
 
   if (I != DeclCache.end())
-    return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(I->second));
+    return dyn_cast_or_null<llvm::DebugNode>(I->second);
 
   // No definition for now. Emit a forward definition that might be
   // merged with a potential upcoming definition.
@@ -2472,7 +2457,7 @@
   else if (const auto *VD = dyn_cast<VarDecl>(D))
     return getGlobalVariableForwardDeclaration(VD);
 
-  return llvm::DIDescriptor();
+  return nullptr;
 }
 
 /// getFunctionDeclaration - Return debug info descriptor to describe method
@@ -2492,23 +2477,23 @@
   if (MI == SPCache.end()) {
     if (const CXXMethodDecl *MD =
             dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
-      llvm::DICompositeType T(S);
+      llvm::DICompositeType T = cast<llvm::MDCompositeType>(S);
       llvm::DISubprogram SP =
           CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()), T);
       return SP;
     }
   }
   if (MI != SPCache.end()) {
-    llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(MI->second));
-    if (SP.isSubprogram() && !SP.isDefinition())
+    auto *SP = dyn_cast_or_null<llvm::MDSubprogram>(MI->second);
+    if (SP && !SP->isDefinition())
       return SP;
   }
 
   for (auto NextFD : FD->redecls()) {
     auto MI = SPCache.find(NextFD->getCanonicalDecl());
     if (MI != SPCache.end()) {
-      llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(MI->second));
-      if (SP.isSubprogram() && !SP.isDefinition())
+      auto *SP = dyn_cast_or_null<llvm::MDSubprogram>(MI->second);
+      if (SP && !SP->isDefinition())
         return SP;
     }
   }
@@ -2517,9 +2502,9 @@
 
 // getOrCreateFunctionType - Construct DIType. If it is a c++ method, include
 // implicit parameter "this".
-llvm::DICompositeType CGDebugInfo::getOrCreateFunctionType(const Decl *D,
-                                                           QualType FnType,
-                                                           llvm::DIFile F) {
+llvm::MDSubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
+                                                             QualType FnType,
+                                                             llvm::DIFile F) {
   if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
     // Create fake but valid subroutine type. Otherwise
     // llvm::DISubprogram::Verify() would return false, and
@@ -2575,7 +2560,7 @@
       return DBuilder.createSubroutineType(F, EltTypeArray);
     }
 
-  return llvm::DICompositeType(getOrCreateType(FnType, F));
+  return cast<llvm::MDSubroutineType>(getOrCreateType(FnType, F));
 }
 
 /// EmitFunctionStart - Constructs the debug code for entering a function.
@@ -2593,7 +2578,7 @@
 
   unsigned Flags = 0;
   llvm::DIFile Unit = getOrCreateFile(Loc);
-  llvm::DIDescriptor FDContext(Unit);
+  llvm::MDScope *FDContext = Unit;
   llvm::DIArray TParamsArray;
   if (!HasDecl) {
     // Use llvm function name.
@@ -2602,8 +2587,8 @@
     // If there is a DISubprogram for this function available then use it.
     auto FI = SPCache.find(FD->getCanonicalDecl());
     if (FI != SPCache.end()) {
-      llvm::DIDescriptor SP(dyn_cast_or_null<llvm::MDNode>(FI->second));
-      if (SP.isSubprogram() && llvm::DISubprogram(SP).isDefinition()) {
+      auto *SP = dyn_cast_or_null<llvm::MDSubprogram>(FI->second);
+      if (SP && SP->isDefinition()) {
         llvm::MDNode *SPN = SP;
         LexicalBlockStack.emplace_back(SPN);
         RegionMap[D].reset(SP);
@@ -2614,17 +2599,17 @@
                              TParamsArray, Flags);
   } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
-    Flags |= llvm::DIDescriptor::FlagPrototyped;
+    Flags |= llvm::DebugNode::FlagPrototyped;
   } else {
     // Use llvm function name.
     Name = Fn->getName();
-    Flags |= llvm::DIDescriptor::FlagPrototyped;
+    Flags |= llvm::DebugNode::FlagPrototyped;
   }
   if (!Name.empty() && Name[0] == '\01')
     Name = Name.substr(1);
 
   if (!HasDecl || D->isImplicit()) {
-    Flags |= llvm::DIDescriptor::FlagArtificial;
+    Flags |= llvm::DebugNode::FlagArtificial;
     // Artificial functions without a location should not silently reuse CurLoc.
     if (Loc.isInvalid())
       CurLoc = SourceLocation();
@@ -2641,7 +2626,7 @@
       FDContext, Name, LinkageName, Unit, LineNo,
       getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(),
       true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, Fn,
-      TParamsArray, getFunctionDeclaration(D));
+      TParamsArray.get(), getFunctionDeclaration(D));
   // We might get here with a VarDecl in the case we're generating
   // code for the initialization of globals. Do not record these decls
   // as they will overwrite the actual VarDecl Decl in the cache.
@@ -2678,7 +2663,7 @@
   if (!LexicalBlockStack.empty())
     Back = LexicalBlockStack.back().get();
   llvm::DIDescriptor D = DBuilder.createLexicalBlock(
-      llvm::DIDescriptor(Back), getOrCreateFile(CurLoc), getLineNumber(CurLoc),
+      cast<llvm::MDScope>(Back), getOrCreateFile(CurLoc), getLineNumber(CurLoc),
       getColumnNumber(CurLoc));
   llvm::MDNode *DN = D;
   LexicalBlockStack.emplace_back(DN);
@@ -2801,7 +2786,7 @@
 
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
 
-  unsigned Flags = llvm::DIDescriptor::FlagBlockByrefStruct;
+  unsigned Flags = llvm::DebugNode::FlagBlockByrefStruct;
 
   return DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, Flags,
                                    llvm::DIType(), Elements);
@@ -2842,20 +2827,20 @@
   SmallVector<int64_t, 9> Expr;
   unsigned Flags = 0;
   if (VD->isImplicit())
-    Flags |= llvm::DIDescriptor::FlagArtificial;
+    Flags |= llvm::DebugNode::FlagArtificial;
   // If this is the first argument and it is implicit then
   // give it an object pointer flag.
   // FIXME: There has to be a better way to do this, but for static
   // functions there won't be an implicit param at arg1 and
   // otherwise it is 'self' or 'this'.
   if (isa<ImplicitParamDecl>(VD) && ArgNo == 1)
-    Flags |= llvm::DIDescriptor::FlagObjectPointer;
+    Flags |= llvm::DebugNode::FlagObjectPointer;
   if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage))
     if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
         !VD->getType()->isPointerType())
       Expr.push_back(llvm::dwarf::DW_OP_deref);
 
-  llvm::MDNode *Scope = LexicalBlockStack.back();
+  auto *Scope = cast<llvm::MDScope>(LexicalBlockStack.back());
 
   StringRef Name = VD->getName();
   if (!Name.empty()) {
@@ -2874,13 +2859,12 @@
 
       // Create the descriptor for the variable.
       llvm::DIVariable D = DBuilder.createLocalVariable(
-          Tag, llvm::DIDescriptor(Scope), VD->getName(), Unit, Line, Ty, ArgNo);
+          Tag, Scope, VD->getName(), Unit, Line, Ty, ArgNo);
 
       // Insert an llvm.dbg.declare into the current block.
-      llvm::Instruction *Call =
-          DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
-                                 Builder.GetInsertBlock());
-      Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
+      DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
+                             llvm::DebugLoc::get(Line, Column, Scope),
+                             Builder.GetInsertBlock());
       return;
     } else if (isa<VariableArrayType>(VD->getType()))
       Expr.push_back(llvm::dwarf::DW_OP_deref);
@@ -2899,28 +2883,27 @@
 
         // Use VarDecl's Tag, Scope and Line number.
         llvm::DIVariable D = DBuilder.createLocalVariable(
-            Tag, llvm::DIDescriptor(Scope), FieldName, Unit, Line, FieldTy,
+            Tag, Scope, FieldName, Unit, Line, FieldTy,
             CGM.getLangOpts().Optimize, Flags, ArgNo);
 
         // Insert an llvm.dbg.declare into the current block.
-        llvm::Instruction *Call = DBuilder.insertDeclare(
-            Storage, D, DBuilder.createExpression(Expr),
-            Builder.GetInsertBlock());
-        Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
+        DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
+                               llvm::DebugLoc::get(Line, Column, Scope),
+                               Builder.GetInsertBlock());
       }
       return;
     }
   }
 
   // Create the descriptor for the variable.
-  llvm::DIVariable D = DBuilder.createLocalVariable(
-      Tag, llvm::DIDescriptor(Scope), Name, Unit, Line, Ty,
-      CGM.getLangOpts().Optimize, Flags, ArgNo);
+  llvm::DIVariable D =
+      DBuilder.createLocalVariable(Tag, Scope, Name, Unit, Line, Ty,
+                                   CGM.getLangOpts().Optimize, Flags, ArgNo);
 
   // Insert an llvm.dbg.declare into the current block.
-  llvm::Instruction *Call = DBuilder.insertDeclare(
-      Storage, D, DBuilder.createExpression(Expr), Builder.GetInsertBlock());
-  Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope));
+  DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
+                         llvm::DebugLoc::get(Line, Column, Scope),
+                         Builder.GetInsertBlock());
 }
 
 void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
@@ -2998,19 +2981,19 @@
   }
 
   // Create the descriptor for the variable.
-  llvm::DIVariable D =
-      DBuilder.createLocalVariable(llvm::dwarf::DW_TAG_auto_variable,
-                                   llvm::DIDescriptor(LexicalBlockStack.back()),
-                                   VD->getName(), Unit, Line, Ty);
+  llvm::DIVariable D = DBuilder.createLocalVariable(
+      llvm::dwarf::DW_TAG_auto_variable,
+      cast<llvm::MDLocalScope>(LexicalBlockStack.back()), VD->getName(), Unit,
+      Line, Ty);
 
   // Insert an llvm.dbg.declare into the current block.
-  llvm::Instruction *Call = InsertPoint ?
-      DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr),
-                             InsertPoint)
-    : DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr),
-                             Builder.GetInsertBlock());
-  Call->setDebugLoc(
-      llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back()));
+  auto DL = llvm::DebugLoc::get(Line, Column, LexicalBlockStack.back());
+  if (InsertPoint)
+    DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL,
+                           InsertPoint);
+  else
+    DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(addr), DL,
+                           Builder.GetInsertBlock());
 }
 
 /// EmitDeclareOfArgVariable - Emit call to llvm.dbg.declare for an argument
@@ -3158,27 +3141,25 @@
   type = DBuilder.createPointerType(type, CGM.PointerWidthInBits);
 
   // Get overall information about the block.
-  unsigned flags = llvm::DIDescriptor::FlagArtificial;
-  llvm::MDNode *scope = LexicalBlockStack.back();
+  unsigned flags = llvm::DebugNode::FlagArtificial;
+  auto *scope = cast<llvm::MDLocalScope>(LexicalBlockStack.back());
 
   // Create the descriptor for the parameter.
   llvm::DIVariable debugVar = DBuilder.createLocalVariable(
-      llvm::dwarf::DW_TAG_arg_variable, llvm::DIDescriptor(scope),
-      Arg->getName(), tunit, line, type, CGM.getLangOpts().Optimize, flags,
-      ArgNo);
+      llvm::dwarf::DW_TAG_arg_variable, scope, Arg->getName(), tunit, line,
+      type, CGM.getLangOpts().Optimize, flags, ArgNo);
 
   if (LocalAddr) {
     // Insert an llvm.dbg.value into the current block.
-    llvm::Instruction *DbgVal = DBuilder.insertDbgValueIntrinsic(
+    DBuilder.insertDbgValueIntrinsic(
         LocalAddr, 0, debugVar, DBuilder.createExpression(),
-        Builder.GetInsertBlock());
-    DbgVal->setDebugLoc(llvm::DebugLoc::get(line, column, scope));
+        llvm::DebugLoc::get(line, column, scope), Builder.GetInsertBlock());
   }
 
   // Insert an llvm.dbg.declare into the current block.
-  llvm::Instruction *DbgDecl = DBuilder.insertDeclare(
-      Arg, debugVar, DBuilder.createExpression(), Builder.GetInsertBlock());
-  DbgDecl->setDebugLoc(llvm::DebugLoc::get(line, column, scope));
+  DBuilder.insertDeclare(Arg, debugVar, DBuilder.createExpression(),
+                         llvm::DebugLoc::get(line, column, scope),
+                         Builder.GetInsertBlock());
 }
 
 /// If D is an out-of-class definition of a static data member of a class, find
@@ -3191,24 +3172,23 @@
   auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
   if (MI != StaticDataMemberCache.end()) {
     assert(MI->second && "Static data member declaration should still exist");
-    return llvm::DIDerivedType(cast<llvm::MDNode>(MI->second));
+    return cast<llvm::MDDerivedTypeBase>(MI->second);
   }
 
   // If the member wasn't found in the cache, lazily construct and add it to the
   // type (used when a limited form of the type is emitted).
   auto DC = D->getDeclContext();
-  llvm::DICompositeType Ctxt(getContextDescriptor(cast<Decl>(DC)));
+  llvm::DICompositeType Ctxt =
+      cast<llvm::MDCompositeType>(getContextDescriptor(cast<Decl>(DC)));
   return CreateRecordStaticField(D, Ctxt, cast<RecordDecl>(DC));
 }
 
 /// Recursively collect all of the member fields of a global anonymous decl and
 /// create static variables for them. The first time this is called it needs
 /// to be on a union and then from there we can have additional unnamed fields.
-llvm::DIGlobalVariable
-CGDebugInfo::CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit,
-                                    unsigned LineNo, StringRef LinkageName,
-                                    llvm::GlobalVariable *Var,
-                                    llvm::DIDescriptor DContext) {
+llvm::DIGlobalVariable CGDebugInfo::CollectAnonRecordDecls(
+    const RecordDecl *RD, llvm::DIFile Unit, unsigned LineNo,
+    StringRef LinkageName, llvm::GlobalVariable *Var, llvm::MDScope *DContext) {
   llvm::DIGlobalVariable GV;
 
   for (const auto *Field : RD->fields()) {
@@ -3237,7 +3217,7 @@
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
   // Create global variable debug descriptor.
   llvm::DIFile Unit;
-  llvm::DIDescriptor DContext;
+  llvm::MDScope *DContext = nullptr;
   unsigned LineNo;
   StringRef DeclName, LinkageName;
   QualType T;
@@ -3278,7 +3258,7 @@
     Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit);
   }
   // Do not use DIGlobalVariable for enums.
-  if (Ty.getTag() == llvm::dwarf::DW_TAG_enumeration_type)
+  if (Ty->getTag() == llvm::dwarf::DW_TAG_enumeration_type)
     return;
   // Do not emit separate definitions for function local const/statics.
   if (isa<FunctionDecl>(VD->getDeclContext()))
@@ -3294,7 +3274,7 @@
     return;
   }
 
-  llvm::DIDescriptor DContext =
+  llvm::MDScope *DContext =
       getContextDescriptor(dyn_cast<Decl>(VD->getDeclContext()));
 
   auto &GV = DeclCache[VD];
@@ -3307,7 +3287,7 @@
 
 llvm::DIScope CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
   if (!LexicalBlockStack.empty())
-    return llvm::DIScope(LexicalBlockStack.back());
+    return cast<llvm::MDScope>(LexicalBlockStack.back());
   return getContextDescriptor(D);
 }
 
@@ -3328,7 +3308,7 @@
   // Emitting one decl is sufficient - debuggers can detect that this is an
   // overloaded name & provide lookup for all the overloads.
   const UsingShadowDecl &USD = **UD.shadow_begin();
-  if (llvm::DIDescriptor Target =
+  if (llvm::DebugNode *Target =
           getDeclarationOrDefinition(USD.getUnderlyingDecl()))
     DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target,
@@ -3341,7 +3321,7 @@
     return llvm::DIImportedEntity();
   auto &VH = NamespaceAliasCache[&NA];
   if (VH)
-    return llvm::DIImportedEntity(cast<llvm::MDNode>(VH));
+    return cast<llvm::MDImportedEntity>(VH);
   llvm::DIImportedEntity R;
   if (const NamespaceAliasDecl *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
@@ -3366,12 +3346,12 @@
   NSDecl = NSDecl->getCanonicalDecl();
   auto I = NameSpaceCache.find(NSDecl);
   if (I != NameSpaceCache.end())
-    return llvm::DINameSpace(cast<llvm::MDNode>(I->second));
+    return cast<llvm::MDNamespace>(I->second);
 
   unsigned LineNo = getLineNumber(NSDecl->getLocation());
   llvm::DIFile FileD = getOrCreateFile(NSDecl->getLocation());
-  llvm::DIDescriptor Context =
-    getContextDescriptor(dyn_cast<Decl>(NSDecl->getDeclContext()));
+  llvm::MDScope *Context =
+      getContextDescriptor(dyn_cast<Decl>(NSDecl->getDeclContext()));
   llvm::DINameSpace NS =
     DBuilder.createNameSpace(Context, NSDecl->getName(), FileD, LineNo);
   NameSpaceCache[NSDecl].reset(NS);
@@ -3383,23 +3363,23 @@
   // element and the size(), so don't cache/reference them.
   for (size_t i = 0; i != ObjCInterfaceCache.size(); ++i) {
     ObjCInterfaceCacheEntry E = ObjCInterfaceCache[i];
-    E.Decl.replaceAllUsesWith(CGM.getLLVMContext(),
-                              E.Type->getDecl()->getDefinition()
-                                  ? CreateTypeDefinition(E.Type, E.Unit)
-                                  : E.Decl);
+    llvm::MDType *Ty = E.Type->getDecl()->getDefinition()
+                           ? CreateTypeDefinition(E.Type, E.Unit)
+                           : E.Decl;
+    DBuilder.replaceTemporary(llvm::TempMDType(E.Decl), Ty);
   }
 
   for (auto p : ReplaceMap) {
     assert(p.second);
-    llvm::DIType Ty(cast<llvm::MDNode>(p.second));
-    assert(Ty.isForwardDecl());
+    auto *Ty = cast<llvm::MDType>(p.second);
+    assert(Ty->isForwardDecl());
 
     auto it = TypeCache.find(p.first);
     assert(it != TypeCache.end());
     assert(it->second);
 
-    llvm::DIType RepTy(cast<llvm::MDNode>(it->second));
-    Ty.replaceAllUsesWith(CGM.getLLVMContext(), RepTy);
+    DBuilder.replaceTemporary(llvm::TempMDType(Ty),
+                               cast<llvm::MDType>(it->second));
   }
 
   for (const auto &p : FwdDeclReplaceMap) {
@@ -3416,15 +3396,15 @@
     else
       Repl = it->second;
 
-    FwdDecl.replaceAllUsesWith(CGM.getLLVMContext(),
-                               llvm::DIDescriptor(cast<llvm::MDNode>(Repl)));
+    DBuilder.replaceTemporary(llvm::TempMDNode(FwdDecl),
+                              cast<llvm::MDNode>(Repl));
   }
 
   // We keep our own list of retained types, because we need to look
   // up the final type in the type cache.
   for (std::vector<void *>::const_iterator RI = RetainedTypes.begin(),
          RE = RetainedTypes.end(); RI != RE; ++RI)
-    DBuilder.retainType(llvm::DIType(cast<llvm::MDNode>(TypeCache[*RI])));
+    DBuilder.retainType(cast<llvm::MDType>(TypeCache[*RI]));
 
   DBuilder.finalize();
 }
@@ -3432,7 +3412,8 @@
 void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
     return;
-  llvm::DIType DieTy = getOrCreateType(Ty, getOrCreateMainFile());
-  // Don't ignore in case of explicit cast where it is referenced indirectly.
-  DBuilder.retainType(DieTy);
+
+  if (llvm::DIType DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
+    // Don't ignore in case of explicit cast where it is referenced indirectly.
+    DBuilder.retainType(DieTy);
 }
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 62ba801..6fcceed 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -56,7 +56,7 @@
   SourceLocation CurLoc;
   llvm::DIType VTablePtrType;
   llvm::DIType ClassTy;
-  llvm::DICompositeType ObjTy;
+  llvm::MDCompositeType *ObjTy = nullptr;
   llvm::DIType SelTy;
   llvm::DIType OCLImage1dDITy, OCLImage1dArrayDITy, OCLImage1dBufferDITy;
   llvm::DIType OCLImage2dDITy, OCLImage2dArrayDITy;
@@ -129,8 +129,9 @@
   llvm::DIType CreateType(const FunctionType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const RecordType *Tyg);
   llvm::DIType CreateTypeDefinition(const RecordType *Ty);
-  llvm::DICompositeType CreateLimitedType(const RecordType *Ty);
-  void CollectContainingType(const CXXRecordDecl *RD, llvm::DICompositeType CT);
+  llvm::MDCompositeType *CreateLimitedType(const RecordType *Ty);
+  void CollectContainingType(const CXXRecordDecl *RD,
+                             llvm::MDCompositeType *CT);
   llvm::DIType CreateType(const ObjCInterfaceType *Ty, llvm::DIFile F);
   llvm::DIType CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const ObjCObjectType *Ty, llvm::DIFile F);
@@ -144,12 +145,13 @@
   llvm::DIType CreateTypeDefinition(const EnumType *Ty);
   llvm::DIType CreateSelfType(const QualType &QualTy, llvm::DIType Ty);
   llvm::DIType getTypeOrNull(const QualType);
-  llvm::DICompositeType getOrCreateMethodType(const CXXMethodDecl *Method,
-                                              llvm::DIFile F);
-  llvm::DICompositeType getOrCreateInstanceMethodType(
-      QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile Unit);
-  llvm::DICompositeType getOrCreateFunctionType(const Decl *D, QualType FnType,
+  llvm::MDSubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method,
                                                 llvm::DIFile F);
+  llvm::MDSubroutineType *
+  getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func,
+                                llvm::DIFile Unit);
+  llvm::MDSubroutineType *
+  getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile F);
   llvm::DIType getOrCreateVTablePtrType(llvm::DIFile F);
   llvm::DINameSpace getOrCreateNameSpace(const NamespaceDecl *N);
   llvm::DIType getOrCreateTypeDeclaration(QualType PointeeTy, llvm::DIFile F);
@@ -323,8 +325,8 @@
   llvm::DIScope getCurrentContextDescriptor(const Decl *Decl);
 
   /// \brief Create a forward decl for a RecordType in a given context.
-  llvm::DICompositeType getOrCreateRecordFwdDecl(const RecordType *,
-                                                 llvm::DIDescriptor);
+  llvm::MDCompositeType *getOrCreateRecordFwdDecl(const RecordType *,
+                                                  llvm::MDScope *);
 
   /// \brief Create a set of decls for the context chain.
   llvm::DIDescriptor createContextChain(const Decl *Decl);
@@ -363,7 +365,7 @@
 
   /// \brief Retrieve the DIDescriptor, if any, for the canonical form of this
   /// declaration.
-  llvm::DIDescriptor getDeclarationOrDefinition(const Decl *D);
+  llvm::DebugNode *getDeclarationOrDefinition(const Decl *D);
 
   /// \brief Return debug info descriptor to describe method
   /// declaration for the given method definition.
@@ -385,9 +387,9 @@
   /// Return a global variable that represents one of the collection of
   /// global variables created for an anonmyous union.
   llvm::DIGlobalVariable
-  CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit, unsigned LineNo,
-                         StringRef LinkageName, llvm::GlobalVariable *Var,
-                         llvm::DIDescriptor DContext);
+  CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit,
+                         unsigned LineNo, StringRef LinkageName,
+                         llvm::GlobalVariable *Var, llvm::MDScope *DContext);
 
   /// \brief Get function name for the given FunctionDecl. If the
   /// name is constructed on demand (e.g. C++ destructor) then the name
@@ -419,18 +421,15 @@
 
   /// \brief Collect various properties of a FunctionDecl.
   /// \param GD  A GlobalDecl whose getDecl() must return a FunctionDecl.
-  void collectFunctionDeclProps(GlobalDecl GD,
-                                llvm::DIFile Unit,
+  void collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile Unit,
                                 StringRef &Name, StringRef &LinkageName,
-                                llvm::DIDescriptor &FDContext,
-                                llvm::DIArray &TParamsArray,
-                                unsigned &Flags);
+                                llvm::MDScope *&FDContext,
+                                llvm::DIArray &TParamsArray, unsigned &Flags);
 
   /// \brief Collect various properties of a VarDecl.
   void collectVarDeclProps(const VarDecl *VD, llvm::DIFile &Unit,
-                           unsigned &LineNo, QualType &T,
-                           StringRef &Name, StringRef &LinkageName,
-                           llvm::DIDescriptor &VDContext);
+                           unsigned &LineNo, QualType &T, StringRef &Name,
+                           StringRef &LinkageName, llvm::MDScope *&VDContext);
 
   /// \brief Allocate a copy of \p A using the DebugInfoNames allocator
   /// and return a reference to it. If multiple arguments are given the strings
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index f79d137..f1ccb09 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -635,8 +635,9 @@
     if (capturedByInit) {
       // We can use a simple GEP for this because it can't have been
       // moved yet.
-      tempLV.setAddress(Builder.CreateStructGEP(tempLV.getAddress(),
-                                   getByRefValueLLVMField(cast<VarDecl>(D))));
+      tempLV.setAddress(Builder.CreateStructGEP(
+          nullptr, tempLV.getAddress(),
+          getByRefValueLLVMField(cast<VarDecl>(D)).second));
     }
 
     llvm::PointerType *ty
@@ -797,8 +798,9 @@
 
       // If necessary, get a pointer to the element and emit it.
       if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
-        emitStoresForInitAfterMemset(Elt, Builder.CreateConstGEP2_32(Loc, 0, i),
-                                     isVolatile, Builder);
+        emitStoresForInitAfterMemset(
+            Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i),
+            isVolatile, Builder);
     }
     return;
   }
@@ -811,8 +813,9 @@
 
     // If necessary, get a pointer to the element and emit it.
     if (!Elt->isNullValue() && !isa<llvm::UndefValue>(Elt))
-      emitStoresForInitAfterMemset(Elt, Builder.CreateConstGEP2_32(Loc, 0, i),
-                                   isVolatile, Builder);
+      emitStoresForInitAfterMemset(
+          Elt, Builder.CreateConstGEP2_32(Init->getType(), Loc, 0, i),
+          isVolatile, Builder);
   }
 }
 
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 236337b..eb4ddc7 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -298,6 +298,11 @@
 CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                             llvm::GlobalVariable *Addr,
                                             bool PerformInit) {
+  // Check if we've already initialized this decl.
+  auto I = DelayedCXXInitPosition.find(D);
+  if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
+    return;
+
   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
   SmallString<256> FnName;
   {
@@ -327,11 +332,9 @@
     CXXThreadLocalInitVars.push_back(Addr);
   } else if (PerformInit && ISA) {
     EmitPointerToInitFunc(D, Addr, Fn, ISA);
-    DelayedCXXInitPosition.erase(D);
   } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) {
     OrderGlobalInits Key(IPA->getPriority(), PrioritizedCXXGlobalInits.size());
     PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
-    DelayedCXXInitPosition.erase(D);
   } else if (isTemplateInstantiation(D->getTemplateSpecializationKind())) {
     // C++ [basic.start.init]p2:
     //   Definitions of explicitly specialized class template static data
@@ -346,24 +349,24 @@
     // minor startup time optimization.  In the MS C++ ABI, there are no guard
     // variables, so this COMDAT key is required for correctness.
     AddGlobalCtor(Fn, 65535, COMDATKey);
-    DelayedCXXInitPosition.erase(D);
   } else if (D->hasAttr<SelectAnyAttr>()) {
     // SelectAny globals will be comdat-folded. Put the initializer into a
     // COMDAT group associated with the global, so the initializers get folded
     // too.
     AddGlobalCtor(Fn, 65535, COMDATKey);
-    DelayedCXXInitPosition.erase(D);
   } else {
-    llvm::DenseMap<const Decl *, unsigned>::iterator I =
-      DelayedCXXInitPosition.find(D);
+    I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash.
     if (I == DelayedCXXInitPosition.end()) {
       CXXGlobalInits.push_back(Fn);
-    } else {
-      assert(CXXGlobalInits[I->second] == nullptr);
+    } else if (I->second != ~0U) {
+      assert(I->second < CXXGlobalInits.size() &&
+             CXXGlobalInits[I->second] == nullptr);
       CXXGlobalInits[I->second] = Fn;
-      DelayedCXXInitPosition.erase(I);
     }
   }
+
+  // Remember that we already emitted the initializer for this global.
+  DelayedCXXInitPosition[D] = ~0U;
 }
 
 void CodeGenModule::EmitCXXThreadLocalInitFunc() {
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 223b8f7..ff12a9a 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -19,8 +19,11 @@
 #include "clang/AST/Mangle.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
+#include "clang/AST/StmtVisitor.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/SaveAndRestore.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -406,13 +409,6 @@
   return Builder.CreateLoad(getEHSelectorSlot(), "sel");
 }
 
-llvm::Value *CodeGenFunction::getAbnormalTerminationSlot() {
-  if (!AbnormalTerminationSlot)
-    AbnormalTerminationSlot =
-        CreateTempAlloca(Int8Ty, "abnormal.termination.slot");
-  return AbnormalTerminationSlot;
-}
-
 void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
                                        bool KeepInsertionPoint) {
   if (const Expr *SubExpr = E->getSubExpr()) {
@@ -458,6 +454,10 @@
       EHStack.pushTerminate();
     }
   } else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
+    // TODO: Revisit exception specifications for the MS ABI.  There is a way to
+    // encode these in an object file but MSVC doesn't do anything with it.
+    if (getTarget().getCXXABI().isMicrosoft())
+      return;
     unsigned NumExceptions = Proto->getNumExceptions();
     EHFilterScope *Filter = EHStack.pushFilter(NumExceptions);
 
@@ -532,6 +532,10 @@
       EHStack.popTerminate();
     }
   } else if (EST == EST_Dynamic || EST == EST_DynamicNone) {
+    // TODO: Revisit exception specifications for the MS ABI.  There is a way to
+    // encode these in an object file but MSVC doesn't do anything with it.
+    if (getTarget().getCXXABI().isMicrosoft())
+      return;
     EHFilterScope &filterScope = cast<EHFilterScope>(*EHStack.begin());
     emitFilterDispatchBlock(*this, filterScope);
     EHStack.popFilter();
@@ -568,7 +572,7 @@
         TypeInfo = CGM.getObjCRuntime().GetEHType(CaughtType);
       else
         TypeInfo =
-            CGM.getAddrOfCXXHandlerMapEntry(CaughtType, C->getCaughtType());
+            CGM.getAddrOfCXXCatchHandlerType(CaughtType, C->getCaughtType());
       CatchScope->setHandler(I, TypeInfo, Handler);
     } else {
       // No exception decl indicates '...', a catch-all.
@@ -1277,8 +1281,7 @@
     return;
   }
 
-  SEHFinallyInfo FI;
-  EnterSEHTryStmt(S, FI);
+  EnterSEHTryStmt(S);
   {
     JumpDest TryExit = getJumpDestInCurrentScope("__try.__leave");
 
@@ -1291,109 +1294,224 @@
     else
       delete TryExit.getBlock();
   }
-  ExitSEHTryStmt(S, FI);
+  ExitSEHTryStmt(S);
 }
 
 namespace {
-struct PerformSEHFinally : EHScopeStack::Cleanup  {
-  CodeGenFunction::SEHFinallyInfo *FI;
-  PerformSEHFinally(CodeGenFunction::SEHFinallyInfo *FI) : FI(FI) {}
+struct PerformSEHFinally : EHScopeStack::Cleanup {
+  llvm::Function *OutlinedFinally;
+  PerformSEHFinally(llvm::Function *OutlinedFinally)
+      : OutlinedFinally(OutlinedFinally) {}
 
   void Emit(CodeGenFunction &CGF, Flags F) override {
-    // Cleanups are emitted at most twice: once for normal control flow and once
-    // for exception control flow. Branch into the finally block, and remember
-    // the continuation block so we can branch out later.
-    if (!FI->FinallyBB) {
-      FI->FinallyBB = CGF.createBasicBlock("__finally");
-      FI->FinallyBB->insertInto(CGF.CurFn);
-      FI->FinallyBB->moveAfter(CGF.Builder.GetInsertBlock());
-    }
+    ASTContext &Context = CGF.getContext();
+    QualType ArgTys[2] = {Context.BoolTy, Context.VoidPtrTy};
+    FunctionProtoType::ExtProtoInfo EPI;
+    const auto *FTP = cast<FunctionType>(
+        Context.getFunctionType(Context.VoidTy, ArgTys, EPI));
 
-    // Set the termination status and branch in.
-    CGF.Builder.CreateStore(
-        llvm::ConstantInt::get(CGF.Int8Ty, F.isForEHCleanup()),
-        CGF.getAbnormalTerminationSlot());
-    CGF.Builder.CreateBr(FI->FinallyBB);
+    CallArgList Args;
+    llvm::Value *IsForEH =
+        llvm::ConstantInt::get(CGF.ConvertType(ArgTys[0]), F.isForEHCleanup());
+    Args.add(RValue::get(IsForEH), ArgTys[0]);
 
-    // Create a continuation block for normal or exceptional control.
-    if (F.isForEHCleanup()) {
-      assert(!FI->ResumeBB && "double emission for EH");
-      FI->ResumeBB = CGF.createBasicBlock("__finally.resume");
-      CGF.EmitBlock(FI->ResumeBB);
-    } else {
-      assert(F.isForNormalCleanup() && !FI->ContBB && "double normal emission");
-      FI->ContBB = CGF.createBasicBlock("__finally.cont");
-      CGF.EmitBlock(FI->ContBB);
-      // Try to keep source order.
-      FI->ContBB->moveAfter(FI->FinallyBB);
-    }
+    CodeGenModule &CGM = CGF.CGM;
+    llvm::Value *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0);
+    llvm::Value *FrameAddr = CGM.getIntrinsic(llvm::Intrinsic::frameaddress);
+    llvm::Value *FP = CGF.Builder.CreateCall(FrameAddr, Zero);
+    Args.add(RValue::get(FP), ArgTys[1]);
+
+    const CGFunctionInfo &FnInfo =
+        CGM.getTypes().arrangeFreeFunctionCall(Args, FTP, /*chainCall=*/false);
+    CGF.EmitCall(FnInfo, OutlinedFinally, ReturnValueSlot(), Args);
   }
 };
 }
 
+namespace {
+/// Find all local variable captures in the statement.
+struct CaptureFinder : ConstStmtVisitor<CaptureFinder> {
+  CodeGenFunction &ParentCGF;
+  const VarDecl *ParentThis;
+  SmallVector<const VarDecl *, 4> Captures;
+  CaptureFinder(CodeGenFunction &ParentCGF, const VarDecl *ParentThis)
+      : ParentCGF(ParentCGF), ParentThis(ParentThis) {}
+
+  void Visit(const Stmt *S) {
+    // See if this is a capture, then recurse.
+    ConstStmtVisitor<CaptureFinder>::Visit(S);
+    for (const Stmt *Child : S->children())
+      if (Child)
+        Visit(Child);
+  }
+
+  void VisitDeclRefExpr(const DeclRefExpr *E) {
+    // If this is already a capture, just make sure we capture 'this'.
+    if (E->refersToEnclosingVariableOrCapture()) {
+      Captures.push_back(ParentThis);
+      return;
+    }
+
+    const auto *D = dyn_cast<VarDecl>(E->getDecl());
+    if (D && D->isLocalVarDeclOrParm() && D->hasLocalStorage())
+      Captures.push_back(D);
+  }
+
+  void VisitCXXThisExpr(const CXXThisExpr *E) {
+    Captures.push_back(ParentThis);
+  }
+};
+}
+
+void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
+                                         const Stmt *OutlinedStmt,
+                                         llvm::Value *ParentFP) {
+  // Find all captures in the Stmt.
+  CaptureFinder Finder(ParentCGF, ParentCGF.CXXABIThisDecl);
+  Finder.Visit(OutlinedStmt);
+
+  // Typically there are no captures and we can exit early.
+  if (Finder.Captures.empty())
+    return;
+
+  // Prepare the first two arguments to llvm.framerecover.
+  llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
+      &CGM.getModule(), llvm::Intrinsic::framerecover);
+  llvm::Constant *ParentI8Fn =
+      llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+
+  // Create llvm.framerecover calls for all captures.
+  for (const VarDecl *VD : Finder.Captures) {
+    if (isa<ImplicitParamDecl>(VD)) {
+      CGM.ErrorUnsupported(VD, "'this' captured by SEH");
+      CXXThisValue = llvm::UndefValue::get(ConvertTypeForMem(VD->getType()));
+      continue;
+    }
+    if (VD->getType()->isVariablyModifiedType()) {
+      CGM.ErrorUnsupported(VD, "VLA captured by SEH");
+      continue;
+    }
+    assert((isa<ImplicitParamDecl>(VD) || VD->isLocalVarDeclOrParm()) &&
+           "captured non-local variable");
+
+    // If this decl hasn't been declared yet, it will be declared in the
+    // OutlinedStmt.
+    auto I = ParentCGF.LocalDeclMap.find(VD);
+    if (I == ParentCGF.LocalDeclMap.end())
+      continue;
+    llvm::Value *ParentVar = I->second;
+
+    llvm::CallInst *RecoverCall = nullptr;
+    CGBuilderTy Builder(AllocaInsertPt);
+    if (auto *ParentAlloca = dyn_cast<llvm::AllocaInst>(ParentVar)) {
+      // Mark the variable escaped if nobody else referenced it and compute the
+      // frameescape index.
+      auto InsertPair =
+          ParentCGF.EscapedLocals.insert(std::make_pair(ParentAlloca, -1));
+      if (InsertPair.second)
+        InsertPair.first->second = ParentCGF.EscapedLocals.size() - 1;
+      int FrameEscapeIdx = InsertPair.first->second;
+      // call i8* @llvm.framerecover(i8* bitcast(@parentFn), i8* %fp, i32 N)
+      RecoverCall =
+          Builder.CreateCall3(FrameRecoverFn, ParentI8Fn, ParentFP,
+                              llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx));
+
+    } else {
+      // If the parent didn't have an alloca, we're doing some nested outlining.
+      // Just clone the existing framerecover call, but tweak the FP argument to
+      // use our FP value. All other arguments are constants.
+      auto *ParentRecover =
+          cast<llvm::IntrinsicInst>(ParentVar->stripPointerCasts());
+      assert(ParentRecover->getIntrinsicID() == llvm::Intrinsic::framerecover &&
+             "expected alloca or framerecover in parent LocalDeclMap");
+      RecoverCall = cast<llvm::CallInst>(ParentRecover->clone());
+      RecoverCall->setArgOperand(1, ParentFP);
+      RecoverCall->insertBefore(AllocaInsertPt);
+    }
+
+    // Bitcast the variable, rename it, and insert it in the local decl map.
+    llvm::Value *ChildVar =
+        Builder.CreateBitCast(RecoverCall, ParentVar->getType());
+    ChildVar->setName(ParentVar->getName());
+    LocalDeclMap[VD] = ChildVar;
+  }
+}
+
+/// Arrange a function prototype that can be called by Windows exception
+/// handling personalities. On Win64, the prototype looks like:
+/// RetTy func(void *EHPtrs, void *ParentFP);
+void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
+                                             StringRef Name, QualType RetTy,
+                                             FunctionArgList &Args,
+                                             const Stmt *OutlinedStmt) {
+  llvm::Function *ParentFn = ParentCGF.CurFn;
+  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false);
+
+  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  llvm::Function *Fn = llvm::Function::Create(
+      FnTy, llvm::GlobalValue::InternalLinkage, Name.str(), &CGM.getModule());
+  // The filter is either in the same comdat as the function, or it's internal.
+  if (llvm::Comdat *C = ParentFn->getComdat()) {
+    Fn->setComdat(C);
+  } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) {
+    llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName());
+    ParentFn->setComdat(C);
+    Fn->setComdat(C);
+  } else {
+    Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+  }
+
+  IsOutlinedSEHHelper = true;
+
+  StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
+                OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart());
+
+  CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn);
+
+  auto AI = Fn->arg_begin();
+  ++AI;
+  EmitCapturedLocals(ParentCGF, OutlinedStmt, &*AI);
+}
+
 /// Create a stub filter function that will ultimately hold the code of the
 /// filter expression. The EH preparation passes in LLVM will outline the code
 /// from the main function body into this stub.
 llvm::Function *
 CodeGenFunction::GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
                                            const SEHExceptStmt &Except) {
-  const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl;
-  llvm::Function *ParentFn = ParentCGF.CurFn;
+  const Expr *FilterExpr = Except.getFilterExpr();
+  SourceLocation StartLoc = FilterExpr->getLocStart();
 
-  Expr *FilterExpr = Except.getFilterExpr();
+  SEHPointersDecl = ImplicitParamDecl::Create(
+      getContext(), nullptr, StartLoc,
+      &getContext().Idents.get("exception_pointers"), getContext().VoidPtrTy);
+  FunctionArgList Args;
+  Args.push_back(SEHPointersDecl);
+  Args.push_back(ImplicitParamDecl::Create(
+      getContext(), nullptr, StartLoc,
+      &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
 
   // Get the mangled function name.
   SmallString<128> Name;
   {
     llvm::raw_svector_ostream OS(Name);
+    const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl;
     const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl);
     assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH");
     CGM.getCXXABI().getMangleContext().mangleSEHFilterExpression(Parent, OS);
   }
 
-  // Arrange a function with the declaration:
-  // int filt(EXCEPTION_POINTERS *exception_pointers, void *frame_pointer)
-  QualType RetTy = getContext().IntTy;
-  FunctionArgList Args;
-  SEHPointersDecl = ImplicitParamDecl::Create(
-      getContext(), nullptr, FilterExpr->getLocStart(),
-      &getContext().Idents.get("exception_pointers"), getContext().VoidPtrTy);
-  Args.push_back(SEHPointersDecl);
-  Args.push_back(ImplicitParamDecl::Create(
-      getContext(), nullptr, FilterExpr->getLocStart(),
-      &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
-  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false);
-  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
-  llvm::Function *Fn = llvm::Function::Create(FnTy, ParentFn->getLinkage(),
-                                              Name.str(), &CGM.getModule());
-  // The filter is either in the same comdat as the function, or it's internal.
-  if (llvm::Comdat *C = ParentFn->getComdat()) {
-    Fn->setComdat(C);
-  } else if (ParentFn->hasWeakLinkage() || ParentFn->hasLinkOnceLinkage()) {
-    // FIXME: Unreachable with Rafael's changes?
-    llvm::Comdat *C = CGM.getModule().getOrInsertComdat(ParentFn->getName());
-    ParentFn->setComdat(C);
-    Fn->setComdat(C);
-  } else {
-    Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
-  }
+  startOutlinedSEHHelper(ParentCGF, Name, getContext().IntTy, Args, FilterExpr);
 
-  StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
-                FilterExpr->getLocStart(), FilterExpr->getLocStart());
+  // Mark finally block calls as nounwind and noinline to make LLVM's job a
+  // little easier.
+  // FIXME: Remove these restrictions in the future.
+  CurFn->addFnAttr(llvm::Attribute::NoUnwind);
+  CurFn->addFnAttr(llvm::Attribute::NoInline);
 
   EmitSEHExceptionCodeSave();
 
-  // Insert dummy allocas for every local variable in scope. We'll initialize
-  // them and prune the unused ones after we find out which ones were
-  // referenced.
-  for (const auto &DeclPtrs : ParentCGF.LocalDeclMap) {
-    const Decl *VD = DeclPtrs.first;
-    llvm::Value *Ptr = DeclPtrs.second;
-    auto *ValTy = cast<llvm::PointerType>(Ptr->getType())->getElementType();
-    LocalDeclMap[VD] = CreateTempAlloca(ValTy, Ptr->getName() + ".filt");
-  }
-
   // Emit the original filter expression, convert to i32, and return.
   llvm::Value *R = EmitScalarExpr(FilterExpr);
   R = Builder.CreateIntCast(R, CGM.IntTy,
@@ -1402,18 +1520,42 @@
 
   FinishFunction(FilterExpr->getLocEnd());
 
-  for (const auto &DeclPtrs : ParentCGF.LocalDeclMap) {
-    const Decl *VD = DeclPtrs.first;
-    auto *Alloca = cast<llvm::AllocaInst>(LocalDeclMap[VD]);
-    if (Alloca->hasNUses(0)) {
-      Alloca->eraseFromParent();
-      continue;
-    }
-    ErrorUnsupported(FilterExpr,
-                     "SEH filter expression local variable capture");
+  return CurFn;
+}
+
+llvm::Function *
+CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
+                                            const SEHFinallyStmt &Finally) {
+  const Stmt *FinallyBlock = Finally.getBlock();
+  SourceLocation StartLoc = FinallyBlock->getLocStart();
+
+  FunctionArgList Args;
+  Args.push_back(ImplicitParamDecl::Create(
+      getContext(), nullptr, StartLoc,
+      &getContext().Idents.get("abnormal_termination"), getContext().BoolTy));
+  Args.push_back(ImplicitParamDecl::Create(
+      getContext(), nullptr, StartLoc,
+      &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
+
+  // Get the mangled function name.
+  SmallString<128> Name;
+  {
+    llvm::raw_svector_ostream OS(Name);
+    const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl;
+    const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl);
+    assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH");
+    CGM.getCXXABI().getMangleContext().mangleSEHFinallyBlock(Parent, OS);
   }
 
-  return Fn;
+  startOutlinedSEHHelper(ParentCGF, Name, getContext().VoidTy, Args,
+                         FinallyBlock);
+
+  // Emit the original filter expression, convert to i32, and return.
+  EmitStmt(FinallyBlock);
+
+  FinishFunction(FinallyBlock->getLocEnd());
+
+  return CurFn;
 }
 
 void CodeGenFunction::EmitSEHExceptionCodeSave() {
@@ -1429,7 +1571,7 @@
   llvm::Type *RecordTy = CGM.Int32Ty->getPointerTo();
   llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr);
   Ptrs = Builder.CreateBitCast(Ptrs, PtrsTy->getPointerTo());
-  llvm::Value *Rec = Builder.CreateStructGEP(Ptrs, 0);
+  llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0);
   Rec = Builder.CreateLoad(Rec);
   llvm::Value *Code = Builder.CreateLoad(Rec);
   Code = Builder.CreateZExt(Code, CGM.IntPtrTy);
@@ -1457,21 +1599,24 @@
 }
 
 llvm::Value *CodeGenFunction::EmitSEHAbnormalTermination() {
-  // Load from the abnormal termination slot. It will be uninitialized outside
-  // of __finally blocks, which we should warn or error on.
-  llvm::Value *IsEH = Builder.CreateLoad(getAbnormalTerminationSlot());
-  return Builder.CreateZExt(IsEH, Int32Ty);
+  // Abnormal termination is just the first parameter to the outlined finally
+  // helper.
+  auto AI = CurFn->arg_begin();
+  return Builder.CreateZExt(&*AI, Int32Ty);
 }
 
-void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI) {
-  if (S.getFinallyHandler()) {
+void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
+  CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
+  if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
     // Push a cleanup for __finally blocks.
-    EHStack.pushCleanup<PerformSEHFinally>(NormalAndEHCleanup, &FI);
+    llvm::Function *FinallyFunc =
+        HelperCGF.GenerateSEHFinallyFunction(*this, *Finally);
+    EHStack.pushCleanup<PerformSEHFinally>(NormalAndEHCleanup, FinallyFunc);
     return;
   }
 
   // Otherwise, we must have an __except block.
-  SEHExceptStmt *Except = S.getExceptHandler();
+  const SEHExceptStmt *Except = S.getExceptHandler();
   assert(Except);
   EHCatchScope *CatchScope = EHStack.pushCatch(1);
 
@@ -1486,40 +1631,17 @@
 
   // In general, we have to emit an outlined filter function. Use the function
   // in place of the RTTI typeinfo global that C++ EH uses.
-  CodeGenFunction FilterCGF(CGM, /*suppressNewContext=*/true);
   llvm::Function *FilterFunc =
-      FilterCGF.GenerateSEHFilterFunction(*this, *Except);
+      HelperCGF.GenerateSEHFilterFunction(*this, *Except);
   llvm::Constant *OpaqueFunc =
       llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy);
   CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except"));
 }
 
-void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI) {
+void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S) {
   // Just pop the cleanup if it's a __finally block.
-  if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
+  if (S.getFinallyHandler()) {
     PopCleanupBlock();
-    assert(FI.ContBB && "did not emit normal cleanup");
-
-    // Emit the code into FinallyBB.
-    CGBuilderTy::InsertPoint SavedIP = Builder.saveIP();
-    Builder.SetInsertPoint(FI.FinallyBB);
-    EmitStmt(Finally->getBlock());
-
-    if (HaveInsertPoint()) {
-      if (FI.ResumeBB) {
-        llvm::Value *IsEH = Builder.CreateLoad(getAbnormalTerminationSlot(),
-                                               "abnormal.termination");
-        IsEH = Builder.CreateICmpEQ(IsEH, llvm::ConstantInt::get(Int8Ty, 0));
-        Builder.CreateCondBr(IsEH, FI.ContBB, FI.ResumeBB);
-      } else {
-        // There was nothing exceptional in the try body, so we only have normal
-        // control flow.
-        Builder.CreateBr(FI.ContBB);
-      }
-    }
-
-    Builder.restoreIP(SavedIP);
-
     return;
   }
 
@@ -1569,7 +1691,13 @@
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
-  assert(!SEHTryEpilogueStack.empty() &&
-         "sema should have rejected this __leave");
+  // This must be a __leave from a __finally block, which we warn on and is UB.
+  // Just emit unreachable.
+  if (!isSEHTryScope()) {
+    Builder.CreateUnreachable();
+    Builder.ClearInsertionPoint();
+    return;
+  }
+
   EmitBranchThroughCleanup(*SEHTryEpilogueStack.back());
 }
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 5ba51cc..4147317 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -300,26 +300,26 @@
                          const MaterializeTemporaryExpr *M, const Expr *Inner) {
   switch (M->getStorageDuration()) {
   case SD_FullExpression:
-  case SD_Automatic:
+  case SD_Automatic: {
     // If we have a constant temporary array or record try to promote it into a
     // constant global under the same rules a normal constant would've been
     // promoted. This is easier on the optimizer and generally emits fewer
     // instructions.
+    QualType Ty = Inner->getType();
     if (CGF.CGM.getCodeGenOpts().MergeAllConstants &&
-        (M->getType()->isArrayType() || M->getType()->isRecordType()) &&
-        CGF.CGM.isTypeConstant(M->getType(), true))
-      if (llvm::Constant *Init =
-              CGF.CGM.EmitConstantExpr(Inner, M->getType(), &CGF)) {
+        (Ty->isArrayType() || Ty->isRecordType()) &&
+        CGF.CGM.isTypeConstant(Ty, true))
+      if (llvm::Constant *Init = CGF.CGM.EmitConstantExpr(Inner, Ty, &CGF)) {
         auto *GV = new llvm::GlobalVariable(
             CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
             llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp");
         GV->setAlignment(
-            CGF.getContext().getTypeAlignInChars(M->getType()).getQuantity());
+            CGF.getContext().getTypeAlignInChars(Ty).getQuantity());
         // FIXME: Should we put the new global into a COMDAT?
         return GV;
       }
-    return CGF.CreateMemTemp(Inner->getType(), "ref.tmp");
-
+    return CGF.CreateMemTemp(Ty, "ref.tmp");
+  }
   case SD_Thread:
   case SD_Static:
     return CGF.CGM.GetAddrOfGlobalTemporary(M, Inner);
@@ -2077,9 +2077,8 @@
     assert(E->getSubExpr()->getType()->isAnyComplexType());
 
     unsigned Idx = E->getOpcode() == UO_Imag;
-    return MakeAddrLValue(Builder.CreateStructGEP(LV.getAddress(),
-                                                  Idx, "idx"),
-                          ExprTy);
+    return MakeAddrLValue(
+        Builder.CreateStructGEP(nullptr, LV.getAddress(), Idx, "idx"), ExprTy);
   }
   case UO_PreInc:
   case UO_PreDec: {
@@ -2675,7 +2674,7 @@
     unsigned Idx = RL.getLLVMFieldNo(field);
     if (Idx != 0)
       // For structs, we GEP to the field that the record layout suggests.
-      Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
+      Addr = Builder.CreateStructGEP(nullptr, Addr, Idx, field->getName());
     // Get the access type.
     llvm::Type *PtrTy = llvm::Type::getIntNPtrTy(
       getLLVMContext(), Info.StorageSize,
@@ -2710,7 +2709,7 @@
   } else {
     // For structs, we GEP to the field that the record layout suggests.
     unsigned idx = CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
-    addr = Builder.CreateStructGEP(addr, idx, field->getName());
+    addr = Builder.CreateStructGEP(nullptr, addr, idx, field->getName());
 
     // If this is a reference field, load the reference right now.
     if (const ReferenceType *refType = type->getAs<ReferenceType>()) {
@@ -2789,7 +2788,7 @@
   const CGRecordLayout &RL =
     CGM.getTypes().getCGRecordLayout(Field->getParent());
   unsigned idx = RL.getLLVMFieldNo(Field);
-  llvm::Value *V = Builder.CreateStructGEP(Base.getAddress(), idx);
+  llvm::Value *V = Builder.CreateStructGEP(nullptr, Base.getAddress(), idx);
   assert(!FieldType.getObjCGCAttr() && "fields cannot have GC attrs");
 
   // Make sure that the address is pointing to the right type.  This is critical
@@ -3369,7 +3368,7 @@
       llvm::Value *CalleePrefixStruct = Builder.CreateBitCast(
           Callee, llvm::PointerType::getUnqual(PrefixStructTy));
       llvm::Value *CalleeSigPtr =
-          Builder.CreateConstGEP2_32(CalleePrefixStruct, 0, 0);
+          Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 0);
       llvm::Value *CalleeSig = Builder.CreateLoad(CalleeSigPtr);
       llvm::Value *CalleeSigMatch = Builder.CreateICmpEQ(CalleeSig, PrefixSig);
 
@@ -3379,7 +3378,7 @@
 
       EmitBlock(TypeCheck);
       llvm::Value *CalleeRTTIPtr =
-          Builder.CreateConstGEP2_32(CalleePrefixStruct, 0, 1);
+          Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 1);
       llvm::Value *CalleeRTTI = Builder.CreateLoad(CalleeRTTIPtr);
       llvm::Value *CalleeRTTIMatch =
           Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst);
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 5b0d9f0..6b4cf68 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -645,7 +645,7 @@
 
         // Build a GEP to refer to the subobject.
         llvm::Value *valueAddr =
-            CGF.Builder.CreateStructGEP(valueDest.getAddr(), 0);
+            CGF.Builder.CreateStructGEP(nullptr, valueDest.getAddr(), 0);
         valueDest = AggValueSlot::forAddr(valueAddr,
                                           valueDest.getAlignment(),
                                           valueDest.getQualifiers(),
@@ -666,7 +666,7 @@
     CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
 
     llvm::Value *valueAddr =
-      Builder.CreateStructGEP(atomicSlot.getAddr(), 0);
+        Builder.CreateStructGEP(nullptr, atomicSlot.getAddr(), 0);
     RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile());
     return EmitFinalDestCopy(valueType, rvalue);
   }
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 6852d3a..4bffad3 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -256,6 +256,12 @@
   } else if (UseVirtualCall) {
     Callee = CGM.getCXXABI().getVirtualFunctionPointer(*this, MD, This, Ty);
   } else {
+    if (SanOpts.has(SanitizerKind::CFINVCall) &&
+        MD->getParent()->isDynamicClass()) {
+      llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy);
+      EmitVTablePtrCheckForCall(MD, VTable);
+    }
+
     if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
       Callee = BuildAppleKextVirtualCall(MD, Qualifier, Ty);
     else if (!DevirtualizedMethod)
@@ -778,12 +784,10 @@
   llvm_unreachable("bad evaluation kind");
 }
 
-void
-CodeGenFunction::EmitNewArrayInitializer(const CXXNewExpr *E,
-                                         QualType ElementType,
-                                         llvm::Value *BeginPtr,
-                                         llvm::Value *NumElements,
-                                         llvm::Value *AllocSizeWithoutCookie) {
+void CodeGenFunction::EmitNewArrayInitializer(
+    const CXXNewExpr *E, QualType ElementType, llvm::Type *ElementTy,
+    llvm::Value *BeginPtr, llvm::Value *NumElements,
+    llvm::Value *AllocSizeWithoutCookie) {
   // If we have a type with trivial initialization and no initializer,
   // there's nothing to do.
   if (!E->hasInitializer())
@@ -809,7 +813,8 @@
     if (const ConstantArrayType *CAT = dyn_cast_or_null<ConstantArrayType>(
             AllocType->getAsArrayTypeUnsafe())) {
       unsigned AS = CurPtr->getType()->getPointerAddressSpace();
-      llvm::Type *AllocPtrTy = ConvertTypeForMem(AllocType)->getPointerTo(AS);
+      ElementTy = ConvertTypeForMem(AllocType);
+      llvm::Type *AllocPtrTy = ElementTy->getPointerTo(AS);
       CurPtr = Builder.CreateBitCast(CurPtr, AllocPtrTy);
       InitListElements *= getContext().getConstantArrayElementCount(CAT);
     }
@@ -839,7 +844,8 @@
       // initialization loops.
       StoreAnyExprIntoOneUnit(*this, ILE->getInit(i),
                               ILE->getInit(i)->getType(), CurPtr);
-      CurPtr = Builder.CreateConstInBoundsGEP1_32(CurPtr, 1, "array.exp.next");
+      CurPtr = Builder.CreateConstInBoundsGEP1_32(ElementTy, CurPtr, 1,
+                                                  "array.exp.next");
     }
 
     // The remaining elements are filled with the array filler expression.
@@ -1000,7 +1006,7 @@
 
   // Advance to the next element by adjusting the pointer type as necessary.
   llvm::Value *NextPtr =
-      Builder.CreateConstInBoundsGEP1_32(CurPtr, 1, "array.next");
+      Builder.CreateConstInBoundsGEP1_32(ElementTy, CurPtr, 1, "array.next");
 
   // Check whether we've gotten to the end of the array and, if so,
   // exit the loop.
@@ -1012,13 +1018,12 @@
 }
 
 static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E,
-                               QualType ElementType,
-                               llvm::Value *NewPtr,
-                               llvm::Value *NumElements,
+                               QualType ElementType, llvm::Type *ElementTy,
+                               llvm::Value *NewPtr, llvm::Value *NumElements,
                                llvm::Value *AllocSizeWithoutCookie) {
   ApplyDebugLocation DL(CGF, E);
   if (E->isArray())
-    CGF.EmitNewArrayInitializer(E, ElementType, NewPtr, NumElements,
+    CGF.EmitNewArrayInitializer(E, ElementType, ElementTy, NewPtr, NumElements,
                                 AllocSizeWithoutCookie);
   else if (const Expr *Init = E->getInitializer())
     StoreAnyExprIntoOneUnit(CGF, Init, E->getAllocatedType(), NewPtr);
@@ -1326,11 +1331,11 @@
                                                        E, allocType);
   }
 
-  llvm::Type *elementPtrTy
-    = ConvertTypeForMem(allocType)->getPointerTo(AS);
+  llvm::Type *elementTy = ConvertTypeForMem(allocType);
+  llvm::Type *elementPtrTy = elementTy->getPointerTo(AS);
   llvm::Value *result = Builder.CreateBitCast(allocation, elementPtrTy);
 
-  EmitNewInitializer(*this, E, allocType, result, numElements,
+  EmitNewInitializer(*this, E, allocType, elementTy, result, numElements,
                      allocSizeWithoutCookie);
   if (E->isArray()) {
     // NewPtr is a pointer to the base element type.  If we're
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index b2228f0..dead1b5 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -317,14 +317,14 @@
   llvm::Value *Real=nullptr, *Imag=nullptr;
 
   if (!IgnoreReal || isVolatile) {
-    llvm::Value *RealP = Builder.CreateStructGEP(SrcPtr, 0,
+    llvm::Value *RealP = Builder.CreateStructGEP(nullptr, SrcPtr, 0,
                                                  SrcPtr->getName() + ".realp");
     Real = Builder.CreateAlignedLoad(RealP, AlignR, isVolatile,
                                      SrcPtr->getName() + ".real");
   }
 
   if (!IgnoreImag || isVolatile) {
-    llvm::Value *ImagP = Builder.CreateStructGEP(SrcPtr, 1,
+    llvm::Value *ImagP = Builder.CreateStructGEP(nullptr, SrcPtr, 1,
                                                  SrcPtr->getName() + ".imagp");
     Imag = Builder.CreateAlignedLoad(ImagP, AlignI, isVolatile,
                                      SrcPtr->getName() + ".imag");
@@ -341,8 +341,8 @@
     return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit);
 
   llvm::Value *Ptr = lvalue.getAddress();
-  llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, "real");
-  llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, "imag");
+  llvm::Value *RealPtr = Builder.CreateStructGEP(nullptr, Ptr, 0, "real");
+  llvm::Value *ImagPtr = Builder.CreateStructGEP(nullptr, Ptr, 1, "imag");
   unsigned AlignR = lvalue.getAlignment().getQuantity();
   ASTContext &C = CGF.getContext();
   QualType ComplexTy = lvalue.getType();
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 7406354..b1cf99c 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -1115,7 +1115,7 @@
         unsigned AS = C->getType()->getPointerAddressSpace();
         llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS);
         llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy);
-        Casted = llvm::ConstantExpr::getGetElementPtr(Casted, Offset);
+        Casted = llvm::ConstantExpr::getGetElementPtr(Int8Ty, Casted, Offset);
         C = llvm::ConstantExpr::getPointerCast(Casted, C->getType());
       }
 
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 16ce69d..658bd3e 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -1451,13 +1451,13 @@
     // anything here.
     if (!E->getType()->isVariableArrayType()) {
       assert(isa<llvm::PointerType>(V->getType()) && "Expected pointer");
+      llvm::Type *NewTy = ConvertType(E->getType());
       V = CGF.Builder.CreatePointerCast(
-          V, ConvertType(E->getType())->getPointerTo(
-            V->getType()->getPointerAddressSpace()));
+          V, NewTy->getPointerTo(V->getType()->getPointerAddressSpace()));
 
       assert(isa<llvm::ArrayType>(V->getType()->getPointerElementType()) &&
              "Expected pointer to array");
-      V = Builder.CreateStructGEP(V, 0, "arraydecay");
+      V = Builder.CreateStructGEP(NewTy, V, 0, "arraydecay");
     }
 
     // Make sure the array decay ends up being the right type.  This matters if
@@ -1845,10 +1845,9 @@
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
     auto Pair = CGF.EmitAtomicCompareExchange(
-        LV, RValue::get(atomicPHI), RValue::get(CGF.EmitToMemory(value, type)),
-        E->getExprLoc());
-    llvm::Value *old = Pair.first.getScalarVal();
-    llvm::Value *success = Pair.second.getScalarVal();
+        LV, RValue::get(atomicPHI), RValue::get(value), E->getExprLoc());
+    llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), type);
+    llvm::Value *success = Pair.second;
     atomicPHI->addIncoming(old, opBB);
     Builder.CreateCondBr(success, contBB, opBB);
     Builder.SetInsertPoint(contBB);
@@ -2189,10 +2188,9 @@
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
     llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
     auto Pair = CGF.EmitAtomicCompareExchange(
-        LHSLV, RValue::get(atomicPHI),
-        RValue::get(CGF.EmitToMemory(Result, LHSTy)), E->getExprLoc());
-    llvm::Value *old = Pair.first.getScalarVal();
-    llvm::Value *success = Pair.second.getScalarVal();
+        LHSLV, RValue::get(atomicPHI), RValue::get(Result), E->getExprLoc());
+    llvm::Value *old = CGF.EmitToMemory(Pair.first.getScalarVal(), LHSTy);
+    llvm::Value *success = Pair.second;
     atomicPHI->addIncoming(old, opBB);
     Builder.CreateCondBr(success, contBB, opBB);
     Builder.SetInsertPoint(contBB);
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index 19f5ca2..dfad13a 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -102,8 +102,8 @@
                                    ArrayType::Normal, /*IndexTypeQuals=*/0);
 
   // Allocate the temporary array(s).
-  llvm::Value *Objects = CreateMemTemp(ElementArrayType, "objects");
-  llvm::Value *Keys = nullptr;
+  llvm::AllocaInst *Objects = CreateMemTemp(ElementArrayType, "objects");
+  llvm::AllocaInst *Keys = nullptr;
   if (DLE)
     Keys = CreateMemTemp(ElementArrayType, "keys");
   
@@ -119,10 +119,9 @@
     if (ALE) {
       // Emit the element and store it to the appropriate array slot.
       const Expr *Rhs = ALE->getElement(i);
-      LValue LV = LValue::MakeAddr(Builder.CreateStructGEP(Objects, i),
-                                   ElementType,
-                                   Context.getTypeAlignInChars(Rhs->getType()),
-                                   Context);
+      LValue LV = LValue::MakeAddr(
+          Builder.CreateStructGEP(Objects->getAllocatedType(), Objects, i),
+          ElementType, Context.getTypeAlignInChars(Rhs->getType()), Context);
 
       llvm::Value *value = EmitScalarExpr(Rhs);
       EmitStoreThroughLValue(RValue::get(value), LV, true);
@@ -132,19 +131,17 @@
     } else {      
       // Emit the key and store it to the appropriate array slot.
       const Expr *Key = DLE->getKeyValueElement(i).Key;
-      LValue KeyLV = LValue::MakeAddr(Builder.CreateStructGEP(Keys, i),
-                                      ElementType,
-                                    Context.getTypeAlignInChars(Key->getType()),
-                                      Context);
+      LValue KeyLV = LValue::MakeAddr(
+          Builder.CreateStructGEP(Keys->getAllocatedType(), Keys, i),
+          ElementType, Context.getTypeAlignInChars(Key->getType()), Context);
       llvm::Value *keyValue = EmitScalarExpr(Key);
       EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true);
 
       // Emit the value and store it to the appropriate array slot.
-      const Expr *Value = DLE->getKeyValueElement(i).Value;  
-      LValue ValueLV = LValue::MakeAddr(Builder.CreateStructGEP(Objects, i), 
-                                        ElementType,
-                                  Context.getTypeAlignInChars(Value->getType()),
-                                        Context);
+      const Expr *Value = DLE->getKeyValueElement(i).Value;
+      LValue ValueLV = LValue::MakeAddr(
+          Builder.CreateStructGEP(Objects->getAllocatedType(), Objects, i),
+          ElementType, Context.getTypeAlignInChars(Value->getType()), Context);
       llvm::Value *valueValue = EmitScalarExpr(Value);
       EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true);
       if (TrackNeededObjects) {
@@ -1434,7 +1431,7 @@
 
   // Fast enumeration state.
   QualType StateTy = CGM.getObjCFastEnumerationStateType();
-  llvm::Value *StatePtr = CreateMemTemp(StateTy, "state.ptr");
+  llvm::AllocaInst *StatePtr = CreateMemTemp(StateTy, "state.ptr");
   EmitNullInitialization(StatePtr, StateTy);
 
   // Number of elements in the items array.
@@ -1518,8 +1515,8 @@
   // Save the initial mutations value.  This is the value at an
   // address that was written into the state object by
   // countByEnumeratingWithState:objects:count:.
-  llvm::Value *StateMutationsPtrPtr =
-    Builder.CreateStructGEP(StatePtr, 2, "mutationsptr.ptr");
+  llvm::Value *StateMutationsPtrPtr = Builder.CreateStructGEP(
+      StatePtr->getAllocatedType(), StatePtr, 2, "mutationsptr.ptr");
   llvm::Value *StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr,
                                                       "mutationsptr");
 
@@ -1599,8 +1596,8 @@
   // Fetch the buffer out of the enumeration state.
   // TODO: this pointer should actually be invariant between
   // refreshes, which would help us do certain loop optimizations.
-  llvm::Value *StateItemsPtr =
-    Builder.CreateStructGEP(StatePtr, 1, "stateitems.ptr");
+  llvm::Value *StateItemsPtr = Builder.CreateStructGEP(
+      StatePtr->getAllocatedType(), StatePtr, 1, "stateitems.ptr");
   llvm::Value *EnumStateItems =
     Builder.CreateLoad(StateItemsPtr, "stateitems");
 
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index da95260..981fe90 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -171,8 +171,9 @@
   /// where the C code specifies const char*.  
   llvm::Constant *MakeConstantString(const std::string &Str,
                                      const std::string &Name="") {
-    llvm::Constant *ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
-    return llvm::ConstantExpr::getGetElementPtr(ConstStr, Zeros);
+    auto *ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
+    return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
+                                                ConstStr, Zeros);
   }
   /// Emits a linkonce_odr string, whose name is the prefix followed by the
   /// string value.  This allows the linker to combine the strings between
@@ -181,13 +182,14 @@
   llvm::Constant *ExportUniqueString(const std::string &Str,
                                      const std::string prefix) {
     std::string name = prefix + Str;
-    llvm::Constant *ConstStr = TheModule.getGlobalVariable(name);
+    auto *ConstStr = TheModule.getGlobalVariable(name);
     if (!ConstStr) {
       llvm::Constant *value = llvm::ConstantDataArray::getString(VMContext,Str);
       ConstStr = new llvm::GlobalVariable(TheModule, value->getType(), true,
               llvm::GlobalValue::LinkOnceODRLinkage, value, prefix + Str);
     }
-    return llvm::ConstantExpr::getGetElementPtr(ConstStr, Zeros);
+    return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
+                                                ConstStr, Zeros);
   }
   /// Generates a global structure, initialized by the elements in the vector.
   /// The element types must match the types of the structure elements in the
@@ -237,8 +239,9 @@
       NameAndAttributes += TypeStr;
       NameAndAttributes += '\0';
       NameAndAttributes += PD->getNameAsString();
-      return llvm::ConstantExpr::getGetElementPtr(
-          CGM.GetAddrOfConstantCString(NameAndAttributes), Zeros);
+      auto *ConstStr = CGM.GetAddrOfConstantCString(NameAndAttributes);
+      return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
+                                                  ConstStr, Zeros);
     }
     return MakeConstantString(PD->getNameAsString());
   }
@@ -672,8 +675,8 @@
       slot->setMetadata(msgSendMDKind, node);
 
       // Load the imp from the slot
-      llvm::Value *imp =
-        Builder.CreateLoad(Builder.CreateStructGEP(slot.getInstruction(), 4));
+      llvm::Value *imp = Builder.CreateLoad(
+          Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4));
 
       // The lookup function may have changed the receiver, so make sure we use
       // the new one.
@@ -690,7 +693,7 @@
         CGF.EmitNounwindRuntimeCall(SlotLookupSuperFn, lookupArgs);
       slot->setOnlyReadsMemory();
 
-      return Builder.CreateLoad(Builder.CreateStructGEP(slot, 4));
+      return Builder.CreateLoad(Builder.CreateStructGEP(nullptr, slot, 4));
     }
   public:
     CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
@@ -1013,7 +1016,7 @@
 llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
                                       const std::string &Name,
                                       bool isWeak) {
-  llvm::Value *ClassName = CGM.GetAddrOfConstantCString(Name);
+  llvm::GlobalVariable *ClassNameGV = CGM.GetAddrOfConstantCString(Name);
   // With the incompatible ABI, this will need to be replaced with a direct
   // reference to the class symbol.  For the compatible nonfragile ABI we are
   // still performing this lookup at run time but emitting the symbol for the
@@ -1023,7 +1026,8 @@
   // with memoized versions or with static references if it's safe to do so.
   if (!isWeak)
     EmitClassRef(Name);
-  ClassName = CGF.Builder.CreateStructGEP(ClassName, 0);
+  llvm::Value *ClassName =
+      CGF.Builder.CreateStructGEP(ClassNameGV->getValueType(), ClassNameGV, 0);
 
   llvm::Constant *ClassLookupFn =
     CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, PtrToInt8Ty, true),
@@ -1143,21 +1147,22 @@
   // It's quite ugly hard-coding this.  Ideally we'd generate it using the host
   // platform's name mangling.
   const char *vtableName = "_ZTVN7gnustep7libobjc22__objc_class_type_infoE";
-  llvm::Constant *Vtable = TheModule.getGlobalVariable(vtableName);
+  auto *Vtable = TheModule.getGlobalVariable(vtableName);
   if (!Vtable) {
     Vtable = new llvm::GlobalVariable(TheModule, PtrToInt8Ty, true,
                                       llvm::GlobalValue::ExternalLinkage,
                                       nullptr, vtableName);
   }
   llvm::Constant *Two = llvm::ConstantInt::get(IntTy, 2);
-  Vtable = llvm::ConstantExpr::getGetElementPtr(Vtable, Two);
-  Vtable = llvm::ConstantExpr::getBitCast(Vtable, PtrToInt8Ty);
+  auto *BVtable = llvm::ConstantExpr::getBitCast(
+      llvm::ConstantExpr::getGetElementPtr(Vtable->getValueType(), Vtable, Two),
+      PtrToInt8Ty);
 
   llvm::Constant *typeName =
     ExportUniqueString(className, "__objc_eh_typename_");
 
   std::vector<llvm::Constant*> fields;
-  fields.push_back(Vtable);
+  fields.push_back(BVtable);
   fields.push_back(typeName);
   llvm::Constant *TI = 
       MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
@@ -1275,11 +1280,11 @@
     }
   }
   // Cast the pointer to a simplified version of the class structure
+  llvm::Type *CastTy = llvm::StructType::get(IdTy, IdTy, nullptr);
   ReceiverClass = Builder.CreateBitCast(ReceiverClass,
-      llvm::PointerType::getUnqual(
-        llvm::StructType::get(IdTy, IdTy, nullptr)));
+                                        llvm::PointerType::getUnqual(CastTy));
   // Get the superclass pointer
-  ReceiverClass = Builder.CreateStructGEP(ReceiverClass, 1);
+  ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1);
   // Load the superclass pointer
   ReceiverClass = Builder.CreateLoad(ReceiverClass);
   // Construct the structure used to look up the IMP
@@ -1287,8 +1292,10 @@
       Receiver->getType(), IdTy, nullptr);
   llvm::Value *ObjCSuper = Builder.CreateAlloca(ObjCSuperTy);
 
-  Builder.CreateStore(Receiver, Builder.CreateStructGEP(ObjCSuper, 0));
-  Builder.CreateStore(ReceiverClass, Builder.CreateStructGEP(ObjCSuper, 1));
+  Builder.CreateStore(Receiver,
+                      Builder.CreateStructGEP(ObjCSuperTy, ObjCSuper, 0));
+  Builder.CreateStore(ReceiverClass,
+                      Builder.CreateStructGEP(ObjCSuperTy, ObjCSuper, 1));
 
   ObjCSuper = EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy);
 
@@ -2294,7 +2301,8 @@
       offsetPointerIndexes[2] = llvm::ConstantInt::get(IndexTy, ivarIndex);
       // Get the correct ivar field
       llvm::Constant *offsetValue = llvm::ConstantExpr::getGetElementPtr(
-              IvarList, offsetPointerIndexes);
+          cast<llvm::GlobalVariable>(IvarList)->getValueType(), IvarList,
+          offsetPointerIndexes);
       // Get the existing variable, if one exists.
       llvm::GlobalVariable *offset = TheModule.getNamedGlobal(Name);
       if (offset) {
@@ -2439,8 +2447,8 @@
 
   // Number of static selectors
   Elements.push_back(llvm::ConstantInt::get(LongTy, SelectorCount));
-  llvm::Constant *SelectorList = MakeGlobalArray(SelStructTy, Selectors,
-          ".objc_selector_list");
+  llvm::GlobalVariable *SelectorList =
+      MakeGlobalArray(SelStructTy, Selectors, ".objc_selector_list");
   Elements.push_back(llvm::ConstantExpr::getBitCast(SelectorList,
     SelStructPtrTy));
 
@@ -2450,8 +2458,8 @@
     llvm::Constant *Idxs[] = {Zeros[0],
       llvm::ConstantInt::get(Int32Ty, i), Zeros[0]};
     // FIXME: We're generating redundant loads and stores here!
-    llvm::Constant *SelPtr = llvm::ConstantExpr::getGetElementPtr(SelectorList,
-        makeArrayRef(Idxs, 2));
+    llvm::Constant *SelPtr = llvm::ConstantExpr::getGetElementPtr(
+        SelectorList->getValueType(), SelectorList, makeArrayRef(Idxs, 2));
     // If selectors are defined as an opaque type, cast the pointer to this
     // type.
     SelPtr = llvm::ConstantExpr::getBitCast(SelPtr, SelectorTy);
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index b9fdf73..a45446a 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -486,7 +486,6 @@
   }
 
   ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm);
-  ~ObjCCommonTypesHelper(){}
 };
 
 /// ObjCTypesHelper - Helper class that encapsulates lazy
@@ -595,7 +594,6 @@
 
 public:
   ObjCTypesHelper(CodeGen::CodeGenModule &cgm);
-  ~ObjCTypesHelper() {}
 };
 
 /// ObjCNonFragileABITypesHelper - will have all types needed by objective-c's
@@ -733,7 +731,6 @@
   llvm::Type *EHTypePtrTy;
   
   ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm);
-  ~ObjCNonFragileABITypesHelper(){}
 };
 
 class CGObjCCommonMac : public CodeGen::CGObjCRuntime {
@@ -1678,14 +1675,13 @@
 
 /// getConstantGEP() - Help routine to construct simple GEPs.
 static llvm::Constant *getConstantGEP(llvm::LLVMContext &VMContext,
-                                      llvm::Constant *C,
-                                      unsigned idx0,
+                                      llvm::GlobalVariable *C, unsigned idx0,
                                       unsigned idx1) {
   llvm::Value *Idxs[] = {
     llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext), idx0),
     llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext), idx1)
   };
-  return llvm::ConstantExpr::getGetElementPtr(C, Idxs);
+  return llvm::ConstantExpr::getGetElementPtr(C->getValueType(), C, Idxs);
 }
 
 /// hasObjCExceptionAttribute - Return true if this class or any super
@@ -1791,8 +1787,9 @@
     CGF.CreateTempAlloca(ObjCTypes.SuperTy, "objc_super");
   llvm::Value *ReceiverAsObject =
     CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy);
-  CGF.Builder.CreateStore(ReceiverAsObject,
-                          CGF.Builder.CreateStructGEP(ObjCSuper, 0));
+  CGF.Builder.CreateStore(
+      ReceiverAsObject,
+      CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 0));
 
   // If this is a class message the metaclass is passed as the target.
   llvm::Value *Target;
@@ -1805,20 +1802,20 @@
       // the class's "isa" pointer.  The following assumes that
       // isa" is the first ivar in a class (which it must be).
       Target = EmitClassRef(CGF, Class->getSuperClass());
-      Target = CGF.Builder.CreateStructGEP(Target, 0);
+      Target = CGF.Builder.CreateStructGEP(ObjCTypes.ClassTy, Target, 0);
       Target = CGF.Builder.CreateLoad(Target);
     } else {
-      llvm::Value *MetaClassPtr = EmitMetaClassRef(Class);
-      llvm::Value *SuperPtr = CGF.Builder.CreateStructGEP(MetaClassPtr, 1);
+      llvm::Constant *MetaClassPtr = EmitMetaClassRef(Class);
+      llvm::Value *SuperPtr =
+          CGF.Builder.CreateStructGEP(ObjCTypes.ClassTy, MetaClassPtr, 1);
       llvm::Value *Super = CGF.Builder.CreateLoad(SuperPtr);
       Target = Super;
     }
-  } 
-  else if (isCategoryImpl)
+  } else if (isCategoryImpl)
     Target = EmitClassRef(CGF, Class->getSuperClass());
   else {
     llvm::Value *ClassPtr = EmitSuperClassRef(Class);
-    ClassPtr = CGF.Builder.CreateStructGEP(ClassPtr, 1);
+    ClassPtr = CGF.Builder.CreateStructGEP(ObjCTypes.ClassTy, ClassPtr, 1);
     Target = CGF.Builder.CreateLoad(ClassPtr);
   }
   // FIXME: We shouldn't need to do this cast, rectify the ASTContext and
@@ -1826,8 +1823,8 @@
   llvm::Type *ClassTy =
     CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType());
   Target = CGF.Builder.CreateBitCast(Target, ClassTy);
-  CGF.Builder.CreateStore(Target,
-                          CGF.Builder.CreateStructGEP(ObjCSuper, 1));
+  CGF.Builder.CreateStore(
+      Target, CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 1));
   return EmitMessageSend(CGF, Return, ResultType,
                          EmitSelector(CGF, Sel),
                          ObjCSuper, ObjCTypes.SuperPtrCTy,
@@ -3816,8 +3813,8 @@
   //  - Call setjmp on the exception data buffer.
   llvm::Constant *Zero = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), 0);
   llvm::Value *GEPIndexes[] = { Zero, Zero, Zero };
-  llvm::Value *SetJmpBuffer =
-    CGF.Builder.CreateGEP(ExceptionData, GEPIndexes, "setjmp_buffer");
+  llvm::Value *SetJmpBuffer = CGF.Builder.CreateGEP(
+      ObjCTypes.ExceptionDataTy, ExceptionData, GEPIndexes, "setjmp_buffer");
   llvm::CallInst *SetJmpResult = CGF.EmitNounwindRuntimeCall(
       ObjCTypes.getSetJmpFn(), SetJmpBuffer, "setjmp_result");
   SetJmpResult->setCanReturnTwice();
@@ -5264,6 +5261,7 @@
   //   const uint32_t size;  // sizeof(struct _protocol_t)
   //   const uint32_t flags;  // = 0
   //   const char ** extendedMethodTypes;
+  //   const char *demangledName;
   // }
 
   // Holder for struct _protocol_list_t *
@@ -5276,6 +5274,7 @@
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy,
+                             Int8PtrTy,
                              nullptr);
 
   // struct _protocol_t*
@@ -6208,6 +6207,7 @@
 ///   const uint32_t size;  // sizeof(struct _protocol_t)
 ///   const uint32_t flags;  // = 0
 ///   const char ** extendedMethodTypes;
+///   const char *demangledName;
 /// }
 /// @endcode
 ///
@@ -6259,7 +6259,7 @@
   MethodTypesExt.insert(MethodTypesExt.end(),
                         OptMethodTypesExt.begin(), OptMethodTypesExt.end());
 
-  llvm::Constant *Values[11];
+  llvm::Constant *Values[12];
   // isa is NULL
   Values[0] = llvm::Constant::getNullValue(ObjCTypes.ObjectPtrTy);
   Values[1] = GetClassName(PD->getObjCRuntimeNameAsString());
@@ -6292,6 +6292,9 @@
   Values[10] = EmitProtocolMethodTypes("\01l_OBJC_$_PROTOCOL_METHOD_TYPES_"
                                        + PD->getObjCRuntimeNameAsString(),
                                        MethodTypesExt, ObjCTypes);
+  // const char *demangledName;
+  Values[11] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
+    
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ProtocolnfABITy,
                                                    Values);
 
@@ -6563,7 +6566,8 @@
   args[1].RV = RValue::get(mref);
 
   // Load the function to call from the message ref table.
-  llvm::Value *callee = CGF.Builder.CreateStructGEP(mref, 0);
+  llvm::Value *callee =
+      CGF.Builder.CreateStructGEP(ObjCTypes.MessageRefTy, mref, 0);
   callee = CGF.Builder.CreateLoad(callee, "msgSend_fn");
 
   callee = CGF.Builder.CreateBitCast(callee, MSI.MessengerType);
@@ -6728,8 +6732,9 @@
 
   llvm::Value *ReceiverAsObject =
     CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy);
-  CGF.Builder.CreateStore(ReceiverAsObject,
-                          CGF.Builder.CreateStructGEP(ObjCSuper, 0));
+  CGF.Builder.CreateStore(
+      ReceiverAsObject,
+      CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 0));
 
   // If this is a class message the metaclass is passed as the target.
   llvm::Value *Target;
@@ -6743,8 +6748,8 @@
   llvm::Type *ClassTy =
     CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType());
   Target = CGF.Builder.CreateBitCast(Target, ClassTy);
-  CGF.Builder.CreateStore(Target,
-                          CGF.Builder.CreateStructGEP(ObjCSuper, 1));
+  CGF.Builder.CreateStore(
+      Target, CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 1));
 
   return (isVTableDispatchedSelector(Sel))
     ? EmitVTableMessageSend(CGF, Return, ResultType, Sel,
@@ -6993,10 +6998,10 @@
   llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2);
 
   llvm::Constant *Values[] = {
-    llvm::ConstantExpr::getGetElementPtr(VTableGV, VTableIdx),
-    GetClassName(ID->getObjCRuntimeNameAsString()),
-    GetClassGlobal(ClassName.str())
-  };
+      llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(), VTableGV,
+                                           VTableIdx),
+      GetClassName(ID->getObjCRuntimeNameAsString()),
+      GetClassGlobal(ClassName.str())};
   llvm::Constant *Init =
     llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
 
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index ef2d214..5988c78 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -31,47 +31,73 @@
 /// \brief Base class for handling code generation inside OpenMP regions.
 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
 public:
-  CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS)
-      : CGCapturedStmtInfo(CS, CR_OpenMP), Directive(D) {}
+  /// \brief Kinds of OpenMP regions used in codegen.
+  enum CGOpenMPRegionKind {
+    /// \brief Region with outlined function for standalone 'parallel'
+    /// directive.
+    ParallelOutlinedRegion,
+    /// \brief Region with outlined function for standalone 'task' directive.
+    TaskOutlinedRegion,
+    /// \brief Region for constructs that do not require function outlining,
+    /// like 'for', 'sections', 'atomic' etc. directives.
+    InlinedRegion,
+  };
 
-  CGOpenMPRegionInfo(const OMPExecutableDirective &D)
-      : CGCapturedStmtInfo(CR_OpenMP), Directive(D) {}
+  CGOpenMPRegionInfo(const CapturedStmt &CS,
+                     const CGOpenMPRegionKind RegionKind,
+                     const RegionCodeGenTy &CodeGen)
+      : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
+        CodeGen(CodeGen) {}
+
+  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
+                     const RegionCodeGenTy &CodeGen)
+      : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
+        CodeGen(CodeGen) {}
 
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   virtual const VarDecl *getThreadIDVariable() const = 0;
 
+  /// \brief Emit the captured statement body.
+  virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
+
   /// \brief Get an LValue for the current ThreadID variable.
   /// \return LValue for thread id variable. This LValue always has type int32*.
   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
 
-    /// \brief Emit the captured statement body.
-  virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
+  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
 
   static bool classof(const CGCapturedStmtInfo *Info) {
     return Info->getKind() == CR_OpenMP;
   }
+
 protected:
-  /// \brief OpenMP executable directive associated with the region.
-  const OMPExecutableDirective &Directive;
+  CGOpenMPRegionKind RegionKind;
+  const RegionCodeGenTy &CodeGen;
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
-  CGOpenMPOutlinedRegionInfo(const OMPExecutableDirective &D,
-                             const CapturedStmt &CS, const VarDecl *ThreadIDVar)
-      : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar) {
+  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
+                             const RegionCodeGenTy &CodeGen)
+      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
+        ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
-  virtual const VarDecl *getThreadIDVariable() const override {
-    return ThreadIDVar;
-  }
+  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
+
   /// \brief Get the name of the capture helper.
   StringRef getHelperName() const override { return ".omp_outlined."; }
 
+  static bool classof(const CGCapturedStmtInfo *Info) {
+    return CGOpenMPRegionInfo::classof(Info) &&
+           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
+               ParallelOutlinedRegion;
+  }
+
 private:
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
@@ -81,84 +107,121 @@
 /// \brief API for captured statement code generation in OpenMP constructs.
 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
-  CGOpenMPTaskOutlinedRegionInfo(const OMPExecutableDirective &D,
-                                 const CapturedStmt &CS,
+  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
                                  const VarDecl *ThreadIDVar,
-                                 const VarDecl *PartIDVar)
-      : CGOpenMPRegionInfo(D, CS), ThreadIDVar(ThreadIDVar),
-        PartIDVar(PartIDVar) {
+                                 const RegionCodeGenTy &CodeGen)
+      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
+        ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
-  virtual const VarDecl *getThreadIDVariable() const override {
-    return ThreadIDVar;
-  }
+  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
 
   /// \brief Get an LValue for the current ThreadID variable.
-  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
-
-  /// \brief Emit the captured statement body.
-  virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
+  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
 
   /// \brief Get the name of the capture helper.
   StringRef getHelperName() const override { return ".omp_outlined."; }
 
+  static bool classof(const CGCapturedStmtInfo *Info) {
+    return CGOpenMPRegionInfo::classof(Info) &&
+           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
+               TaskOutlinedRegion;
+  }
+
 private:
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
   const VarDecl *ThreadIDVar;
-  /// \brief A variable or parameter storing part id for OpenMP tasking
-  /// constructs.
-  const VarDecl *PartIDVar;
 };
 
 /// \brief API for inlined captured statement code generation in OpenMP
 /// constructs.
 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
-  CGOpenMPInlinedRegionInfo(const OMPExecutableDirective &D,
-                            CodeGenFunction::CGCapturedStmtInfo *OldCSI)
-      : CGOpenMPRegionInfo(D), OldCSI(OldCSI),
+  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
+                            const RegionCodeGenTy &CodeGen)
+      : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
   // \brief Retrieve the value of the context parameter.
-  virtual llvm::Value *getContextValue() const override {
+  llvm::Value *getContextValue() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getContextValue();
     llvm_unreachable("No context value for inlined OpenMP region");
   }
+  virtual void setContextValue(llvm::Value *V) override {
+    if (OuterRegionInfo) {
+      OuterRegionInfo->setContextValue(V);
+      return;
+    }
+    llvm_unreachable("No context value for inlined OpenMP region");
+  }
   /// \brief Lookup the captured field decl for a variable.
-  virtual const FieldDecl *lookup(const VarDecl *VD) const override {
+  const FieldDecl *lookup(const VarDecl *VD) const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->lookup(VD);
-    llvm_unreachable("Trying to reference VarDecl that is neither local nor "
-                     "captured in outer OpenMP region");
+    // If there is no outer outlined region,no need to lookup in a list of
+    // captured variables, we can use the original one.
+    return nullptr;
   }
-  virtual FieldDecl *getThisFieldDecl() const override {
+  FieldDecl *getThisFieldDecl() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getThisFieldDecl();
     return nullptr;
   }
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
-  virtual const VarDecl *getThreadIDVariable() const override {
+  const VarDecl *getThreadIDVariable() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getThreadIDVariable();
     return nullptr;
   }
 
   /// \brief Get the name of the capture helper.
-  virtual StringRef getHelperName() const override {
+  StringRef getHelperName() const override {
+    if (auto *OuterRegionInfo = getOldCSI())
+      return OuterRegionInfo->getHelperName();
     llvm_unreachable("No helper name for inlined OpenMP construct");
   }
 
   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
 
+  static bool classof(const CGCapturedStmtInfo *Info) {
+    return CGOpenMPRegionInfo::classof(Info) &&
+           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
+  }
+
 private:
   /// \brief CodeGen info about outer OpenMP region.
   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
   CGOpenMPRegionInfo *OuterRegionInfo;
 };
+
+/// \brief RAII for emitting code of OpenMP constructs.
+class InlinedOpenMPRegionRAII {
+  CodeGenFunction &CGF;
+
+public:
+  /// \brief Constructs region for combined constructs.
+  /// \param CodeGen Code generation sequence for combined directives. Includes
+  /// a list of functions used for code generation of implicitly inlined
+  /// regions.
+  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
+      : CGF(CGF) {
+    // Start emission for the construct.
+    CGF.CapturedStmtInfo =
+        new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
+  }
+  ~InlinedOpenMPRegionRAII() {
+    // Restore original CapturedStmtInfo only if we're done with code emission.
+    auto *OldCSI =
+        cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
+    delete CGF.CapturedStmtInfo;
+    CGF.CapturedStmtInfo = OldCSI;
+  }
+};
+
 } // namespace
 
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
@@ -172,15 +235,18 @@
           ->getPointeeType());
 }
 
-void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
-  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
-  CGF.EmitOMPPrivateClause(Directive, PrivateScope);
-  CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
-  if (PrivateScope.Privatize())
-    // Emit implicit barrier to synchronize threads and avoid data races.
-    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(),
-                                               /*IsExplicit=*/false);
-  CGCapturedStmtInfo::EmitBody(CGF, S);
+void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CGF.EHStack.pushTerminate();
+  {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    CodeGen(CGF);
+  }
+  CGF.EHStack.popTerminate();
 }
 
 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
@@ -190,14 +256,6 @@
       getThreadIDVariable()->getType());
 }
 
-void CGOpenMPTaskOutlinedRegionInfo::EmitBody(CodeGenFunction &CGF,
-                                              const Stmt *S) {
-  if (PartIDVar) {
-    // TODO: emit code for untied tasks.
-  }
-  CGCapturedStmtInfo::EmitBody(CGF, S);
-}
-
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
   IdentTy = llvm::StructType::create(
@@ -216,13 +274,14 @@
 }
 
 llvm::Value *
-CGOpenMPRuntime::emitOutlinedFunction(const OMPExecutableDirective &D,
-                                      const VarDecl *ThreadIDVar) {
+CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+                                              const VarDecl *ThreadIDVar,
+                                              const RegionCodeGenTy &CodeGen) {
   assert(ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 *");
   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar);
+  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
   CGF.CapturedStmtInfo = &CGInfo;
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
@@ -230,12 +289,12 @@
 llvm::Value *
 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
                                           const VarDecl *ThreadIDVar,
-                                          const VarDecl *PartIDVar) {
+                                          const RegionCodeGenTy &CodeGen) {
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPTaskOutlinedRegionInfo CGInfo(D, *CS, ThreadIDVar, PartIDVar);
+  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
   CGF.CapturedStmtInfo = &CGInfo;
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
@@ -303,8 +362,8 @@
   }
 
   // char **psource = &.kmpc_loc_<flags>.addr.psource;
-  auto *PSource =
-      CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
+  auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
+                                                         IdentField_PSource);
 
   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
   if (OMPDebugLoc == nullptr) {
@@ -595,6 +654,62 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
     break;
   }
+  case OMPRTL__kmpc_reduce: {
+    // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
+    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
+    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
+                                               /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
+        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
+        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
+    break;
+  }
+  case OMPRTL__kmpc_reduce_nowait: {
+    // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
+    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
+    // *lck);
+    llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+    auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
+                                               /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
+        CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
+        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
+    break;
+  }
+  case OMPRTL__kmpc_end_reduce: {
+    // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *lck);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(), CGM.Int32Ty,
+        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
+    break;
+  }
+  case OMPRTL__kmpc_end_reduce_nowait: {
+    // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *lck);
+    llvm::Type *TypeParams[] = {
+        getIdentTyPointerTy(), CGM.Int32Ty,
+        llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -906,24 +1021,46 @@
   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
 }
 
-void CGOpenMPRuntime::emitCriticalRegion(
-    CodeGenFunction &CGF, StringRef CriticalName,
-    const std::function<void()> &CriticalOpGen, SourceLocation Loc) {
-  auto RegionLock = getCriticalRegionLock(CriticalName);
+namespace {
+class CallEndCleanup : public EHScopeStack::Cleanup {
+public:
+  typedef ArrayRef<llvm::Value *> CleanupValuesTy;
+private:
+  llvm::Value *Callee;
+  llvm::SmallVector<llvm::Value *, 8> Args;
+
+public:
+  CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args)
+      : Callee(Callee), Args(Args.begin(), Args.end()) {}
+  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    CGF.EmitRuntimeCall(Callee, Args);
+  }
+};
+} // namespace
+
+void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
+                                         StringRef CriticalName,
+                                         const RegionCodeGenTy &CriticalOpGen,
+                                         SourceLocation Loc) {
   // __kmpc_critical(ident_t *, gtid, Lock);
   // CriticalOpGen();
   // __kmpc_end_critical(ident_t *, gtid, Lock);
   // Prepare arguments and build a call to __kmpc_critical
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
-                         RegionLock};
-  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
-  CriticalOpGen();
-  // Build a call to __kmpc_end_critical
-  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
+  {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+                           getCriticalRegionLock(CriticalName)};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
+    // Build a call to __kmpc_end_critical
+    CGF.EHStack.pushCleanup<CallEndCleanup>(
+        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
+        llvm::makeArrayRef(Args));
+    emitInlinedDirective(CGF, CriticalOpGen);
+  }
 }
 
 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
-                       const std::function<void()> &BodyOpGen) {
+                       const RegionCodeGenTy &BodyOpGen) {
   llvm::Value *CallBool = CGF.EmitScalarConversion(
       IfCond,
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
@@ -934,14 +1071,14 @@
   // Generate the branch (If-stmt)
   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
   CGF.EmitBlock(ThenBlock);
-  BodyOpGen();
+  CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
   // Emit the rest of bblocks/branches
   CGF.EmitBranch(ContBlock);
   CGF.EmitBlock(ContBlock, true);
 }
 
 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
-                                       const std::function<void()> &MasterOpGen,
+                                       const RegionCodeGenTy &MasterOpGen,
                                        SourceLocation Loc) {
   // if(__kmpc_master(ident_t *, gtid)) {
   //   MasterOpGen();
@@ -951,23 +1088,12 @@
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   auto *IsMaster =
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
-  emitIfStmt(CGF, IsMaster, [&]() -> void {
-    MasterOpGen();
-    // Build a call to __kmpc_end_master.
-    // OpenMP [1.2.2 OpenMP Language Terminology]
-    // For C/C++, an executable statement, possibly compound, with a single
-    // entry at the top and a single exit at the bottom, or an OpenMP construct.
-    // * Access to the structured block must not be the result of a branch.
-    // * The point of exit cannot be a branch out of the structured block.
-    // * The point of entry must not be a call to setjmp().
-    // * longjmp() and throw() must not violate the entry/exit criteria.
-    // * An expression statement, iteration statement, selection statement, or
-    // try block is considered to be a structured block if the corresponding
-    // compound statement obtained by enclosing it in { and } would be a
-    // structured block.
-    // It is analyzed in Sema, so we can just call __kmpc_end_master() on
-    // fallthrough rather than pushing a normal cleanup for it.
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_master), Args);
+  emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    CGF.EHStack.pushCleanup<CallEndCleanup>(
+        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
+        llvm::makeArrayRef(Args));
+    MasterOpGen(CGF);
   });
 }
 
@@ -981,8 +1107,9 @@
 }
 
 static llvm::Value *emitCopyprivateCopyFunction(
-    CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> SrcExprs,
-    ArrayRef<const Expr *> DstExprs, ArrayRef<const Expr *> AssignmentOps) {
+    CodeGenModule &CGM, llvm::Type *ArgsType,
+    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
+    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
   auto &C = CGM.getContext();
   // void copy_func(void *LHSArg, void *RHSArg);
   FunctionArgList Args;
@@ -1001,7 +1128,7 @@
   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
   CodeGenFunction CGF(CGM);
   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
-  // Dst = (void*[n])(LHSArg);
+  // Dest = (void*[n])(LHSArg);
   // Src = (void*[n])(RHSArg);
   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
@@ -1015,36 +1142,28 @@
   // *(Type1*)Dst[1] = *(Type1*)Src[1];
   // ...
   // *(Typen*)Dst[n] = *(Typen*)Src[n];
-  CodeGenFunction::OMPPrivateScope Scope(CGF);
   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
-    Scope.addPrivate(
-        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
-        [&]() -> llvm::Value *{
-          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(RHS, I),
-                                            CGM.PointerAlignInBytes),
-              CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
-        });
-    Scope.addPrivate(
-        cast<VarDecl>(cast<DeclRefExpr>(DstExprs[I])->getDecl()),
-        [&]() -> llvm::Value *{
-          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(LHS, I),
-                                            CGM.PointerAlignInBytes),
-              CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
-        });
+    auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        CGF.Builder.CreateAlignedLoad(
+            CGF.Builder.CreateStructGEP(nullptr, LHS, I),
+            CGM.PointerAlignInBytes),
+        CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+    auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        CGF.Builder.CreateAlignedLoad(
+            CGF.Builder.CreateStructGEP(nullptr, RHS, I),
+            CGM.PointerAlignInBytes),
+        CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+    CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr,
+                    cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
+                    cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
+                    AssignmentOps[I]);
   }
-  Scope.Privatize();
-  for (auto *E : AssignmentOps) {
-    CGF.EmitIgnoredExpr(E);
-  }
-  Scope.ForceCleanup();
   CGF.FinishFunction();
   return Fn;
 }
 
 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
-                                       const std::function<void()> &SingleOpGen,
+                                       const RegionCodeGenTy &SingleOpGen,
                                        SourceLocation Loc,
                                        ArrayRef<const Expr *> CopyprivateVars,
                                        ArrayRef<const Expr *> SrcExprs,
@@ -1074,28 +1193,17 @@
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   auto *IsSingle =
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
-  emitIfStmt(CGF, IsSingle, [&]() -> void {
-    SingleOpGen();
+  emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    CGF.EHStack.pushCleanup<CallEndCleanup>(
+        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
+        llvm::makeArrayRef(Args));
+    SingleOpGen(CGF);
     if (DidIt) {
       // did_it = 1;
       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
                                      DidIt->getAlignment());
     }
-    // Build a call to __kmpc_end_single.
-    // OpenMP [1.2.2 OpenMP Language Terminology]
-    // For C/C++, an executable statement, possibly compound, with a single
-    // entry at the top and a single exit at the bottom, or an OpenMP construct.
-    // * Access to the structured block must not be the result of a branch.
-    // * The point of exit cannot be a branch out of the structured block.
-    // * The point of entry must not be a call to setjmp().
-    // * longjmp() and throw() must not violate the entry/exit criteria.
-    // * An expression statement, iteration statement, selection statement, or
-    // try block is considered to be a structured block if the corresponding
-    // compound statement obtained by enclosing it in { and } would be a
-    // structured block.
-    // It is analyzed in Sema, so we can just call __kmpc_end_single() on
-    // fallthrough rather than pushing a normal cleanup for it.
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
   });
   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   // <copy_func>, did_it);
@@ -1108,7 +1216,8 @@
     auto *CopyprivateList =
         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
-      auto *Elem = CGF.Builder.CreateStructGEP(CopyprivateList, I);
+      auto *Elem = CGF.Builder.CreateStructGEP(
+          CopyprivateList->getAllocatedType(), CopyprivateList, I);
       CGF.Builder.CreateAlignedStore(
           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
@@ -1118,7 +1227,7 @@
     // threads in the corresponding parallel region.
     auto *CpyFn = emitCopyprivateCopyFunction(
         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
-        SrcExprs, DstExprs, AssignmentOps);
+        CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
     auto *BufSize = CGF.Builder.getInt32(
         C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
@@ -1138,11 +1247,23 @@
 }
 
 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                      bool IsExplicit) {
+                                      OpenMPDirectiveKind Kind) {
   // Build call __kmpc_cancel_barrier(loc, thread_id);
-  auto Flags = static_cast<OpenMPLocationFlags>(
-      OMP_IDENT_KMPC |
-      (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL));
+  OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
+  if (Kind == OMPD_for) {
+    Flags =
+        static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
+  } else if (Kind == OMPD_sections) {
+    Flags = static_cast<OpenMPLocationFlags>(Flags |
+                                             OMP_IDENT_BARRIER_IMPL_SECTIONS);
+  } else if (Kind == OMPD_single) {
+    Flags =
+        static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
+  } else if (Kind == OMPD_barrier) {
+    Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
+  } else {
+    Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
+  }
   // Build call __kmpc_cancel_barrier(loc, thread_id);
   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
   // one provides the same functionality and adds initial support for
@@ -1262,6 +1383,7 @@
   assert((ScheduleKind == OMPC_SCHEDULE_static ||
           ScheduleKind == OMPC_SCHEDULE_unknown) &&
          "Non-static schedule kinds are not yet implemented");
+  (void)ScheduleKind;
   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                          getThreadID(CGF, Loc)};
@@ -1379,7 +1501,8 @@
 static llvm::Value *
 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
                       QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
-                      QualType SharedsPtrTy, llvm::Value *TaskFunction) {
+                      QualType SharedsPtrTy, llvm::Value *TaskFunction,
+                      llvm::Type *KmpTaskTTy) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
@@ -1407,12 +1530,12 @@
   auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
       CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
       CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
-  auto *PartidPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+  auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
                                                 /*Idx=*/KmpTaskTPartId);
   auto *PartidParam = CGF.EmitLoadOfScalar(
       PartidPtr, /*Volatile=*/false,
       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
-  auto *SharedsPtr = CGF.Builder.CreateStructGEP(TaskTypeArgAddr,
+  auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
                                                  /*Idx=*/KmpTaskTShareds);
   auto *SharedsParam =
       CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
@@ -1441,14 +1564,16 @@
   auto KmpTaskQTy =
       createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
   QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
-  auto KmpTaskTPtrTy = CGF.ConvertType(KmpTaskQTy)->getPointerTo();
+  auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy);
+  auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo();
   auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
 
   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   // kmp_task_t *tt);
-  auto *TaskEntry = emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy,
-                                          SharedsPtrTy, TaskFunction);
+  auto *TaskEntry =
+      emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy,
+                            TaskFunction, KmpTaskTTy);
 
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
@@ -1481,7 +1606,7 @@
   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
     CGF.EmitAggregateCopy(
         CGF.EmitLoadOfScalar(
-            CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+            CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
                                         /*Idx=*/KmpTaskTShareds),
             /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
         Shareds, SharedsTy);
@@ -1490,7 +1615,7 @@
   CGF.Builder.CreateAlignedStore(
       llvm::ConstantPointerNull::get(
           cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
-      CGF.Builder.CreateStructGEP(NewTaskNewTaskTTy,
+      CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
                                   /*Idx=*/KmpTaskTDestructors),
       CGM.PointerAlignInBytes);
 
@@ -1504,23 +1629,268 @@
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
 }
 
-InlinedOpenMPRegionRAII::InlinedOpenMPRegionRAII(
-    CodeGenFunction &CGF, const OMPExecutableDirective &D)
-    : CGF(CGF) {
-  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(D, CGF.CapturedStmtInfo);
-  // 1.2.2 OpenMP Language Terminology
-  // Structured block - An executable statement with a single entry at the
-  // top and a single exit at the bottom.
-  // The point of exit cannot be a branch out of the structured block.
-  // longjmp() and throw() must not violate the entry/exit criteria.
-  CGF.EHStack.pushTerminate();
+static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
+                                          llvm::Type *ArgsType,
+                                          ArrayRef<const Expr *> LHSExprs,
+                                          ArrayRef<const Expr *> RHSExprs,
+                                          ArrayRef<const Expr *> ReductionOps) {
+  auto &C = CGM.getContext();
+
+  // void reduction_func(void *LHSArg, void *RHSArg);
+  FunctionArgList Args;
+  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+                           C.VoidPtrTy);
+  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+                           C.VoidPtrTy);
+  Args.push_back(&LHSArg);
+  Args.push_back(&RHSArg);
+  FunctionType::ExtInfo EI;
+  auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+      C.VoidTy, Args, EI, /*isVariadic=*/false);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      ".omp.reduction.reduction_func", &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+  // Dst = (void*[n])(LHSArg);
+  // Src = (void*[n])(RHSArg);
+  auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
+                                    CGF.PointerAlignInBytes),
+      ArgsType);
+  auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
+                                    CGF.PointerAlignInBytes),
+      ArgsType);
+
+  //  ...
+  //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
+  //  ...
+  CodeGenFunction::OMPPrivateScope Scope(CGF);
+  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
+    Scope.addPrivate(
+        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
+        [&]() -> llvm::Value *{
+          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              CGF.Builder.CreateAlignedLoad(
+                  CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
+                  CGM.PointerAlignInBytes),
+              CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
+        });
+    Scope.addPrivate(
+        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
+        [&]() -> llvm::Value *{
+          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              CGF.Builder.CreateAlignedLoad(
+                  CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
+                  CGM.PointerAlignInBytes),
+              CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
+        });
+  }
+  Scope.Privatize();
+  for (auto *E : ReductionOps) {
+    CGF.EmitIgnoredExpr(E);
+  }
+  Scope.ForceCleanup();
+  CGF.FinishFunction();
+  return Fn;
 }
 
-InlinedOpenMPRegionRAII::~InlinedOpenMPRegionRAII() {
-  CGF.EHStack.popTerminate();
-  auto *OldCSI =
-      cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
-  delete CGF.CapturedStmtInfo;
-  CGF.CapturedStmtInfo = OldCSI;
+void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                                    ArrayRef<const Expr *> LHSExprs,
+                                    ArrayRef<const Expr *> RHSExprs,
+                                    ArrayRef<const Expr *> ReductionOps,
+                                    bool WithNowait) {
+  // Next code should be emitted for reduction:
+  //
+  // static kmp_critical_name lock = { 0 };
+  //
+  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+  //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
+  //  ...
+  //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
+  //  *(Type<n>-1*)rhs[<n>-1]);
+  // }
+  //
+  // ...
+  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
+  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+  // RedList, reduce_func, &<lock>)) {
+  // case 1:
+  //  ...
+  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+  //  ...
+  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+  // break;
+  // case 2:
+  //  ...
+  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+  //  ...
+  // break;
+  // default:;
+  // }
+
+  auto &C = CGM.getContext();
+
+  // 1. Build a list of reduction variables.
+  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+  llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
+  QualType ReductionArrayTy =
+      C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+                             /*IndexTypeQuals=*/0);
+  auto *ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
+    auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
+    CGF.Builder.CreateAlignedStore(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
+        Elem, CGM.PointerAlignInBytes);
+  }
+
+  // 2. Emit reduce_func().
+  auto *ReductionFn = emitReductionFunction(
+      CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
+      RHSExprs, ReductionOps);
+
+  // 3. Create static kmp_critical_name lock = { 0 };
+  auto *Lock = getCriticalRegionLock(".reduction");
+
+  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+  // RedList, reduce_func, &<lock>);
+  auto *IdentTLoc = emitUpdateLocation(
+      CGF, Loc,
+      static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
+  auto *ThreadId = getThreadID(CGF, Loc);
+  auto *ReductionArrayTySize = llvm::ConstantInt::get(
+      CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
+  auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
+                                                             CGF.VoidPtrTy);
+  llvm::Value *Args[] = {
+      IdentTLoc,                             // ident_t *<loc>
+      ThreadId,                              // i32 <gtid>
+      CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
+      ReductionArrayTySize,                  // size_type sizeof(RedList)
+      RL,                                    // void *RedList
+      ReductionFn, // void (*) (void *, void *) <reduce_func>
+      Lock         // kmp_critical_name *&<lock>
+  };
+  auto Res = CGF.EmitRuntimeCall(
+      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
+                                       : OMPRTL__kmpc_reduce),
+      Args);
+
+  // 5. Build switch(res)
+  auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+  auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
+
+  // 6. Build case 1:
+  //  ...
+  //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+  //  ...
+  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+  // break;
+  auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
+  CGF.EmitBlock(Case1BB);
+
+  {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+    llvm::Value *EndArgs[] = {
+        IdentTLoc, // ident_t *<loc>
+        ThreadId,  // i32 <gtid>
+        Lock       // kmp_critical_name *&<lock>
+    };
+    CGF.EHStack.pushCleanup<CallEndCleanup>(
+        NormalAndEHCleanup,
+        createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
+                                         : OMPRTL__kmpc_end_reduce),
+        llvm::makeArrayRef(EndArgs));
+    for (auto *E : ReductionOps) {
+      CGF.EmitIgnoredExpr(E);
+    }
+  }
+
+  CGF.EmitBranch(DefaultBB);
+
+  // 7. Build case 2:
+  //  ...
+  //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+  //  ...
+  // break;
+  auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
+  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
+  CGF.EmitBlock(Case2BB);
+
+  {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    auto I = LHSExprs.begin();
+    for (auto *E : ReductionOps) {
+      const Expr *XExpr = nullptr;
+      const Expr *EExpr = nullptr;
+      const Expr *UpExpr = nullptr;
+      BinaryOperatorKind BO = BO_Comma;
+      // Try to emit update expression as a simple atomic.
+      if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
+        // If this is a conditional operator, analyze it's condition for
+        // min/max reduction operator.
+        E = ACO->getCond();
+      }
+      if (auto *BO = dyn_cast<BinaryOperator>(E)) {
+        if (BO->getOpcode() == BO_Assign) {
+          XExpr = BO->getLHS();
+          UpExpr = BO->getRHS();
+        }
+      }
+      // Analyze RHS part of the whole expression.
+      if (UpExpr) {
+        if (auto *BORHS =
+                dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
+          EExpr = BORHS->getRHS();
+          BO = BORHS->getOpcode();
+        }
+      }
+      if (XExpr) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
+        LValue X = CGF.EmitLValue(XExpr);
+        RValue E;
+        if (EExpr)
+          E = CGF.EmitAnyExpr(EExpr);
+        CGF.EmitOMPAtomicSimpleUpdateExpr(
+            X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
+            [&CGF, UpExpr, VD](RValue XRValue) {
+              CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+              PrivateScope.addPrivate(
+                  VD, [&CGF, VD, XRValue]() -> llvm::Value *{
+                    auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
+                    CGF.EmitStoreThroughLValue(
+                        XRValue,
+                        CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
+                    return LHSTemp;
+                  });
+              (void)PrivateScope.Privatize();
+              return CGF.EmitAnyExpr(UpExpr);
+            });
+      } else {
+        // Emit as a critical region.
+        emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
+          CGF.EmitIgnoredExpr(E);
+        }, Loc);
+      }
+      ++I;
+    }
+  }
+
+  CGF.EmitBranch(DefaultBB);
+  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+}
+
+void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
+                                           const RegionCodeGenTy &CodeGen) {
+  InlinedOpenMPRegionRAII Region(CGF, CodeGen);
+  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
 }
 
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index f8849e6..fa59930 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -43,7 +43,10 @@
 class CodeGenFunction;
 class CodeGenModule;
 
+typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy;
+
 class CGOpenMPRuntime {
+private:
   enum OpenMPRTLFunction {
     /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
     /// kmpc_micro microtask, ...);
@@ -100,6 +103,21 @@
     // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
     // kmp_int32 didit);
     OMPRTL__kmpc_copyprivate,
+    // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
+    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
+    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
+    OMPRTL__kmpc_reduce,
+    // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
+    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
+    // *lck);
+    OMPRTL__kmpc_reduce_nowait,
+    // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *lck);
+    OMPRTL__kmpc_end_reduce,
+    // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *lck);
+    OMPRTL__kmpc_end_reduce_nowait,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -284,25 +302,27 @@
   virtual ~CGOpenMPRuntime() {}
   virtual void clear();
 
-  /// \brief Emits outlined function for the specified OpenMP directive \a D.
-  /// This outlined function has type void(*)(kmp_int32 *ThreadID, kmp_int32
-  /// BoundID, struct context_vars*).
+  /// \brief Emits outlined function for the specified OpenMP parallel directive
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  ///
-  virtual llvm::Value *emitOutlinedFunction(const OMPExecutableDirective &D,
-                                            const VarDecl *ThreadIDVar);
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  virtual llvm::Value *
+  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+                               const VarDecl *ThreadIDVar,
+                               const RegionCodeGenTy &CodeGen);
 
   /// \brief Emits outlined function for the OpenMP task directive \a D. This
   /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
   /// PartID, struct context_vars*).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  /// \param PartIDVar If not nullptr - variable used for part id in tasks.
+  /// \param CodeGen Code generation sequence for the \a D directive.
   ///
   virtual llvm::Value *emitTaskOutlinedFunction(const OMPExecutableDirective &D,
                                                 const VarDecl *ThreadIDVar,
-                                                const VarDecl *PartIDVar);
+                                                const RegionCodeGenTy &CodeGen);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
@@ -334,14 +354,14 @@
   /// \param CriticalOpGen Generator for the statement associated with the given
   /// critical region.
   virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
-                                  const std::function<void()> &CriticalOpGen,
+                                  const RegionCodeGenTy &CriticalOpGen,
                                   SourceLocation Loc);
 
   /// \brief Emits a master region.
   /// \param MasterOpGen Generator for the statement associated with the given
   /// master region.
   virtual void emitMasterRegion(CodeGenFunction &CGF,
-                                const std::function<void()> &MasterOpGen,
+                                const RegionCodeGenTy &MasterOpGen,
                                 SourceLocation Loc);
 
   /// \brief Emits code for a taskyield directive.
@@ -351,18 +371,19 @@
   /// \param SingleOpGen Generator for the statement associated with the given
   /// single region.
   virtual void emitSingleRegion(CodeGenFunction &CGF,
-                                const std::function<void()> &SingleOpGen,
+                                const RegionCodeGenTy &SingleOpGen,
                                 SourceLocation Loc,
                                 ArrayRef<const Expr *> CopyprivateVars,
+                                ArrayRef<const Expr *> DestExprs,
                                 ArrayRef<const Expr *> SrcExprs,
-                                ArrayRef<const Expr *> DstExprs,
                                 ArrayRef<const Expr *> AssignmentOps);
 
-  /// \brief Emits explicit barrier for OpenMP threads.
-  /// \param IsExplicit true, if it is explicitly specified barrier.
+  /// \brief Emit an implicit/explicit barrier for OpenMP threads.
+  /// \param Kind Directive for which this implicit barrier call must be
+  /// generated. Must be OMPD_barrier for explicit barrier generation.
   ///
   virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                               bool IsExplicit = true);
+                               OpenMPDirectiveKind Kind);
 
   /// \brief Check if the specified \a ScheduleKind is static non-chunked.
   /// This kind of worksharing directive is emitted without outer loop.
@@ -505,17 +526,56 @@
                             llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
                             llvm::Value *TaskFunction, QualType SharedsTy,
                             llvm::Value *Shareds);
+
+  /// \brief Emit code for the directive that does not require outlining.
+  ///
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  virtual void emitInlinedDirective(CodeGenFunction &CGF,
+                                    const RegionCodeGenTy &CodeGen);
+  /// \brief Emit a code for reduction clause. Next code should be emitted for
+  /// reduction:
+  /// \code
+  ///
+  /// static kmp_critical_name lock = { 0 };
+  ///
+  /// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+  ///  ...
+  ///  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
+  ///  ...
+  /// }
+  ///
+  /// ...
+  /// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
+  /// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+  /// RedList, reduce_func, &<lock>)) {
+  /// case 1:
+  ///  ...
+  ///  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+  ///  ...
+  /// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+  /// break;
+  /// case 2:
+  ///  ...
+  ///  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+  ///  ...
+  /// break;
+  /// default:;
+  /// }
+  /// \endcode
+  ///
+  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+  /// or 'operator binop(LHS, RHS)'.
+  /// \param WithNowait true if parent directive has also nowait clause, false
+  /// otherwise.
+  virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                             ArrayRef<const Expr *> LHSExprs,
+                             ArrayRef<const Expr *> RHSExprs,
+                             ArrayRef<const Expr *> ReductionOps,
+                             bool WithNowait);
 };
 
-/// \brief RAII for emitting code of CapturedStmt without function outlining.
-class InlinedOpenMPRegionRAII {
-  CodeGenFunction &CGF;
-
-public:
-  InlinedOpenMPRegionRAII(CodeGenFunction &CGF,
-                          const OMPExecutableDirective &D);
-  ~InlinedOpenMPRegionRAII();
-};
 } // namespace CodeGen
 } // namespace clang
 
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index e433984..481fdbe 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -1021,6 +1021,12 @@
 /// if the function returns void, or may be missing one if the function returns
 /// non-void.  Fun stuff :).
 void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
+  // Returning from an outlined SEH helper is UB, and we already warn on it.
+  if (IsOutlinedSEHHelper) {
+    Builder.CreateUnreachable();
+    Builder.ClearInsertionPoint();
+  }
+
   // Emit the result value, even if unused, to evalute the side effects.
   const Expr *RV = S.getRetValue();
 
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 9af7474..aa53756 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -23,21 +23,6 @@
 //===----------------------------------------------------------------------===//
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
-namespace {
-/// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp
-/// critical' etc.). Helps to generate proper debug info and provides correct
-/// code generation for such constructs.
-class InlinedOpenMPRegionScopeRAII {
-  InlinedOpenMPRegionRAII Region;
-  CodeGenFunction::LexicalScope DirectiveScope;
-
-public:
-  InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF,
-                               const OMPExecutableDirective &D)
-      : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {}
-};
-} // namespace
-
 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
 /// function. Here is the logic:
 /// if (Cond) {
@@ -84,117 +69,171 @@
   CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
 }
 
-void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr,
-                                             llvm::Value *PrivateAddr,
-                                             const Expr *AssignExpr,
-                                             QualType OriginalType,
-                                             const VarDecl *VDInit) {
-  EmitBlock(createBasicBlock(".omp.assign.begin."));
-  if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) {
-    // Perform simple memcpy.
-    EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(),
-                        AssignExpr->getType());
-  } else {
-    // Perform element-by-element initialization.
-    QualType ElementTy;
-    auto SrcBegin = OriginalAddr.getAddress();
-    auto DestBegin = PrivateAddr;
-    auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
-    auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin);
-    auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
-    auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements);
-    auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements);
-    // The basic structure here is a do-while loop, because we don't
-    // need to check for the zero-element case.
-    auto BodyBB = createBasicBlock("omp.arraycpy.body");
-    auto DoneBB = createBasicBlock("omp.arraycpy.done");
-    auto IsEmpty =
-        Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
-    Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+void CodeGenFunction::EmitOMPAggregateAssign(
+    llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
+    const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
+  // Perform element-by-element initialization.
+  QualType ElementTy;
+  auto SrcBegin = SrcAddr;
+  auto DestBegin = DestAddr;
+  auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
+  auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
+  // Cast from pointer to array type to pointer to single element.
+  SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin,
+                                                         DestBegin->getType());
+  auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
+  // The basic structure here is a while-do loop.
+  auto BodyBB = createBasicBlock("omp.arraycpy.body");
+  auto DoneBB = createBasicBlock("omp.arraycpy.done");
+  auto IsEmpty =
+      Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
+  Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
 
-    // Enter the loop body, making that address the current address.
-    auto EntryBB = Builder.GetInsertBlock();
-    EmitBlock(BodyBB);
-    auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2,
-                                            "omp.arraycpy.srcElementPast");
-    SrcElementPast->addIncoming(SrcEnd, EntryBB);
-    auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2,
-                                             "omp.arraycpy.destElementPast");
-    DestElementPast->addIncoming(DestEnd, EntryBB);
+  // Enter the loop body, making that address the current address.
+  auto EntryBB = Builder.GetInsertBlock();
+  EmitBlock(BodyBB);
+  auto SrcElementCurrent =
+      Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
+  SrcElementCurrent->addIncoming(SrcBegin, EntryBB);
+  auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2,
+                                              "omp.arraycpy.destElementPast");
+  DestElementCurrent->addIncoming(DestBegin, EntryBB);
 
-    // Shift the address back by one element.
-    auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true);
-    auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne,
-                                         "omp.arraycpy.dest.element");
-    auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne,
-                                        "omp.arraycpy.src.element");
-    {
-      // Create RunCleanScope to cleanup possible temps.
-      CodeGenFunction::RunCleanupsScope Init(*this);
-      // Emit initialization for single element.
-      LocalDeclMap[VDInit] = SrcElement;
-      EmitAnyExprToMem(AssignExpr, DestElement,
-                       AssignExpr->getType().getQualifiers(),
-                       /*IsInitializer*/ false);
-      LocalDeclMap.erase(VDInit);
-    }
+  // Emit copy.
+  CopyGen(DestElementCurrent, SrcElementCurrent);
 
-    // Check whether we've reached the end.
-    auto Done =
-        Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done");
-    Builder.CreateCondBr(Done, DoneBB, BodyBB);
-    DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock());
-    SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock());
+  // Shift the address forward by one element.
+  auto DestElementNext = Builder.CreateConstGEP1_32(
+      DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element");
+  auto SrcElementNext = Builder.CreateConstGEP1_32(
+      SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element");
+  // Check whether we've reached the end.
+  auto Done =
+      Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
+  Builder.CreateCondBr(Done, DoneBB, BodyBB);
+  DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock());
+  SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock());
 
-    // Done.
-    EmitBlock(DoneBB, true);
-  }
-  EmitBlock(createBasicBlock(".omp.assign.end."));
+  // Done.
+  EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
-void CodeGenFunction::EmitOMPFirstprivateClause(
-    const OMPExecutableDirective &D,
-    CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  auto PrivateFilter = [](const OMPClause *C) -> bool {
+void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF,
+                                  QualType OriginalType, llvm::Value *DestAddr,
+                                  llvm::Value *SrcAddr, const VarDecl *DestVD,
+                                  const VarDecl *SrcVD, const Expr *Copy) {
+  if (OriginalType->isArrayType()) {
+    auto *BO = dyn_cast<BinaryOperator>(Copy);
+    if (BO && BO->getOpcode() == BO_Assign) {
+      // Perform simple memcpy for simple copying.
+      CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
+    } else {
+      // For arrays with complex element types perform element by element
+      // copying.
+      CGF.EmitOMPAggregateAssign(
+          DestAddr, SrcAddr, OriginalType,
+          [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement,
+                                          llvm::Value *SrcElement) {
+            // Working with the single array element, so have to remap
+            // destination and source variables to corresponding array
+            // elements.
+            CodeGenFunction::OMPPrivateScope Remap(CGF);
+            Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{
+              return DestElement;
+            });
+            Remap.addPrivate(
+                SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; });
+            (void)Remap.Privatize();
+            CGF.EmitIgnoredExpr(Copy);
+          });
+    }
+  } else {
+    // Remap pseudo source variable to private copy.
+    CodeGenFunction::OMPPrivateScope Remap(CGF);
+    Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; });
+    Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; });
+    (void)Remap.Privatize();
+    // Emit copying of the whole variable.
+    CGF.EmitIgnoredExpr(Copy);
+  }
+}
+
+bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
+                                                OMPPrivateScope &PrivateScope) {
+  auto FirstprivateFilter = [](const OMPClause *C) -> bool {
     return C->getClauseKind() == OMPC_firstprivate;
   };
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
-           I(D.clauses(), PrivateFilter); I; ++I) {
+  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
+  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+           FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
+       I; ++I) {
     auto *C = cast<OMPFirstprivateClause>(*I);
     auto IRef = C->varlist_begin();
     auto InitsRef = C->inits().begin();
     for (auto IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
-      auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-      bool IsRegistered;
-      if (*InitsRef != nullptr) {
-        // Emit VarDecl with copy init for arrays.
-        auto *FD = CapturedStmtInfo->lookup(OrigVD);
-        LValue Base = MakeNaturalAlignAddrLValue(
-            CapturedStmtInfo->getContextValue(),
-            getContext().getTagDeclType(FD->getParent()));
-        auto OriginalAddr = EmitLValueForField(Base, FD);
-        auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
-        IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
-          auto Emission = EmitAutoVarAlloca(*VD);
-          // Emit initialization of aggregate firstprivate vars.
-          EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(),
-                                 VD->getInit(), (*IRef)->getType(), VDInit);
-          EmitAutoVarCleanups(Emission);
-          return Emission.getAllocatedAddress();
-        });
-      } else
-        IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
-          // Emit private VarDecl with copy init.
-          EmitDecl(*VD);
-          return GetAddrOfLocalVar(VD);
-        });
-      assert(IsRegistered && "firstprivate var already registered as private");
-      // Silence the warning about unused variable.
-      (void)IsRegistered;
+      if (EmittedAsFirstprivate.count(OrigVD) == 0) {
+        EmittedAsFirstprivate.insert(OrigVD);
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+        auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
+        bool IsRegistered;
+        DeclRefExpr DRE(
+            const_cast<VarDecl *>(OrigVD),
+            /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
+                OrigVD) != nullptr,
+            (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+        auto *OriginalAddr = EmitLValue(&DRE).getAddress();
+        if (OrigVD->getType()->isArrayType()) {
+          // Emit VarDecl with copy init for arrays.
+          // Get the address of the original variable captured in current
+          // captured region.
+          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+            auto Emission = EmitAutoVarAlloca(*VD);
+            auto *Init = VD->getInit();
+            if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
+              // Perform simple memcpy.
+              EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
+                                  (*IRef)->getType());
+            } else {
+              EmitOMPAggregateAssign(
+                  Emission.getAllocatedAddress(), OriginalAddr,
+                  (*IRef)->getType(),
+                  [this, VDInit, Init](llvm::Value *DestElement,
+                                       llvm::Value *SrcElement) {
+                    // Clean up any temporaries needed by the initialization.
+                    RunCleanupsScope InitScope(*this);
+                    // Emit initialization for single element.
+                    LocalDeclMap[VDInit] = SrcElement;
+                    EmitAnyExprToMem(Init, DestElement,
+                                     Init->getType().getQualifiers(),
+                                     /*IsInitializer*/ false);
+                    LocalDeclMap.erase(VDInit);
+                  });
+            }
+            EmitAutoVarCleanups(Emission);
+            return Emission.getAllocatedAddress();
+          });
+        } else {
+          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+            // Emit private VarDecl with copy init.
+            // Remap temp VDInit variable to the address of the original
+            // variable
+            // (for proper handling of captured global variables).
+            LocalDeclMap[VDInit] = OriginalAddr;
+            EmitDecl(*VD);
+            LocalDeclMap.erase(VDInit);
+            return GetAddrOfLocalVar(VD);
+          });
+        }
+        assert(IsRegistered &&
+               "firstprivate var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+      }
       ++IRef, ++InitsRef;
     }
   }
+  return !EmittedAsFirstprivate.empty();
 }
 
 void CodeGenFunction::EmitOMPPrivateClause(
@@ -224,9 +263,225 @@
   }
 }
 
+bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
+  // threadprivate_var1 = master_threadprivate_var1;
+  // operator=(threadprivate_var2, master_threadprivate_var2);
+  // ...
+  // __kmpc_barrier(&loc, global_tid);
+  auto CopyinFilter = [](const OMPClause *C) -> bool {
+    return C->getClauseKind() == OMPC_copyin;
+  };
+  llvm::DenseSet<const VarDecl *> CopiedVars;
+  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
+  for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)>
+           I(D.clauses(), CopyinFilter);
+       I; ++I) {
+    auto *C = cast<OMPCopyinClause>(*I);
+    auto IRef = C->varlist_begin();
+    auto ISrcRef = C->source_exprs().begin();
+    auto IDestRef = C->destination_exprs().begin();
+    for (auto *AssignOp : C->assignment_ops()) {
+      auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
+        // Get the address of the master variable.
+        auto *MasterAddr = VD->isStaticLocal()
+                               ? CGM.getStaticLocalDeclAddress(VD)
+                               : CGM.GetAddrOfGlobal(VD);
+        // Get the address of the threadprivate variable.
+        auto *PrivateAddr = EmitLValue(*IRef).getAddress();
+        if (CopiedVars.size() == 1) {
+          // At first check if current thread is a master thread. If it is, no
+          // need to copy data.
+          CopyBegin = createBasicBlock("copyin.not.master");
+          CopyEnd = createBasicBlock("copyin.not.master.end");
+          Builder.CreateCondBr(
+              Builder.CreateICmpNE(
+                  Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy),
+                  Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)),
+              CopyBegin, CopyEnd);
+          EmitBlock(CopyBegin);
+        }
+        auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
+        auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+        EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD,
+                    SrcVD, AssignOp);
+      }
+      ++IRef;
+      ++ISrcRef;
+      ++IDestRef;
+    }
+  }
+  if (CopyEnd) {
+    // Exit out of copying procedure for non-master thread.
+    EmitBlock(CopyEnd, /*IsFinished=*/true);
+    return true;
+  }
+  return false;
+}
+
+bool CodeGenFunction::EmitOMPLastprivateClauseInit(
+    const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
+  auto LastprivateFilter = [](const OMPClause *C) -> bool {
+    return C->getClauseKind() == OMPC_lastprivate;
+  };
+  bool HasAtLeastOneLastprivate = false;
+  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
+  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+           LastprivateFilter)> I(D.clauses(), LastprivateFilter);
+       I; ++I) {
+    auto *C = cast<OMPLastprivateClause>(*I);
+    auto IRef = C->varlist_begin();
+    auto IDestRef = C->destination_exprs().begin();
+    for (auto *IInit : C->private_copies()) {
+      // Keep the address of the original variable for future update at the end
+      // of the loop.
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
+        auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{
+          DeclRefExpr DRE(
+              const_cast<VarDecl *>(OrigVD),
+              /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
+                  OrigVD) != nullptr,
+              (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+          return EmitLValue(&DRE).getAddress();
+        });
+        // Check if the variable is also a firstprivate: in this case IInit is
+        // not generated. Initialization of this variable will happen in codegen
+        // for 'firstprivate' clause.
+        if (!IInit)
+          continue;
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+        bool IsRegistered =
+            PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+              // Emit private VarDecl with copy init.
+              EmitDecl(*VD);
+              return GetAddrOfLocalVar(VD);
+            });
+        assert(IsRegistered && "lastprivate var already registered as private");
+        HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered;
+      }
+      ++IRef, ++IDestRef;
+    }
+  }
+  return HasAtLeastOneLastprivate;
+}
+
+void CodeGenFunction::EmitOMPLastprivateClauseFinal(
+    const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
+  // Emit following code:
+  // if (<IsLastIterCond>) {
+  //   orig_var1 = private_orig_var1;
+  //   ...
+  //   orig_varn = private_orig_varn;
+  // }
+  auto *ThenBB = createBasicBlock(".omp.lastprivate.then");
+  auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
+  Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
+  EmitBlock(ThenBB);
+  {
+    auto LastprivateFilter = [](const OMPClause *C) -> bool {
+      return C->getClauseKind() == OMPC_lastprivate;
+    };
+    llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
+    for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+             LastprivateFilter)> I(D.clauses(), LastprivateFilter);
+         I; ++I) {
+      auto *C = cast<OMPLastprivateClause>(*I);
+      auto IRef = C->varlist_begin();
+      auto ISrcRef = C->source_exprs().begin();
+      auto IDestRef = C->destination_exprs().begin();
+      for (auto *AssignOp : C->assignment_ops()) {
+        auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+        if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) {
+          auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
+          auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+          // Get the address of the original variable.
+          auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
+          // Get the address of the private variable.
+          auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
+          EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr,
+                      DestVD, SrcVD, AssignOp);
+        }
+        ++IRef;
+        ++ISrcRef;
+        ++IDestRef;
+      }
+    }
+  }
+  EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
+void CodeGenFunction::EmitOMPReductionClauseInit(
+    const OMPExecutableDirective &D,
+    CodeGenFunction::OMPPrivateScope &PrivateScope) {
+  auto ReductionFilter = [](const OMPClause *C) -> bool {
+    return C->getClauseKind() == OMPC_reduction;
+  };
+  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+           ReductionFilter)> I(D.clauses(), ReductionFilter);
+       I; ++I) {
+    auto *C = cast<OMPReductionClause>(*I);
+    auto ILHS = C->lhs_exprs().begin();
+    auto IRHS = C->rhs_exprs().begin();
+    for (auto IRef : C->varlists()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+      auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+      auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+      // Store the address of the original variable associated with the LHS
+      // implicit variable.
+      PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
+        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                        IRef->getType(), VK_LValue, IRef->getExprLoc());
+        return EmitLValue(&DRE).getAddress();
+      });
+      // Emit reduction copy.
+      bool IsRegistered =
+          PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
+            // Emit private VarDecl with reduction init.
+            EmitDecl(*PrivateVD);
+            return GetAddrOfLocalVar(PrivateVD);
+          });
+      assert(IsRegistered && "private var already registered as private");
+      // Silence the warning about unused variable.
+      (void)IsRegistered;
+      ++ILHS, ++IRHS;
+    }
+  }
+}
+
+void CodeGenFunction::EmitOMPReductionClauseFinal(
+    const OMPExecutableDirective &D) {
+  llvm::SmallVector<const Expr *, 8> LHSExprs;
+  llvm::SmallVector<const Expr *, 8> RHSExprs;
+  llvm::SmallVector<const Expr *, 8> ReductionOps;
+  auto ReductionFilter = [](const OMPClause *C) -> bool {
+    return C->getClauseKind() == OMPC_reduction;
+  };
+  bool HasAtLeastOneReduction = false;
+  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+           ReductionFilter)> I(D.clauses(), ReductionFilter);
+       I; ++I) {
+    HasAtLeastOneReduction = true;
+    auto *C = cast<OMPReductionClause>(*I);
+    LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+  }
+  if (HasAtLeastOneReduction) {
+    // Emit nowait reduction if nowait clause is present or directive is a
+    // parallel directive (it always has implicit barrier).
+    CGM.getOpenMPRuntime().emitReduction(
+        *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
+        D.getSingleClause(OMPC_nowait) ||
+            isOpenMPParallelDirective(D.getDirectiveKind()));
+  }
+}
+
 /// \brief Emits code for OpenMP parallel directive in the parallel region.
-static void EmitOMPParallelCall(CodeGenFunction &CGF,
-                                const OMPParallelDirective &S,
+static void emitOMPParallelCall(CodeGenFunction &CGF,
+                                const OMPExecutableDirective &S,
                                 llvm::Value *OutlinedFn,
                                 llvm::Value *CapturedStruct) {
   if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
@@ -241,22 +496,51 @@
                                               CapturedStruct);
 }
 
-void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
+                                           const OMPExecutableDirective &S,
+                                           const RegionCodeGenTy &CodeGen) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
-  auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction(
-      S, *CS->getCapturedDecl()->param_begin());
+  auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
+  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
+      S, *CS->getCapturedDecl()->param_begin(), CodeGen);
   if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
     auto Cond = cast<OMPIfClause>(C)->getCondition();
-    EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) {
+    EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
       if (ThenBlock)
-        EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
+        emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
       else
-        CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(),
-                                              OutlinedFn, CapturedStruct);
+        CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
+                                                  OutlinedFn, CapturedStruct);
     });
   } else
-    EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct);
+    emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
+}
+
+void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  // Emit parallel region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    OMPPrivateScope PrivateScope(CGF);
+    bool Copyins = CGF.EmitOMPCopyinClause(S);
+    bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    if (Copyins || Firstprivates) {
+      // Emit implicit barrier to synchronize threads and avoid data races on
+      // initialization of firstprivate variables or propagation master's thread
+      // values of threadprivate variables to local instances of that variables
+      // of all other implicit threads.
+      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                                 OMPD_unknown);
+    }
+    CGF.EmitOMPPrivateClause(S, PrivateScope);
+    CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitOMPReductionClauseFinal(S);
+    // Emit implicit barrier at the end of the 'parallel' directive.
+    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                               OMPD_unknown);
+  };
+  emitCommonOMPParallelDirective(*this, S, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S,
@@ -289,10 +573,10 @@
   }
 }
 
-void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
-                                       const Expr *LoopCond,
-                                       const Expr *IncExpr,
-                                       const std::function<void()> &BodyGen) {
+void CodeGenFunction::EmitOMPInnerLoop(
+    const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
+    const Expr *IncExpr,
+    const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
   auto Cnt = getPGORegionCounter(&S);
 
@@ -323,7 +607,7 @@
   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  BodyGen();
+  BodyGen(*this);
 
   // Emit "IV = IV + 1" and a back-edge to the condition block.
   EmitBlock(Continue.getBlock());
@@ -414,129 +698,132 @@
 }
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
-  // Pragma 'simd' code depends on presence of 'lastprivate'.
-  // If present, we have to separate last iteration of the loop:
-  //
-  // if (LastIteration != 0) {
-  //   for (IV in 0..LastIteration-1) BODY;
-  //   BODY with updates of lastprivate vars;
-  //   <Final counter/linear vars updates>;
-  // }
-  //
-  // otherwise (when there's no lastprivate):
-  //
-  //   for (IV in 0..LastIteration) BODY;
-  //   <Final counter/linear vars updates>;
-  //
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    // Pragma 'simd' code depends on presence of 'lastprivate'.
+    // If present, we have to separate last iteration of the loop:
+    //
+    // if (LastIteration != 0) {
+    //   for (IV in 0..LastIteration-1) BODY;
+    //   BODY with updates of lastprivate vars;
+    //   <Final counter/linear vars updates>;
+    // }
+    //
+    // otherwise (when there's no lastprivate):
+    //
+    //   for (IV in 0..LastIteration) BODY;
+    //   <Final counter/linear vars updates>;
+    //
 
-  // Walk clauses and process safelen/lastprivate.
-  bool SeparateIter = false;
-  LoopStack.setParallel();
-  LoopStack.setVectorizerEnable(true);
-  for (auto C : S.clauses()) {
-    switch (C->getClauseKind()) {
-    case OMPC_safelen: {
-      RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
-                               AggValueSlot::ignored(), true);
-      llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
-      LoopStack.setVectorizerWidth(Val->getZExtValue());
-      // In presence of finite 'safelen', it may be unsafe to mark all
-      // the memory instructions parallel, because loop-carried
-      // dependences of 'safelen' iterations are possible.
-      LoopStack.setParallel(false);
-      break;
-    }
-    case OMPC_aligned:
-      EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C));
-      break;
-    case OMPC_lastprivate:
-      SeparateIter = true;
-      break;
-    default:
-      // Not handled yet
-      ;
-    }
-  }
-
-  InlinedOpenMPRegionScopeRAII Region(*this, S);
-
-  // Emit inits for the linear variables.
-  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
-    for (auto Init : C->inits()) {
-      auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
-      EmitVarDecl(*D);
-    }
-  }
-
-  // Emit the loop iteration variable.
-  const Expr *IVExpr = S.getIterationVariable();
-  const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
-  EmitVarDecl(*IVDecl);
-  EmitIgnoredExpr(S.getInit());
-
-  // Emit the iterations count variable.
-  // If it is not a variable, Sema decided to calculate iterations count on each
-  // iteration (e.g., it is foldable into a constant).
-  if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
-    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
-    // Emit calculation of the iterations count.
-    EmitIgnoredExpr(S.getCalcLastIteration());
-  }
-
-  // Emit the linear steps for the linear clauses.
-  // If a step is not constant, it is pre-calculated before the loop.
-  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
-    if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
-      if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
-        EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
-        // Emit calculation of the linear step.
-        EmitIgnoredExpr(CS);
+    // Walk clauses and process safelen/lastprivate.
+    bool SeparateIter = false;
+    CGF.LoopStack.setParallel();
+    CGF.LoopStack.setVectorizerEnable(true);
+    for (auto C : S.clauses()) {
+      switch (C->getClauseKind()) {
+      case OMPC_safelen: {
+        RValue Len = CGF.EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
+                                     AggValueSlot::ignored(), true);
+        llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+        CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
+        // In presence of finite 'safelen', it may be unsafe to mark all
+        // the memory instructions parallel, because loop-carried
+        // dependences of 'safelen' iterations are possible.
+        CGF.LoopStack.setParallel(false);
+        break;
       }
-  }
+      case OMPC_aligned:
+        EmitOMPAlignedClause(CGF, CGF.CGM, cast<OMPAlignedClause>(*C));
+        break;
+      case OMPC_lastprivate:
+        SeparateIter = true;
+        break;
+      default:
+        // Not handled yet
+        ;
+      }
+    }
 
-  if (SeparateIter) {
-    // Emit: if (LastIteration > 0) - begin.
-    RegionCounter Cnt = getPGORegionCounter(&S);
-    auto ThenBlock = createBasicBlock("simd.if.then");
-    auto ContBlock = createBasicBlock("simd.if.end");
-    EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
-    EmitBlock(ThenBlock);
-    Cnt.beginRegion(Builder);
-    // Emit 'then' code.
-    {
-      OMPPrivateScope LoopScope(*this);
-      EmitPrivateLoopCounters(*this, LoopScope, S.counters());
-      EmitPrivateLinearVars(*this, S, LoopScope);
-      EmitOMPPrivateClause(S, LoopScope);
-      (void)LoopScope.Privatize();
-      EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                       S.getCond(/*SeparateIter=*/true), S.getInc(),
-                       [&S, this]() {
-                         EmitOMPLoopBody(S);
-                         EmitStopPoint(&S);
-                       });
-      EmitOMPLoopBody(S, /* SeparateIter */ true);
+    // Emit inits for the linear variables.
+    for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+      for (auto Init : C->inits()) {
+        auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
+        CGF.EmitVarDecl(*D);
+      }
     }
-    EmitOMPSimdFinal(S);
-    // Emit: if (LastIteration != 0) - end.
-    EmitBranch(ContBlock);
-    EmitBlock(ContBlock, true);
-  } else {
-    {
-      OMPPrivateScope LoopScope(*this);
-      EmitPrivateLoopCounters(*this, LoopScope, S.counters());
-      EmitPrivateLinearVars(*this, S, LoopScope);
-      EmitOMPPrivateClause(S, LoopScope);
-      (void)LoopScope.Privatize();
-      EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                       S.getCond(/*SeparateIter=*/false), S.getInc(),
-                       [&S, this]() {
-                         EmitOMPLoopBody(S);
-                         EmitStopPoint(&S);
-                       });
+
+    // Emit the loop iteration variable.
+    const Expr *IVExpr = S.getIterationVariable();
+    const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+    CGF.EmitVarDecl(*IVDecl);
+    CGF.EmitIgnoredExpr(S.getInit());
+
+    // Emit the iterations count variable.
+    // If it is not a variable, Sema decided to calculate iterations count on
+    // each
+    // iteration (e.g., it is foldable into a constant).
+    if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+      CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+      // Emit calculation of the iterations count.
+      CGF.EmitIgnoredExpr(S.getCalcLastIteration());
     }
-    EmitOMPSimdFinal(S);
-  }
+
+    // Emit the linear steps for the linear clauses.
+    // If a step is not constant, it is pre-calculated before the loop.
+    for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+      if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
+        if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
+          CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
+          // Emit calculation of the linear step.
+          CGF.EmitIgnoredExpr(CS);
+        }
+    }
+
+    if (SeparateIter) {
+      // Emit: if (LastIteration > 0) - begin.
+      RegionCounter Cnt = CGF.getPGORegionCounter(&S);
+      auto ThenBlock = CGF.createBasicBlock("simd.if.then");
+      auto ContBlock = CGF.createBasicBlock("simd.if.end");
+      CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
+                               Cnt.getCount());
+      CGF.EmitBlock(ThenBlock);
+      Cnt.beginRegion(CGF.Builder);
+      // Emit 'then' code.
+      {
+        OMPPrivateScope LoopScope(CGF);
+        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
+        EmitPrivateLinearVars(CGF, S, LoopScope);
+        CGF.EmitOMPPrivateClause(S, LoopScope);
+        (void)LoopScope.Privatize();
+        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                             S.getCond(/*SeparateIter=*/true), S.getInc(),
+                             [&S](CodeGenFunction &CGF) {
+                               CGF.EmitOMPLoopBody(S);
+                               CGF.EmitStopPoint(&S);
+                             });
+        CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
+      }
+      CGF.EmitOMPSimdFinal(S);
+      // Emit: if (LastIteration != 0) - end.
+      CGF.EmitBranch(ContBlock);
+      CGF.EmitBlock(ContBlock, true);
+    } else {
+      {
+        OMPPrivateScope LoopScope(CGF);
+        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
+        EmitPrivateLinearVars(CGF, S, LoopScope);
+        CGF.EmitOMPPrivateClause(S, LoopScope);
+        (void)LoopScope.Privatize();
+        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                             S.getCond(/*SeparateIter=*/false), S.getInc(),
+                             [&S](CodeGenFunction &CGF) {
+                               CGF.EmitOMPLoopBody(S);
+                               CGF.EmitStopPoint(&S);
+                             });
+      }
+      CGF.EmitOMPSimdFinal(S);
+    }
+  };
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
@@ -654,9 +941,10 @@
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
   EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                   S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() {
-                     EmitOMPLoopBody(S);
-                     EmitStopPoint(&S);
+                   S.getCond(/*SeparateIter=*/false), S.getInc(),
+                   [&S](CodeGenFunction &CGF) {
+                     CGF.EmitOMPLoopBody(S);
+                     CGF.EmitStopPoint(&S);
                    });
 
   EmitBlock(Continue.getBlock());
@@ -686,7 +974,7 @@
   return CGF.EmitLValue(Helper);
 }
 
-void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   // Emit the loop iteration variable.
   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
   auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -703,6 +991,7 @@
 
   auto &RT = CGM.getOpenMPRuntime();
 
+  bool HasLastprivateClause;
   // Check pre-condition.
   {
     // Skip the entire loop if we don't meet the precondition.
@@ -725,6 +1014,13 @@
           EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
 
       OMPPrivateScope LoopScope(*this);
+      if (EmitOMPFirstprivateClause(S, LoopScope)) {
+        // Emit implicit barrier to synchronize threads and avoid data races on
+        // initialization of firstprivate variables.
+        CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                               OMPD_unknown);
+      }
+      HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
       (void)LoopScope.Privatize();
 
@@ -759,9 +1055,9 @@
         // while (idx <= UB) { BODY; ++idx; }
         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
                          S.getCond(/*SeparateIter=*/false), S.getInc(),
-                         [&S, this]() {
-                           EmitOMPLoopBody(S);
-                           EmitStopPoint(&S);
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S);
+                           CGF.EmitStopPoint(&S);
                          });
         // Tell the runtime we are done.
         RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
@@ -772,21 +1068,30 @@
                             UB.getAddress(), ST.getAddress(), IL.getAddress(),
                             Chunk);
       }
+      // Emit final copy of the lastprivate variables if IsLastIter != 0.
+      if (HasLastprivateClause)
+        EmitOMPLastprivateClauseFinal(
+            S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
     }
     // We're now done with the loop, so jump to the continuation block.
     EmitBranch(ContBlock);
     EmitBlock(ContBlock, true);
   }
+  return HasLastprivateClause;
 }
 
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
-  InlinedOpenMPRegionScopeRAII Region(*this, S);
-
-  EmitOMPWorksharingLoop(S);
+  LexicalScope Scope(*this, S.getSourceRange());
+  bool HasLastprivates = false;
+  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+    HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+  };
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
 
   // Emit an implicit barrier at the end.
-  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
-                                         /*IsExplicit*/ false);
+  if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
+  }
 }
 
 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
@@ -802,107 +1107,122 @@
   return LVal;
 }
 
-void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
-  InlinedOpenMPRegionScopeRAII Region(*this, S);
-
+static OpenMPDirectiveKind emitSections(CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &S) {
   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   if (CS && CS->size() > 1) {
-    auto &C = CGM.getContext();
-    auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
-    // Emit helper vars inits.
-    LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.",
-                                  Builder.getInt32(0));
-    auto *GlobalUBVal = Builder.getInt32(CS->size() - 1);
-    LValue UB =
-        createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
-    LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.",
-                                  Builder.getInt32(1));
-    LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.",
-                                  Builder.getInt32(0));
-    // Loop counter.
-    LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv.");
-    OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
-    OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV);
-    OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
-    OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB);
-    // Generate condition for loop.
-    BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
-                        OK_Ordinary, S.getLocStart(), /*fpContractable=*/false);
-    // Increment for loop counter.
-    UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
-                      S.getLocStart());
-    auto BodyGen = [this, CS, &S, &IV]() {
-      // Iterate through all sections and emit a switch construct:
-      // switch (IV) {
-      //   case 0:
-      //     <SectionStmt[0]>;
-      //     break;
-      // ...
-      //   case <NumSection> - 1:
-      //     <SectionStmt[<NumSection> - 1]>;
-      //     break;
-      // }
-      // .omp.sections.exit:
-      auto *ExitBB = createBasicBlock(".omp.sections.exit");
-      auto *SwitchStmt = Builder.CreateSwitch(
-          EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
-          CS->size());
-      unsigned CaseNumber = 0;
-      for (auto C = CS->children(); C; ++C, ++CaseNumber) {
-        auto CaseBB = createBasicBlock(".omp.sections.case");
-        EmitBlock(CaseBB);
-        SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
-        EmitStmt(*C);
-        EmitBranch(ExitBB);
-      }
-      EmitBlock(ExitBB, /*IsFinished=*/true);
+    auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
+      auto &C = CGF.CGM.getContext();
+      auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+      // Emit helper vars inits.
+      LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
+                                    CGF.Builder.getInt32(0));
+      auto *GlobalUBVal = CGF.Builder.getInt32(CS->size() - 1);
+      LValue UB =
+          createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
+      LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
+                                    CGF.Builder.getInt32(1));
+      LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
+                                    CGF.Builder.getInt32(0));
+      // Loop counter.
+      LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
+      OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+      CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
+      OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
+      CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
+      // Generate condition for loop.
+      BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
+                          OK_Ordinary, S.getLocStart(),
+                          /*fpContractable=*/false);
+      // Increment for loop counter.
+      UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue,
+                        OK_Ordinary, S.getLocStart());
+      auto BodyGen = [CS, &S, &IV](CodeGenFunction &CGF) {
+        // Iterate through all sections and emit a switch construct:
+        // switch (IV) {
+        //   case 0:
+        //     <SectionStmt[0]>;
+        //     break;
+        // ...
+        //   case <NumSection> - 1:
+        //     <SectionStmt[<NumSection> - 1]>;
+        //     break;
+        // }
+        // .omp.sections.exit:
+        auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
+        auto *SwitchStmt = CGF.Builder.CreateSwitch(
+            CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
+            CS->size());
+        unsigned CaseNumber = 0;
+        for (auto C = CS->children(); C; ++C, ++CaseNumber) {
+          auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
+          CGF.EmitBlock(CaseBB);
+          SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
+          CGF.EmitStmt(*C);
+          CGF.EmitBranch(ExitBB);
+        }
+        CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+      };
+      // Emit static non-chunked loop.
+      CGF.CGM.getOpenMPRuntime().emitForInit(
+          CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
+          /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
+          ST.getAddress());
+      // UB = min(UB, GlobalUB);
+      auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
+      auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
+          CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
+      CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
+      // IV = LB;
+      CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
+      // while (idx <= UB) { BODY; ++idx; }
+      CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
+      // Tell the runtime we are done.
+      CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
+                                               OMPC_SCHEDULE_static);
     };
-    // Emit static non-chunked loop.
-    CGM.getOpenMPRuntime().emitForInit(
-        *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
-        /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
-        ST.getAddress());
-    // UB = min(UB, GlobalUB);
-    auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart());
-    auto *MinUBGlobalUB = Builder.CreateSelect(
-        Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
-    EmitStoreOfScalar(MinUBGlobalUB, UB);
-    // IV = LB;
-    EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV);
-    // while (idx <= UB) { BODY; ++idx; }
-    EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
-    // Tell the runtime we are done.
-    CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(),
-                                         OMPC_SCHEDULE_static);
-  } else {
-    // If only one section is found - no need to generate loop, emit as a single
-    // region.
-    CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
-      InlinedOpenMPRegionScopeRAII Region(*this, S);
-      EmitStmt(Stmt);
-      EnsureInsertPoint();
-    }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None);
-  }
 
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
+    return OMPD_sections;
+  }
+  // If only one section is found - no need to generate loop, emit as a single
+  // region.
+  auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
+    CGF.EmitStmt(Stmt);
+    CGF.EnsureInsertPoint();
+  };
+  CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
+                                              llvm::None, llvm::None,
+                                              llvm::None, llvm::None);
+  return OMPD_single;
+}
+
+void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  OpenMPDirectiveKind EmittedAs = emitSections(*this, S);
   // Emit an implicit barrier at the end.
-  if (!S.getSingleClause(OMPC_nowait))
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
-                                           /*IsExplicit=*/false);
+  if (!S.getSingleClause(OMPC_nowait)) {
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
+  }
 }
 
 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
-  InlinedOpenMPRegionScopeRAII Region(*this, S);
-  EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-  EnsureInsertPoint();
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EnsureInsertPoint();
+  };
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
+  llvm::SmallVector<const Expr *, 8> DestExprs;
   llvm::SmallVector<const Expr *, 8> SrcExprs;
-  llvm::SmallVector<const Expr *, 8> DstExprs;
   llvm::SmallVector<const Expr *, 8> AssignmentOps;
-  // Check if there are any 'copyprivate' clauses associated with this 'single'
+  // Check if there are any 'copyprivate' clauses associated with this
+  // 'single'
   // construct.
   auto CopyprivateFilter = [](const OMPClause *C) -> bool {
     return C->getClauseKind() == OMPC_copyprivate;
@@ -914,44 +1234,60 @@
   for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
     auto *C = cast<OMPCopyprivateClause>(*I);
     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
+    DestExprs.append(C->destination_exprs().begin(),
+                     C->destination_exprs().end());
     SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
-    DstExprs.append(C->destination_exprs().begin(),
-                    C->destination_exprs().end());
     AssignmentOps.append(C->assignment_ops().begin(),
                          C->assignment_ops().end());
   }
+  LexicalScope Scope(*this, S.getSourceRange());
   // Emit code for 'single' region along with 'copyprivate' clauses
-  CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
-    InlinedOpenMPRegionScopeRAII Region(*this, S);
-    EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    EnsureInsertPoint();
-  }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EnsureInsertPoint();
+  };
+  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
+                                          CopyprivateVars, DestExprs, SrcExprs,
+                                          AssignmentOps);
   // Emit an implicit barrier at the end.
-  if (!S.getSingleClause(OMPC_nowait))
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
-                                           /*IsExplicit=*/false);
+  if (!S.getSingleClause(OMPC_nowait)) {
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
+  }
 }
 
 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
-  CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void {
-    InlinedOpenMPRegionScopeRAII Region(*this, S);
-    EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    EnsureInsertPoint();
-  }, S.getLocStart());
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EnsureInsertPoint();
+  };
+  CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EnsureInsertPoint();
+  };
   CGM.getOpenMPRuntime().emitCriticalRegion(
-      *this, S.getDirectiveName().getAsString(), [&]() -> void {
-        InlinedOpenMPRegionScopeRAII Region(*this, S);
-        EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-        EnsureInsertPoint();
-      }, S.getLocStart());
+      *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
 }
 
-void
-CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) {
-  llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet.");
+void CodeGenFunction::EmitOMPParallelForDirective(
+    const OMPParallelForDirective &S) {
+  // Emit directive as a combined directive that consists of two implicit
+  // directives: 'parallel' with 'for' directive.
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitOMPWorksharingLoop(S);
+    // Emit implicit barrier at the end of parallel region, but this barrier
+    // is at the end of 'for' directive, so emit it as the implicit barrier for
+    // this 'for' directive.
+    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                               OMPD_parallel);
+  };
+  emitCommonOMPParallelDirective(*this, S, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -960,19 +1296,36 @@
 }
 
 void CodeGenFunction::EmitOMPParallelSectionsDirective(
-    const OMPParallelSectionsDirective &) {
-  llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet.");
+    const OMPParallelSectionsDirective &S) {
+  // Emit directive as a combined directive that consists of two implicit
+  // directives: 'parallel' with 'sections' directive.
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    (void)emitSections(CGF, S);
+    // Emit implicit barrier at the end of parallel region.
+    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                               OMPD_parallel);
+  };
+  emitCommonOMPParallelDirective(*this, S, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   // Emit outlined function for task construct.
+  LexicalScope Scope(*this, S.getSourceRange());
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto *I = CS->getCapturedDecl()->param_begin();
+  auto *PartId = std::next(I);
   // The first function argument for tasks is a thread id, the second one is a
   // part id (0 for tied tasks, >=0 for untied task).
+  auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
+    if (*PartId) {
+      // TODO: emit code for untied tasks.
+    }
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
   auto OutlinedFn =
-      CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I));
+      CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
   // Check if we should emit tied or untied task.
   bool Tied = !S.getSingleClause(OMPC_untied);
   // Check if the task is final
@@ -1001,7 +1354,7 @@
 }
 
 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
-  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart());
+  CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
 }
 
 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
@@ -1116,9 +1469,156 @@
     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
 }
 
+bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
+                      BinaryOperatorKind BO, llvm::AtomicOrdering AO,
+                      bool IsXLHSInRHSPart) {
+  auto &Context = CGF.CGM.getContext();
+  // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
+  // expression is simple and atomic is allowed for the given type for the
+  // target platform.
+  if (BO == BO_Comma || !Update.isScalar() ||
+      !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
+      (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
+       (Update.getScalarVal()->getType() !=
+        X.getAddress()->getType()->getPointerElementType())) ||
+      !Context.getTargetInfo().hasBuiltinAtomic(
+          Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
+    return false;
+
+  llvm::AtomicRMWInst::BinOp RMWOp;
+  switch (BO) {
+  case BO_Add:
+    RMWOp = llvm::AtomicRMWInst::Add;
+    break;
+  case BO_Sub:
+    if (!IsXLHSInRHSPart)
+      return false;
+    RMWOp = llvm::AtomicRMWInst::Sub;
+    break;
+  case BO_And:
+    RMWOp = llvm::AtomicRMWInst::And;
+    break;
+  case BO_Or:
+    RMWOp = llvm::AtomicRMWInst::Or;
+    break;
+  case BO_Xor:
+    RMWOp = llvm::AtomicRMWInst::Xor;
+    break;
+  case BO_LT:
+    RMWOp = X.getType()->hasSignedIntegerRepresentation()
+                ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
+                                   : llvm::AtomicRMWInst::Max)
+                : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
+                                   : llvm::AtomicRMWInst::UMax);
+    break;
+  case BO_GT:
+    RMWOp = X.getType()->hasSignedIntegerRepresentation()
+                ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
+                                   : llvm::AtomicRMWInst::Min)
+                : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
+                                   : llvm::AtomicRMWInst::UMin);
+    break;
+  case BO_Mul:
+  case BO_Div:
+  case BO_Rem:
+  case BO_Shl:
+  case BO_Shr:
+  case BO_LAnd:
+  case BO_LOr:
+    return false;
+  case BO_PtrMemD:
+  case BO_PtrMemI:
+  case BO_LE:
+  case BO_GE:
+  case BO_EQ:
+  case BO_NE:
+  case BO_Assign:
+  case BO_AddAssign:
+  case BO_SubAssign:
+  case BO_AndAssign:
+  case BO_OrAssign:
+  case BO_XorAssign:
+  case BO_MulAssign:
+  case BO_DivAssign:
+  case BO_RemAssign:
+  case BO_ShlAssign:
+  case BO_ShrAssign:
+  case BO_Comma:
+    llvm_unreachable("Unsupported atomic update operation");
+  }
+  auto *UpdateVal = Update.getScalarVal();
+  if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
+    UpdateVal = CGF.Builder.CreateIntCast(
+        IC, X.getAddress()->getType()->getPointerElementType(),
+        X.getType()->hasSignedIntegerRepresentation());
+  }
+  CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
+  return true;
+}
+
+void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
+    LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
+    llvm::AtomicOrdering AO, SourceLocation Loc,
+    const llvm::function_ref<RValue(RValue)> &CommonGen) {
+  // Update expressions are allowed to have the following forms:
+  // x binop= expr; -> xrval + expr;
+  // x++, ++x -> xrval + 1;
+  // x--, --x -> xrval - 1;
+  // x = x binop expr; -> xrval binop expr
+  // x = expr Op x; - > expr binop xrval;
+  if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
+    if (X.isGlobalReg()) {
+      // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
+      // 'xrval'.
+      EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
+    } else {
+      // Perform compare-and-swap procedure.
+      EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
+    }
+  }
+}
+
+static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                    const Expr *X, const Expr *E,
+                                    const Expr *UE, bool IsXLHSInRHSPart,
+                                    SourceLocation Loc) {
+  assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
+         "Update expr in 'atomic update' must be a binary operator.");
+  auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
+  // Update expressions are allowed to have the following forms:
+  // x binop= expr; -> xrval + expr;
+  // x++, ++x -> xrval + 1;
+  // x--, --x -> xrval - 1;
+  // x = x binop expr; -> xrval binop expr
+  // x = expr Op x; - > expr binop xrval;
+  assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
+  LValue XLValue = CGF.EmitLValue(X);
+  RValue ExprRValue = CGF.EmitAnyExpr(E);
+  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
+  auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
+  auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
+  auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
+  auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
+  auto Gen =
+      [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
+        CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
+        CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
+        return CGF.EmitAnyExpr(UE);
+      };
+  CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
+                                    IsXLHSInRHSPart, AO, Loc, Gen);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+}
+
 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
                               bool IsSeqCst, const Expr *X, const Expr *V,
-                              const Expr *E, SourceLocation Loc) {
+                              const Expr *E, const Expr *UE,
+                              bool IsXLHSInRHSPart, SourceLocation Loc) {
   switch (Kind) {
   case OMPC_read:
     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
@@ -1126,7 +1626,10 @@
   case OMPC_write:
     EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
     break;
+  case OMPC_unknown:
   case OMPC_update:
+    EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
+    break;
   case OMPC_capture:
     llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
   case OMPC_if:
@@ -1153,7 +1656,6 @@
   case OMPC_untied:
   case OMPC_threadprivate:
   case OMPC_mergeable:
-  case OMPC_unknown:
     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   }
 }
@@ -1173,10 +1675,13 @@
       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
   if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
     enterFullExpression(EWC);
-  InlinedOpenMPRegionScopeRAII Region(*this, S);
 
-  EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
-                    S.getLocStart());
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
+    EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
+                      S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
+  };
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index 81bd651..895afd7 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -18,7 +18,7 @@
 using namespace clang;
 using namespace CodeGen;
 
-static llvm::Constant *
+static llvm::GlobalVariable *
 GetAddrOfVTTVTable(CodeGenVTables &CGVT, CodeGenModule &CGM,
                    const CXXRecordDecl *MostDerivedClass,
                    const VTTVTable &VTable,
@@ -47,8 +47,8 @@
   llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int64Ty = CGM.Int64Ty;
   llvm::ArrayType *ArrayType = 
     llvm::ArrayType::get(Int8PtrTy, Builder.getVTTComponents().size());
-  
-  SmallVector<llvm::Constant *, 8> VTables;
+
+  SmallVector<llvm::GlobalVariable *, 8> VTables;
   SmallVector<VTableAddressPointsMapTy, 8> VTableAddressPoints;
   for (const VTTVTable *i = Builder.getVTTVTables().begin(),
                        *e = Builder.getVTTVTables().end(); i != e; ++i) {
@@ -61,7 +61,7 @@
   for (const VTTComponent *i = Builder.getVTTComponents().begin(),
                           *e = Builder.getVTTComponents().end(); i != e; ++i) {
     const VTTVTable &VTTVT = Builder.getVTTVTables()[i->VTableIndex];
-    llvm::Constant *VTable = VTables[i->VTableIndex];
+    llvm::GlobalVariable *VTable = VTables[i->VTableIndex];
     uint64_t AddressPoint;
     if (VTTVT.getBase() == RD) {
       // Just get the address point for the regular vtable.
@@ -79,8 +79,8 @@
        llvm::ConstantInt::get(Int64Ty, AddressPoint)
      };
 
-     llvm::Constant *Init = 
-       llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, Idxs);
+     llvm::Constant *Init = llvm::ConstantExpr::getInBoundsGetElementPtr(
+         VTable->getValueType(), VTable, Idxs);
 
      Init = llvm::ConstantExpr::getBitCast(Init, Int8PtrTy);
 
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 372db7a..57370a6 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -842,7 +842,10 @@
 
 void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
                                             const VTableLayout &VTLayout) {
-  if (!LangOpts.Sanitize.has(SanitizerKind::CFIVptr))
+  if (!LangOpts.Sanitize.has(SanitizerKind::CFIVCall) &&
+      !LangOpts.Sanitize.has(SanitizerKind::CFINVCall) &&
+      !LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) &&
+      !LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast))
     return;
 
   llvm::Metadata *VTableMD = llvm::ConstantAsMetadata::get(VTable);
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 82cd949..9205591 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -19,6 +19,7 @@
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/Type.h"
 
 namespace llvm {
   class Constant;
@@ -299,6 +300,7 @@
 
     LValue R;
     R.LVType = Simple;
+    assert(address->getType()->isPointerTy());
     R.V = address;
     R.Initialize(type, qs, alignment, TBAAInfo);
     return R;
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index b5ed12a..60aac07 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -46,7 +46,7 @@
     const CodeGenOptions &CodeGenOpts;
     const TargetOptions &TargetOpts;
     const LangOptions &LangOpts;
-    raw_ostream *AsmOutStream;
+    raw_pwrite_stream *AsmOutStream;
     ASTContext *Context;
 
     Timer LLVMIRGeneration;
@@ -61,7 +61,7 @@
                     const TargetOptions &targetopts,
                     const LangOptions &langopts, bool TimePasses,
                     const std::string &infile, llvm::Module *LinkModule,
-                    raw_ostream *OS, LLVMContext &C,
+                    raw_pwrite_stream *OS, LLVMContext &C,
                     CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(_Diags), Action(action), CodeGenOpts(compopts),
           TargetOpts(targetopts), LangOpts(langopts), AsmOutStream(OS),
@@ -601,9 +601,8 @@
   return VMContext;
 }
 
-static raw_ostream *GetOutputStream(CompilerInstance &CI,
-                                    StringRef InFile,
-                                    BackendAction Action) {
+static raw_pwrite_stream *
+GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) {
   switch (Action) {
   case Backend_EmitAssembly:
     return CI.createDefaultOutputFile(false, InFile, "s");
@@ -625,7 +624,7 @@
 std::unique_ptr<ASTConsumer>
 CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   BackendAction BA = static_cast<BackendAction>(Act);
-  std::unique_ptr<raw_ostream> OS(GetOutputStream(CI, InFile, BA));
+  std::unique_ptr<raw_pwrite_stream> OS(GetOutputStream(CI, InFile, BA));
   if (BA != Backend_EmitNothing && !OS)
     return nullptr;
 
@@ -678,7 +677,7 @@
   if (getCurrentFileKind() == IK_LLVM_IR) {
     BackendAction BA = static_cast<BackendAction>(Act);
     CompilerInstance &CI = getCompilerInstance();
-    raw_ostream *OS = GetOutputStream(CI, getCurrentFile(), BA);
+    raw_pwrite_stream *OS = GetOutputStream(CI, getCurrentFile(), BA);
     if (BA != Backend_EmitNothing && !OS)
       return;
 
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 9e80f0a..42c3a42 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -40,7 +40,7 @@
       CurFn(nullptr), CapturedStmtInfo(nullptr),
       SanOpts(CGM.getLangOpts().Sanitize), IsSanitizerScope(false),
       CurFuncIsThunk(false), AutoreleaseResult(false), SawAsmBlock(false),
-      BlockInfo(nullptr), BlockPointer(nullptr),
+      IsOutlinedSEHHelper(false), BlockInfo(nullptr), BlockPointer(nullptr),
       LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr),
       NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr),
       ExceptionSlot(nullptr), EHSelectorSlot(nullptr),
@@ -70,6 +70,9 @@
   if (CGM.getCodeGenOpts().NoSignedZeros) {
     FMF.setNoSignedZeros();
   }
+  if (CGM.getCodeGenOpts().ReciprocalMath) {
+    FMF.setAllowReciprocal();
+  }
   Builder.SetFastMathFlags(FMF);
 }
 
@@ -279,6 +282,20 @@
     Builder.ClearInsertionPoint();
   }
 
+  // If some of our locals escaped, insert a call to llvm.frameescape in the
+  // entry block.
+  if (!EscapedLocals.empty()) {
+    // Invert the map from local to index into a simple vector. There should be
+    // no holes.
+    SmallVector<llvm::Value *, 4> EscapeArgs;
+    EscapeArgs.resize(EscapedLocals.size());
+    for (auto &Pair : EscapedLocals)
+      EscapeArgs[Pair.second] = Pair.first;
+    llvm::Function *FrameEscapeFn = llvm::Intrinsic::getDeclaration(
+        &CGM.getModule(), llvm::Intrinsic::frameescape);
+    CGBuilderTy(AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs);
+  }
+
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
   llvm::Instruction *Ptr = AllocaInsertPt;
   AllocaInsertPt = nullptr;
@@ -680,7 +697,7 @@
     unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex();
     llvm::Function::arg_iterator EI = CurFn->arg_end();
     --EI;
-    llvm::Value *Addr = Builder.CreateStructGEP(EI, Idx);
+    llvm::Value *Addr = Builder.CreateStructGEP(nullptr, EI, Idx);
     ReturnValue = Builder.CreateLoad(Addr, "agg.result");
   } else {
     ReturnValue = CreateIRTemp(RetTy, "retval");
@@ -1229,7 +1246,8 @@
                        /*volatile*/ false);
 
   // Go to the next element.
-  llvm::Value *next = Builder.CreateConstInBoundsGEP1_32(cur, 1, "vla.next");
+  llvm::Value *next = Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(),
+                                                         cur, 1, "vla.next");
 
   // Leave if that's the end of the VLA.
   llvm::Value *done = Builder.CreateICmpEQ(next, end, "vla-init.isdone");
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 151eb7e..4e7a7e2 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -192,7 +192,7 @@
 
     CapturedRegionKind getKind() const { return Kind; }
 
-    void setContextValue(llvm::Value *V) { ThisValue = V; }
+    virtual void setContextValue(llvm::Value *V) { ThisValue = V; }
     // \brief Retrieve the value of the context parameter.
     virtual llvm::Value *getContextValue() const { return ThisValue; }
 
@@ -263,6 +263,10 @@
   /// potentially set the return value.
   bool SawAsmBlock;
 
+  /// True if the current function is an outlined SEH helper. This can be a
+  /// finally block or filter expression.
+  bool IsOutlinedSEHHelper;
+
   const CodeGen::CGBlockInfo *BlockInfo;
   llvm::Value *BlockPointer;
 
@@ -351,17 +355,6 @@
     void exit(CodeGenFunction &CGF);
   };
 
-  /// Cleanups can be emitted for two reasons: normal control leaving a region
-  /// exceptional control flow leaving a region.
-  struct SEHFinallyInfo {
-    SEHFinallyInfo()
-        : FinallyBB(nullptr), ContBB(nullptr), ResumeBB(nullptr) {}
-
-    llvm::BasicBlock *FinallyBB;
-    llvm::BasicBlock *ContBB;
-    llvm::BasicBlock *ResumeBB;
-  };
-
   /// Returns true inside SEH __try blocks.
   bool isSEHTryScope() const { return !SEHTryEpilogueStack.empty(); }
 
@@ -851,7 +844,8 @@
   
   /// getByrefValueFieldNumber - Given a declaration, returns the LLVM field
   /// number that holds the value.
-  unsigned getByRefValueLLVMField(const ValueDecl *VD) const;
+  std::pair<llvm::Type *, unsigned>
+  getByRefValueLLVMField(const ValueDecl *VD) const;
 
   /// BuildBlockByrefAddress - Computes address location of the
   /// variable which is declared as __block.
@@ -876,6 +870,10 @@
   typedef llvm::DenseMap<const Decl*, llvm::Value*> DeclMapTy;
   DeclMapTy LocalDeclMap;
 
+  /// Track escaped local variables with auto storage. Used during SEH
+  /// outlining to produce a call to llvm.frameescape.
+  llvm::DenseMap<llvm::AllocaInst *, int> EscapedLocals;
+
   /// LabelMap - This keeps track of the LLVM basic block for each C label.
   llvm::DenseMap<const LabelDecl*, JumpDest> LabelMap;
 
@@ -1047,10 +1045,6 @@
   llvm::Value *getExceptionSlot();
   llvm::Value *getEHSelectorSlot();
 
-  /// Stack slot that contains whether a __finally block is being executed as an
-  /// EH cleanup or as a normal cleanup.
-  llvm::Value *getAbnormalTerminationSlot();
-
   /// Returns the contents of the function's exception object and selector
   /// slots.
   llvm::Value *getExceptionFromSlot();
@@ -1729,7 +1723,8 @@
                              llvm::Value *This);
 
   void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
-                               llvm::Value *NewPtr, llvm::Value *NumElements,
+                               llvm::Type *ElementTy, llvm::Value *NewPtr,
+                               llvm::Value *NumElements,
                                llvm::Value *AllocSizeWithoutCookie);
 
   void EmitCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
@@ -1895,8 +1890,8 @@
     llvm::Value *getObjectAddress(CodeGenFunction &CGF) const {
       if (!IsByRef) return Address;
 
-      return CGF.Builder.CreateStructGEP(Address,
-                                         CGF.getByRefValueLLVMField(Variable),
+      auto F = CGF.getByRefValueLLVMField(Variable);
+      return CGF.Builder.CreateStructGEP(F.first, Address, F.second,
                                          Variable->getNameAsString());
     }
   };
@@ -1994,17 +1989,30 @@
   void EmitCXXTryStmt(const CXXTryStmt &S);
   void EmitSEHTryStmt(const SEHTryStmt &S);
   void EmitSEHLeaveStmt(const SEHLeaveStmt &S);
-  void EnterSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI);
-  void ExitSEHTryStmt(const SEHTryStmt &S, SEHFinallyInfo &FI);
+  void EnterSEHTryStmt(const SEHTryStmt &S);
+  void ExitSEHTryStmt(const SEHTryStmt &S);
+
+  void startOutlinedSEHHelper(CodeGenFunction &ParentCGF, StringRef Name,
+                              QualType RetTy, FunctionArgList &Args,
+                              const Stmt *OutlinedStmt);
 
   llvm::Function *GenerateSEHFilterFunction(CodeGenFunction &ParentCGF,
                                             const SEHExceptStmt &Except);
 
+  llvm::Function *GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
+                                             const SEHFinallyStmt &Finally);
+
   void EmitSEHExceptionCodeSave();
   llvm::Value *EmitSEHExceptionCode();
   llvm::Value *EmitSEHExceptionInfo();
   llvm::Value *EmitSEHAbnormalTermination();
 
+  /// Scan the outlined statement for captures from the parent function. For
+  /// each capture, mark the capture as escaped and emit a call to
+  /// llvm.framerecover. Insert the framerecover result into the LocalDeclMap.
+  void EmitCapturedLocals(CodeGenFunction &ParentCGF, const Stmt *OutlinedStmt,
+                          llvm::Value *ParentFP);
+
   void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                            ArrayRef<const Attr *> Attrs = None);
 
@@ -2014,13 +2022,100 @@
   llvm::Function *GenerateCapturedStmtFunctionEpilog(const CapturedStmt &S);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
   llvm::Value *GenerateCapturedStmtArgument(const CapturedStmt &S);
-  void EmitOMPAggregateAssign(LValue OriginalAddr, llvm::Value *PrivateAddr,
-                              const Expr *AssignExpr, QualType Type,
-                              const VarDecl *VDInit);
-  void EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
+  /// \brief Perform element by element copying of arrays with type \a
+  /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
+  /// generated by \a CopyGen.
+  ///
+  /// \param DestAddr Address of the destination array.
+  /// \param SrcAddr Address of the source array.
+  /// \param OriginalType Type of destination and source arrays.
+  /// \param CopyGen Copying procedure that copies value of single array element
+  /// to another single array element.
+  void EmitOMPAggregateAssign(
+      llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
+      const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen);
+  /// \brief Emit proper copying of data from one variable to another.
+  ///
+  /// \param OriginalType Original type of the copied variables.
+  /// \param DestAddr Destination address.
+  /// \param SrcAddr Source address.
+  /// \param DestVD Destination variable used in \a CopyExpr (for arrays, has
+  /// type of the base array element).
+  /// \param SrcVD Source variable used in \a CopyExpr (for arrays, has type of
+  /// the base array element).
+  /// \param Copy Actual copygin expression for copying data from \a SrcVD to \a
+  /// DestVD.
+  void EmitOMPCopy(CodeGenFunction &CGF, QualType OriginalType,
+                   llvm::Value *DestAddr, llvm::Value *SrcAddr,
+                   const VarDecl *DestVD, const VarDecl *SrcVD,
+                   const Expr *Copy);
+  /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or
+  /// \a X = \a E \a BO \a E.
+  ///
+  /// \param X Value to be updated.
+  /// \param E Update value.
+  /// \param BO Binary operation for update operation.
+  /// \param IsXLHSInRHSPart true if \a X is LHS in RHS part of the update
+  /// expression, false otherwise.
+  /// \param AO Atomic ordering of the generated atomic instructions.
+  /// \param CommonGen Code generator for complex expressions that cannot be
+  /// expressed through atomicrmw instruction.
+  void EmitOMPAtomicSimpleUpdateExpr(
+      LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
+      llvm::AtomicOrdering AO, SourceLocation Loc,
+      const llvm::function_ref<RValue(RValue)> &CommonGen);
+  bool EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                                  OMPPrivateScope &PrivateScope);
   void EmitOMPPrivateClause(const OMPExecutableDirective &D,
                             OMPPrivateScope &PrivateScope);
+  /// \brief Emit code for copyin clause in \a D directive. The next code is
+  /// generated at the start of outlined functions for directives:
+  /// \code
+  /// threadprivate_var1 = master_threadprivate_var1;
+  /// operator=(threadprivate_var2, master_threadprivate_var2);
+  /// ...
+  /// __kmpc_barrier(&loc, global_tid);
+  /// \endcode
+  ///
+  /// \param D OpenMP directive possibly with 'copyin' clause(s).
+  /// \returns true if at least one copyin variable is found, false otherwise.
+  bool EmitOMPCopyinClause(const OMPExecutableDirective &D);
+  /// \brief Emit initial code for lastprivate variables. If some variable is
+  /// not also firstprivate, then the default initialization is used. Otherwise
+  /// initialization of this variable is performed by EmitOMPFirstprivateClause
+  /// method.
+  ///
+  /// \param D Directive that may have 'lastprivate' directives.
+  /// \param PrivateScope Private scope for capturing lastprivate variables for
+  /// proper codegen in internal captured statement.
+  ///
+  /// \returns true if there is at least one lastprivate variable, false
+  /// otherwise.
+  bool EmitOMPLastprivateClauseInit(const OMPExecutableDirective &D,
+                                    OMPPrivateScope &PrivateScope);
+  /// \brief Emit final copying of lastprivate values to original variables at
+  /// the end of the worksharing or simd directive.
+  ///
+  /// \param D Directive that has at least one 'lastprivate' directives.
+  /// \param IsLastIterCond Boolean condition that must be set to 'i1 true' if
+  /// it is the last iteration of the loop code in associated directive, or to
+  /// 'i1 false' otherwise.
+  void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D,
+                                     llvm::Value *IsLastIterCond);
+  /// \brief Emit initial code for reduction variables. Creates reduction copies
+  /// and initializes them with the values according to OpenMP standard.
+  ///
+  /// \param D Directive (possibly) with the 'reduction' clause.
+  /// \param PrivateScope Private scope for capturing reduction variables for
+  /// proper codegen in internal captured statement.
+  ///
+  void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
+                                  OMPPrivateScope &PrivateScope);
+  /// \brief Emit final update of reduction values to original variables at
+  /// the end of the directive.
+  ///
+  /// \param D Directive that has at least one 'reduction' directives.
+  void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D);
 
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
@@ -2044,16 +2139,21 @@
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
 
+  void
+  EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
+                   const Expr *IncExpr,
+                   const llvm::function_ref<void(CodeGenFunction &)> &BodyGen);
+
 private:
 
   /// Helpers for the OpenMP loop directives.
   void EmitOMPLoopBody(const OMPLoopDirective &Directive,
                        bool SeparateIter = false);
-  void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup,
-                        const Expr *LoopCond, const Expr *IncExpr,
-                        const std::function<void()> &BodyGen);
   void EmitOMPSimdFinal(const OMPLoopDirective &S);
-  void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
+  /// \brief Emit code for the worksharing loop-based directive.
+  /// \return true, if this construct has any lastprivate clause, false -
+  /// otherwise.
+  bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                            const OMPLoopDirective &S,
                            OMPPrivateScope &LoopScope, llvm::Value *LB,
@@ -2123,12 +2223,16 @@
   void EmitAtomicStore(RValue rvalue, LValue lvalue, llvm::AtomicOrdering AO,
                        bool IsVolatile, bool isInit);
 
-  std::pair<RValue, RValue> EmitAtomicCompareExchange(
+  std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
       LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
       llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
       llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
       bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
 
+  void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
+                        const std::function<RValue(RValue)> &UpdateOp,
+                        bool IsVolatile);
+
   /// EmitToMemory - Change a scalar value from its value
   /// representation to its in-memory representation.
   llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty);
@@ -2430,6 +2534,7 @@
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitR600BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
   llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index f4ae684..17b7ddc 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -147,8 +147,8 @@
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
                                               "Could not read profile: %0");
       getDiags().Report(DiagID) << EC.message();
-    }
-    PGOReader = std::move(ReaderOrErr.get());
+    } else
+      PGOReader = std::move(ReaderOrErr.get());
   }
 
   // If coverage mapping generation is enabled, create the
@@ -2134,7 +2134,8 @@
 }
 
 static bool isVarDeclStrongDefinition(const ASTContext &Context,
-                                      const VarDecl *D, bool NoCommon) {
+                                      CodeGenModule &CGM, const VarDecl *D,
+                                      bool NoCommon) {
   // Don't give variables common linkage if -fno-common was specified unless it
   // was overridden by a NoCommon attribute.
   if ((NoCommon || D->hasAttr<NoCommonAttr>()) && !D->hasAttr<CommonAttr>())
@@ -2159,6 +2160,10 @@
   if (D->hasAttr<WeakImportAttr>())
     return true;
 
+  // A variable cannot be both common and exist in a comdat.
+  if (shouldBeInCOMDAT(CGM, *D))
+    return true;
+
   // Declarations with a required alignment do not have common linakge in MSVC
   // mode.
   if (Context.getLangOpts().MSVCCompat) {
@@ -2227,7 +2232,7 @@
   // C++ doesn't have tentative definitions and thus cannot have common
   // linkage.
   if (!getLangOpts().CPlusPlus && isa<VarDecl>(D) &&
-      !isVarDeclStrongDefinition(Context, cast<VarDecl>(D),
+      !isVarDeclStrongDefinition(Context, *this, cast<VarDecl>(D),
                                  CodeGenOpts.NoCommon))
     return llvm::GlobalVariable::CommonLinkage;
 
@@ -2350,7 +2355,7 @@
       callSite->replaceAllUsesWith(newCall.getInstruction());
 
     // Copy debug location attached to CI.
-    if (!callSite->getDebugLoc().isUnknown())
+    if (callSite->getDebugLoc())
       newCall->setDebugLoc(callSite->getDebugLoc());
     callSite->eraseFromParent();
   }
@@ -2564,12 +2569,10 @@
                                          Tys);
 }
 
-static llvm::StringMapEntry<llvm::Constant*> &
-GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
-                         const StringLiteral *Literal,
-                         bool TargetIsLSB,
-                         bool &IsUTF16,
-                         unsigned &StringLength) {
+static llvm::StringMapEntry<llvm::GlobalVariable *> &
+GetConstantCFStringEntry(llvm::StringMap<llvm::GlobalVariable *> &Map,
+                         const StringLiteral *Literal, bool TargetIsLSB,
+                         bool &IsUTF16, unsigned &StringLength) {
   StringRef String = Literal->getString();
   unsigned NumBytes = String.size();
 
@@ -2601,10 +2604,9 @@
                          nullptr)).first;
 }
 
-static llvm::StringMapEntry<llvm::Constant*> &
-GetConstantStringEntry(llvm::StringMap<llvm::Constant*> &Map,
-                       const StringLiteral *Literal,
-                       unsigned &StringLength) {
+static llvm::StringMapEntry<llvm::GlobalVariable *> &
+GetConstantStringEntry(llvm::StringMap<llvm::GlobalVariable *> &Map,
+                       const StringLiteral *Literal, unsigned &StringLength) {
   StringRef String = Literal->getString();
   StringLength = String.size();
   return *Map.insert(std::make_pair(String, nullptr)).first;
@@ -2614,10 +2616,10 @@
 CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   unsigned StringLength = 0;
   bool isUTF16 = false;
-  llvm::StringMapEntry<llvm::Constant*> &Entry =
-    GetConstantCFStringEntry(CFConstantStringMap, Literal,
-                             getDataLayout().isLittleEndian(),
-                             isUTF16, StringLength);
+  llvm::StringMapEntry<llvm::GlobalVariable *> &Entry =
+      GetConstantCFStringEntry(CFConstantStringMap, Literal,
+                               getDataLayout().isLittleEndian(), isUTF16,
+                               StringLength);
 
   if (auto *C = Entry.second)
     return C;
@@ -2633,7 +2635,7 @@
     llvm::Constant *GV = CreateRuntimeVariable(Ty,
                                            "__CFConstantStringClassReference");
     // Decay array -> ptr
-    V = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
+    V = llvm::ConstantExpr::getGetElementPtr(Ty, GV, Zeros);
     CFConstantStringClassRef = V;
   }
   else
@@ -2686,7 +2688,7 @@
   }
 
   // String.
-  Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
+  Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV->getType(), GV, Zeros);
 
   if (isUTF16)
     // Cast the UTF16 string to the correct type.
@@ -2707,11 +2709,11 @@
   return GV;
 }
 
-llvm::Constant *
+llvm::GlobalVariable *
 CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
   unsigned StringLength = 0;
-  llvm::StringMapEntry<llvm::Constant*> &Entry =
-    GetConstantStringEntry(CFConstantStringMap, Literal, StringLength);
+  llvm::StringMapEntry<llvm::GlobalVariable *> &Entry =
+      GetConstantStringEntry(CFConstantStringMap, Literal, StringLength);
 
   if (auto *C = Entry.second)
     return C;
@@ -2740,11 +2742,10 @@
       llvm::Type *PTy = llvm::ArrayType::get(Ty, 0);
       GV = CreateRuntimeVariable(PTy, str);
       // Decay array -> ptr
-      V = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
+      V = llvm::ConstantExpr::getGetElementPtr(PTy, GV, Zeros);
       ConstantStringClassRef = V;
     }
-  }
-  else
+  } else
     V = ConstantStringClassRef;
 
   if (!NSConstantStringType) {
@@ -2800,8 +2801,9 @@
   // of the string is via this class initializer.
   CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
   GV->setAlignment(Align.getQuantity());
-  Fields[1] = llvm::ConstantExpr::getGetElementPtr(GV, Zeros);
-  
+  Fields[1] =
+      llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
+
   // String length.
   llvm::Type *Ty = getTypes().ConvertType(getContext().UnsignedIntTy);
   Fields[2] = llvm::ConstantInt::get(Ty, StringLength);
@@ -3433,7 +3435,7 @@
 
 void CodeGenModule::EmitDeferredUnusedCoverageMappings() {
   std::vector<const Decl *> DeferredDecls;
-  for (const auto I : DeferredEmptyCoverageMappingDecls) {
+  for (const auto &I : DeferredEmptyCoverageMappingDecls) {
     if (!I.second)
       continue;
     DeferredDecls.push_back(I.first);
@@ -3638,9 +3640,9 @@
 }
 
 llvm::Constant *
-CodeGenModule::getAddrOfCXXHandlerMapEntry(QualType Ty,
-                                           QualType CatchHandlerType) {
-  return getCXXABI().getAddrOfCXXHandlerMapEntry(Ty, CatchHandlerType);
+CodeGenModule::getAddrOfCXXCatchHandlerType(QualType Ty,
+                                            QualType CatchHandlerType) {
+  return getCXXABI().getAddrOfCXXCatchHandlerType(Ty, CatchHandlerType);
 }
 
 llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index ce540e9..feef6c2 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -366,7 +366,7 @@
   /// Map used to get unique annotation strings.
   llvm::StringMap<llvm::Constant*> AnnotationStrings;
 
-  llvm::StringMap<llvm::Constant*> CFConstantStringMap;
+  llvm::StringMap<llvm::GlobalVariable *> CFConstantStringMap;
 
   llvm::DenseMap<llvm::Constant *, llvm::GlobalVariable *> ConstantStringMap;
   llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap;
@@ -400,7 +400,8 @@
   /// When a C++ decl with an initializer is deferred, null is
   /// appended to CXXGlobalInits, and the index of that null is placed
   /// here so that the initializer will be performed in the correct
-  /// order.
+  /// order. Once the decl is emitted, the index is replaced with ~0U to ensure
+  /// that we don't re-emit the initializer.
   llvm::DenseMap<const Decl*, unsigned> DelayedCXXInitPosition;
   
   typedef std::pair<OrderGlobalInits, llvm::Function*> GlobalInitData;
@@ -719,8 +720,8 @@
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
-  llvm::Constant *getAddrOfCXXHandlerMapEntry(QualType Ty,
-                                              QualType CatchHandlerType);
+  llvm::Constant *getAddrOfCXXCatchHandlerType(QualType Ty,
+                                               QualType CatchHandlerType);
 
   /// Get the address of a uuid descriptor .
   llvm::Constant *GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
@@ -786,7 +787,7 @@
   /// Return a pointer to a constant NSString object for the given string. Or a
   /// user defined String object as defined via
   /// -fconstant-string-class=class_name option.
-  llvm::Constant *GetAddrOfConstantString(const StringLiteral *Literal);
+  llvm::GlobalVariable *GetAddrOfConstantString(const StringLiteral *Literal);
 
   /// Return a constant array for the given string.
   llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index 557828d..cc6ac20 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -880,12 +880,10 @@
   if (!haveRegionCounts())
     return nullptr;
   uint64_t LoopCount = Cnt.getCount();
-  uint64_t CondCount = 0;
-  bool Found = getStmtCount(Cond, CondCount);
-  assert(Found && "missing expected loop condition count");
-  (void)Found;
-  if (CondCount == 0)
+  Optional<uint64_t> CondCount = getStmtCount(Cond);
+  assert(CondCount.hasValue() && "missing expected loop condition count");
+  if (*CondCount == 0)
     return nullptr;
   return createBranchWeights(LoopCount,
-                             std::max(CondCount, LoopCount) - LoopCount);
+                             std::max(*CondCount, LoopCount) - LoopCount);
 }
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index 431c850..c92a057 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -69,23 +69,20 @@
 
   /// Check if an execution count is known for a given statement. If so, return
   /// true and put the value in Count; else return false.
-  bool getStmtCount(const Stmt *S, uint64_t &Count) {
+  Optional<uint64_t> getStmtCount(const Stmt *S) {
     if (!StmtCountMap)
-      return false;
-    llvm::DenseMap<const Stmt*, uint64_t>::const_iterator
-      I = StmtCountMap->find(S);
+      return None;
+    auto I = StmtCountMap->find(S);
     if (I == StmtCountMap->end())
-      return false;
-    Count = I->second;
-    return true;
+      return None;
+    return I->second;
   }
 
   /// If the execution count for the current statement is known, record that
   /// as the current count.
   void setCurrentStmt(const Stmt *S) {
-    uint64_t Count;
-    if (getStmtCount(S, Count))
-      setCurrentRegionCount(Count);
+    if (auto Count = getStmtCount(S))
+      setCurrentRegionCount(*Count);
   }
 
   /// Calculate branch weights appropriate for PGO data
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index 07db6c7..d26eced 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -748,7 +748,7 @@
         size_t Index =
             pushRegion(Counter::getZero(), getStart(CS->body_front()),
                        getEnd(CS->body_back()));
-        for (const auto &Child : CS->children())
+        for (const auto *Child : CS->children())
           Visit(Child);
         popRegions(Index);
       }
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index f23cd9f..eb7ab1d 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -127,7 +127,8 @@
   void EmitFundamentalRTTIDescriptors();
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
   llvm::Constant *
-  getAddrOfCXXHandlerMapEntry(QualType Ty, QualType CatchHandlerType) override {
+  getAddrOfCXXCatchHandlerType(QualType Ty,
+                               QualType CatchHandlerType) override {
     return getAddrOfRTTIDescriptor(Ty);
   }
 
@@ -1387,7 +1388,7 @@
 
 llvm::Constant *ItaniumCXXABI::getVTableAddressPointForConstExpr(
     BaseSubobject Base, const CXXRecordDecl *VTableClass) {
-  llvm::Constant *VTable = getAddrOfVTable(VTableClass, CharUnits());
+  auto *VTable = getAddrOfVTable(VTableClass, CharUnits());
 
   // Find the appropriate vtable within the vtable group.
   uint64_t AddressPoint = CGM.getItaniumVTableContext()
@@ -1398,7 +1399,8 @@
     llvm::ConstantInt::get(CGM.Int64Ty, AddressPoint)
   };
 
-  return llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, Indices);
+  return llvm::ConstantExpr::getInBoundsGetElementPtr(VTable->getValueType(),
+                                                      VTable, Indices);
 }
 
 llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
@@ -1442,7 +1444,8 @@
   Ty = Ty->getPointerTo()->getPointerTo();
   llvm::Value *VTable = CGF.GetVTablePtr(This, Ty);
 
-  CGF.EmitVTablePtrCheckForCall(cast<CXXMethodDecl>(GD.getDecl()), VTable);
+  if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
+    CGF.EmitVTablePtrCheckForCall(cast<CXXMethodDecl>(GD.getDecl()), VTable);
 
   uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
   llvm::Value *VFuncPtr =
@@ -1666,7 +1669,7 @@
   CGF.Builder.CreateStore(elementSize, cookie);
 
   // The second element is the element count.
-  cookie = CGF.Builder.CreateConstInBoundsGEP1_32(cookie, 1);
+  cookie = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.SizeTy, cookie, 1);
   CGF.Builder.CreateStore(numElements, cookie);
 
   // Finally, compute a pointer to the actual data buffer by skipping
@@ -2604,7 +2607,8 @@
 
   // The vtable address point is 2.
   llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2);
-  VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, Two);
+  VTable =
+      llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two);
   VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy);
 
   Fields.push_back(VTable);
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index f8f7845..f00cd9c 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -45,7 +45,7 @@
       : CGCXXABI(CGM), BaseClassDescriptorType(nullptr),
         ClassHierarchyDescriptorType(nullptr),
         CompleteObjectLocatorType(nullptr), CatchableTypeType(nullptr),
-        ThrowInfoType(nullptr), HandlerMapEntryType(nullptr) {}
+        ThrowInfoType(nullptr), CatchHandlerTypeType(nullptr) {}
 
   bool HasThisReturn(GlobalDecl GD) const override;
   bool hasMostDerivedReturn(GlobalDecl GD) const override;
@@ -85,7 +85,7 @@
 
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
   llvm::Constant *
-  getAddrOfCXXHandlerMapEntry(QualType Ty, QualType CatchHandlerType) override;
+  getAddrOfCXXCatchHandlerType(QualType Ty, QualType CatchHandlerType) override;
 
   bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
   void EmitBadTypeidCall(CodeGenFunction &CGF) override;
@@ -573,16 +573,16 @@
 
   void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
 
-  llvm::StructType *getHandlerMapEntryType() {
-    if (!HandlerMapEntryType) {
+  llvm::StructType *getCatchHandlerTypeType() {
+    if (!CatchHandlerTypeType) {
       llvm::Type *FieldTypes[] = {
-        CGM.IntTy,                           // Flags
-        getImageRelativeType(CGM.Int8PtrTy), // TypeDescriptor
+          CGM.IntTy,     // Flags
+          CGM.Int8PtrTy, // TypeDescriptor
       };
-      HandlerMapEntryType = llvm::StructType::create(
-          CGM.getLLVMContext(), FieldTypes, "eh.HandlerMapEntry");
+      CatchHandlerTypeType = llvm::StructType::create(
+          CGM.getLLVMContext(), FieldTypes, "eh.CatchHandlerType");
     }
-    return HandlerMapEntryType;
+    return CatchHandlerTypeType;
   }
 
   llvm::StructType *getCatchableTypeType() {
@@ -698,7 +698,7 @@
   llvm::StructType *CatchableTypeType;
   llvm::DenseMap<uint32_t, llvm::StructType *> CatchableTypeArrayTypeMap;
   llvm::StructType *ThrowInfoType;
-  llvm::StructType *HandlerMapEntryType;
+  llvm::StructType *CatchHandlerTypeType;
 };
 
 }
@@ -809,7 +809,9 @@
   llvm::Function *BeginCatch =
       CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_begincatch);
 
-  if (!CatchParam) {
+  // If this is a catch-all or the catch parameter is unnamed, we don't need to
+  // emit an alloca to the object.
+  if (!CatchParam || !CatchParam->getDeclName()) {
     llvm::Value *Args[2] = {Exn, llvm::Constant::getNullValue(CGF.Int8PtrTy)};
     CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
     CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalAndEHCleanup);
@@ -1121,7 +1123,8 @@
       Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
     llvm::Value *VBPtr =
         CGF.Builder.CreateConstInBoundsGEP1_64(ThisInt8Ptr, Offs.getQuantity());
-    llvm::Value *GVPtr = CGF.Builder.CreateConstInBoundsGEP2_32(GV, 0, 0);
+    llvm::Value *GVPtr =
+        CGF.Builder.CreateConstInBoundsGEP2_32(GV->getValueType(), GV, 0, 0);
     VBPtr = CGF.Builder.CreateBitCast(VBPtr, GVPtr->getType()->getPointerTo(0),
                                       "vbptr." + VBT->ReusingBase->getName());
     CGF.Builder.CreateStore(GVPtr, VBPtr);
@@ -1255,7 +1258,7 @@
       // FIXME: Update the code that emits this adjustment in thunks prologues.
       This = CGF.Builder.CreateConstGEP1_32(This, StaticOffset.getQuantity());
     } else {
-      This = CGF.Builder.CreateConstInBoundsGEP1_32(This,
+      This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
                                                     StaticOffset.getQuantity());
     }
   }
@@ -1310,8 +1313,8 @@
 
   This = CGF.Builder.CreateBitCast(This, charPtrTy);
   assert(Adjustment.isPositive());
-  This =
-      CGF.Builder.CreateConstInBoundsGEP1_32(This, -Adjustment.getQuantity());
+  This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
+                                                -Adjustment.getQuantity());
   return CGF.Builder.CreateBitCast(This, thisTy);
 }
 
@@ -1550,8 +1553,8 @@
                                  llvm::ConstantInt::get(CGM.IntTy, 1)};
     // Create a GEP which points just after the first entry in the VFTable,
     // this should be the location of the first virtual method.
-    llvm::Constant *VTableGEP =
-        llvm::ConstantExpr::getInBoundsGetElementPtr(VTable, GEPIndices);
+    llvm::Constant *VTableGEP = llvm::ConstantExpr::getInBoundsGetElementPtr(
+        VTable->getValueType(), VTable, GEPIndices);
     if (llvm::GlobalValue::isWeakForLinker(VFTableLinkage)) {
       VFTableLinkage = llvm::GlobalValue::ExternalLinkage;
       if (C)
@@ -1871,7 +1874,7 @@
   }
 
   if (RA.NonVirtual)
-    V = CGF.Builder.CreateConstInBoundsGEP1_32(V, RA.NonVirtual);
+    V = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, V, RA.NonVirtual);
 
   // Cast back to the original type.
   return CGF.Builder.CreateBitCast(V, Ret->getType());
@@ -3020,13 +3023,15 @@
   if (CHD->isWeakForLinker())
     CHD->setComdat(CGM.getModule().getOrInsertComdat(CHD->getName()));
 
+  auto *Bases = getBaseClassArray(Classes);
+
   // Initialize the base class ClassHierarchyDescriptor.
   llvm::Constant *Fields[] = {
       llvm::ConstantInt::get(CGM.IntTy, 0), // Unknown
       llvm::ConstantInt::get(CGM.IntTy, Flags),
       llvm::ConstantInt::get(CGM.IntTy, Classes.size()),
       ABI.getImageRelativeConstant(llvm::ConstantExpr::getInBoundsGetElementPtr(
-          getBaseClassArray(Classes),
+          Bases->getValueType(), Bases,
           llvm::ArrayRef<llvm::Value *>(GEPIndices))),
   };
   CHD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
@@ -3193,8 +3198,8 @@
 }
 
 llvm::Constant *
-MicrosoftCXXABI::getAddrOfCXXHandlerMapEntry(QualType Type,
-                                             QualType CatchHandlerType) {
+MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
+                                              QualType CatchHandlerType) {
   // TypeDescriptors for exceptions never have qualified pointer types,
   // qualifiers are stored seperately in order to support qualification
   // conversions.
@@ -3203,16 +3208,6 @@
 
   bool IsReference = CatchHandlerType->isReferenceType();
 
-  SmallString<256> MangledName;
-  {
-    llvm::raw_svector_ostream Out(MangledName);
-    getMangleContext().mangleCXXHandlerMapEntry(Type, IsConst, IsVolatile,
-                                                IsReference, Out);
-  }
-
-  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
-    return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
-
   uint32_t Flags = 0;
   if (IsConst)
     Flags |= 1;
@@ -3221,15 +3216,24 @@
   if (IsReference)
     Flags |= 8;
 
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    getMangleContext().mangleCXXCatchHandlerType(Type, Flags, Out);
+  }
+
+  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
+    return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
+
   llvm::Constant *Fields[] = {
-      llvm::ConstantInt::get(CGM.IntTy, Flags),                // Flags
-      getImageRelativeConstant(getAddrOfRTTIDescriptor(Type)), // TypeDescriptor
+      llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
+      getAddrOfRTTIDescriptor(Type),            // TypeDescriptor
   };
-  llvm::StructType *HandlerMapEntryType = getHandlerMapEntryType();
+  llvm::StructType *CatchHandlerTypeType = getCatchHandlerTypeType();
   auto *Var = new llvm::GlobalVariable(
-      CGM.getModule(), HandlerMapEntryType, /*Constant=*/true,
+      CGM.getModule(), CatchHandlerTypeType, /*Constant=*/true,
       llvm::GlobalValue::PrivateLinkage,
-      llvm::ConstantStruct::get(HandlerMapEntryType, Fields),
+      llvm::ConstantStruct::get(CatchHandlerTypeType, Fields),
       StringRef(MangledName));
   Var->setUnnamedAddr(true);
   Var->setSection("llvm.metadata");
@@ -3596,9 +3600,10 @@
   //         - a standard pointer conversion (4.10) not involving conversions to
   //           pointers to private or protected or ambiguous classes
   //
-  // All pointers are convertible to pointer-to-void so ensure that it is in the
-  // CatchableTypeArray.
-  if (IsPointer)
+  // C++14 [conv.ptr]p2:
+  //   A prvalue of type "pointer to cv T," where T is an object type, can be
+  //   converted to a prvalue of type "pointer to cv void".
+  if (IsPointer && T->getPointeeType()->isObjectType())
     CatchableTypes.insert(getCatchableType(getContext().VoidPtrTy));
 
   // C++14 [except.handle]p3:
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 03771e1..25e5740 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -63,7 +63,7 @@
         CoverageInfo(CoverageInfo),
         M(new llvm::Module(ModuleName, C)) {}
 
-    virtual ~CodeGeneratorImpl() {
+    ~CodeGeneratorImpl() override {
       // There should normally not be any leftover inline method definitions.
       assert(DeferredInlineMethodDefinitions.empty() ||
              Diags.hasErrorOccurred());
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 43cf791..48c85e6 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -37,7 +37,8 @@
                                unsigned LastIndex) {
   // Alternatively, we could emit this as a loop in the source.
   for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
-    llvm::Value *Cell = Builder.CreateConstInBoundsGEP1_32(Array, I);
+    llvm::Value *Cell =
+        Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
     Builder.CreateStore(Value, Cell);
   }
 }
@@ -339,9 +340,15 @@
   //
   // FIXME: This needs to be generalized to handle classes as well.
   const RecordDecl *RD = RT->getDecl();
-  if (!RD->isStruct() || isa<CXXRecordDecl>(RD))
+  if (!RD->isStruct())
     return false;
 
+  // We try to expand CLike CXXRecordDecl.
+  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+    if (!CXXRD->isCLike())
+      return false;
+  }
+
   uint64_t Size = 0;
 
   for (const auto *FD : RD->fields()) {
@@ -1352,7 +1359,8 @@
   } else {
     // 9 is %eflags, which doesn't get a size on Darwin for some
     // reason.
-    Builder.CreateStore(Four8, Builder.CreateConstInBoundsGEP1_32(Address, 9));
+    Builder.CreateStore(
+        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9));
 
     // 11-16 are st(0..5).  Not sure why we stop at 5.
     // These have size 12, which is sizeof(long double) on
@@ -1617,7 +1625,7 @@
     : X86_64TargetCodeGenInfo(CGT, HasAVX) {}
 
   void getDependentLibraryOption(llvm::StringRef Lib,
-                                 llvm::SmallString<24> &Opt) const {
+                                 llvm::SmallString<24> &Opt) const override {
     Opt = "\01";
     Opt += Lib;
   }
@@ -2766,8 +2774,8 @@
 static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr,
                                         QualType Ty,
                                         CodeGenFunction &CGF) {
-  llvm::Value *overflow_arg_area_p =
-    CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p");
+  llvm::Value *overflow_arg_area_p = CGF.Builder.CreateStructGEP(
+      nullptr, VAListAddr, 2, "overflow_arg_area_p");
   llvm::Value *overflow_arg_area =
     CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
 
@@ -2847,14 +2855,16 @@
   llvm::Value *gp_offset_p = nullptr, *gp_offset = nullptr;
   llvm::Value *fp_offset_p = nullptr, *fp_offset = nullptr;
   if (neededInt) {
-    gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
+    gp_offset_p =
+        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 0, "gp_offset_p");
     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
     InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
     InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
   }
 
   if (neededSSE) {
-    fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p");
+    fp_offset_p =
+        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 1, "fp_offset_p");
     fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
     llvm::Value *FitsInFP =
       llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
@@ -2882,9 +2892,8 @@
   // simple assembling of a structure from scattered addresses has many more
   // loads than necessary. Can we clean this up?
   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
-  llvm::Value *RegAddr =
-    CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(VAListAddr, 3),
-                           "reg_save_area");
+  llvm::Value *RegAddr = CGF.Builder.CreateLoad(
+      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3), "reg_save_area");
   if (neededInt && neededSSE) {
     // FIXME: Cleanup.
     assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
@@ -2904,9 +2913,9 @@
     llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
     llvm::Value *V =
       CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegLoAddr, PTyLo));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
+    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
     V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegHiAddr, PTyHi));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
+    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
 
     RegAddr = CGF.Builder.CreateBitCast(Tmp,
                                         llvm::PointerType::getUnqual(LTy));
@@ -2943,10 +2952,10 @@
     Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
     V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrLo,
                                                          DblPtrTy));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
+    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
     V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrHi,
                                                          DblPtrTy));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
+    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
     RegAddr = CGF.Builder.CreateBitCast(Tmp,
                                         llvm::PointerType::getUnqual(LTy));
   }
@@ -3781,10 +3790,12 @@
     ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
     llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
     llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
-    llvm::Value *Ptr = CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty),
-                                            "vacplx");
-    llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, ".real");
-    llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, ".imag");
+    llvm::AllocaInst *Ptr =
+        CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty), "vacplx");
+    llvm::Value *RealPtr =
+        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 0, ".real");
+    llvm::Value *ImagPtr =
+        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 1, ".imag");
     Builder.CreateStore(Real, RealPtr, false);
     Builder.CreateStore(Imag, ImagPtr, false);
     return Ptr;
@@ -3904,8 +3915,8 @@
   llvm::Value *EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
                               CodeGenFunction &CGF) const;
 
-  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                 CodeGenFunction &CGF) const override {
+  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const override {
     return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
@@ -3916,13 +3927,15 @@
   AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
       : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
 
-  StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+  StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
     return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
   }
 
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const { return 31; }
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
+    return 31;
+  }
 
-  virtual bool doesReturnSlotInterfereWithArgs() const { return false; }
+  bool doesReturnSlotInterfereWithArgs() const override { return false; }
 };
 }
 
@@ -4119,13 +4132,15 @@
   int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
   if (!IsFPR) {
     // 3 is the field number of __gr_offs
-    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
+    reg_offs_p =
+        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3, "gr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
     RegSize = llvm::RoundUpToAlignment(RegSize, 8);
   } else {
     // 4 is the field number of __vr_offs.
-    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
+    reg_offs_p =
+        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 4, "vr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
     reg_top_index = 2; // field number for __vr_top
     RegSize = 16 * NumRegs;
@@ -4186,8 +4201,8 @@
   CGF.EmitBlock(InRegBlock);
 
   llvm::Value *reg_top_p = nullptr, *reg_top = nullptr;
-  reg_top_p =
-      CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
+  reg_top_p = CGF.Builder.CreateStructGEP(nullptr, VAListAddr, reg_top_index,
+                                          "reg_top_p");
   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
   llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
   llvm::Value *RegAddr = nullptr;
@@ -4210,7 +4225,7 @@
     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
-    llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
+    llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(HFATy);
     int Offset = 0;
 
     if (CGF.CGM.getDataLayout().isBigEndian() && Ctx.getTypeSize(Base) < 128)
@@ -4221,7 +4236,8 @@
       llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
       LoadAddr = CGF.Builder.CreateBitCast(
           LoadAddr, llvm::PointerType::getUnqual(BaseTy));
-      llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
+      llvm::Value *StoreAddr =
+          CGF.Builder.CreateStructGEP(Tmp->getAllocatedType(), Tmp, i);
 
       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
       CGF.Builder.CreateStore(Elem, StoreAddr);
@@ -4254,7 +4270,7 @@
   CGF.EmitBlock(OnStackBlock);
 
   llvm::Value *stack_p = nullptr, *OnStackAddr = nullptr;
-  stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
+  stack_p = CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 0, "stack_p");
   OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
 
   // Again, stack arguments may need realigmnent. In this case both integer and
@@ -5171,7 +5187,9 @@
 }
 
 bool SystemZABIInfo::isCompoundType(QualType Ty) const {
-  return Ty->isAnyComplexType() || isAggregateTypeForABI(Ty);
+  return (Ty->isAnyComplexType() ||
+          Ty->isVectorType() ||
+          isAggregateTypeForABI(Ty));
 }
 
 bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
@@ -5206,11 +5224,12 @@
 
     // Check the fields.
     for (const auto *FD : RD->fields()) {
-      // Empty bitfields don't affect things either way.
+      // For compatibility with GCC, ignore empty bitfields in C++ mode.
       // Unlike isSingleElementStruct(), empty structure and array fields
       // do count.  So do anonymous bitfields that aren't zero-sized.
-      if (FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
-        return true;
+      if (getContext().getLangOpts().CPlusPlus &&
+          FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
+        continue;
 
       // Unlike isSingleElementStruct(), arrays do not count.
       // Nested isFPArgumentType structures still do though.
@@ -5242,17 +5261,21 @@
   // Every argument occupies 8 bytes and is passed by preference in either
   // GPRs or FPRs.
   Ty = CGF.getContext().getCanonicalType(Ty);
+  llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
+  llvm::Type *APTy = llvm::PointerType::getUnqual(ArgTy);
   ABIArgInfo AI = classifyArgumentType(Ty);
-  bool InFPRs = isFPArgumentType(Ty);
-
-  llvm::Type *APTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
   bool IsIndirect = AI.isIndirect();
+  bool InFPRs = false;
   unsigned UnpaddedBitSize;
   if (IsIndirect) {
     APTy = llvm::PointerType::getUnqual(APTy);
     UnpaddedBitSize = 64;
-  } else
+  } else {
+    if (AI.getCoerceToType())
+      ArgTy = AI.getCoerceToType();
+    InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy();
     UnpaddedBitSize = getContext().getTypeSize(Ty);
+  }
   unsigned PaddedBitSize = 64;
   assert((UnpaddedBitSize <= PaddedBitSize) && "Invalid argument size.");
 
@@ -5272,8 +5295,8 @@
     RegPadding = Padding; // values are passed in the low bits of a GPR
   }
 
-  llvm::Value *RegCountPtr =
-    CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr");
+  llvm::Value *RegCountPtr = CGF.Builder.CreateStructGEP(
+      nullptr, VAListAddr, RegCountField, "reg_count_ptr");
   llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
   llvm::Type *IndexTy = RegCount->getType();
   llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
@@ -5297,7 +5320,7 @@
   llvm::Value *RegOffset =
     CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset");
   llvm::Value *RegSaveAreaPtr =
-    CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr");
+      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3, "reg_save_area_ptr");
   llvm::Value *RegSaveArea =
     CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area");
   llvm::Value *RawRegAddr =
@@ -5316,8 +5339,8 @@
   CGF.EmitBlock(InMemBlock);
 
   // Work out the address of a stack argument.
-  llvm::Value *OverflowArgAreaPtr =
-    CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
+  llvm::Value *OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(
+      nullptr, VAListAddr, 2, "overflow_arg_area_ptr");
   llvm::Value *OverflowArgArea =
     CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area");
   llvm::Value *PaddingV = llvm::ConstantInt::get(IndexTy, Padding);