Update Clang for 3.5 rebase (r209713).

Change-Id: I8c9133b0f8f776dc915f270b60f94962e771bc83
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 1c4a0b7..85805e4 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -40,9 +40,12 @@
   CGObjCMac.cpp \
   CGObjCRuntime.cpp \
   CGOpenCLRuntime.cpp \
+  CGOpenMPRuntime.cpp \
+  CGLoopInfo.cpp \
   CGRTTI.cpp \
   CGRecordLayoutBuilder.cpp \
   CGStmt.cpp \
+  CGStmtOpenMP.cpp \
   CGVTT.cpp \
   CGVTables.cpp \
   CodeGenABITypes.cpp \
@@ -54,6 +57,7 @@
   CodeGenTypes.cpp \
   ItaniumCXXABI.cpp \
   MicrosoftCXXABI.cpp \
+  MicrosoftRTTI.cpp \
   ModuleBuilder.cpp \
   TargetInfo.cpp
 
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 17f5f4d..fc6c594 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -14,6 +14,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/Utils.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
@@ -112,7 +113,8 @@
                      Module *M)
     : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
       TheModule(M), CodeGenerationTime("Code Generation Time"),
-      CodeGenPasses(0), PerModulePasses(0), PerFunctionPasses(0) {}
+      CodeGenPasses(nullptr), PerModulePasses(nullptr),
+      PerFunctionPasses(nullptr) {}
 
   ~EmitAssemblyHelper() {
     delete CodeGenPasses;
@@ -247,6 +249,7 @@
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
+  PMBuilder.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
@@ -334,6 +337,8 @@
 
   // Set up the per-module pass manager.
   PassManager *MPM = getPerModulePasses();
+  if (CodeGenOpts.VerifyModule)
+    MPM->add(createDebugInfoVerifierPass());
 
   if (!CodeGenOpts.DisableGCov &&
       (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
@@ -363,32 +368,19 @@
   if (!TheTarget) {
     if (MustCreateTM)
       Diags.Report(diag::err_fe_unable_to_create_target) << Error;
-    return 0;
+    return nullptr;
   }
 
-  // FIXME: Expose these capabilities via actual APIs!!!! Aside from just
-  // being gross, this is also totally broken if we ever care about
-  // concurrency.
-
-  TargetMachine::setAsmVerbosityDefault(CodeGenOpts.AsmVerbose);
-
-  TargetMachine::setFunctionSections(CodeGenOpts.FunctionSections);
-  TargetMachine::setDataSections    (CodeGenOpts.DataSections);
-
-  // FIXME: Parse this earlier.
-  llvm::CodeModel::Model CM;
-  if (CodeGenOpts.CodeModel == "small") {
-    CM = llvm::CodeModel::Small;
-  } else if (CodeGenOpts.CodeModel == "kernel") {
-    CM = llvm::CodeModel::Kernel;
-  } else if (CodeGenOpts.CodeModel == "medium") {
-    CM = llvm::CodeModel::Medium;
-  } else if (CodeGenOpts.CodeModel == "large") {
-    CM = llvm::CodeModel::Large;
-  } else {
-    assert(CodeGenOpts.CodeModel.empty() && "Invalid code model!");
-    CM = llvm::CodeModel::Default;
-  }
+  unsigned CodeModel =
+    llvm::StringSwitch<unsigned>(CodeGenOpts.CodeModel)
+      .Case("small", llvm::CodeModel::Small)
+      .Case("kernel", llvm::CodeModel::Kernel)
+      .Case("medium", llvm::CodeModel::Medium)
+      .Case("large", llvm::CodeModel::Large)
+      .Case("default", llvm::CodeModel::Default)
+      .Default(~0u);
+  assert(CodeModel != ~0u && "invalid code model!");
+  llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel);
 
   SmallVector<const char *, 16> BackendArgs;
   BackendArgs.push_back("clang"); // Fake program name.
@@ -406,7 +398,7 @@
     BackendArgs.push_back(CodeGenOpts.BackendOptions[i].c_str());
   if (CodeGenOpts.NoGlobalMerge)
     BackendArgs.push_back("-global-merge=false");
-  BackendArgs.push_back(0);
+  BackendArgs.push_back(nullptr);
   llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
                                     BackendArgs.data());
 
@@ -491,23 +483,19 @@
   Options.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   Options.TrapFuncName = CodeGenOpts.TrapFuncName;
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
-  Options.EnableSegmentedStacks = CodeGenOpts.EnableSegmentedStacks;
+  Options.FunctionSections = CodeGenOpts.FunctionSections;
+  Options.DataSections = CodeGenOpts.DataSections;
+
+  Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
+  Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
+  Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
+  Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
+  Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
 
   TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU,
                                                      FeaturesStr, Options,
                                                      RM, CM, OptLevel);
 
-  if (CodeGenOpts.RelaxAll)
-    TM->setMCRelaxAll(true);
-  if (CodeGenOpts.SaveTempLabels)
-    TM->setMCSaveTempLabels(true);
-  if (CodeGenOpts.NoDwarf2CFIAsm)
-    TM->setMCUseCFI(false);
-  if (!CodeGenOpts.NoDwarfDirectoryAsm)
-    TM->setMCUseDwarfDirectory(true);
-  if (CodeGenOpts.NoExecStack)
-    TM->setMCNoExecStack(true);
-
   return TM;
 }
 
@@ -554,7 +542,7 @@
 }
 
 void EmitAssemblyHelper::EmitAssembly(BackendAction Action, raw_ostream *OS) {
-  TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : 0);
+  TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
   llvm::formatted_raw_ostream FormattedOS;
 
   bool UsesCodeGen = (Action != Backend_EmitNothing &&
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 7c7c80c..18eb065 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -255,7 +255,8 @@
   }
 
   // Create all the relevant BB's
-  llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *SeqCstBB = 0;
+  llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
+                   *SeqCstBB = nullptr;
   MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
   if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release)
     AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
@@ -452,17 +453,17 @@
   bool UseLibcall = (Size != Align ||
                      getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
-  llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
-  Ptr = EmitScalarExpr(E->getPtr());
+  llvm::Value *OrderFail = nullptr, *Val1 = nullptr, *Val2 = nullptr;
+  llvm::Value *Ptr = EmitScalarExpr(E->getPtr());
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
     assert(!Dest && "Init does not return a value");
     LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
     EmitAtomicInit(E->getVal1(), lvalue);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
-  Order = EmitScalarExpr(E->getOrder());
+  llvm::Value *Order = EmitScalarExpr(E->getOrder());
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -696,7 +697,7 @@
     if (!RetTy->isVoidType())
       return Res;
     if (E->getType()->isVoidType())
-      return RValue::get(0);
+      return RValue::get(nullptr);
     return convertTempToRValue(Dest, E->getType(), E->getExprLoc());
   }
 
@@ -751,15 +752,16 @@
       break;
     }
     if (E->getType()->isVoidType())
-      return RValue::get(0);
+      return RValue::get(nullptr);
     return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc());
   }
 
   // Long case, when Order isn't obviously constant.
 
   // Create all the relevant BB's
-  llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0,
-                   *AcqRelBB = 0, *SeqCstBB = 0;
+  llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
+                   *ReleaseBB = nullptr, *AcqRelBB = nullptr,
+                   *SeqCstBB = nullptr;
   MonotonicBB = createBasicBlock("monotonic", CurFn);
   if (!IsStore)
     AcquireBB = createBasicBlock("acquire", CurFn);
@@ -818,7 +820,7 @@
   // Cleanup and return
   Builder.SetInsertPoint(ContBB);
   if (E->getType()->isVoidType())
-    return RValue::get(0);
+    return RValue::get(nullptr);
   return convertTempToRValue(OrigDest, E->getType(), E->getExprLoc());
 }
 
@@ -896,7 +898,7 @@
 
   // If we're ignoring an aggregate return, don't do anything.
   if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
-    return RValue::getAggregate(0, false);
+    return RValue::getAggregate(nullptr, false);
 
   // The easiest way to do this this is to go through memory, but we
   // try not to in some easy cases.
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 15b08d4..21896da 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -30,9 +30,9 @@
 CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
   : Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false),
     HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false),
-    StructureType(0), Block(block),
-    DominatingIP(0) {
-    
+    StructureType(nullptr), Block(block),
+    DominatingIP(nullptr) {
+
   // Skip asm prefix, if any.  'name' is usually taken directly from
   // the mangled name of the enclosing function.
   if (!name.empty() && name[0] == '\01')
@@ -246,7 +246,7 @@
   // Only records can be unsafe.
   if (!recordType) return true;
 
-  const CXXRecordDecl *record = cast<CXXRecordDecl>(recordType->getDecl());
+  const auto *record = cast<CXXRecordDecl>(recordType->getDecl());
 
   // Maintain semantics for classes with non-trivial dtors or copy ctors.
   if (!record->hasTrivialDestructor()) return false;
@@ -269,7 +269,7 @@
   QualType type = var->getType();
 
   // We can only do this if the variable is const.
-  if (!type.isConstQualified()) return 0;
+  if (!type.isConstQualified()) return nullptr;
 
   // Furthermore, in C++ we have to worry about mutable fields:
   // C++ [dcl.type.cv]p4:
@@ -277,13 +277,13 @@
   //   modified, any attempt to modify a const object during its
   //   lifetime results in undefined behavior.
   if (CGM.getLangOpts().CPlusPlus && !isSafeForCXXConstantCapture(type))
-    return 0;
+    return nullptr;
 
   // If the variable doesn't have any initializer (shouldn't this be
   // invalid?), it's not clear what we should do.  Maybe capture as
   // zero?
   const Expr *init = var->getInit();
-  if (!init) return 0;
+  if (!init) return nullptr;
 
   return CGM.EmitConstantInit(*var, CGF);
 }
@@ -366,7 +366,7 @@
 
     layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
                                       Qualifiers::OCL_None,
-                                      0, llvmType));
+                                      nullptr, llvmType));
   }
 
   // Next, all the block captures.
@@ -661,7 +661,7 @@
     CGBlockInfo *cur = head;
     head = cur->NextBlockInfo;
     delete cur;
-  } while (head != 0);
+  } while (head != nullptr);
 }
 
 /// Emit a block literal expression in the current function.
@@ -769,7 +769,7 @@
     } else if (blockDecl->isConversionFromLambda()) {
       // The lambda capture in a lambda's conversion-to-block-pointer is
       // special; we'll simply emit it directly.
-      src = 0;
+      src = nullptr;
     } else {
       // Just look it up in the locals map, which will give us back a
       // [[type]]*.  If that doesn't work, do the more elaborate DRE
@@ -841,8 +841,9 @@
     } else {
       // Fake up a new variable so that EmitScalarInit doesn't think
       // we're referring to the variable in its own initializer.
-      ImplicitParamDecl blockFieldPseudoVar(/*DC*/ 0, SourceLocation(),
-                                            /*name*/ 0, type);
+      ImplicitParamDecl blockFieldPseudoVar(getContext(), /*DC*/ nullptr,
+                                            SourceLocation(), /*name*/ nullptr,
+                                            type);
 
       // We use one of these or the other depending on whether the
       // reference is nested.
@@ -1019,7 +1020,7 @@
   blockInfo.BlockExpression = blockExpr;
 
   // Compute information about the layout, etc., of this block.
-  computeBlockInfo(*this, 0, blockInfo);
+  computeBlockInfo(*this, nullptr, blockInfo);
 
   // Using that metadata, generate the actual block function.
   llvm::Constant *blockFn;
@@ -1093,7 +1094,7 @@
   // to be local to this function as well, in case they're directly
   // referenced in a block.
   for (DeclMapTy::const_iterator i = ldm.begin(), e = ldm.end(); i != e; ++i) {
-    const VarDecl *var = dyn_cast<VarDecl>(i->first);
+    const auto *var = dyn_cast<VarDecl>(i->first);
     if (var && !var->hasLocalStorage())
       LocalDeclMap[var] = i->second;
   }
@@ -1108,7 +1109,7 @@
   QualType selfTy = getContext().VoidPtrTy;
   IdentifierInfo *II = &CGM.getContext().Idents.get(".block_descriptor");
 
-  ImplicitParamDecl selfDecl(const_cast<BlockDecl*>(blockDecl),
+  ImplicitParamDecl selfDecl(getContext(), const_cast<BlockDecl*>(blockDecl),
                              SourceLocation(), II, selfTy);
   args.push_back(&selfDecl);
 
@@ -1135,6 +1136,7 @@
 
   // Begin generating the function.
   StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
+                blockDecl->getLocation(),
                 blockInfo.getBlockExpr()->getBody()->getLocStart());
 
   // Okay.  Undo some of what StartFunction did.
@@ -1236,7 +1238,7 @@
   }
 
   // And resume where we left off.
-  if (resume == 0)
+  if (resume == nullptr)
     Builder.ClearInsertionPoint();
   else
     Builder.SetInsertPoint(resume);
@@ -1278,9 +1280,11 @@
   ASTContext &C = getContext();
 
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(0, SourceLocation(), 0, C.VoidPtrTy);
+  ImplicitParamDecl dstDecl(getContext(), nullptr, SourceLocation(), nullptr,
+                            C.VoidPtrTy);
   args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(0, SourceLocation(), 0, C.VoidPtrTy);
+  ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
+                            C.VoidPtrTy);
   args.push_back(&srcDecl);
 
   const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -1300,13 +1304,13 @@
   FunctionDecl *FD = FunctionDecl::Create(C,
                                           C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false,
                                           false);
   // Create a scope with an artificial location for the body of this function.
   ArtificialLocation AL(*this, Builder);
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   AL.Emit();
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -1395,7 +1399,7 @@
         // storeStrong doesn't over-release) and then call storeStrong.
         // This is a workaround to not having an initStrong call.
         if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-          llvm::PointerType *ty = cast<llvm::PointerType>(srcValue->getType());
+          auto *ty = cast<llvm::PointerType>(srcValue->getType());
           llvm::Value *null = llvm::ConstantPointerNull::get(ty);
           Builder.CreateStore(null, dstField);
           EmitARCStoreStrongCall(dstField, srcValue, true);
@@ -1452,7 +1456,8 @@
   ASTContext &C = getContext();
 
   FunctionArgList args;
-  ImplicitParamDecl srcDecl(0, SourceLocation(), 0, C.VoidPtrTy);
+  ImplicitParamDecl srcDecl(getContext(), nullptr, SourceLocation(), nullptr,
+                            C.VoidPtrTy);
   args.push_back(&srcDecl);
 
   const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -1471,12 +1476,12 @@
 
   FunctionDecl *FD = FunctionDecl::Create(C, C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false, false);
   // Create a scope with an artificial location for the body of this function.
   ArtificialLocation AL(*this, Builder);
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   AL.Emit();
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
@@ -1497,7 +1502,7 @@
     if (capture.isConstant()) continue;
 
     BlockFieldFlags flags;
-    const CXXDestructorDecl *dtor = 0;
+    const CXXDestructorDecl *dtor = nullptr;
 
     bool useARCWeakDestroy = false;
     bool useARCStrongDestroy = false;
@@ -1708,7 +1713,7 @@
                   const Expr *copyExpr)
     : ByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
 
-  bool needsCopy() const override { return CopyExpr != 0; }
+  bool needsCopy() const override { return CopyExpr != nullptr; }
   void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
                 llvm::Value *srcField) override {
     if (!CopyExpr) return;
@@ -1737,10 +1742,12 @@
   QualType R = Context.VoidTy;
 
   FunctionArgList args;
-  ImplicitParamDecl dst(0, SourceLocation(), 0, Context.VoidPtrTy);
+  ImplicitParamDecl dst(CGF.getContext(), nullptr, SourceLocation(), nullptr,
+                        Context.VoidPtrTy);
   args.push_back(&dst);
 
-  ImplicitParamDecl src(0, SourceLocation(), 0, Context.VoidPtrTy);
+  ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
+                        Context.VoidPtrTy);
   args.push_back(&src);
 
   const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -1761,11 +1768,11 @@
   FunctionDecl *FD = FunctionDecl::Create(Context,
                                           Context.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, R, 0,
+                                          SourceLocation(), II, R, nullptr,
                                           SC_Static,
                                           false, false);
 
-  CGF.StartFunction(FD, R, Fn, FI, args, SourceLocation());
+  CGF.StartFunction(FD, R, Fn, FI, args);
 
   if (byrefInfo.needsCopy()) {
     llvm::Type *byrefPtrType = byrefType.getPointerTo(0);
@@ -1809,7 +1816,8 @@
   QualType R = Context.VoidTy;
 
   FunctionArgList args;
-  ImplicitParamDecl src(0, SourceLocation(), 0, Context.VoidPtrTy);
+  ImplicitParamDecl src(CGF.getContext(), nullptr, SourceLocation(), nullptr,
+                        Context.VoidPtrTy);
   args.push_back(&src);
 
   const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -1831,10 +1839,10 @@
   FunctionDecl *FD = FunctionDecl::Create(Context,
                                           Context.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, R, 0,
+                                          SourceLocation(), II, R, nullptr,
                                           SC_Static,
                                           false, false);
-  CGF.StartFunction(FD, R, Fn, FI, args, SourceLocation());
+  CGF.StartFunction(FD, R, Fn, FI, args);
 
   if (byrefInfo.needsDispose()) {
     llvm::Value *V = CGF.GetAddrOfLocalVar(&src);
@@ -1901,7 +1909,7 @@
 
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
-    if (!copyExpr && record->hasTrivialDestructor()) return 0;
+    if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
 
     CXXByrefHelpers byrefInfo(emission.Alignment, type, copyExpr);
     return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
@@ -1909,7 +1917,7 @@
 
   // Otherwise, if we don't have a retainable type, there's nothing to do.
   // that the runtime does extra copies.
-  if (!type->isObjCRetainableType()) return 0;
+  if (!type->isObjCRetainableType()) return nullptr;
 
   Qualifiers qs = type.getQualifiers();
 
@@ -1923,7 +1931,7 @@
     // These are just bits as far as the runtime is concerned.
     case Qualifiers::OCL_ExplicitNone:
     case Qualifiers::OCL_Autoreleasing:
-      return 0;
+      return nullptr;
 
     // Tell the runtime that this is ARC __weak, called by the
     // byref routines.
@@ -1957,7 +1965,7 @@
              type->isObjCObjectPointerType()) {
     flags |= BLOCK_FIELD_IS_OBJECT;
   } else {
-    return 0;
+    return nullptr;
   }
 
   if (type.isObjCGCWeak())
@@ -2230,9 +2238,8 @@
                                          llvm::Constant *C) {
   if (!CGM.getLangOpts().BlocksRuntimeOptional) return;
 
-  llvm::GlobalValue *GV = cast<llvm::GlobalValue>(C->stripPointerCasts());
-  if (GV->isDeclaration() &&
-      GV->getLinkage() == llvm::GlobalValue::ExternalLinkage)
+  auto *GV = cast<llvm::GlobalValue>(C->stripPointerCasts());
+  if (GV->isDeclaration() && GV->hasExternalLinkage())
     GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
 }
 
@@ -2265,7 +2272,8 @@
     return NSConcreteGlobalBlock;
 
   NSConcreteGlobalBlock = GetOrCreateLLVMGlobal("_NSConcreteGlobalBlock",
-                                                Int8PtrTy->getPointerTo(), 0);
+                                                Int8PtrTy->getPointerTo(),
+                                                nullptr);
   configureBlocksRuntimeObject(*this, NSConcreteGlobalBlock);
   return NSConcreteGlobalBlock;
 }
@@ -2275,7 +2283,8 @@
     return NSConcreteStackBlock;
 
   NSConcreteStackBlock = GetOrCreateLLVMGlobal("_NSConcreteStackBlock",
-                                               Int8PtrTy->getPointerTo(), 0);
+                                               Int8PtrTy->getPointerTo(),
+                                               nullptr);
   configureBlocksRuntimeObject(*this, NSConcreteStackBlock);
   return NSConcreteStackBlock;  
 }
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index 020638a..0031e32 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -28,15 +28,15 @@
 #include "llvm/IR/Module.h"
 
 namespace llvm {
-  class Module;
-  class Constant;
-  class Function;
-  class GlobalValue;
-  class DataLayout;
-  class FunctionType;
-  class PointerType;
-  class Value;
-  class LLVMContext;
+class Module;
+class Constant;
+class Function;
+class GlobalValue;
+class DataLayout;
+class FunctionType;
+class PointerType;
+class Value;
+class LLVMContext;
 }
 
 namespace clang {
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index fd21e7e..f113b97 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -15,12 +15,39 @@
 namespace clang {
 namespace CodeGen {
 
+class CodeGenFunction;
+
+/// \brief This is an IRBuilder insertion helper that forwards to
+/// CodeGenFunction::InsertHelper, which adds nesessary metadata to
+/// instructions.
+template <bool PreserveNames>
+class CGBuilderInserter
+  : protected llvm::IRBuilderDefaultInserter<PreserveNames> {
+public:
+  CGBuilderInserter() : CGF(nullptr) {}
+  explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
+
+protected:
+  /// \brief This forwards to CodeGenFunction::InsertHelper.
+  void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
+                    llvm::BasicBlock *BB,
+                    llvm::BasicBlock::iterator InsertPt) const;
+private:
+  void operator=(const CGBuilderInserter &) LLVM_DELETED_FUNCTION;
+
+  CodeGenFunction *CGF;
+};
+
 // Don't preserve names on values in an optimized build.
 #ifdef NDEBUG
-typedef llvm::IRBuilder<false> CGBuilderTy;
+#define PreserveNames false
 #else
-typedef llvm::IRBuilder<> CGBuilderTy;
+#define PreserveNames true
 #endif
+typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy;
+typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder,
+                        CGBuilderInserterTy> CGBuilderTy;
+#undef PreserveNames
 
 }  // end namespace CodeGen
 }  // end namespace clang
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 5a86bdd..c6ac3cc 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -212,7 +212,7 @@
   default: break;  // Handle intrinsics and libm functions below.
   case Builtin::BI__builtin___CFStringMakeConstantString:
   case Builtin::BI__builtin___NSStringMakeConstantString:
-    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
+    return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
   case Builtin::BI__builtin_stdarg_start:
   case Builtin::BI__builtin_va_start:
   case Builtin::BI__va_start:
@@ -457,7 +457,7 @@
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("unreachable.cont"));
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__builtin_powi:
@@ -790,7 +790,7 @@
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
   case Builtin::BI__builtin_unwind_init: {
     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
@@ -856,7 +856,7 @@
     // We do need to preserve an insertion point.
     EmitBlock(createBasicBlock("longjmp.cont"));
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
   case Builtin::BI__sync_fetch_and_add:
   case Builtin::BI__sync_fetch_and_sub:
@@ -1037,7 +1037,7 @@
       Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
     Store->setAlignment(StoreSize.getQuantity());
     Store->setAtomic(llvm::Release);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__sync_synchronize: {
@@ -1049,7 +1049,7 @@
     // to use it with non-atomic loads and stores to get acquire/release
     // semantics.
     Builder.CreateFence(llvm::SequentiallyConsistent);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__c11_atomic_is_lock_free:
@@ -1090,7 +1090,7 @@
     Value *Order = EmitScalarExpr(E->getArg(1));
     if (isa<llvm::ConstantInt>(Order)) {
       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-      AtomicRMWInst *Result = 0;
+      AtomicRMWInst *Result = nullptr;
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
@@ -1190,7 +1190,7 @@
         Store->setOrdering(llvm::SequentiallyConsistent);
         break;
       }
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
 
     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
@@ -1220,7 +1220,7 @@
     SI->addCase(Builder.getInt32(5), BBs[2]);
 
     Builder.SetInsertPoint(ContBB);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   case Builtin::BI__atomic_thread_fence:
@@ -1254,7 +1254,7 @@
         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
         break;
       }
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
 
     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
@@ -1289,7 +1289,7 @@
     SI->addCase(Builder.getInt32(5), SeqCstBB);
 
     Builder.SetInsertPoint(ContBB);
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
     // Library functions with special handling.
@@ -1509,7 +1509,7 @@
   case Builtin::BI__builtin_addressof:
     return RValue::get(EmitLValue(E->getArg(0)).getAddress());
   case Builtin::BI__noop:
-    return RValue::get(0);
+    return RValue::get(nullptr);
   case Builtin::BI_InterlockedCompareExchange: {
     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
         EmitScalarExpr(E->getArg(0)),
@@ -1637,16 +1637,16 @@
 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   switch (getTarget().getTriple().getArch()) {
-  case llvm::Triple::aarch64:
-  case llvm::Triple::aarch64_be:
-    return EmitAArch64BuiltinExpr(BuiltinID, E);
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
     return EmitARMBuiltinExpr(BuiltinID, E);
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_be:
   case llvm::Triple::arm64:
-    return EmitARM64BuiltinExpr(BuiltinID, E);
+  case llvm::Triple::arm64_be:
+    return EmitAArch64BuiltinExpr(BuiltinID, E);
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
     return EmitX86BuiltinExpr(BuiltinID, E);
@@ -1655,7 +1655,7 @@
   case llvm::Triple::ppc64le:
     return EmitPPCBuiltinExpr(BuiltinID, E);
   default:
-    return 0;
+    return nullptr;
   }
 }
 
@@ -1882,354 +1882,6 @@
       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
       #NameBase, TypeModifier }
 
-static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = {
-  NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType),
-  NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType),
-  NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0),
-  NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0),
-  NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0),
-  NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
-  NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType),
-  NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
-  NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
-  NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
-  NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes),
-  NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers),
-  NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01),
-  NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01),
-  NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers),
-  NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01),
-  NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01),
-  NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers),
-  NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01),
-  NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers),
-  NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
-  NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
-  NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers),
-  NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
-  NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers),
-  NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01),
-  NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01),
-  NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes),
-  NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers),
-  NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01),
-  NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers),
-  NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01),
-  NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01),
-  NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes),
-  NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers),
-  NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01),
-  NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers),
-  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType),
-  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType),
-  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType),
-  NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType),
-  NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType),
-  NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0),
-  NEONMAP0(vdupb_lane_i8),
-  NEONMAP0(vdupb_laneq_i8),
-  NEONMAP0(vdupd_lane_f64),
-  NEONMAP0(vdupd_lane_i64),
-  NEONMAP0(vdupd_laneq_f64),
-  NEONMAP0(vdupd_laneq_i64),
-  NEONMAP0(vduph_lane_i16),
-  NEONMAP0(vduph_laneq_i16),
-  NEONMAP0(vdups_lane_f32),
-  NEONMAP0(vdups_lane_i32),
-  NEONMAP0(vdups_laneq_f32),
-  NEONMAP0(vdups_laneq_i32),
-  NEONMAP0(vfmad_lane_f64),
-  NEONMAP0(vfmad_laneq_f64),
-  NEONMAP0(vfmas_lane_f32),
-  NEONMAP0(vfmas_laneq_f32),
-  NEONMAP0(vget_lane_f32),
-  NEONMAP0(vget_lane_f64),
-  NEONMAP0(vget_lane_i16),
-  NEONMAP0(vget_lane_i32),
-  NEONMAP0(vget_lane_i64),
-  NEONMAP0(vget_lane_i8),
-  NEONMAP0(vgetq_lane_f32),
-  NEONMAP0(vgetq_lane_f64),
-  NEONMAP0(vgetq_lane_i16),
-  NEONMAP0(vgetq_lane_i32),
-  NEONMAP0(vgetq_lane_i64),
-  NEONMAP0(vgetq_lane_i8),
-  NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0),
-  NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0),
-  NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType),
-  NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
-  NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0),
-  NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
-  NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
-  NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0),
-  NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
-  NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType),
-  NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType),
-  NEONMAP0(vmul_n_f64),
-  NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0),
-  NEONMAP0(vmulxd_f64),
-  NEONMAP0(vmulxs_f32),
-  NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0),
-  NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
-  NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0),
-  NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0),
-  NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType),
-  NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
-  NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType),
-  NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType),
-  NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType),
-  NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet),
-  NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet),
-  NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet),
-  NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet),
-  NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet),
-  NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet),
-  NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet),
-  NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet),
-  NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet),
-  NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet),
-  NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet),
-  NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet),
-  NEONMAP0(vqdmlalh_lane_s16),
-  NEONMAP0(vqdmlalh_laneq_s16),
-  NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet),
-  NEONMAP0(vqdmlals_lane_s32),
-  NEONMAP0(vqdmlals_laneq_s32),
-  NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet),
-  NEONMAP0(vqdmlslh_lane_s16),
-  NEONMAP0(vqdmlslh_laneq_s16),
-  NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet),
-  NEONMAP0(vqdmlsls_lane_s32),
-  NEONMAP0(vqdmlsls_laneq_s32),
-  NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet),
-  NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet),
-  NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet),
-  NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet),
-  NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet),
-  NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet),
-  NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet),
-  NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet),
-  NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet),
-  NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet),
-  NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet),
-  NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet),
-  NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet),
-  NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet),
-  NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet),
-  NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet),
-  NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet),
-  NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet),
-  NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet),
-  NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet),
-  NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet),
-  NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet),
-  NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet),
-  NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet),
-  NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet),
-  NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet),
-  NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet),
-  NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet),
-  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet),
-  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet),
-  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet),
-  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet),
-  NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet),
-  NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet),
-  NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet),
-  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet),
-  NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet),
-  NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet),
-  NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet),
-  NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet),
-  NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet),
-  NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet),
-  NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet),
-  NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet),
-  NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet),
-  NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet),
-  NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet),
-  NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet),
-  NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet),
-  NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet),
-  NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet),
-  NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet),
-  NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet),
-  NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet),
-  NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet),
-  NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet),
-  NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet),
-  NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet),
-  NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet),
-  NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet),
-  NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet),
-  NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet),
-  NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet),
-  NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet),
-  NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet),
-  NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet),
-  NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet),
-  NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet),
-  NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet),
-  NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet),
-  NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet),
-  NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet),
-  NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet),
-  NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet),
-  NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType),
-  NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType),
-  NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType),
-  NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType),
-  NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType),
-  NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType),
-  NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0),
-  NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0),
-  NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet),
-  NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet),
-  NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType),
-  NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType),
-  NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType),
-  NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType),
-  NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0),
-  NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0),
-  NEONMAP0(vset_lane_f32),
-  NEONMAP0(vset_lane_f64),
-  NEONMAP0(vset_lane_i16),
-  NEONMAP0(vset_lane_i32),
-  NEONMAP0(vset_lane_i64),
-  NEONMAP0(vset_lane_i8),
-  NEONMAP0(vsetq_lane_f32),
-  NEONMAP0(vsetq_lane_f64),
-  NEONMAP0(vsetq_lane_i16),
-  NEONMAP0(vsetq_lane_i32),
-  NEONMAP0(vsetq_lane_i64),
-  NEONMAP0(vsetq_lane_i8),
-  NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0),
-  NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0),
-  NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0),
-  NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0),
-  NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0),
-  NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0),
-  NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0),
-  NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0),
-  NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0),
-  NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0),
-  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet),
-  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet),
-  NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet),
-  NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet),
-  NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet),
-  NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet),
-  NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0),
-  NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0),
-  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet),
-  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet),
-  NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0),
-  NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0),
-  NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01),
-  NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01),
-  NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet),
-  NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet),
-  NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet),
-  NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet)
-};
-
 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
@@ -2427,109 +2079,109 @@
   NEONMAP0(vzipq_v)
 };
 
-static NeonIntrinsicInfo ARM64SIMDIntrinsicMap[] = {
-  NEONMAP1(vabs_v, arm64_neon_abs, 0),
-  NEONMAP1(vabsq_v, arm64_neon_abs, 0),
+static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
+  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
   NEONMAP0(vaddhn_v),
-  NEONMAP1(vaesdq_v, arm64_crypto_aesd, 0),
-  NEONMAP1(vaeseq_v, arm64_crypto_aese, 0),
-  NEONMAP1(vaesimcq_v, arm64_crypto_aesimc, 0),
-  NEONMAP1(vaesmcq_v, arm64_crypto_aesmc, 0),
-  NEONMAP1(vcage_v, arm64_neon_facge, 0),
-  NEONMAP1(vcageq_v, arm64_neon_facge, 0),
-  NEONMAP1(vcagt_v, arm64_neon_facgt, 0),
-  NEONMAP1(vcagtq_v, arm64_neon_facgt, 0),
-  NEONMAP1(vcale_v, arm64_neon_facge, 0),
-  NEONMAP1(vcaleq_v, arm64_neon_facge, 0),
-  NEONMAP1(vcalt_v, arm64_neon_facgt, 0),
-  NEONMAP1(vcaltq_v, arm64_neon_facgt, 0),
-  NEONMAP1(vcls_v, arm64_neon_cls, Add1ArgType),
-  NEONMAP1(vclsq_v, arm64_neon_cls, Add1ArgType),
+  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
+  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
+  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
+  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
+  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
+  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
+  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
+  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
   NEONMAP1(vclz_v, ctlz, Add1ArgType),
   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_v, arm64_neon_vcvtfp2hf, 0),
-  NEONMAP1(vcvt_f32_f16, arm64_neon_vcvthf2fp, 0),
+  NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0),
+  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
-  NEONMAP2(vcvt_n_f32_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
-  NEONMAP2(vcvt_n_f64_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
-  NEONMAP1(vcvt_n_s32_v, arm64_neon_vcvtfp2fxs, 0),
-  NEONMAP1(vcvt_n_s64_v, arm64_neon_vcvtfp2fxs, 0),
-  NEONMAP1(vcvt_n_u32_v, arm64_neon_vcvtfp2fxu, 0),
-  NEONMAP1(vcvt_n_u64_v, arm64_neon_vcvtfp2fxu, 0),
+  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
   NEONMAP0(vcvtq_f32_v),
-  NEONMAP2(vcvtq_n_f32_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
-  NEONMAP2(vcvtq_n_f64_v, arm64_neon_vcvtfxu2fp, arm64_neon_vcvtfxs2fp, 0),
-  NEONMAP1(vcvtq_n_s32_v, arm64_neon_vcvtfp2fxs, 0),
-  NEONMAP1(vcvtq_n_s64_v, arm64_neon_vcvtfp2fxs, 0),
-  NEONMAP1(vcvtq_n_u32_v, arm64_neon_vcvtfp2fxu, 0),
-  NEONMAP1(vcvtq_n_u64_v, arm64_neon_vcvtfp2fxu, 0),
-  NEONMAP1(vcvtx_f32_v, arm64_neon_fcvtxn, AddRetType | Add1ArgType),
+  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
+  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
+  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
+  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
   NEONMAP0(vext_v),
   NEONMAP0(vextq_v),
   NEONMAP0(vfma_v),
   NEONMAP0(vfmaq_v),
-  NEONMAP2(vhadd_v, arm64_neon_uhadd, arm64_neon_shadd, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vhaddq_v, arm64_neon_uhadd, arm64_neon_shadd, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vhsub_v, arm64_neon_uhsub, arm64_neon_shsub, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vhsubq_v, arm64_neon_uhsub, arm64_neon_shsub, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
   NEONMAP0(vmovl_v),
   NEONMAP0(vmovn_v),
-  NEONMAP1(vmul_v, arm64_neon_pmul, Add1ArgType),
-  NEONMAP1(vmulq_v, arm64_neon_pmul, Add1ArgType),
-  NEONMAP1(vpadd_v, arm64_neon_addp, Add1ArgType),
-  NEONMAP2(vpaddl_v, arm64_neon_uaddlp, arm64_neon_saddlp, UnsignedAlts),
-  NEONMAP2(vpaddlq_v, arm64_neon_uaddlp, arm64_neon_saddlp, UnsignedAlts),
-  NEONMAP1(vpaddq_v, arm64_neon_addp, Add1ArgType),
-  NEONMAP1(vqabs_v, arm64_neon_sqabs, Add1ArgType),
-  NEONMAP1(vqabsq_v, arm64_neon_sqabs, Add1ArgType),
-  NEONMAP2(vqadd_v, arm64_neon_uqadd, arm64_neon_sqadd, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqaddq_v, arm64_neon_uqadd, arm64_neon_sqadd, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqdmlal_v, arm64_neon_sqdmull, arm64_neon_sqadd, 0),
-  NEONMAP2(vqdmlsl_v, arm64_neon_sqdmull, arm64_neon_sqsub, 0),
-  NEONMAP1(vqdmulh_v, arm64_neon_sqdmulh, Add1ArgType),
-  NEONMAP1(vqdmulhq_v, arm64_neon_sqdmulh, Add1ArgType),
-  NEONMAP1(vqdmull_v, arm64_neon_sqdmull, Add1ArgType),
-  NEONMAP2(vqmovn_v, arm64_neon_uqxtn, arm64_neon_sqxtn, Add1ArgType | UnsignedAlts),
-  NEONMAP1(vqmovun_v, arm64_neon_sqxtun, Add1ArgType),
-  NEONMAP1(vqneg_v, arm64_neon_sqneg, Add1ArgType),
-  NEONMAP1(vqnegq_v, arm64_neon_sqneg, Add1ArgType),
-  NEONMAP1(vqrdmulh_v, arm64_neon_sqrdmulh, Add1ArgType),
-  NEONMAP1(vqrdmulhq_v, arm64_neon_sqrdmulh, Add1ArgType),
-  NEONMAP2(vqrshl_v, arm64_neon_uqrshl, arm64_neon_sqrshl, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqrshlq_v, arm64_neon_uqrshl, arm64_neon_sqrshl, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqshl_n_v, arm64_neon_uqshl, arm64_neon_sqshl, UnsignedAlts),
-  NEONMAP2(vqshl_v, arm64_neon_uqshl, arm64_neon_sqshl, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqshlq_n_v, arm64_neon_uqshl, arm64_neon_sqshl,UnsignedAlts),
-  NEONMAP2(vqshlq_v, arm64_neon_uqshl, arm64_neon_sqshl, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqsub_v, arm64_neon_uqsub, arm64_neon_sqsub, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vqsubq_v, arm64_neon_uqsub, arm64_neon_sqsub, Add1ArgType | UnsignedAlts),
-  NEONMAP1(vraddhn_v, arm64_neon_raddhn, Add1ArgType),
-  NEONMAP2(vrecpe_v, arm64_neon_frecpe, arm64_neon_urecpe, 0),
-  NEONMAP2(vrecpeq_v, arm64_neon_frecpe, arm64_neon_urecpe, 0),
-  NEONMAP1(vrecps_v, arm64_neon_frecps, Add1ArgType),
-  NEONMAP1(vrecpsq_v, arm64_neon_frecps, Add1ArgType),
-  NEONMAP2(vrhadd_v, arm64_neon_urhadd, arm64_neon_srhadd, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vrhaddq_v, arm64_neon_urhadd, arm64_neon_srhadd, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vrshl_v, arm64_neon_urshl, arm64_neon_srshl, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vrshlq_v, arm64_neon_urshl, arm64_neon_srshl, Add1ArgType | UnsignedAlts),
-  NEONMAP2(vrsqrte_v, arm64_neon_frsqrte, arm64_neon_ursqrte, 0),
-  NEONMAP2(vrsqrteq_v, arm64_neon_frsqrte, arm64_neon_ursqrte, 0),
-  NEONMAP1(vrsqrts_v, arm64_neon_frsqrts, Add1ArgType),
-  NEONMAP1(vrsqrtsq_v, arm64_neon_frsqrts, Add1ArgType),
-  NEONMAP1(vrsubhn_v, arm64_neon_rsubhn, Add1ArgType),
-  NEONMAP1(vsha1su0q_v, arm64_crypto_sha1su0, 0),
-  NEONMAP1(vsha1su1q_v, arm64_crypto_sha1su1, 0),
-  NEONMAP1(vsha256h2q_v, arm64_crypto_sha256h2, 0),
-  NEONMAP1(vsha256hq_v, arm64_crypto_sha256h, 0),
-  NEONMAP1(vsha256su0q_v, arm64_crypto_sha256su0, 0),
-  NEONMAP1(vsha256su1q_v, arm64_crypto_sha256su1, 0),
+  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
+  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
+  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
+  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
+  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
+  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
+  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
+  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
+  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
+  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
+  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
+  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
+  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
+  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
+  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
+  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
+  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
+  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
+  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
+  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
+  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
+  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
+  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
+  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
+  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
+  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
+  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
   NEONMAP0(vshl_n_v),
-  NEONMAP2(vshl_v, arm64_neon_ushl, arm64_neon_sshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
   NEONMAP0(vshll_n_v),
   NEONMAP0(vshlq_n_v),
-  NEONMAP2(vshlq_v, arm64_neon_ushl, arm64_neon_sshl, Add1ArgType | UnsignedAlts),
+  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
   NEONMAP0(vshr_n_v),
   NEONMAP0(vshrn_n_v),
   NEONMAP0(vshrq_n_v),
@@ -2538,199 +2190,199 @@
   NEONMAP0(vtstq_v),
 };
 
-static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = {
-  NEONMAP1(vabdd_f64, arm64_sisd_fabd, Add1ArgType),
-  NEONMAP1(vabds_f32, arm64_sisd_fabd, Add1ArgType),
-  NEONMAP1(vabsd_s64, arm64_neon_abs, Add1ArgType),
-  NEONMAP1(vaddlv_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddlv_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddlvq_s32, arm64_neon_saddlv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddlvq_u32, arm64_neon_uaddlv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddv_f32, arm64_neon_faddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddv_s32, arm64_neon_saddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddv_u32, arm64_neon_uaddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_f32, arm64_neon_faddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_f64, arm64_neon_faddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_s32, arm64_neon_saddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_s64, arm64_neon_saddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_u32, arm64_neon_uaddv, AddRetType | Add1ArgType),
-  NEONMAP1(vaddvq_u64, arm64_neon_uaddv, AddRetType | Add1ArgType),
-  NEONMAP1(vcaged_f64, arm64_neon_facge, AddRetType | Add1ArgType),
-  NEONMAP1(vcages_f32, arm64_neon_facge, AddRetType | Add1ArgType),
-  NEONMAP1(vcagtd_f64, arm64_neon_facgt, AddRetType | Add1ArgType),
-  NEONMAP1(vcagts_f32, arm64_neon_facgt, AddRetType | Add1ArgType),
-  NEONMAP1(vcaled_f64, arm64_neon_facge, AddRetType | Add1ArgType),
-  NEONMAP1(vcales_f32, arm64_neon_facge, AddRetType | Add1ArgType),
-  NEONMAP1(vcaltd_f64, arm64_neon_facgt, AddRetType | Add1ArgType),
-  NEONMAP1(vcalts_f32, arm64_neon_facgt, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtad_s64_f64, arm64_neon_fcvtas, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtad_u64_f64, arm64_neon_fcvtau, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtas_s32_f32, arm64_neon_fcvtas, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtas_u32_f32, arm64_neon_fcvtau, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtd_n_f64_s64, arm64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtd_n_f64_u64, arm64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtd_n_s64_f64, arm64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtd_n_u64_f64, arm64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtmd_s64_f64, arm64_neon_fcvtms, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtmd_u64_f64, arm64_neon_fcvtmu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtms_s32_f32, arm64_neon_fcvtms, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtms_u32_f32, arm64_neon_fcvtmu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtnd_s64_f64, arm64_neon_fcvtns, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtnd_u64_f64, arm64_neon_fcvtnu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtns_s32_f32, arm64_neon_fcvtns, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtns_u32_f32, arm64_neon_fcvtnu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtpd_s64_f64, arm64_neon_fcvtps, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtpd_u64_f64, arm64_neon_fcvtpu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtps_s32_f32, arm64_neon_fcvtps, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtps_u32_f32, arm64_neon_fcvtpu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvts_n_f32_s32, arm64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
-  NEONMAP1(vcvts_n_f32_u32, arm64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
-  NEONMAP1(vcvts_n_s32_f32, arm64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
-  NEONMAP1(vcvts_n_u32_f32, arm64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtxd_f32_f64, arm64_sisd_fcvtxn, 0),
-  NEONMAP1(vmaxnmv_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxnmvq_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxnmvq_f64, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxv_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxv_s32, arm64_neon_smaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxv_u32, arm64_neon_umaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxvq_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxvq_f64, arm64_neon_fmaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxvq_s32, arm64_neon_smaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vmaxvq_u32, arm64_neon_umaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vminnmv_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vminnmvq_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vminnmvq_f64, arm64_neon_fminnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vminv_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
-  NEONMAP1(vminv_s32, arm64_neon_sminv, AddRetType | Add1ArgType),
-  NEONMAP1(vminv_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
-  NEONMAP1(vminvq_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
-  NEONMAP1(vminvq_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
-  NEONMAP1(vminvq_s32, arm64_neon_sminv, AddRetType | Add1ArgType),
-  NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
-  NEONMAP1(vmull_p64, arm64_neon_pmull64, 0),
-  NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType),
-  NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType),
-  NEONMAP1(vpaddd_s64, arm64_neon_uaddv, AddRetType | Add1ArgType),
-  NEONMAP1(vpaddd_u64, arm64_neon_uaddv, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxnmqd_f64, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxnms_f32, arm64_neon_fmaxnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxqd_f64, arm64_neon_fmaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vpmaxs_f32, arm64_neon_fmaxv, AddRetType | Add1ArgType),
-  NEONMAP1(vpminnmqd_f64, arm64_neon_fminnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vpminnms_f32, arm64_neon_fminnmv, AddRetType | Add1ArgType),
-  NEONMAP1(vpminqd_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
-  NEONMAP1(vpmins_f32, arm64_neon_fminv, AddRetType | Add1ArgType),
-  NEONMAP1(vqabsb_s8, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqabsd_s64, arm64_neon_sqabs, Add1ArgType),
-  NEONMAP1(vqabsh_s16, arm64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqabss_s32, arm64_neon_sqabs, Add1ArgType),
-  NEONMAP1(vqaddb_s8, arm64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqaddb_u8, arm64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqaddd_s64, arm64_neon_sqadd, Add1ArgType),
-  NEONMAP1(vqaddd_u64, arm64_neon_uqadd, Add1ArgType),
-  NEONMAP1(vqaddh_s16, arm64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqaddh_u16, arm64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqadds_s32, arm64_neon_sqadd, Add1ArgType),
-  NEONMAP1(vqadds_u32, arm64_neon_uqadd, Add1ArgType),
-  NEONMAP1(vqdmulhh_s16, arm64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqdmulhs_s32, arm64_neon_sqdmulh, Add1ArgType),
-  NEONMAP1(vqdmullh_s16, arm64_neon_sqdmull, VectorRet | Use128BitVectors),
-  NEONMAP1(vqdmulls_s32, arm64_neon_sqdmulls_scalar, 0),
-  NEONMAP1(vqmovnd_s64, arm64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
-  NEONMAP1(vqmovnd_u64, arm64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
-  NEONMAP1(vqmovnh_s16, arm64_neon_sqxtn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqmovnh_u16, arm64_neon_uqxtn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqmovns_s32, arm64_neon_sqxtn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqmovns_u32, arm64_neon_uqxtn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqmovund_s64, arm64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
-  NEONMAP1(vqmovunh_s16, arm64_neon_sqxtun, VectorRet | Use64BitVectors),
-  NEONMAP1(vqmovuns_s32, arm64_neon_sqxtun, VectorRet | Use64BitVectors),
-  NEONMAP1(vqnegb_s8, arm64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqnegd_s64, arm64_neon_sqneg, Add1ArgType),
-  NEONMAP1(vqnegh_s16, arm64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqnegs_s32, arm64_neon_sqneg, Add1ArgType),
-  NEONMAP1(vqrdmulhh_s16, arm64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqrdmulhs_s32, arm64_neon_sqrdmulh, Add1ArgType),
-  NEONMAP1(vqrshlb_s8, arm64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqrshlb_u8, arm64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqrshld_s64, arm64_neon_sqrshl, Add1ArgType),
-  NEONMAP1(vqrshld_u64, arm64_neon_uqrshl, Add1ArgType),
-  NEONMAP1(vqrshlh_s16, arm64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqrshlh_u16, arm64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqrshls_s32, arm64_neon_sqrshl, Add1ArgType),
-  NEONMAP1(vqrshls_u32, arm64_neon_uqrshl, Add1ArgType),
-  NEONMAP1(vqrshrnd_n_s64, arm64_neon_sqrshrn, AddRetType),
-  NEONMAP1(vqrshrnd_n_u64, arm64_neon_uqrshrn, AddRetType),
-  NEONMAP1(vqrshrnh_n_s16, arm64_neon_sqrshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqrshrnh_n_u16, arm64_neon_uqrshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqrshrns_n_s32, arm64_neon_sqrshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqrshrns_n_u32, arm64_neon_uqrshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqrshrund_n_s64, arm64_neon_sqrshrun, AddRetType),
-  NEONMAP1(vqrshrunh_n_s16, arm64_neon_sqrshrun, VectorRet | Use64BitVectors),
-  NEONMAP1(vqrshruns_n_s32, arm64_neon_sqrshrun, VectorRet | Use64BitVectors),
-  NEONMAP1(vqshlb_n_s8, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlb_n_u8, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlb_s8, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlb_u8, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshld_s64, arm64_neon_sqshl, Add1ArgType),
-  NEONMAP1(vqshld_u64, arm64_neon_uqshl, Add1ArgType),
-  NEONMAP1(vqshlh_n_s16, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlh_n_u16, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlh_s16, arm64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlh_u16, arm64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshls_n_s32, arm64_neon_sqshl, Add1ArgType),
-  NEONMAP1(vqshls_n_u32, arm64_neon_uqshl, Add1ArgType),
-  NEONMAP1(vqshls_s32, arm64_neon_sqshl, Add1ArgType),
-  NEONMAP1(vqshls_u32, arm64_neon_uqshl, Add1ArgType),
-  NEONMAP1(vqshlub_n_s8, arm64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshluh_n_s16, arm64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqshlus_n_s32, arm64_neon_sqshlu, Add1ArgType),
-  NEONMAP1(vqshrnd_n_s64, arm64_neon_sqshrn, AddRetType),
-  NEONMAP1(vqshrnd_n_u64, arm64_neon_uqshrn, AddRetType),
-  NEONMAP1(vqshrnh_n_s16, arm64_neon_sqshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqshrnh_n_u16, arm64_neon_uqshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqshrns_n_s32, arm64_neon_sqshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqshrns_n_u32, arm64_neon_uqshrn, VectorRet | Use64BitVectors),
-  NEONMAP1(vqshrund_n_s64, arm64_neon_sqshrun, AddRetType),
-  NEONMAP1(vqshrunh_n_s16, arm64_neon_sqshrun, VectorRet | Use64BitVectors),
-  NEONMAP1(vqshruns_n_s32, arm64_neon_sqshrun, VectorRet | Use64BitVectors),
-  NEONMAP1(vqsubb_s8, arm64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqsubb_u8, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqsubd_s64, arm64_neon_sqsub, Add1ArgType),
-  NEONMAP1(vqsubd_u64, arm64_neon_uqsub, Add1ArgType),
-  NEONMAP1(vqsubh_s16, arm64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqsubh_u16, arm64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vqsubs_s32, arm64_neon_sqsub, Add1ArgType),
-  NEONMAP1(vqsubs_u32, arm64_neon_uqsub, Add1ArgType),
-  NEONMAP1(vrecped_f64, arm64_neon_frecpe, Add1ArgType),
-  NEONMAP1(vrecpes_f32, arm64_neon_frecpe, Add1ArgType),
-  NEONMAP1(vrecpxd_f64, arm64_neon_frecpx, Add1ArgType),
-  NEONMAP1(vrecpxs_f32, arm64_neon_frecpx, Add1ArgType),
-  NEONMAP1(vrshld_s64, arm64_neon_srshl, Add1ArgType),
-  NEONMAP1(vrshld_u64, arm64_neon_urshl, Add1ArgType),
-  NEONMAP1(vrsqrted_f64, arm64_neon_frsqrte, Add1ArgType),
-  NEONMAP1(vrsqrtes_f32, arm64_neon_frsqrte, Add1ArgType),
-  NEONMAP1(vrsqrtsd_f64, arm64_neon_frsqrts, Add1ArgType),
-  NEONMAP1(vrsqrtss_f32, arm64_neon_frsqrts, Add1ArgType),
-  NEONMAP1(vsha1cq_u32, arm64_crypto_sha1c, 0),
-  NEONMAP1(vsha1h_u32, arm64_crypto_sha1h, 0),
-  NEONMAP1(vsha1mq_u32, arm64_crypto_sha1m, 0),
-  NEONMAP1(vsha1pq_u32, arm64_crypto_sha1p, 0),
-  NEONMAP1(vshld_s64, arm64_neon_sshl, Add1ArgType),
-  NEONMAP1(vshld_u64, arm64_neon_ushl, Add1ArgType),
-  NEONMAP1(vslid_n_s64, arm64_neon_vsli, Vectorize1ArgType),
-  NEONMAP1(vslid_n_u64, arm64_neon_vsli, Vectorize1ArgType),
-  NEONMAP1(vsqaddb_u8, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vsqaddd_u64, arm64_neon_usqadd, Add1ArgType),
-  NEONMAP1(vsqaddh_u16, arm64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vsqadds_u32, arm64_neon_usqadd, Add1ArgType),
-  NEONMAP1(vsrid_n_s64, arm64_neon_vsri, Vectorize1ArgType),
-  NEONMAP1(vsrid_n_u64, arm64_neon_vsri, Vectorize1ArgType),
-  NEONMAP1(vuqaddb_s8, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vuqaddd_s64, arm64_neon_suqadd, Add1ArgType),
-  NEONMAP1(vuqaddh_s16, arm64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
-  NEONMAP1(vuqadds_s32, arm64_neon_suqadd, Add1ArgType),
+static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
+  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
+  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
+  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
+  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
+  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
+  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
+  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
+  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
+  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
+  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
+  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
+  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
+  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
+  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
+  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
+  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
+  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
+  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
+  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
+  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
+  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
+  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
+  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
+  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
+  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
+  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
+  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
+  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
+  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
+  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
+  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
+  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
+  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
+  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
+  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
+  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
+  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
+  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
+  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
+  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
+  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
+  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
+  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
+  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
+  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
+  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
+  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
+  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
+  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
+  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
+  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
+  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
+  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
+  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
+  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
+  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
+  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
+  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
+  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
+  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
+  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
+  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
+  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
+  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
+  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
+  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
+  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
+  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
+  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
+  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
+  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
+  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
 };
 
 #undef NEONMAP0
@@ -2738,10 +2390,9 @@
 #undef NEONMAP2
 
 static bool NEONSIMDIntrinsicsProvenSorted = false;
-static bool AArch64SISDIntrinsicInfoProvenSorted = false;
 
-static bool ARM64SIMDIntrinsicsProvenSorted = false;
-static bool ARM64SISDIntrinsicsProvenSorted = false;
+static bool AArch64SIMDIntrinsicsProvenSorted = false;
+static bool AArch64SISDIntrinsicsProvenSorted = false;
 
 
 static const NeonIntrinsicInfo *
@@ -2763,7 +2414,7 @@
   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
     return Builtin;
 
-  return 0;
+  return nullptr;
 }
 
 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
@@ -2868,169 +2519,6 @@
   return CGF.Builder.CreateBitCast(Result, ResultType, s);
 }
 
-static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
-                                           const NeonIntrinsicInfo &SISDInfo,
-                                           const CallExpr *E) {
-  unsigned BuiltinID = SISDInfo.BuiltinID;
-  unsigned int Int = SISDInfo.LLVMIntrinsic;
-  const char *s = SISDInfo.NameHint;
-
-  SmallVector<Value *, 4> Ops;
-  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
-    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
-  }
-
-  // AArch64 scalar builtins are not overloaded, they do not have an extra
-  // argument that specifies the vector type, need to handle each case.
-  switch (BuiltinID) {
-  default: break;
-  case NEON::BI__builtin_neon_vdups_lane_f32:
-  case NEON::BI__builtin_neon_vdupd_lane_f64:
-  case NEON::BI__builtin_neon_vdups_laneq_f32:
-  case NEON::BI__builtin_neon_vdupd_laneq_f64: {
-    return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane");
-  }
-  case NEON::BI__builtin_neon_vdupb_lane_i8:
-  case NEON::BI__builtin_neon_vduph_lane_i16:
-  case NEON::BI__builtin_neon_vdups_lane_i32:
-  case NEON::BI__builtin_neon_vdupd_lane_i64:
-  case NEON::BI__builtin_neon_vdupb_laneq_i8:
-  case NEON::BI__builtin_neon_vduph_laneq_i16:
-  case NEON::BI__builtin_neon_vdups_laneq_i32:
-  case NEON::BI__builtin_neon_vdupd_laneq_i64: {
-    // The backend treats Neon scalar types as v1ix types
-    // So we want to dup lane from any vector to v1ix vector
-    // with shufflevector
-    s = "vdup_lane";
-    Value* SV = llvm::ConstantVector::getSplat(1, cast<ConstantInt>(Ops[1]));
-    Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s);
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    // AArch64 intrinsic one-element vector type cast to
-    // scalar type expected by the builtin
-    return CGF.Builder.CreateBitCast(Result, Ty, s);
-  }
-  case NEON::BI__builtin_neon_vqdmlalh_lane_s16 :
-  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 :
-  case NEON::BI__builtin_neon_vqdmlals_lane_s32 :
-  case NEON::BI__builtin_neon_vqdmlals_laneq_s32 :
-  case NEON::BI__builtin_neon_vqdmlslh_lane_s16 :
-  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 :
-  case NEON::BI__builtin_neon_vqdmlsls_lane_s32 :
-  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : {
-    Int = Intrinsic::arm_neon_vqadds;
-    if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 ||
-        BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 ||
-        BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 ||
-        BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) {
-      Int = Intrinsic::arm_neon_vqsubs;
-    }
-    // create vqdmull call with b * c[i]
-    llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
-    llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
-    Ty = CGF.ConvertType(E->getArg(0)->getType());
-    llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
-    Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
-    Value *V = UndefValue::get(OpVTy);
-    llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
-    SmallVector<Value *, 2> MulOps;
-    MulOps.push_back(Ops[1]);
-    MulOps.push_back(Ops[2]);
-    MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
-    MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
-    MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
-    Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
-    // create vqadds call with a +/- vqdmull result
-    F = CGF.CGM.getIntrinsic(Int, ResVTy);
-    SmallVector<Value *, 2> AddOps;
-    AddOps.push_back(Ops[0]);
-    AddOps.push_back(MulRes);
-    V = UndefValue::get(ResVTy);
-    AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
-    Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
-    return CGF.Builder.CreateBitCast(AddRes, Ty);
-  }
-  case NEON::BI__builtin_neon_vfmas_lane_f32:
-  case NEON::BI__builtin_neon_vfmas_laneq_f32:
-  case NEON::BI__builtin_neon_vfmad_lane_f64:
-  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-    return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-  }
-  // Scalar Floating-point Multiply Extended
-  case NEON::BI__builtin_neon_vmulxs_f32:
-  case NEON::BI__builtin_neon_vmulxd_f64: {
-    Int = Intrinsic::aarch64_neon_vmulx;
-    llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
-    return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
-  }
-  case NEON::BI__builtin_neon_vmul_n_f64: {
-    // v1f64 vmul_n_f64  should be mapped to Neon scalar mul lane
-    llvm::Type *VTy = GetNeonType(&CGF,
-      NeonTypeFlags(NeonTypeFlags::Float64, false, false));
-    Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy);
-    llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0);
-    Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract");
-    Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
-    return CGF.Builder.CreateBitCast(Result, VTy);
-  }
-  case NEON::BI__builtin_neon_vget_lane_i8:
-  case NEON::BI__builtin_neon_vget_lane_i16:
-  case NEON::BI__builtin_neon_vget_lane_i32:
-  case NEON::BI__builtin_neon_vget_lane_i64:
-  case NEON::BI__builtin_neon_vget_lane_f32:
-  case NEON::BI__builtin_neon_vget_lane_f64:
-  case NEON::BI__builtin_neon_vgetq_lane_i8:
-  case NEON::BI__builtin_neon_vgetq_lane_i16:
-  case NEON::BI__builtin_neon_vgetq_lane_i32:
-  case NEON::BI__builtin_neon_vgetq_lane_i64:
-  case NEON::BI__builtin_neon_vgetq_lane_f32:
-  case NEON::BI__builtin_neon_vgetq_lane_f64:
-    return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E);
-  case NEON::BI__builtin_neon_vset_lane_i8:
-  case NEON::BI__builtin_neon_vset_lane_i16:
-  case NEON::BI__builtin_neon_vset_lane_i32:
-  case NEON::BI__builtin_neon_vset_lane_i64:
-  case NEON::BI__builtin_neon_vset_lane_f32:
-  case NEON::BI__builtin_neon_vset_lane_f64:
-  case NEON::BI__builtin_neon_vsetq_lane_i8:
-  case NEON::BI__builtin_neon_vsetq_lane_i16:
-  case NEON::BI__builtin_neon_vsetq_lane_i32:
-  case NEON::BI__builtin_neon_vsetq_lane_i64:
-  case NEON::BI__builtin_neon_vsetq_lane_f32:
-  case NEON::BI__builtin_neon_vsetq_lane_f64:
-    return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E);
-
-  case NEON::BI__builtin_neon_vceqzd_s64:
-  case NEON::BI__builtin_neon_vceqzd_u64:
-  case NEON::BI__builtin_neon_vcgezd_s64:
-  case NEON::BI__builtin_neon_vcgtzd_s64:
-  case NEON::BI__builtin_neon_vclezd_s64:
-  case NEON::BI__builtin_neon_vcltzd_s64:
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
-    break;
-  case NEON::BI__builtin_neon_vceqzs_f32:
-  case NEON::BI__builtin_neon_vceqzd_f64:
-  case NEON::BI__builtin_neon_vcgezs_f32:
-  case NEON::BI__builtin_neon_vcgezd_f64:
-  case NEON::BI__builtin_neon_vcgtzs_f32:
-  case NEON::BI__builtin_neon_vcgtzd_f64:
-  case NEON::BI__builtin_neon_vclezs_f32:
-  case NEON::BI__builtin_neon_vclezd_f64:
-  case NEON::BI__builtin_neon_vcltzs_f32:
-  case NEON::BI__builtin_neon_vcltzd_f64:
-    // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
-    break;
-  }
-
-  // It didn't need any handling specific to the AArch64 backend, so defer to
-  // common code.
-  return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E);
-}
-
 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
     const char *NameHint, unsigned Modifier, const CallExpr *E,
@@ -3039,7 +2527,7 @@
   llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
-    return 0;
+    return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
@@ -3049,7 +2537,7 @@
   llvm::VectorType *VTy = GetNeonType(this, Type);
   llvm::Type *Ty = VTy;
   if (!Ty)
-    return 0;
+    return nullptr;
 
   unsigned Int = LLVMIntrinsic;
   if ((Modifier & UnsignedAlts) && !Usgn)
@@ -3396,7 +2884,7 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -3425,7 +2913,7 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -3444,7 +2932,7 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -3533,799 +3021,33 @@
   return CGF.EmitNeonCall(TblF, TblOps, Name);
 }
 
-static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF,
-                                        unsigned BuiltinID,
-                                        const CallExpr *E) {
-  unsigned int Int = 0;
-  const char *s = NULL;
-
+Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
+                                           const CallExpr *E) {
+  unsigned HintID = static_cast<unsigned>(-1);
   switch (BuiltinID) {
-  default:
-    return 0;
-  case NEON::BI__builtin_neon_vtbl1_v:
-  case NEON::BI__builtin_neon_vqtbl1_v:
-  case NEON::BI__builtin_neon_vqtbl1q_v:
-  case NEON::BI__builtin_neon_vtbl2_v:
-  case NEON::BI__builtin_neon_vqtbl2_v:
-  case NEON::BI__builtin_neon_vqtbl2q_v:
-  case NEON::BI__builtin_neon_vtbl3_v:
-  case NEON::BI__builtin_neon_vqtbl3_v:
-  case NEON::BI__builtin_neon_vqtbl3q_v:
-  case NEON::BI__builtin_neon_vtbl4_v:
-  case NEON::BI__builtin_neon_vqtbl4_v:
-  case NEON::BI__builtin_neon_vqtbl4q_v:
-  case NEON::BI__builtin_neon_vtbx1_v:
-  case NEON::BI__builtin_neon_vqtbx1_v:
-  case NEON::BI__builtin_neon_vqtbx1q_v:
-  case NEON::BI__builtin_neon_vtbx2_v:
-  case NEON::BI__builtin_neon_vqtbx2_v:
-  case NEON::BI__builtin_neon_vqtbx2q_v:
-  case NEON::BI__builtin_neon_vtbx3_v:
-  case NEON::BI__builtin_neon_vqtbx3_v:
-  case NEON::BI__builtin_neon_vqtbx3q_v:
-  case NEON::BI__builtin_neon_vtbx4_v:
-  case NEON::BI__builtin_neon_vqtbx4_v:
-  case NEON::BI__builtin_neon_vqtbx4q_v:
+  default: break;
+  case ARM::BI__yield:
+    HintID = 1;
+    break;
+  case ARM::BI__wfe:
+    HintID = 2;
+    break;
+  case ARM::BI__wfi:
+    HintID = 3;
+    break;
+  case ARM::BI__sev:
+    HintID = 4;
+    break;
+  case ARM::BI__sevl:
+    HintID = 5;
     break;
   }
 
-  assert(E->getNumArgs() >= 3);
-
-  // Get the last argument, which specifies the vector type.
-  llvm::APSInt Result;
-  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
-    return 0;
-
-  // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(Result.getZExtValue());
-  llvm::VectorType *VTy = GetNeonType(&CGF, Type);
-  llvm::Type *Ty = VTy;
-  if (!Ty)
-    return 0;
-
-  SmallVector<Value *, 4> Ops;
-  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
-    Ops.push_back(CGF.EmitScalarExpr(E->getArg(i)));
+  if (HintID != static_cast<unsigned>(-1)) {
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_hint);
+    return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
   }
 
-  unsigned nElts = VTy->getNumElements();  
-
-  // AArch64 scalar builtins are not overloaded, they do not have an extra
-  // argument that specifies the vector type, need to handle each case.
-  SmallVector<Value *, 2> TblOps;
-  switch (BuiltinID) {
-  case NEON::BI__builtin_neon_vtbl1_v: {
-    TblOps.push_back(Ops[0]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
-                              Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-  }
-  case NEON::BI__builtin_neon_vtbl2_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
-                              Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-  }
-  case NEON::BI__builtin_neon_vtbl3_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
-                              Intrinsic::aarch64_neon_vtbl2, "vtbl2");
-  }
-  case NEON::BI__builtin_neon_vtbl4_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
-                              Intrinsic::aarch64_neon_vtbl2, "vtbl2");
-  }
-  case NEON::BI__builtin_neon_vtbx1_v: {
-    TblOps.push_back(Ops[1]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
-                                    Intrinsic::aarch64_neon_vtbl1, "vtbl1");
-
-    llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
-    Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
-    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
-    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
-
-    SmallVector<Value *, 4> BslOps;
-    BslOps.push_back(CmpRes);
-    BslOps.push_back(Ops[0]);
-    BslOps.push_back(TblRes);
-    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
-    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
-  }
-  case NEON::BI__builtin_neon_vtbx2_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
-                              Intrinsic::aarch64_neon_vtbx1, "vtbx1");
-  }
-  case NEON::BI__builtin_neon_vtbx3_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
-                                       Intrinsic::aarch64_neon_vtbl2, "vtbl2");
-
-    llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
-    Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
-    Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
-                                           TwentyFourV);
-    CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty);
-  
-    SmallVector<Value *, 4> BslOps;
-    BslOps.push_back(CmpRes);
-    BslOps.push_back(Ops[0]);
-    BslOps.push_back(TblRes);
-    Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty);
-    return CGF.EmitNeonCall(BslF, BslOps, "vbsl");
-  }
-  case NEON::BI__builtin_neon_vtbx4_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    TblOps.push_back(Ops[4]);
-    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
-                              Intrinsic::aarch64_neon_vtbx2, "vtbx2");
-  }
-  case NEON::BI__builtin_neon_vqtbl1_v:
-  case NEON::BI__builtin_neon_vqtbl1q_v:
-    Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break;
-  case NEON::BI__builtin_neon_vqtbl2_v:
-  case NEON::BI__builtin_neon_vqtbl2q_v: {
-    Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break;
-  case NEON::BI__builtin_neon_vqtbl3_v:
-  case NEON::BI__builtin_neon_vqtbl3q_v:
-    Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break;
-  case NEON::BI__builtin_neon_vqtbl4_v:
-  case NEON::BI__builtin_neon_vqtbl4q_v:
-    Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break;
-  case NEON::BI__builtin_neon_vqtbx1_v:
-  case NEON::BI__builtin_neon_vqtbx1q_v:
-    Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break;
-  case NEON::BI__builtin_neon_vqtbx2_v:
-  case NEON::BI__builtin_neon_vqtbx2q_v:
-    Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break;
-  case NEON::BI__builtin_neon_vqtbx3_v:
-  case NEON::BI__builtin_neon_vqtbx3q_v:
-    Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break;
-  case NEON::BI__builtin_neon_vqtbx4_v:
-  case NEON::BI__builtin_neon_vqtbx4q_v:
-    Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break;
-  }
-  }
-
-  if (!Int)
-    return 0;
-
-  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
-  return CGF.EmitNeonCall(F, Ops, s);
-}
-
-Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
-                                               const CallExpr *E) {
-
-  // Process AArch64 scalar builtins
-  llvm::ArrayRef<NeonIntrinsicInfo> SISDInfo(AArch64SISDIntrinsicInfo);
-  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
-      SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted);
-
-  if (Builtin) {
-    Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E);
-    assert(Result && "SISD intrinsic should have been handled");
-    return Result;
-  }
-
-  // Process AArch64 table lookup builtins
-  if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E))
-    return Result;
-
-  if (BuiltinID == AArch64::BI__clear_cache) {
-    assert(E->getNumArgs() == 2 &&
-           "Variadic __clear_cache slipped through on AArch64");
-
-    const FunctionDecl *FD = E->getDirectCallee();
-    SmallVector<Value *, 2> Ops;
-    for (unsigned i = 0; i < E->getNumArgs(); i++)
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
-    llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
-    llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
-    StringRef Name = FD->getName();
-    return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
-  }
-
-  SmallVector<Value *, 4> Ops;
-  llvm::Value *Align = 0; // Alignment for load/store
-
-  if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) {
-   Value *Op = EmitScalarExpr(E->getArg(0));
-   unsigned addressSpace =
-     cast<llvm::PointerType>(Op->getType())->getAddressSpace();
-   llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
-   Op = Builder.CreateBitCast(Op, Ty);
-   Op = Builder.CreateLoad(Op);
-   Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
-   return Builder.CreateBitCast(Op, Ty);
-  }
-  if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) {
-    Value *Op0 = EmitScalarExpr(E->getArg(0));
-    unsigned addressSpace =
-      cast<llvm::PointerType>(Op0->getType())->getAddressSpace();
-    llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace);
-    Op0 = Builder.CreateBitCast(Op0, PTy);
-    Value *Op1 = EmitScalarExpr(E->getArg(1));
-    llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext());
-    Op1 = Builder.CreateBitCast(Op1, Ty);
-    return Builder.CreateStore(Op1, Op0);
-  }
-  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
-    if (i == 0) {
-      switch (BuiltinID) {
-      case NEON::BI__builtin_neon_vld1_v:
-      case NEON::BI__builtin_neon_vld1q_v:
-      case NEON::BI__builtin_neon_vst1_v:
-      case NEON::BI__builtin_neon_vst1q_v:
-      case NEON::BI__builtin_neon_vst2_v:
-      case NEON::BI__builtin_neon_vst2q_v:
-      case NEON::BI__builtin_neon_vst3_v:
-      case NEON::BI__builtin_neon_vst3q_v:
-      case NEON::BI__builtin_neon_vst4_v:
-      case NEON::BI__builtin_neon_vst4q_v:
-      case NEON::BI__builtin_neon_vst1_x2_v:
-      case NEON::BI__builtin_neon_vst1q_x2_v:
-      case NEON::BI__builtin_neon_vst1_x3_v:
-      case NEON::BI__builtin_neon_vst1q_x3_v:
-      case NEON::BI__builtin_neon_vst1_x4_v:
-      case NEON::BI__builtin_neon_vst1q_x4_v:
-      // Handle ld1/st1 lane in this function a little different from ARM.
-      case NEON::BI__builtin_neon_vld1_lane_v:
-      case NEON::BI__builtin_neon_vld1q_lane_v:
-      case NEON::BI__builtin_neon_vst1_lane_v:
-      case NEON::BI__builtin_neon_vst1q_lane_v:
-      case NEON::BI__builtin_neon_vst2_lane_v:
-      case NEON::BI__builtin_neon_vst2q_lane_v:
-      case NEON::BI__builtin_neon_vst3_lane_v:
-      case NEON::BI__builtin_neon_vst3q_lane_v:
-      case NEON::BI__builtin_neon_vst4_lane_v:
-      case NEON::BI__builtin_neon_vst4q_lane_v:
-      case NEON::BI__builtin_neon_vld1_dup_v:
-      case NEON::BI__builtin_neon_vld1q_dup_v:
-        // Get the alignment for the argument in addition to the value;
-        // we'll use it later.
-        std::pair<llvm::Value *, unsigned> Src =
-            EmitPointerWithAlignment(E->getArg(0));
-        Ops.push_back(Src.first);
-        Align = Builder.getInt32(Src.second);
-        continue;
-      }
-    }
-    if (i == 1) {
-      switch (BuiltinID) {
-      case NEON::BI__builtin_neon_vld2_v:
-      case NEON::BI__builtin_neon_vld2q_v:
-      case NEON::BI__builtin_neon_vld3_v:
-      case NEON::BI__builtin_neon_vld3q_v:
-      case NEON::BI__builtin_neon_vld4_v:
-      case NEON::BI__builtin_neon_vld4q_v:
-      case NEON::BI__builtin_neon_vld1_x2_v:
-      case NEON::BI__builtin_neon_vld1q_x2_v:
-      case NEON::BI__builtin_neon_vld1_x3_v:
-      case NEON::BI__builtin_neon_vld1q_x3_v:
-      case NEON::BI__builtin_neon_vld1_x4_v:
-      case NEON::BI__builtin_neon_vld1q_x4_v:
-      // Handle ld1/st1 dup lane in this function a little different from ARM.
-      case NEON::BI__builtin_neon_vld2_dup_v:
-      case NEON::BI__builtin_neon_vld2q_dup_v:
-      case NEON::BI__builtin_neon_vld3_dup_v:
-      case NEON::BI__builtin_neon_vld3q_dup_v:
-      case NEON::BI__builtin_neon_vld4_dup_v:
-      case NEON::BI__builtin_neon_vld4q_dup_v:
-      case NEON::BI__builtin_neon_vld2_lane_v:
-      case NEON::BI__builtin_neon_vld2q_lane_v:
-      case NEON::BI__builtin_neon_vld3_lane_v:
-      case NEON::BI__builtin_neon_vld3q_lane_v:
-      case NEON::BI__builtin_neon_vld4_lane_v:
-      case NEON::BI__builtin_neon_vld4q_lane_v:
-        // Get the alignment for the argument in addition to the value;
-        // we'll use it later.
-        std::pair<llvm::Value *, unsigned> Src =
-            EmitPointerWithAlignment(E->getArg(1));
-        Ops.push_back(Src.first);
-        Align = Builder.getInt32(Src.second);
-        continue;
-      }
-    }
-    Ops.push_back(EmitScalarExpr(E->getArg(i)));
-  }
-
-  // Get the last argument, which specifies the vector type.
-  llvm::APSInt Result;
-  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  if (!Arg->isIntegerConstantExpr(Result, getContext()))
-    return 0;
-
-  // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(Result.getZExtValue());
-  bool usgn = Type.isUnsigned();
-  bool quad = Type.isQuad();
-
-  llvm::VectorType *VTy = GetNeonType(this, Type);
-  llvm::Type *Ty = VTy;
-  if (!Ty)
-    return 0;
-
-
-  // Many NEON builtins have identical semantics and uses in ARM and
-  // AArch64. Emit these in a single function.
-  llvm::ArrayRef<NeonIntrinsicInfo> IntrinsicMap(ARMSIMDIntrinsicMap);
-  Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID,
-                                   NEONSIMDIntrinsicsProvenSorted);
-  if (Builtin)
-    return EmitCommonNeonBuiltinExpr(
-        Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
-        Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
-
-  unsigned Int;
-  switch (BuiltinID) {
-  default:
-    return 0;
-
-  // AArch64 builtins mapping to legacy ARM v7 builtins.
-  // FIXME: the mapped builtins listed correspond to what has been tested
-  // in aarch64-neon-intrinsics.c so far.
-
-  // Shift by immediate
-  case NEON::BI__builtin_neon_vrshr_n_v:
-  case NEON::BI__builtin_neon_vrshrq_n_v:
-    Int = usgn ? Intrinsic::aarch64_neon_vurshr
-               : Intrinsic::aarch64_neon_vsrshr;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n");
-  case NEON::BI__builtin_neon_vsra_n_v:
-    if (VTy->getElementType()->isIntegerTy(64)) {
-      Int = usgn ? Intrinsic::aarch64_neon_vsradu_n
-                 : Intrinsic::aarch64_neon_vsrads_n;
-      return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n");
-    }
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E);
-  case NEON::BI__builtin_neon_vsraq_n_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E);
-  case NEON::BI__builtin_neon_vrsra_n_v:
-    if (VTy->getElementType()->isIntegerTy(64)) {
-      Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n
-                 : Intrinsic::aarch64_neon_vrsrads_n;
-      return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n");
-    }
-    // fall through
-  case NEON::BI__builtin_neon_vrsraq_n_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Int = usgn ? Intrinsic::aarch64_neon_vurshr
-               : Intrinsic::aarch64_neon_vsrshr;
-    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
-    return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
-  }
-  case NEON::BI__builtin_neon_vqshlu_n_v:
-  case NEON::BI__builtin_neon_vqshluq_n_v:
-    Int = Intrinsic::aarch64_neon_vsqshlu;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n");
-  case NEON::BI__builtin_neon_vsri_n_v:
-  case NEON::BI__builtin_neon_vsriq_n_v:
-    Int = Intrinsic::aarch64_neon_vsri;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n");
-  case NEON::BI__builtin_neon_vsli_n_v:
-  case NEON::BI__builtin_neon_vsliq_n_v:
-    Int = Intrinsic::aarch64_neon_vsli;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n");
-  case NEON::BI__builtin_neon_vqshrun_n_v:
-    Int = Intrinsic::aarch64_neon_vsqshrun;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
-  case NEON::BI__builtin_neon_vrshrn_n_v:
-    Int = Intrinsic::aarch64_neon_vrshrn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
-  case NEON::BI__builtin_neon_vqrshrun_n_v:
-    Int = Intrinsic::aarch64_neon_vsqrshrun;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
-  case NEON::BI__builtin_neon_vqshrn_n_v:
-    Int = usgn ? Intrinsic::aarch64_neon_vuqshrn
-               : Intrinsic::aarch64_neon_vsqshrn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
-  case NEON::BI__builtin_neon_vqrshrn_n_v:
-    Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn
-               : Intrinsic::aarch64_neon_vsqrshrn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
-
-  // Convert
-  case NEON::BI__builtin_neon_vcvt_n_f64_v:
-  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
-    llvm::Type *FloatTy =
-        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
-    llvm::Type *Tys[2] = { FloatTy, Ty };
-    Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
-               : Intrinsic::arm_neon_vcvtfxs2fp;
-    Function *F = CGM.getIntrinsic(Int, Tys);
-    return EmitNeonCall(F, Ops, "vcvt_n");
-  }
-
-  // Load/Store
-  case NEON::BI__builtin_neon_vld1_x2_v:
-  case NEON::BI__builtin_neon_vld1q_x2_v:
-  case NEON::BI__builtin_neon_vld1_x3_v:
-  case NEON::BI__builtin_neon_vld1q_x3_v:
-  case NEON::BI__builtin_neon_vld1_x4_v:
-  case NEON::BI__builtin_neon_vld1q_x4_v: {
-    unsigned Int;
-    switch (BuiltinID) {
-    case NEON::BI__builtin_neon_vld1_x2_v:
-    case NEON::BI__builtin_neon_vld1q_x2_v:
-      Int = Intrinsic::aarch64_neon_vld1x2;
-      break;
-    case NEON::BI__builtin_neon_vld1_x3_v:
-    case NEON::BI__builtin_neon_vld1q_x3_v:
-      Int = Intrinsic::aarch64_neon_vld1x3;
-      break;
-    case NEON::BI__builtin_neon_vld1_x4_v:
-    case NEON::BI__builtin_neon_vld1q_x4_v:
-      Int = Intrinsic::aarch64_neon_vld1x4;
-      break;
-    }
-    Function *F = CGM.getIntrinsic(Int, Ty);
-    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN");
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-  case NEON::BI__builtin_neon_vst1_x2_v:
-  case NEON::BI__builtin_neon_vst1q_x2_v:
-  case NEON::BI__builtin_neon_vst1_x3_v:
-  case NEON::BI__builtin_neon_vst1q_x3_v:
-  case NEON::BI__builtin_neon_vst1_x4_v:
-  case NEON::BI__builtin_neon_vst1q_x4_v: {
-    Ops.push_back(Align);
-    unsigned Int;
-    switch (BuiltinID) {
-    case NEON::BI__builtin_neon_vst1_x2_v:
-    case NEON::BI__builtin_neon_vst1q_x2_v:
-      Int = Intrinsic::aarch64_neon_vst1x2;
-      break;
-    case NEON::BI__builtin_neon_vst1_x3_v:
-    case NEON::BI__builtin_neon_vst1q_x3_v:
-      Int = Intrinsic::aarch64_neon_vst1x3;
-      break;
-    case NEON::BI__builtin_neon_vst1_x4_v:
-    case NEON::BI__builtin_neon_vst1q_x4_v:
-      Int = Intrinsic::aarch64_neon_vst1x4;
-      break;
-    }
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
-  }
-  case NEON::BI__builtin_neon_vld1_lane_v:
-  case NEON::BI__builtin_neon_vld1q_lane_v: {
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
-    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
-    return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
-  }
-  case NEON::BI__builtin_neon_vst1_lane_v:
-  case NEON::BI__builtin_neon_vst1q_lane_v: {
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    StoreInst *St =
-        Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
-    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
-    return St;
-  }
-  case NEON::BI__builtin_neon_vld2_dup_v:
-  case NEON::BI__builtin_neon_vld2q_dup_v:
-  case NEON::BI__builtin_neon_vld3_dup_v:
-  case NEON::BI__builtin_neon_vld3q_dup_v:
-  case NEON::BI__builtin_neon_vld4_dup_v:
-  case NEON::BI__builtin_neon_vld4q_dup_v: {
-    // Handle 64-bit x 1 elements as a special-case.  There is no "dup" needed.
-    if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 &&
-        VTy->getNumElements() == 1) {
-      switch (BuiltinID) {
-      case NEON::BI__builtin_neon_vld2_dup_v:
-        Int = Intrinsic::arm_neon_vld2;
-        break;
-      case NEON::BI__builtin_neon_vld3_dup_v:
-        Int = Intrinsic::arm_neon_vld3;
-        break;
-      case NEON::BI__builtin_neon_vld4_dup_v:
-        Int = Intrinsic::arm_neon_vld4;
-        break;
-      default:
-        llvm_unreachable("unknown vld_dup intrinsic?");
-      }
-      Function *F = CGM.getIntrinsic(Int, Ty);
-      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
-      Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-      Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-      return Builder.CreateStore(Ops[1], Ops[0]);
-    }
-    switch (BuiltinID) {
-    case NEON::BI__builtin_neon_vld2_dup_v:
-    case NEON::BI__builtin_neon_vld2q_dup_v:
-      Int = Intrinsic::arm_neon_vld2lane;
-      break;
-    case NEON::BI__builtin_neon_vld3_dup_v:
-    case NEON::BI__builtin_neon_vld3q_dup_v:
-      Int = Intrinsic::arm_neon_vld3lane;
-      break;
-    case NEON::BI__builtin_neon_vld4_dup_v:
-    case NEON::BI__builtin_neon_vld4q_dup_v:
-      Int = Intrinsic::arm_neon_vld4lane;
-      break;
-    }
-    Function *F = CGM.getIntrinsic(Int, Ty);
-    llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
-
-    SmallVector<Value *, 6> Args;
-    Args.push_back(Ops[1]);
-    Args.append(STy->getNumElements(), UndefValue::get(Ty));
-
-    llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
-    Args.push_back(CI);
-    Args.push_back(Align);
-
-    Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
-    // splat lane 0 to all elts in each vector of the result.
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      Value *Val = Builder.CreateExtractValue(Ops[1], i);
-      Value *Elt = Builder.CreateBitCast(Val, Ty);
-      Elt = EmitNeonSplat(Elt, CI);
-      Elt = Builder.CreateBitCast(Elt, Val->getType());
-      Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
-    }
-    Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
-  }
-
-  case NEON::BI__builtin_neon_vmul_lane_v:
-  case NEON::BI__builtin_neon_vmul_laneq_v: {
-    // v1f64 vmul_lane should be mapped to Neon scalar mul lane
-    bool Quad = false;
-    if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
-      Quad = true;
-    Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-    llvm::Type *VTy = GetNeonType(this,
-      NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
-    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
-    Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
-    Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
-    return Builder.CreateBitCast(Result, Ty);
-  }
-
-  // AArch64-only builtins
-  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
-  }
-  case NEON::BI__builtin_neon_vfmaq_lane_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
-                                            VTy->getNumElements() / 2);
-    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
-    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
-                                               cast<ConstantInt>(Ops[3]));
-    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
-
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
-  }
-  case NEON::BI__builtin_neon_vfma_lane_v: {
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    // v1f64 fma should be mapped to Neon scalar f64 fma
-    if (VTy && VTy->getElementType() == DoubleTy) {
-      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
-      llvm::Type *VTy = GetNeonType(this,
-        NeonTypeFlags(NeonTypeFlags::Float64, false, false));
-      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
-      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
-      Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-      return Builder.CreateBitCast(Result, Ty);
-    }
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
-  }
-  case NEON::BI__builtin_neon_vfma_laneq_v: {
-    llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-    // v1f64 fma should be mapped to Neon scalar f64 fma
-    if (VTy && VTy->getElementType() == DoubleTy) {
-      Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
-      Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
-      llvm::Type *VTy = GetNeonType(this,
-        NeonTypeFlags(NeonTypeFlags::Float64, false, true));
-      Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
-      Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-      Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
-      Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-      return Builder.CreateBitCast(Result, Ty);
-    }
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-
-    llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
-                                            VTy->getNumElements() * 2);
-    Ops[2] = Builder.CreateBitCast(Ops[2], STy);
-    Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
-                                               cast<ConstantInt>(Ops[3]));
-    Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
-
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
-  }
-  case NEON::BI__builtin_neon_vfms_v:
-  case NEON::BI__builtin_neon_vfmsq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[1] = Builder.CreateFNeg(Ops[1]);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-
-    // LLVM's fma intrinsic puts the accumulator in the last position, but the
-    // AArch64 intrinsic has it first.
-    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
-  }
-  case NEON::BI__builtin_neon_vmaxnm_v:
-  case NEON::BI__builtin_neon_vmaxnmq_v: {
-    Int = Intrinsic::aarch64_neon_vmaxnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
-  }
-  case NEON::BI__builtin_neon_vminnm_v:
-  case NEON::BI__builtin_neon_vminnmq_v: {
-    Int = Intrinsic::aarch64_neon_vminnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
-  }
-  case NEON::BI__builtin_neon_vpmaxnm_v:
-  case NEON::BI__builtin_neon_vpmaxnmq_v: {
-    Int = Intrinsic::aarch64_neon_vpmaxnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
-  }
-  case NEON::BI__builtin_neon_vpminnm_v:
-  case NEON::BI__builtin_neon_vpminnmq_v: {
-    Int = Intrinsic::aarch64_neon_vpminnm;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
-  }
-  case NEON::BI__builtin_neon_vpmaxq_v: {
-    Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
-  }
-  case NEON::BI__builtin_neon_vpminq_v: {
-    Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
-  }
-  case NEON::BI__builtin_neon_vmulx_v:
-  case NEON::BI__builtin_neon_vmulxq_v: {
-    Int = Intrinsic::aarch64_neon_vmulx;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
-  }
-  case NEON::BI__builtin_neon_vsqadd_v:
-  case NEON::BI__builtin_neon_vsqaddq_v: {
-    Int = Intrinsic::aarch64_neon_usqadd;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
-  }
-  case NEON::BI__builtin_neon_vuqadd_v:
-  case NEON::BI__builtin_neon_vuqaddq_v: {
-    Int = Intrinsic::aarch64_neon_suqadd;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
-  }
-  case NEON::BI__builtin_neon_vrbit_v:
-  case NEON::BI__builtin_neon_vrbitq_v:
-    Int = Intrinsic::aarch64_neon_rbit;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
-  case NEON::BI__builtin_neon_vcvt_f32_f64: {
-    NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
-    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
-    return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
-  }
-  case NEON::BI__builtin_neon_vcvtx_f32_v: {
-    llvm::Type *EltTy = FloatTy;
-    llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2);
-    llvm::Type *Tys[2] = { ResTy, Ty };
-    Int = Intrinsic::aarch64_neon_vcvtxn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64");
-  }
-  case NEON::BI__builtin_neon_vcvt_f64_f32: {
-    llvm::Type *OpTy =
-        GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false));
-    Ops[0] = Builder.CreateBitCast(Ops[0], OpTy);
-    return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
-  }
-  case NEON::BI__builtin_neon_vcvt_f64_v:
-  case NEON::BI__builtin_neon_vcvtq_f64_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
-    return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
-                : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
-  }
-  case NEON::BI__builtin_neon_vrndn_v:
-  case NEON::BI__builtin_neon_vrndnq_v: {
-    Int = Intrinsic::aarch64_neon_frintn;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
-  }
-  case NEON::BI__builtin_neon_vrnda_v:
-  case NEON::BI__builtin_neon_vrndaq_v: {
-    Int = Intrinsic::round;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
-  }
-  case NEON::BI__builtin_neon_vrndp_v:
-  case NEON::BI__builtin_neon_vrndpq_v: {
-    Int = Intrinsic::ceil;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
-  }
-  case NEON::BI__builtin_neon_vrndm_v:
-  case NEON::BI__builtin_neon_vrndmq_v: {
-    Int = Intrinsic::floor;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
-  }
-  case NEON::BI__builtin_neon_vrndx_v:
-  case NEON::BI__builtin_neon_vrndxq_v: {
-    Int = Intrinsic::rint;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
-  }
-  case NEON::BI__builtin_neon_vrnd_v:
-  case NEON::BI__builtin_neon_vrndq_v: {
-    Int = Intrinsic::trunc;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd");
-  }
-  case NEON::BI__builtin_neon_vrndi_v:
-  case NEON::BI__builtin_neon_vrndiq_v: {
-    Int = Intrinsic::nearbyint;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
-  }
-  case NEON::BI__builtin_neon_vsqrt_v:
-  case NEON::BI__builtin_neon_vsqrtq_v: {
-    Int = Intrinsic::sqrt;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
-  }
-  case NEON::BI__builtin_neon_vceqz_v:
-  case NEON::BI__builtin_neon_vceqzq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
-                                         ICmpInst::ICMP_EQ, "vceqz");
-  case NEON::BI__builtin_neon_vcgez_v:
-  case NEON::BI__builtin_neon_vcgezq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
-                                         ICmpInst::ICMP_SGE, "vcgez");
-  case NEON::BI__builtin_neon_vclez_v:
-  case NEON::BI__builtin_neon_vclezq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
-                                         ICmpInst::ICMP_SLE, "vclez");
-  case NEON::BI__builtin_neon_vcgtz_v:
-  case NEON::BI__builtin_neon_vcgtzq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
-                                         ICmpInst::ICMP_SGT, "vcgtz");
-  case NEON::BI__builtin_neon_vcltz_v:
-  case NEON::BI__builtin_neon_vcltzq_v:
-    return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
-                                         ICmpInst::ICMP_SLT, "vcltz");
-  }
-}
-
-Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
-                                           const CallExpr *E) {
   if (BuiltinID == ARM::BI__clear_cache) {
     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
     const FunctionDecl *FD = E->getDirectCallee();
@@ -4422,11 +3144,6 @@
     return Builder.CreateCall(F);
   }
 
-  if (BuiltinID == ARM::BI__builtin_arm_sevl) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm_sevl);
-    return Builder.CreateCall(F);
-  }
-
   // CRC32
   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
@@ -4471,7 +3188,7 @@
   }
 
   SmallVector<Value*, 4> Ops;
-  llvm::Value *Align = 0;
+  llvm::Value *Align = nullptr;
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
     if (i == 0) {
       switch (BuiltinID) {
@@ -4587,7 +3304,7 @@
   llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   if (!Arg->isIntegerConstantExpr(Result, getContext()))
-    return 0;
+    return nullptr;
 
   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
@@ -4615,7 +3332,7 @@
   llvm::VectorType *VTy = GetNeonType(this, Type);
   llvm::Type *Ty = VTy;
   if (!Ty)
-    return 0;
+    return nullptr;
 
   // Many NEON builtins have identical semantics and uses in ARM and
   // AArch64. Emit these in a single function.
@@ -4629,7 +3346,7 @@
 
   unsigned Int;
   switch (BuiltinID) {
-  default: return 0;
+  default: return nullptr;
   case NEON::BI__builtin_neon_vld1q_lane_v:
     // Handle 64-bit integer elements as a special case.  Use shuffles of
     // one-element vectors to avoid poor code for i64 in the backend.
@@ -4817,16 +3534,15 @@
   }
 }
 
-static Value *EmitARM64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
+static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
                                       const CallExpr *E,
                                       SmallVectorImpl<Value *> &Ops) {
   unsigned int Int = 0;
-  const char *s = NULL;
+  const char *s = nullptr;
 
-  unsigned TblPos;
   switch (BuiltinID) {
   default:
-    return 0;
+    return nullptr;
   case NEON::BI__builtin_neon_vtbl1_v:
   case NEON::BI__builtin_neon_vqtbl1_v:
   case NEON::BI__builtin_neon_vqtbl1q_v:
@@ -4839,7 +3555,6 @@
   case NEON::BI__builtin_neon_vtbl4_v:
   case NEON::BI__builtin_neon_vqtbl4_v:
   case NEON::BI__builtin_neon_vqtbl4q_v:
-    TblPos = 0;
     break;
   case NEON::BI__builtin_neon_vtbx1_v:
   case NEON::BI__builtin_neon_vqtbx1_v:
@@ -4853,7 +3568,6 @@
   case NEON::BI__builtin_neon_vtbx4_v:
   case NEON::BI__builtin_neon_vqtbx4_v:
   case NEON::BI__builtin_neon_vqtbx4q_v:
-    TblPos = 1;
     break;
   }
 
@@ -4863,16 +3577,15 @@
   llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
-    return 0;
+    return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(Result.getZExtValue());
   llvm::VectorType *VTy = GetNeonType(&CGF, Type);
   llvm::Type *Ty = VTy;
   if (!Ty)
-    return 0;
+    return nullptr;
 
-  Arg = E->getArg(TblPos);
   unsigned nElts = VTy->getNumElements();
 
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
@@ -4883,34 +3596,34 @@
   switch (BuiltinID) {
   case NEON::BI__builtin_neon_vtbl1_v: {
     TblOps.push_back(Ops[0]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[1], Ty,
-                              Intrinsic::arm64_neon_tbl1, "vtbl1");
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty,
+                              Intrinsic::aarch64_neon_tbl1, "vtbl1");
   }
   case NEON::BI__builtin_neon_vtbl2_v: {
     TblOps.push_back(Ops[0]);
     TblOps.push_back(Ops[1]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
-                              Intrinsic::arm64_neon_tbl1, "vtbl1");
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
+                              Intrinsic::aarch64_neon_tbl1, "vtbl1");
   }
   case NEON::BI__builtin_neon_vtbl3_v: {
     TblOps.push_back(Ops[0]);
     TblOps.push_back(Ops[1]);
     TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[3], Ty,
-                              Intrinsic::arm64_neon_tbl2, "vtbl2");
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty,
+                              Intrinsic::aarch64_neon_tbl2, "vtbl2");
   }
   case NEON::BI__builtin_neon_vtbl4_v: {
     TblOps.push_back(Ops[0]);
     TblOps.push_back(Ops[1]);
     TblOps.push_back(Ops[2]);
     TblOps.push_back(Ops[3]);
-    return packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
-                              Intrinsic::arm64_neon_tbl2, "vtbl2");
+    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
+                              Intrinsic::aarch64_neon_tbl2, "vtbl2");
   }
   case NEON::BI__builtin_neon_vtbx1_v: {
     TblOps.push_back(Ops[1]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[2], Ty,
-                                    Intrinsic::arm64_neon_tbl1, "vtbl1");
+    Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
+                                       Intrinsic::aarch64_neon_tbl1, "vtbl1");
 
     llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
     Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
@@ -4925,14 +3638,14 @@
     TblOps.push_back(Ops[1]);
     TblOps.push_back(Ops[2]);
     return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
-                              Intrinsic::arm64_neon_tbx1, "vtbx1");
+                              Intrinsic::aarch64_neon_tbx1, "vtbx1");
   }
   case NEON::BI__builtin_neon_vtbx3_v: {
     TblOps.push_back(Ops[1]);
     TblOps.push_back(Ops[2]);
     TblOps.push_back(Ops[3]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, 0, Ops[4], Ty,
-                                       Intrinsic::arm64_neon_tbl2, "vtbl2");
+    Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
+                                       Intrinsic::aarch64_neon_tbl2, "vtbl2");
 
     llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
     Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
@@ -4950,37 +3663,37 @@
     TblOps.push_back(Ops[3]);
     TblOps.push_back(Ops[4]);
     return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
-                              Intrinsic::arm64_neon_tbx2, "vtbx2");
+                              Intrinsic::aarch64_neon_tbx2, "vtbx2");
   }
   case NEON::BI__builtin_neon_vqtbl1_v:
   case NEON::BI__builtin_neon_vqtbl1q_v:
-    Int = Intrinsic::arm64_neon_tbl1; s = "vtbl1"; break;
+    Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
   case NEON::BI__builtin_neon_vqtbl2_v:
   case NEON::BI__builtin_neon_vqtbl2q_v: {
-    Int = Intrinsic::arm64_neon_tbl2; s = "vtbl2"; break;
+    Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
   case NEON::BI__builtin_neon_vqtbl3_v:
   case NEON::BI__builtin_neon_vqtbl3q_v:
-    Int = Intrinsic::arm64_neon_tbl3; s = "vtbl3"; break;
+    Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
   case NEON::BI__builtin_neon_vqtbl4_v:
   case NEON::BI__builtin_neon_vqtbl4q_v:
-    Int = Intrinsic::arm64_neon_tbl4; s = "vtbl4"; break;
+    Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
   case NEON::BI__builtin_neon_vqtbx1_v:
   case NEON::BI__builtin_neon_vqtbx1q_v:
-    Int = Intrinsic::arm64_neon_tbx1; s = "vtbx1"; break;
+    Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
   case NEON::BI__builtin_neon_vqtbx2_v:
   case NEON::BI__builtin_neon_vqtbx2q_v:
-    Int = Intrinsic::arm64_neon_tbx2; s = "vtbx2"; break;
+    Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
   case NEON::BI__builtin_neon_vqtbx3_v:
   case NEON::BI__builtin_neon_vqtbx3q_v:
-    Int = Intrinsic::arm64_neon_tbx3; s = "vtbx3"; break;
+    Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
   case NEON::BI__builtin_neon_vqtbx4_v:
   case NEON::BI__builtin_neon_vqtbx4q_v:
-    Int = Intrinsic::arm64_neon_tbx4; s = "vtbx4"; break;
+    Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
   }
   }
 
   if (!Int)
-    return 0;
+    return nullptr;
 
   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
   return CGF.EmitNeonCall(F, Ops, s);
@@ -5007,7 +3720,7 @@
 Value *CodeGenFunction::
 emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
                                   const char *Name) {
-  // i8 is not a legal types for ARM64, so we can't just use
+  // i8 is not a legal types for AArch64, so we can't just use
   // a normal overloaed intrinsic call for these scalar types. Instead
   // we'll build 64-bit vectors w/ lane zero being our input values and
   // perform the operation on that. The back end can pattern match directly
@@ -5023,7 +3736,7 @@
 Value *CodeGenFunction::
 emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
                                    const char *Name) {
-  // i16 is not a legal types for ARM64, so we can't just use
+  // i16 is not a legal types for AArch64, so we can't just use
   // a normal overloaed intrinsic call for these scalar types. Instead
   // we'll build 64-bit vectors w/ lane zero being our input values and
   // perform the operation on that. The back end can pattern match directly
@@ -5036,9 +3749,9 @@
   return Builder.CreateExtractElement(V, CI, "lane0");
 }
 
-Value *CodeGenFunction::EmitARM64BuiltinExpr(unsigned BuiltinID,
-                                             const CallExpr *E) {
-  if (BuiltinID == ARM64::BI__clear_cache) {
+Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
+                                               const CallExpr *E) {
+  if (BuiltinID == AArch64::BI__clear_cache) {
     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
     const FunctionDecl *FD = E->getDirectCallee();
     SmallVector<Value*, 2> Ops;
@@ -5050,9 +3763,9 @@
     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
   }
 
-  if (BuiltinID == ARM64::BI__builtin_arm_ldrex &&
+  if (BuiltinID == AArch64::BI__builtin_arm_ldrex &&
       getContext().getTypeSize(E->getType()) == 128) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_ldxp);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ldxp);
 
     Value *LdPtr = EmitScalarExpr(E->getArg(0));
     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
@@ -5068,7 +3781,7 @@
     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
     Val = Builder.CreateOr(Val, Val1);
     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
-  } else if (BuiltinID == ARM64::BI__builtin_arm_ldrex) {
+  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex) {
     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
 
     QualType Ty = E->getType();
@@ -5077,7 +3790,7 @@
                                                   getContext().getTypeSize(Ty));
     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
 
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_ldxr, LoadAddr->getType());
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ldxr, LoadAddr->getType());
     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
 
     if (RealResTy->isPointerTy())
@@ -5087,9 +3800,9 @@
     return Builder.CreateBitCast(Val, RealResTy);
   }
 
-  if (BuiltinID == ARM64::BI__builtin_arm_strex &&
+  if (BuiltinID == AArch64::BI__builtin_arm_strex &&
       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_stxp);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stxp);
     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, NULL);
 
     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
@@ -5106,7 +3819,7 @@
     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
                                          Int8PtrTy);
     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp");
-  } else if (BuiltinID == ARM64::BI__builtin_arm_strex) {
+  } else if (BuiltinID == AArch64::BI__builtin_arm_strex) {
     Value *StoreVal = EmitScalarExpr(E->getArg(0));
     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
 
@@ -5122,34 +3835,34 @@
       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
     }
 
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_stxr, StoreAddr->getType());
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_stxr, StoreAddr->getType());
     return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr");
   }
 
-  if (BuiltinID == ARM64::BI__builtin_arm_clrex) {
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_clrex);
+  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
     return Builder.CreateCall(F);
   }
 
   // CRC32
   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
-  case ARM64::BI__builtin_arm_crc32b:
-    CRCIntrinsicID = Intrinsic::arm64_crc32b; break;
-  case ARM64::BI__builtin_arm_crc32cb:
-    CRCIntrinsicID = Intrinsic::arm64_crc32cb; break;
-  case ARM64::BI__builtin_arm_crc32h:
-    CRCIntrinsicID = Intrinsic::arm64_crc32h; break;
-  case ARM64::BI__builtin_arm_crc32ch:
-    CRCIntrinsicID = Intrinsic::arm64_crc32ch; break;
-  case ARM64::BI__builtin_arm_crc32w:
-    CRCIntrinsicID = Intrinsic::arm64_crc32w; break;
-  case ARM64::BI__builtin_arm_crc32cw:
-    CRCIntrinsicID = Intrinsic::arm64_crc32cw; break;
-  case ARM64::BI__builtin_arm_crc32d:
-    CRCIntrinsicID = Intrinsic::arm64_crc32x; break;
-  case ARM64::BI__builtin_arm_crc32cd:
-    CRCIntrinsicID = Intrinsic::arm64_crc32cx; break;
+  case AArch64::BI__builtin_arm_crc32b:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
+  case AArch64::BI__builtin_arm_crc32cb:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
+  case AArch64::BI__builtin_arm_crc32h:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
+  case AArch64::BI__builtin_arm_crc32ch:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
+  case AArch64::BI__builtin_arm_crc32w:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
+  case AArch64::BI__builtin_arm_crc32cw:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
+  case AArch64::BI__builtin_arm_crc32d:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
+  case AArch64::BI__builtin_arm_crc32cd:
+    CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
   }
 
   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
@@ -5167,9 +3880,9 @@
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
     Ops.push_back(EmitScalarExpr(E->getArg(i)));
 
-  llvm::ArrayRef<NeonIntrinsicInfo> SISDMap(ARM64SISDIntrinsicMap);
+  llvm::ArrayRef<NeonIntrinsicInfo> SISDMap(AArch64SISDIntrinsicMap);
   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
-      SISDMap, BuiltinID, ARM64SISDIntrinsicsProvenSorted);
+      SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
 
   if (Builtin) {
     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
@@ -5513,50 +4226,46 @@
     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
-    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmull, VTy),
+    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
                           ProductOps, "vqdmlXl");
     Constant *CI = ConstantInt::get(Int32Ty, 0);
     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
 
     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
-                                        ? Intrinsic::arm64_neon_sqadd
-                                        : Intrinsic::arm64_neon_sqsub;
+                                        ? Intrinsic::aarch64_neon_sqadd
+                                        : Intrinsic::aarch64_neon_sqsub;
     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
   }
   case NEON::BI__builtin_neon_vqshlud_n_s64: {
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
-    llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqshlu, VTy),
-                          Ops, "vqshlu_n");
-    return Builder.CreateBitCast(Ops[0], Int64Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
+                        Ops, "vqshlu_n");
   }
   case NEON::BI__builtin_neon_vqshld_n_u64:
   case NEON::BI__builtin_neon_vqshld_n_s64: {
     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
-                                   ? Intrinsic::arm64_neon_uqshl
-                                   : Intrinsic::arm64_neon_sqshl;
+                                   ? Intrinsic::aarch64_neon_uqshl
+                                   : Intrinsic::aarch64_neon_sqshl;
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
-    llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, "vqshl_n");
-    return Builder.CreateBitCast(Ops[0], Int64Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
   }
   case NEON::BI__builtin_neon_vrshrd_n_u64:
   case NEON::BI__builtin_neon_vrshrd_n_s64: {
     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
-                                   ? Intrinsic::arm64_neon_urshl
-                                   : Intrinsic::arm64_neon_srshl;
+                                   ? Intrinsic::aarch64_neon_urshl
+                                   : Intrinsic::aarch64_neon_srshl;
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    llvm::Type *VTy = llvm::VectorType::get(Int64Ty, 1);
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, "vrshr_n", 1, true);
-    return Builder.CreateBitCast(Ops[0], Int64Ty);
+    int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
+    Ops[1] = ConstantInt::get(Int64Ty, -SV);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
   }
   case NEON::BI__builtin_neon_vrsrad_n_u64:
   case NEON::BI__builtin_neon_vrsrad_n_s64: {
     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
-                                   ? Intrinsic::arm64_neon_urshl
-                                   : Intrinsic::arm64_neon_srshl;
+                                   ? Intrinsic::aarch64_neon_urshl
+                                   : Intrinsic::aarch64_neon_srshl;
     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1],
@@ -5567,38 +4276,41 @@
   case NEON::BI__builtin_neon_vshld_n_u64: {
     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
     return Builder.CreateShl(
-        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
-                                                   Amt->getZExtValue())),
-        "vshr_n");
+        Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
   }
   case NEON::BI__builtin_neon_vshrd_n_s64: {
     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
     return Builder.CreateAShr(
         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
                                                    Amt->getZExtValue())),
-        "vshr_n");
+        "shrd_n");
   }
   case NEON::BI__builtin_neon_vshrd_n_u64: {
     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
-    return Builder.CreateLShr(
-        Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
-                                                   Amt->getZExtValue())),
-        "vshr_n");
+    uint64_t ShiftAmt = Amt->getZExtValue();
+    // Right-shifting an unsigned value by its size yields 0.
+    if (ShiftAmt == 64)
+      return ConstantInt::get(Int64Ty, 0);
+    return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
+                              "shrd_n");
   }
   case NEON::BI__builtin_neon_vsrad_n_s64: {
     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
     Ops[1] = Builder.CreateAShr(
         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
                                                    Amt->getZExtValue())),
-        "vshr_n");
+        "shrd_n");
     return Builder.CreateAdd(Ops[0], Ops[1]);
   }
   case NEON::BI__builtin_neon_vsrad_n_u64: {
     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
-    Ops[1] = Builder.CreateLShr(
-        Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
-                                                   Amt->getZExtValue())),
-        "vshr_n");
+    uint64_t ShiftAmt = Amt->getZExtValue();
+    // Right-shifting an unsigned value by its size yields 0.
+    // As Op + 0 = Op, return Ops[0] directly.
+    if (ShiftAmt == 64)
+      return Ops[0];
+    Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
+                                "shrd_n");
     return Builder.CreateAdd(Ops[0], Ops[1]);
   }
   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
@@ -5611,7 +4323,7 @@
     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
-    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmull, VTy),
+    Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
                           ProductOps, "vqdmlXl");
     Constant *CI = ConstantInt::get(Int32Ty, 0);
     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
@@ -5619,8 +4331,8 @@
 
     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
-                          ? Intrinsic::arm64_neon_sqadd
-                          : Intrinsic::arm64_neon_sqsub;
+                          ? Intrinsic::aarch64_neon_sqadd
+                          : Intrinsic::aarch64_neon_sqsub;
     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
   }
   case NEON::BI__builtin_neon_vqdmlals_s32:
@@ -5629,12 +4341,12 @@
     ProductOps.push_back(Ops[1]);
     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
     Ops[1] =
-        EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmulls_scalar),
+        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
                      ProductOps, "vqdmlXl");
 
     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
-                                        ? Intrinsic::arm64_neon_sqadd
-                                        : Intrinsic::arm64_neon_sqsub;
+                                        ? Intrinsic::aarch64_neon_sqadd
+                                        : Intrinsic::aarch64_neon_sqsub;
     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
   }
   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
@@ -5647,14 +4359,14 @@
     ProductOps.push_back(Ops[1]);
     ProductOps.push_back(Ops[2]);
     Ops[1] =
-        EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_sqdmulls_scalar),
+        EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
                      ProductOps, "vqdmlXl");
     Ops.pop_back();
 
     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
-                          ? Intrinsic::arm64_neon_sqadd
-                          : Intrinsic::arm64_neon_sqsub;
+                          ? Intrinsic::aarch64_neon_sqadd
+                          : Intrinsic::aarch64_neon_sqsub;
     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
   }
   }
@@ -5662,24 +4374,24 @@
   llvm::VectorType *VTy = GetNeonType(this, Type);
   llvm::Type *Ty = VTy;
   if (!Ty)
-    return 0;
+    return nullptr;
 
-  // Not all intrinsics handled by the common case work for ARM64 yet, so only
+  // Not all intrinsics handled by the common case work for AArch64 yet, so only
   // defer to common code if it's been added to our special map.
-  Builtin = findNeonIntrinsicInMap(ARM64SIMDIntrinsicMap, BuiltinID,
-                                   ARM64SIMDIntrinsicsProvenSorted);
+  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
+                                   AArch64SIMDIntrinsicsProvenSorted);
 
   if (Builtin)
     return EmitCommonNeonBuiltinExpr(
         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
-        Builtin->NameHint, Builtin->TypeModifier, E, Ops, 0);
+        Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr);
 
-  if (Value *V = EmitARM64TblBuiltinExpr(*this, BuiltinID, E, Ops))
+  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
     return V;
 
   unsigned Int;
   switch (BuiltinID) {
-  default: return 0;
+  default: return nullptr;
   case NEON::BI__builtin_neon_vbsl_v:
   case NEON::BI__builtin_neon_vbslq_v: {
     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
@@ -5780,26 +4492,26 @@
   }
   case NEON::BI__builtin_neon_vmull_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
-    Int = usgn ? Intrinsic::arm64_neon_umull : Intrinsic::arm64_neon_smull;
-    if (Type.isPoly()) Int = Intrinsic::arm64_neon_pmull;
+    Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
+    if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
   case NEON::BI__builtin_neon_vmax_v:
   case NEON::BI__builtin_neon_vmaxq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
-    Int = usgn ? Intrinsic::arm64_neon_umax : Intrinsic::arm64_neon_smax;
-    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fmax;
+    Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
   case NEON::BI__builtin_neon_vmin_v:
   case NEON::BI__builtin_neon_vminq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
-    Int = usgn ? Intrinsic::arm64_neon_umin : Intrinsic::arm64_neon_smin;
-    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fmin;
+    Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
   case NEON::BI__builtin_neon_vabd_v:
   case NEON::BI__builtin_neon_vabdq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
-    Int = usgn ? Intrinsic::arm64_neon_uabd : Intrinsic::arm64_neon_sabd;
-    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fabd;
+    Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
   case NEON::BI__builtin_neon_vpadal_v:
   case NEON::BI__builtin_neon_vpadalq_v: {
@@ -5809,7 +4521,7 @@
     llvm::Type *ArgTy = llvm::VectorType::get(
         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
     llvm::Type* Tys[2] = { VTy, ArgTy };
-    Int = usgn ? Intrinsic::arm64_neon_uaddlp : Intrinsic::arm64_neon_saddlp;
+    Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
     SmallVector<llvm::Value*, 1> TmpOps;
     TmpOps.push_back(Ops[1]);
     Function *F = CGM.getIntrinsic(Int, Tys);
@@ -5820,33 +4532,33 @@
   case NEON::BI__builtin_neon_vpmin_v:
   case NEON::BI__builtin_neon_vpminq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
-    Int = usgn ? Intrinsic::arm64_neon_uminp : Intrinsic::arm64_neon_sminp;
-    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fminp;
+    Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
   case NEON::BI__builtin_neon_vpmax_v:
   case NEON::BI__builtin_neon_vpmaxq_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
-    Int = usgn ? Intrinsic::arm64_neon_umaxp : Intrinsic::arm64_neon_smaxp;
-    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::arm64_neon_fmaxp;
+    Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
+    if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
   case NEON::BI__builtin_neon_vminnm_v:
   case NEON::BI__builtin_neon_vminnmq_v:
-    Int = Intrinsic::arm64_neon_fminnm;
+    Int = Intrinsic::aarch64_neon_fminnm;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
   case NEON::BI__builtin_neon_vmaxnm_v:
   case NEON::BI__builtin_neon_vmaxnmq_v:
-    Int = Intrinsic::arm64_neon_fmaxnm;
+    Int = Intrinsic::aarch64_neon_fmaxnm;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
   case NEON::BI__builtin_neon_vrecpss_f32: {
     llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext());
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_frecps, f32Type),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type),
                         Ops, "vrecps");
   }
   case NEON::BI__builtin_neon_vrecpsd_f64: {
     llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext());
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_frecps, f64Type),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type),
                         Ops, "vrecps");
   }
   case NEON::BI__builtin_neon_vrshr_n_v:
@@ -5854,34 +4566,34 @@
     // FIXME: this can be shared with 32-bit ARM, but not AArch64 at the
     // moment. After the final merge it should be added to
     // EmitCommonNeonBuiltinExpr.
-    Int = usgn ? Intrinsic::arm64_neon_urshl : Intrinsic::arm64_neon_srshl;
+    Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
   case NEON::BI__builtin_neon_vqshlu_n_v:
   case NEON::BI__builtin_neon_vqshluq_n_v:
     // FIXME: AArch64 and ARM use different intrinsics for this, but are
     // essentially compatible. It should be in EmitCommonNeonBuiltinExpr after
     // the final merge.
-    Int = Intrinsic::arm64_neon_sqshlu;
+    Int = Intrinsic::aarch64_neon_sqshlu;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 1, false);
   case NEON::BI__builtin_neon_vqshrun_n_v:
     // FIXME: as above
-    Int = Intrinsic::arm64_neon_sqshrun;
+    Int = Intrinsic::aarch64_neon_sqshrun;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
   case NEON::BI__builtin_neon_vqrshrun_n_v:
     // FIXME: and again.
-    Int = Intrinsic::arm64_neon_sqrshrun;
+    Int = Intrinsic::aarch64_neon_sqrshrun;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
   case NEON::BI__builtin_neon_vqshrn_n_v:
     // FIXME: guess
-    Int = usgn ? Intrinsic::arm64_neon_uqshrn : Intrinsic::arm64_neon_sqshrn;
+    Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
   case NEON::BI__builtin_neon_vrshrn_n_v:
     // FIXME: there might be a pattern here.
-    Int = Intrinsic::arm64_neon_rshrn;
+    Int = Intrinsic::aarch64_neon_rshrn;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
   case NEON::BI__builtin_neon_vqrshrn_n_v:
     // FIXME: another one
-    Int = usgn ? Intrinsic::arm64_neon_uqrshrn : Intrinsic::arm64_neon_sqrshrn;
+    Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
   case NEON::BI__builtin_neon_vrnda_v:
   case NEON::BI__builtin_neon_vrndaq_v: {
@@ -5900,7 +4612,7 @@
   }
   case NEON::BI__builtin_neon_vrndn_v:
   case NEON::BI__builtin_neon_vrndnq_v: {
-    Int = Intrinsic::arm64_neon_frintn;
+    Int = Intrinsic::aarch64_neon_frintn;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
   }
   case NEON::BI__builtin_neon_vrndp_v:
@@ -5987,7 +4699,7 @@
   case NEON::BI__builtin_neon_vcvtaq_s64_v:
   case NEON::BI__builtin_neon_vcvta_u64_v:
   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
-    Int = usgn ? Intrinsic::arm64_neon_fcvtau : Intrinsic::arm64_neon_fcvtas;
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
     bool Double =
       (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
     llvm::Type *InTy =
@@ -6005,7 +4717,7 @@
   case NEON::BI__builtin_neon_vcvtmq_s64_v:
   case NEON::BI__builtin_neon_vcvtm_u64_v:
   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
-    Int = usgn ? Intrinsic::arm64_neon_fcvtmu : Intrinsic::arm64_neon_fcvtms;
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
     bool Double =
       (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
     llvm::Type *InTy =
@@ -6023,7 +4735,7 @@
   case NEON::BI__builtin_neon_vcvtnq_s64_v:
   case NEON::BI__builtin_neon_vcvtn_u64_v:
   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
-    Int = usgn ? Intrinsic::arm64_neon_fcvtnu : Intrinsic::arm64_neon_fcvtns;
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
     bool Double =
       (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
     llvm::Type *InTy =
@@ -6041,7 +4753,7 @@
   case NEON::BI__builtin_neon_vcvtpq_s64_v:
   case NEON::BI__builtin_neon_vcvtp_u64_v:
   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
-    Int = usgn ? Intrinsic::arm64_neon_fcvtpu : Intrinsic::arm64_neon_fcvtps;
+    Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
     bool Double =
       (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
     llvm::Type *InTy =
@@ -6053,7 +4765,7 @@
   }
   case NEON::BI__builtin_neon_vmulx_v:
   case NEON::BI__builtin_neon_vmulxq_v: {
-    Int = Intrinsic::arm64_neon_fmulx;
+    Int = Intrinsic::aarch64_neon_fmulx;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
   }
   case NEON::BI__builtin_neon_vmul_lane_v:
@@ -6074,12 +4786,12 @@
     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
   case NEON::BI__builtin_neon_vpmaxnm_v:
   case NEON::BI__builtin_neon_vpmaxnmq_v: {
-    Int = Intrinsic::arm64_neon_fmaxnmp;
+    Int = Intrinsic::aarch64_neon_fmaxnmp;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
   }
   case NEON::BI__builtin_neon_vpminnm_v:
   case NEON::BI__builtin_neon_vpminnmq_v: {
-    Int = Intrinsic::arm64_neon_fminnmp;
+    Int = Intrinsic::aarch64_neon_fminnmp;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
   }
   case NEON::BI__builtin_neon_vsqrt_v:
@@ -6090,7 +4802,7 @@
   }
   case NEON::BI__builtin_neon_vrbit_v:
   case NEON::BI__builtin_neon_vrbitq_v: {
-    Int = Intrinsic::arm64_neon_rbit;
+    Int = Intrinsic::aarch64_neon_rbit;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
   }
   case NEON::BI__builtin_neon_vaddv_u8:
@@ -6098,7 +4810,7 @@
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddv_s8: {
-    Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6112,7 +4824,7 @@
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddv_s16: {
-    Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6126,7 +4838,7 @@
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddvq_s8: {
-    Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6140,7 +4852,7 @@
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddvq_s16: {
-    Int = usgn ? Intrinsic::arm64_neon_uaddv : Intrinsic::arm64_neon_saddv;
+    Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6151,7 +4863,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vmaxv_u8: {
-    Int = Intrinsic::arm64_neon_umaxv;
+    Int = Intrinsic::aarch64_neon_umaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6162,7 +4874,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vmaxv_u16: {
-    Int = Intrinsic::arm64_neon_umaxv;
+    Int = Intrinsic::aarch64_neon_umaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6173,7 +4885,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vmaxvq_u8: {
-    Int = Intrinsic::arm64_neon_umaxv;
+    Int = Intrinsic::aarch64_neon_umaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6184,7 +4896,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vmaxvq_u16: {
-    Int = Intrinsic::arm64_neon_umaxv;
+    Int = Intrinsic::aarch64_neon_umaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6195,7 +4907,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vmaxv_s8: {
-    Int = Intrinsic::arm64_neon_smaxv;
+    Int = Intrinsic::aarch64_neon_smaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6206,7 +4918,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vmaxv_s16: {
-    Int = Intrinsic::arm64_neon_smaxv;
+    Int = Intrinsic::aarch64_neon_smaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6217,7 +4929,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vmaxvq_s8: {
-    Int = Intrinsic::arm64_neon_smaxv;
+    Int = Intrinsic::aarch64_neon_smaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6228,7 +4940,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vmaxvq_s16: {
-    Int = Intrinsic::arm64_neon_smaxv;
+    Int = Intrinsic::aarch64_neon_smaxv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6239,7 +4951,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vminv_u8: {
-    Int = Intrinsic::arm64_neon_uminv;
+    Int = Intrinsic::aarch64_neon_uminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6250,7 +4962,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vminv_u16: {
-    Int = Intrinsic::arm64_neon_uminv;
+    Int = Intrinsic::aarch64_neon_uminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6261,7 +4973,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vminvq_u8: {
-    Int = Intrinsic::arm64_neon_uminv;
+    Int = Intrinsic::aarch64_neon_uminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6272,7 +4984,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vminvq_u16: {
-    Int = Intrinsic::arm64_neon_uminv;
+    Int = Intrinsic::aarch64_neon_uminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6283,7 +4995,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vminv_s8: {
-    Int = Intrinsic::arm64_neon_sminv;
+    Int = Intrinsic::aarch64_neon_sminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6294,7 +5006,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vminv_s16: {
-    Int = Intrinsic::arm64_neon_sminv;
+    Int = Intrinsic::aarch64_neon_sminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6305,7 +5017,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vminvq_s8: {
-    Int = Intrinsic::arm64_neon_sminv;
+    Int = Intrinsic::aarch64_neon_sminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6316,7 +5028,7 @@
              llvm::IntegerType::get(getLLVMContext(), 8));
   }
   case NEON::BI__builtin_neon_vminvq_s16: {
-    Int = Intrinsic::arm64_neon_sminv;
+    Int = Intrinsic::aarch64_neon_sminv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6332,7 +5044,7 @@
     return Builder.CreateFMul(Ops[0], RHS);
   }
   case NEON::BI__builtin_neon_vaddlv_u8: {
-    Int = Intrinsic::arm64_neon_uaddlv;
+    Int = Intrinsic::aarch64_neon_uaddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6343,7 +5055,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vaddlv_u16: {
-    Int = Intrinsic::arm64_neon_uaddlv;
+    Int = Intrinsic::aarch64_neon_uaddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6352,7 +5064,7 @@
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
   }
   case NEON::BI__builtin_neon_vaddlvq_u8: {
-    Int = Intrinsic::arm64_neon_uaddlv;
+    Int = Intrinsic::aarch64_neon_uaddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6363,7 +5075,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vaddlvq_u16: {
-    Int = Intrinsic::arm64_neon_uaddlv;
+    Int = Intrinsic::aarch64_neon_uaddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6372,7 +5084,7 @@
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
   }
   case NEON::BI__builtin_neon_vaddlv_s8: {
-    Int = Intrinsic::arm64_neon_saddlv;
+    Int = Intrinsic::aarch64_neon_saddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
@@ -6383,7 +5095,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vaddlv_s16: {
-    Int = Intrinsic::arm64_neon_saddlv;
+    Int = Intrinsic::aarch64_neon_saddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
@@ -6392,7 +5104,7 @@
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
   }
   case NEON::BI__builtin_neon_vaddlvq_s8: {
-    Int = Intrinsic::arm64_neon_saddlv;
+    Int = Intrinsic::aarch64_neon_saddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
@@ -6403,7 +5115,7 @@
              llvm::IntegerType::get(getLLVMContext(), 16));
   }
   case NEON::BI__builtin_neon_vaddlvq_s16: {
-    Int = Intrinsic::arm64_neon_saddlv;
+    Int = Intrinsic::aarch64_neon_saddlv;
     Ty = llvm::IntegerType::get(getLLVMContext(), 32);
     VTy =
       llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
@@ -6413,13 +5125,13 @@
   }
   case NEON::BI__builtin_neon_vsri_n_v:
   case NEON::BI__builtin_neon_vsriq_n_v: {
-    Int = Intrinsic::arm64_neon_vsri;
+    Int = Intrinsic::aarch64_neon_vsri;
     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
     return EmitNeonCall(Intrin, Ops, "vsri_n");
   }
   case NEON::BI__builtin_neon_vsli_n_v:
   case NEON::BI__builtin_neon_vsliq_n_v: {
-    Int = Intrinsic::arm64_neon_vsli;
+    Int = Intrinsic::aarch64_neon_vsli;
     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
     return EmitNeonCall(Intrin, Ops, "vsli_n");
   }
@@ -6430,7 +5142,7 @@
     return Builder.CreateAdd(Ops[0], Ops[1]);
   case NEON::BI__builtin_neon_vrsra_n_v:
   case NEON::BI__builtin_neon_vrsraq_n_v: {
-    Int = usgn ? Intrinsic::arm64_neon_urshl : Intrinsic::arm64_neon_srshl;
+    Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
     SmallVector<llvm::Value*,2> TmpOps;
     TmpOps.push_back(Ops[1]);
     TmpOps.push_back(Ops[2]);
@@ -6454,15 +5166,15 @@
     switch (BuiltinID) {
     case NEON::BI__builtin_neon_vld1_x2_v:
     case NEON::BI__builtin_neon_vld1q_x2_v:
-      Int = Intrinsic::arm64_neon_ld1x2;
+      Int = Intrinsic::aarch64_neon_ld1x2;
       break;
     case NEON::BI__builtin_neon_vld1_x3_v:
     case NEON::BI__builtin_neon_vld1q_x3_v:
-      Int = Intrinsic::arm64_neon_ld1x3;
+      Int = Intrinsic::aarch64_neon_ld1x3;
       break;
     case NEON::BI__builtin_neon_vld1_x4_v:
     case NEON::BI__builtin_neon_vld1q_x4_v:
-      Int = Intrinsic::arm64_neon_ld1x4;
+      Int = Intrinsic::aarch64_neon_ld1x4;
       break;
     }
     Function *F = CGM.getIntrinsic(Int, Tys);
@@ -6483,15 +5195,15 @@
     switch (BuiltinID) {
     case NEON::BI__builtin_neon_vst1_x2_v:
     case NEON::BI__builtin_neon_vst1q_x2_v:
-      Int = Intrinsic::arm64_neon_st1x2;
+      Int = Intrinsic::aarch64_neon_st1x2;
       break;
     case NEON::BI__builtin_neon_vst1_x3_v:
     case NEON::BI__builtin_neon_vst1q_x3_v:
-      Int = Intrinsic::arm64_neon_st1x3;
+      Int = Intrinsic::aarch64_neon_st1x3;
       break;
     case NEON::BI__builtin_neon_vst1_x4_v:
     case NEON::BI__builtin_neon_vst1q_x4_v:
-      Int = Intrinsic::arm64_neon_st1x4;
+      Int = Intrinsic::aarch64_neon_st1x4;
       break;
     }
     SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end());
@@ -6535,7 +5247,7 @@
     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
     llvm::Type *Tys[2] = { VTy, PTy };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld2, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
@@ -6546,7 +5258,7 @@
     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
     llvm::Type *Tys[2] = { VTy, PTy };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld3, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
@@ -6557,7 +5269,7 @@
     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
     llvm::Type *Tys[2] = { VTy, PTy };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld4, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
@@ -6569,7 +5281,7 @@
       llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
     llvm::Type *Tys[2] = { VTy, PTy };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld2r, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
@@ -6581,7 +5293,7 @@
       llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
     llvm::Type *Tys[2] = { VTy, PTy };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld3r, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
@@ -6593,7 +5305,7 @@
       llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
     llvm::Type *Tys[2] = { VTy, PTy };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld4r, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
@@ -6602,7 +5314,7 @@
   case NEON::BI__builtin_neon_vld2_lane_v:
   case NEON::BI__builtin_neon_vld2q_lane_v: {
     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld2lane, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
     Ops.push_back(Ops[1]);
     Ops.erase(Ops.begin()+1);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
@@ -6618,7 +5330,7 @@
   case NEON::BI__builtin_neon_vld3_lane_v:
   case NEON::BI__builtin_neon_vld3q_lane_v: {
     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld3lane, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
     Ops.push_back(Ops[1]);
     Ops.erase(Ops.begin()+1);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
@@ -6635,7 +5347,7 @@
   case NEON::BI__builtin_neon_vld4_lane_v:
   case NEON::BI__builtin_neon_vld4q_lane_v: {
     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
-    Function *F = CGM.getIntrinsic(Intrinsic::arm64_neon_ld4lane, Tys);
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
     Ops.push_back(Ops[1]);
     Ops.erase(Ops.begin()+1);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
@@ -6655,7 +5367,7 @@
     Ops.push_back(Ops[0]);
     Ops.erase(Ops.begin());
     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st2, Tys),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
                         Ops, "");
   }
   case NEON::BI__builtin_neon_vst2_lane_v:
@@ -6665,7 +5377,7 @@
     Ops[2] = Builder.CreateZExt(Ops[2],
                 llvm::IntegerType::get(getLLVMContext(), 64));
     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st2lane, Tys),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
                         Ops, "");
   }
   case NEON::BI__builtin_neon_vst3_v:
@@ -6673,7 +5385,7 @@
     Ops.push_back(Ops[0]);
     Ops.erase(Ops.begin());
     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st3, Tys),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
                         Ops, "");
   }
   case NEON::BI__builtin_neon_vst3_lane_v:
@@ -6683,7 +5395,7 @@
     Ops[3] = Builder.CreateZExt(Ops[3],
                 llvm::IntegerType::get(getLLVMContext(), 64));
     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st3lane, Tys),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
                         Ops, "");
   }
   case NEON::BI__builtin_neon_vst4_v:
@@ -6691,7 +5403,7 @@
     Ops.push_back(Ops[0]);
     Ops.erase(Ops.begin());
     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st4, Tys),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
                         Ops, "");
   }
   case NEON::BI__builtin_neon_vst4_lane_v:
@@ -6701,7 +5413,7 @@
     Ops[4] = Builder.CreateZExt(Ops[4],
                 llvm::IntegerType::get(getLLVMContext(), 64));
     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_st4lane, Tys),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
                         Ops, "");
   }
   case NEON::BI__builtin_neon_vtrn_v:
@@ -6709,7 +5421,7 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -6729,7 +5441,7 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -6748,7 +5460,7 @@
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
+    Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
       SmallVector<Constant*, 16> Indices;
@@ -6764,45 +5476,45 @@
     return SV;
   }
   case NEON::BI__builtin_neon_vqtbl1q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl1, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
                         Ops, "vtbl1");
   }
   case NEON::BI__builtin_neon_vqtbl2q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl2, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
                         Ops, "vtbl2");
   }
   case NEON::BI__builtin_neon_vqtbl3q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl3, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
                         Ops, "vtbl3");
   }
   case NEON::BI__builtin_neon_vqtbl4q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbl4, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
                         Ops, "vtbl4");
   }
   case NEON::BI__builtin_neon_vqtbx1q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx1, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
                         Ops, "vtbx1");
   }
   case NEON::BI__builtin_neon_vqtbx2q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx2, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
                         Ops, "vtbx2");
   }
   case NEON::BI__builtin_neon_vqtbx3q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx3, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
                         Ops, "vtbx3");
   }
   case NEON::BI__builtin_neon_vqtbx4q_v: {
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm64_neon_tbx4, Ty),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
                         Ops, "vtbx4");
   }
   case NEON::BI__builtin_neon_vsqadd_v:
   case NEON::BI__builtin_neon_vsqaddq_v: {
-    Int = Intrinsic::arm64_neon_usqadd;
+    Int = Intrinsic::aarch64_neon_usqadd;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
   }
   case NEON::BI__builtin_neon_vuqadd_v:
   case NEON::BI__builtin_neon_vuqaddq_v: {
-    Int = Intrinsic::arm64_neon_suqadd;
+    Int = Intrinsic::aarch64_neon_suqadd;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
   }
   }
@@ -6860,7 +5572,7 @@
   }
 
   switch (BuiltinID) {
-  default: return 0;
+  default: return nullptr;
   case X86::BI_mm_prefetch: {
     Value *Address = EmitScalarExpr(E->getArg(0));
     Value *RW = ConstantInt::get(Int32Ty, 0);
@@ -7037,7 +5749,7 @@
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
-    const char *name = 0;
+    const char *name = nullptr;
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
     switch(BuiltinID) {
     default: llvm_unreachable("Unsupported intrinsic!");
@@ -7106,7 +5818,7 @@
   Intrinsic::ID ID = Intrinsic::not_intrinsic;
 
   switch (BuiltinID) {
-  default: return 0;
+  default: return nullptr;
 
   // vec_ld, vec_lvsl, vec_lvsr
   case PPC::BI__builtin_altivec_lvx:
diff --git a/lib/CodeGen/CGCUDARuntime.cpp b/lib/CodeGen/CGCUDARuntime.cpp
index 54a28f5..29e0a91 100644
--- a/lib/CodeGen/CGCUDARuntime.cpp
+++ b/lib/CodeGen/CGCUDARuntime.cpp
@@ -37,7 +37,7 @@
   eval.begin(CGF);
   CGF.EmitBlock(ConfigOKBlock);
 
-  const Decl *TargetDecl = 0;
+  const Decl *TargetDecl = nullptr;
   if (const ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E->getCallee())) {
     if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(CE->getSubExpr())) {
       TargetDecl = DRE->getDecl();
@@ -52,5 +52,5 @@
   CGF.EmitBlock(ContBlock);
   eval.end(CGF);
 
-  return RValue::get(0);
+  return RValue::get(nullptr);
 }
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index ef29af7..55c06f6 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -61,15 +61,15 @@
       return true;
 
   // Try to find a unique base class with a non-trivial destructor.
-  const CXXRecordDecl *UniqueBase = 0;
+  const CXXRecordDecl *UniqueBase = nullptr;
   for (const auto &I : Class->bases()) {
 
     // We're in the base destructor, so skip virtual bases.
     if (I.isVirtual()) continue;
 
     // Skip base classes with trivial destructors.
-    const CXXRecordDecl *Base
-      = cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
+    const auto *Base =
+        cast<CXXRecordDecl>(I.getType()->getAs<RecordType>()->getDecl());
     if (Base->hasTrivialDestructor()) continue;
 
     // If we've already found a base class with a non-trivial
@@ -137,13 +137,13 @@
   // Find the referent.  Some aliases might require a bitcast, in
   // which case the caller is responsible for ensuring the soundness
   // of these semantics.
-  llvm::GlobalValue *Ref = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl));
-  llvm::Constant *Aliasee = Ref;
-  if (Ref->getType() != AliasType)
-    Aliasee = llvm::ConstantExpr::getBitCast(Ref, AliasType);
+  auto *Ref = cast<llvm::GlobalValue>(GetAddrOfGlobal(TargetDecl));
+  auto *Aliasee = dyn_cast<llvm::GlobalObject>(Ref);
+  if (!Aliasee)
+    Aliasee = cast<llvm::GlobalAlias>(Ref)->getAliasee();
 
   // Instead of creating as alias to a linkonce_odr, replace all of the uses
-  // of the aliassee.
+  // of the aliasee.
   if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) &&
      (TargetLinkage != llvm::GlobalValue::AvailableExternallyLinkage ||
       !TargetDecl.getDecl()->hasAttr<AlwaysInlineAttr>())) {
@@ -152,7 +152,10 @@
     // members with attribute "AlwaysInline" and expect no reference to
     // be generated. It is desirable to reenable this optimisation after
     // corresponding LLVM changes.
-    Replacements[MangledName] = Aliasee;
+    llvm::Constant *Replacement = Aliasee;
+    if (Aliasee->getType() != AliasType)
+      Replacement = llvm::ConstantExpr::getBitCast(Aliasee, AliasType);
+    Replacements[MangledName] = Replacement;
     return false;
   }
 
@@ -160,7 +163,7 @@
     /// If we don't have a definition for the destructor yet, don't
     /// emit.  We can't emit aliases to declarations; that's just not
     /// how aliases work.
-    if (Ref->isDeclaration())
+    if (Aliasee->isDeclaration())
       return true;
   }
 
@@ -172,8 +175,8 @@
     return true;
 
   // Create the alias with no name.
-  llvm::GlobalAlias *Alias = 
-    new llvm::GlobalAlias(AliasType, Linkage, "", Aliasee, &getModule());
+  auto *Alias = llvm::GlobalAlias::create(AliasType->getElementType(), 0,
+                                          Linkage, "", Aliasee);
 
   // Switch any previous uses to the alias.
   if (Entry) {
@@ -211,13 +214,13 @@
   const CGFunctionInfo &fnInfo =
     getTypes().arrangeCXXConstructorDeclaration(ctor, ctorType);
 
-  llvm::Function *fn = cast<llvm::Function>(
+  auto *fn = cast<llvm::Function>(
       GetAddrOfCXXConstructor(ctor, ctorType, &fnInfo, true));
   setFunctionLinkage(GlobalDecl(ctor, ctorType), fn);
 
   CodeGenFunction(*this).GenerateCode(GlobalDecl(ctor, ctorType), fn, fnInfo);
 
-  SetFunctionDefinitionAttributes(ctor, fn);
+  setFunctionDefinitionAttributes(ctor, fn);
   SetLLVMFunctionAttributesForDefinition(ctor, fn);
 }
 
@@ -268,13 +271,13 @@
   const CGFunctionInfo &fnInfo =
     getTypes().arrangeCXXDestructor(dtor, dtorType);
 
-  llvm::Function *fn = cast<llvm::Function>(
-      GetAddrOfCXXDestructor(dtor, dtorType, &fnInfo, 0, true));
+  auto *fn = cast<llvm::Function>(
+      GetAddrOfCXXDestructor(dtor, dtorType, &fnInfo, nullptr, true));
   setFunctionLinkage(GlobalDecl(dtor, dtorType), fn);
 
   CodeGenFunction(*this).GenerateCode(GlobalDecl(dtor, dtorType), fn, fnInfo);
 
-  SetFunctionDefinitionAttributes(dtor, fn);
+  setFunctionDefinitionAttributes(dtor, fn);
   SetLLVMFunctionAttributesForDefinition(dtor, fn);
 }
 
@@ -335,9 +338,9 @@
   QualType T = QualType(QTy, 0);
   const RecordType *RT = T->getAs<RecordType>();
   assert(RT && "BuildAppleKextVirtualCall - Qual type must be record");
-  const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
-  
-  if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD))
+  const auto *RD = cast<CXXRecordDecl>(RT->getDecl());
+
+  if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD))
     return BuildAppleKextVirtualDestructorCall(DD, Dtor_Complete, RD);
 
   return ::BuildAppleKextVirtualCall(*this, MD, Ty, RD);
@@ -350,7 +353,7 @@
                                             const CXXDestructorDecl *DD,
                                             CXXDtorType Type,
                                             const CXXRecordDecl *RD) {
-  const CXXMethodDecl *MD = cast<CXXMethodDecl>(DD);
+  const auto *MD = cast<CXXMethodDecl>(DD);
   // FIXME. Dtor_Base dtor is always direct!!
   // It need be somehow inline expanded into the caller.
   // -O does that. But need to support -O0 as well.
@@ -361,5 +364,5 @@
     llvm::Type *Ty = CGM.getTypes().GetFunctionType(FInfo);
     return ::BuildAppleKextVirtualCall(*this, GlobalDecl(DD, Type), Ty, RD);
   }
-  return 0;
+  return nullptr;
 }
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index 2bb3907..1f0de31 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -28,6 +28,41 @@
     << S;
 }
 
+bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const {
+  // If RD has a non-trivial move or copy constructor, we cannot copy the
+  // argument.
+  if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor())
+    return false;
+
+  // If RD has a non-trivial destructor, we cannot copy the argument.
+  if (RD->hasNonTrivialDestructor())
+    return false;
+
+  // We can only copy the argument if there exists at least one trivial,
+  // non-deleted copy or move constructor.
+  // FIXME: This assumes that all lazily declared copy and move constructors are
+  // not deleted.  This assumption might not be true in some corner cases.
+  bool CopyDeleted = false;
+  bool MoveDeleted = false;
+  for (const CXXConstructorDecl *CD : RD->ctors()) {
+    if (CD->isCopyConstructor() || CD->isMoveConstructor()) {
+      assert(CD->isTrivial());
+      // We had at least one undeleted trivial copy or move ctor.  Return
+      // directly.
+      if (!CD->isDeleted())
+        return true;
+      if (CD->isCopyConstructor())
+        CopyDeleted = true;
+      else
+        MoveDeleted = true;
+    }
+  }
+
+  // If all trivial copy and move constructors are deleted, we cannot copy the
+  // argument.
+  return !(CopyDeleted && MoveDeleted);
+}
+
 llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) {
   return llvm::Constant::getNullValue(CGM.getTypes().ConvertType(T));
 }
@@ -121,7 +156,7 @@
   // FIXME: I'm not entirely sure I like using a fake decl just for code
   // generation. Maybe we can come up with a better way?
   ImplicitParamDecl *ThisDecl
-    = ImplicitParamDecl::Create(CGM.getContext(), 0, MD->getLocation(),
+    = ImplicitParamDecl::Create(CGM.getContext(), nullptr, MD->getLocation(),
                                 &CGM.getContext().Idents.get("this"),
                                 MD->getThisType(CGM.getContext()));
   params.push_back(ThisDecl);
@@ -159,7 +194,7 @@
                                              QualType ElementType) {
   // Should never be called.
   ErrorUnsupportedABI(CGF, "array cookie initialization");
-  return 0;
+  return nullptr;
 }
 
 bool CGCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
@@ -193,7 +228,7 @@
   // If we don't need an array cookie, bail out early.
   if (!requiresArrayCookie(expr, eltTy)) {
     allocPtr = ptr;
-    numElements = 0;
+    numElements = nullptr;
     cookieSize = CharUnits::Zero();
     return;
   }
@@ -272,7 +307,7 @@
     llvm_unreachable("shouldn't be called in this ABI");
 
   ErrorUnsupportedABI(CGF, "complete object detection in ctor");
-  return 0;
+  return nullptr;
 }
 
 void CGCXXABI::EmitThreadLocalInitFuncs(
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index beaec2c..615ec23 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -19,23 +19,23 @@
 #include "clang/Basic/LLVM.h"
 
 namespace llvm {
-  class Constant;
-  class Type;
-  class Value;
+class Constant;
+class Type;
+class Value;
 }
 
 namespace clang {
-  class CastExpr;
-  class CXXConstructorDecl;
-  class CXXDestructorDecl;
-  class CXXMethodDecl;
-  class CXXRecordDecl;
-  class FieldDecl;
-  class MangleContext;
+class CastExpr;
+class CXXConstructorDecl;
+class CXXDestructorDecl;
+class CXXMethodDecl;
+class CXXRecordDecl;
+class FieldDecl;
+class MangleContext;
 
 namespace CodeGen {
-  class CodeGenFunction;
-  class CodeGenModule;
+class CodeGenFunction;
+class CodeGenModule;
 
 /// \brief Implements C++ ABI-specific code generation functions.
 class CGCXXABI {
@@ -93,8 +93,9 @@
   /// when called virtually, and code generation does not support the case.
   virtual bool HasThisReturn(GlobalDecl GD) const { return false; }
 
-  /// Returns true if the given record type should be returned indirectly.
-  virtual bool isReturnTypeIndirect(const CXXRecordDecl *RD) const = 0;
+  /// If the C++ ABI requires the given type be returned in a particular way,
+  /// this method sets RetAI and returns true.
+  virtual bool classifyReturnType(CGFunctionInfo &FI) const = 0;
 
   /// Specify how one should pass an argument of a record type.
   enum RecordArgABI {
@@ -111,9 +112,17 @@
     RAA_Indirect
   };
 
+  /// Returns true if C++ allows us to copy the memory of an object of type RD
+  /// when it is passed as an argument.
+  bool canCopyArgument(const CXXRecordDecl *RD) const;
+
   /// Returns how an argument of the given record type should be passed.
   virtual RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const = 0;
 
+  /// Returns true if the implicit 'sret' parameter comes after the implicit
+  /// 'this' parameter of C++ instance methods.
+  virtual bool isSRetParameterAfterThis() const { return false; }
+
   /// Find the LLVM type used to represent the given member pointer
   /// type.
   virtual llvm::Type *
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index e26d6b2d..d0849d3 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -158,6 +158,23 @@
   return CC_C;
 }
 
+static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) {
+  switch (FI.getEffectiveCallingConvention()) {
+  case llvm::CallingConv::C:
+    switch (Target.getTriple().getEnvironment()) {
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
+  case llvm::CallingConv::ARM_AAPCS_VFP:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Arrange the argument and result information for a call to an
 /// unknown C++ non-static member function of the given abstract type.
 /// (Zero value of RD means we don't have any meaningful "this" argument type,
@@ -474,7 +491,7 @@
   CGFunctionInfo::Profile(ID, IsInstanceMethod, info, required, resultType,
                           argTypes);
 
-  void *insertPos = 0;
+  void *insertPos = nullptr;
   CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
   if (FI)
     return *FI;
@@ -494,11 +511,11 @@
   // them are direct or extend without a specified coerce type, specify the
   // default now.
   ABIArgInfo &retInfo = FI->getReturnInfo();
-  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == 0)
+  if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
     retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
 
   for (auto &I : FI->arguments())
-    if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == 0)
+    if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
       I.info.setCoerceToType(ConvertType(I.type));
 
   bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
@@ -525,7 +542,7 @@
   FI->Required = required;
   FI->HasRegParm = info.getHasRegParm();
   FI->RegParm = info.getRegParm();
-  FI->ArgStruct = 0;
+  FI->ArgStruct = nullptr;
   FI->NumArgs = argTypes.size();
   FI->getArgsBuffer()[0].type = resultType;
   for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
@@ -548,7 +565,7 @@
     if (RD->isUnion()) {
       // Unions can be here only in degenerative cases - all the fields are same
       // after flattening. Thus we have to use the "largest" field.
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
       for (const auto *FD : RD->fields()) {
@@ -596,7 +613,7 @@
     if (RD->isUnion()) {
       // Unions can be here only in degenerative cases - all the fields are same
       // after flattening. Thus we have to use the "largest" field.
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
       for (const auto *FD : RD->fields()) {
@@ -700,8 +717,9 @@
     if (DL.isBigEndian()) {
       // Preserve the high bits on big-endian targets.
       // That is what memory coercion does.
-      uint64_t SrcSize = DL.getTypeAllocSizeInBits(Val->getType());
-      uint64_t DstSize = DL.getTypeAllocSizeInBits(DestIntTy);
+      uint64_t SrcSize = DL.getTypeSizeInBits(Val->getType());
+      uint64_t DstSize = DL.getTypeSizeInBits(DestIntTy);
+
       if (SrcSize > DstSize) {
         Val = CGF.Builder.CreateLShr(Val, SrcSize - DstSize, "coerce.highbits");
         Val = CGF.Builder.CreateTrunc(Val, DestIntTy, "coerce.val.ii");
@@ -922,8 +940,9 @@
   bool Inserted = FunctionsBeingProcessed.insert(&FI); (void)Inserted;
   assert(Inserted && "Recursively being processed?");
   
+  bool SwapThisWithSRet = false;
   SmallVector<llvm::Type*, 8> argTypes;
-  llvm::Type *resultType = 0;
+  llvm::Type *resultType = nullptr;
 
   const ABIArgInfo &retAI = FI.getReturnInfo();
   switch (retAI.getKind()) {
@@ -955,6 +974,8 @@
     llvm::Type *ty = ConvertType(ret);
     unsigned addressSpace = Context.getTargetAddressSpace(ret);
     argTypes.push_back(llvm::PointerType::get(ty, addressSpace));
+
+    SwapThisWithSRet = retAI.isSRetAfterThis();
     break;
   }
 
@@ -994,8 +1015,11 @@
       // If the coerce-to type is a first class aggregate, flatten it.  Either
       // way is semantically identical, but fast-isel and the optimizer
       // generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::Type *argType = argAI.getCoerceToType();
-      if (llvm::StructType *st = dyn_cast<llvm::StructType>(argType)) {
+      llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
+      if (st && !isAAPCSVFP(FI, getTarget())) {
         for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
           argTypes.push_back(st->getElementType(i));
       } else {
@@ -1014,6 +1038,9 @@
   if (llvm::StructType *ArgStruct = FI.getArgStruct())
     argTypes.push_back(ArgStruct->getPointerTo());
 
+  if (SwapThisWithSRet)
+    std::swap(argTypes[0], argTypes[1]);
+
   bool Erased = FunctionsBeingProcessed.erase(&FI); (void)Erased;
   assert(Erased && "Not in set?");
   
@@ -1090,6 +1117,9 @@
     FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
   if (CodeGenOpts.NoImplicitFloat)
     FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+  if (CodeGenOpts.EnableSegmentedStacks &&
+      !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
+    FuncAttrs.addAttribute("split-stack");
 
   if (AttrOnCallSite) {
     // Attributes that should go on the call site only.
@@ -1126,6 +1156,7 @@
 
   QualType RetTy = FI.getReturnType();
   unsigned Index = 1;
+  bool SwapThisWithSRet = false;
   const ABIArgInfo &RetAI = FI.getReturnInfo();
   switch (RetAI.getKind()) {
   case ABIArgInfo::Extend:
@@ -1153,10 +1184,12 @@
     SRETAttrs.addAttribute(llvm::Attribute::StructRet);
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
-    PAL.push_back(llvm::
-                  AttributeSet::get(getLLVMContext(), Index, SRETAttrs));
+    SwapThisWithSRet = RetAI.isSRetAfterThis();
+    PAL.push_back(llvm::AttributeSet::get(
+        getLLVMContext(), SwapThisWithSRet ? 2 : Index, SRETAttrs));
 
-    ++Index;
+    if (!SwapThisWithSRet)
+      ++Index;
     // sret disables readnone and readonly
     FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
       .removeAttribute(llvm::Attribute::ReadNone);
@@ -1178,6 +1211,11 @@
     const ABIArgInfo &AI = I.info;
     llvm::AttrBuilder Attrs;
 
+    // Skip over the sret parameter when it comes second.  We already handled it
+    // above.
+    if (Index == 2 && SwapThisWithSRet)
+      ++Index;
+
     if (AI.getPaddingType()) {
       if (AI.getPaddingInReg())
         PAL.push_back(llvm::AttributeSet::get(getLLVMContext(), Index,
@@ -1196,14 +1234,15 @@
       else if (ParamType->isUnsignedIntegerOrEnumerationType())
         Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
-    case ABIArgInfo::Direct:
+    case ABIArgInfo::Direct: {
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       // FIXME: handle sseregparm someday...
 
-      if (llvm::StructType *STy =
-          dyn_cast<llvm::StructType>(AI.getCoerceToType())) {
+      llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType());
+      if (!isAAPCSVFP(FI, getTarget()) && STy) {
         unsigned Extra = STy->getNumElements()-1;  // 1 will be added below.
         if (Attrs.hasAttributes())
           for (unsigned I = 0; I < Extra; ++I)
@@ -1212,7 +1251,7 @@
         Index += Extra;
       }
       break;
-
+    }
     case ABIArgInfo::Indirect:
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
@@ -1312,7 +1351,7 @@
 
   // If we're using inalloca, all the memory arguments are GEPs off of the last
   // parameter, which is a pointer to the complete memory area.
-  llvm::Value *ArgStruct = 0;
+  llvm::Value *ArgStruct = nullptr;
   if (FI.usesInAlloca()) {
     llvm::Function::arg_iterator EI = Fn->arg_end();
     --EI;
@@ -1320,13 +1359,20 @@
     assert(ArgStruct->getType() == FI.getArgStruct()->getPointerTo());
   }
 
-  // Name the struct return argument.
-  if (CGM.ReturnTypeUsesSRet(FI)) {
+  // Name the struct return parameter, which can come first or second.
+  const ABIArgInfo &RetAI = FI.getReturnInfo();
+  bool SwapThisWithSRet = false;
+  if (RetAI.isIndirect()) {
+    SwapThisWithSRet = RetAI.isSRetAfterThis();
+    if (SwapThisWithSRet)
+      ++AI;
     AI->setName("agg.result");
-    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(),
-                                        AI->getArgNo() + 1,
+    AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
                                         llvm::Attribute::NoAlias));
-    ++AI;
+    if (SwapThisWithSRet)
+      --AI;  // Go back to the beginning for 'this'.
+    else
+      ++AI;  // Skip the sret parameter.
   }
 
   // Track if we received the parameter as a pointer (indirect, byval, or
@@ -1471,8 +1517,10 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
-      if (STy && STy->getNumElements() > 1) {
+      if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) {
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
         llvm::Type *DstTy =
           cast<llvm::PointerType>(Ptr->getType())->getElementType();
@@ -1554,6 +1602,9 @@
     }
 
     ++AI;
+
+    if (ArgNo == 1 && SwapThisWithSRet)
+      ++AI;  // Skip the sret parameter.
   }
 
   if (FI.usesInAlloca())
@@ -1587,8 +1638,8 @@
                                                     llvm::Value *result) {
   // We must be immediately followed the cast.
   llvm::BasicBlock *BB = CGF.Builder.GetInsertBlock();
-  if (BB->empty()) return 0;
-  if (&BB->back() != result) return 0;
+  if (BB->empty()) return nullptr;
+  if (&BB->back() != result) return nullptr;
 
   llvm::Type *resultType = result->getType();
 
@@ -1606,7 +1657,7 @@
 
     // Require the generator to be immediately followed by the cast.
     if (generator->getNextNode() != bitcast)
-      return 0;
+      return nullptr;
 
     insnsToKill.push_back(bitcast);
   }
@@ -1616,7 +1667,7 @@
   // or
   //   %generator = call i8* @objc_retainAutoreleasedReturnValue(i8* %originalResult)
   llvm::CallInst *call = dyn_cast<llvm::CallInst>(generator);
-  if (!call) return 0;
+  if (!call) return nullptr;
 
   bool doRetainAutorelease;
 
@@ -1644,7 +1695,7 @@
       insnsToKill.push_back(prev);
     }
   } else {
-    return 0;
+    return nullptr;
   }
 
   result = call->getArgOperand(0);
@@ -1677,16 +1728,16 @@
   // This is only applicable to a method with an immutable 'self'.
   const ObjCMethodDecl *method =
     dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl);
-  if (!method) return 0;
+  if (!method) return nullptr;
   const VarDecl *self = method->getSelfDecl();
-  if (!self->getType().isConstQualified()) return 0;
+  if (!self->getType().isConstQualified()) return nullptr;
 
   // Look for a retain call.
   llvm::CallInst *retainCall =
     dyn_cast<llvm::CallInst>(result->stripPointerCasts());
   if (!retainCall ||
       retainCall->getCalledValue() != CGF.CGM.getARCEntrypoints().objc_retain)
-    return 0;
+    return nullptr;
 
   // Look for an ordinary load of 'self'.
   llvm::Value *retainedValue = retainCall->getArgOperand(0);
@@ -1694,7 +1745,7 @@
     dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
   if (!load || load->isAtomic() || load->isVolatile() || 
       load->getPointerOperand() != CGF.GetAddrOfLocalVar(self))
-    return 0;
+    return nullptr;
 
   // Okay!  Burn it all down.  This relies for correctness on the
   // assumption that the retain is emitted as part of the return and
@@ -1736,17 +1787,17 @@
   // with noreturn cleanups.
   if (!CGF.ReturnValue->hasOneUse()) {
     llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
-    if (IP->empty()) return 0;
+    if (IP->empty()) return nullptr;
     llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(&IP->back());
-    if (!store) return 0;
-    if (store->getPointerOperand() != CGF.ReturnValue) return 0;
+    if (!store) return nullptr;
+    if (store->getPointerOperand() != CGF.ReturnValue) return nullptr;
     assert(!store->isAtomic() && !store->isVolatile()); // see below
     return store;
   }
 
   llvm::StoreInst *store =
     dyn_cast<llvm::StoreInst>(CGF.ReturnValue->user_back());
-  if (!store) return 0;
+  if (!store) return nullptr;
 
   // These aren't actually possible for non-coerced returns, and we
   // only care about non-coerced returns on this code path.
@@ -1758,7 +1809,7 @@
   llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
   while (IP != StoreBB) {
     if (!(IP = IP->getSinglePredecessor()))
-      return 0;
+      return nullptr;
   }
 
   // Okay, the store's basic block dominates the insertion point; we
@@ -1770,13 +1821,13 @@
                                          bool EmitRetDbgLoc,
                                          SourceLocation EndLoc) {
   // Functions with no result always return void.
-  if (ReturnValue == 0) {
+  if (!ReturnValue) {
     Builder.CreateRetVoid();
     return;
   }
 
   llvm::DebugLoc RetDbgLoc;
-  llvm::Value *RV = 0;
+  llvm::Value *RV = nullptr;
   QualType RetTy = FI.getReturnType();
   const ABIArgInfo &RetAI = FI.getReturnInfo();
 
@@ -1796,13 +1847,15 @@
     break;
 
   case ABIArgInfo::Indirect: {
+    auto AI = CurFn->arg_begin();
+    if (RetAI.isSRetAfterThis())
+      ++AI;
     switch (getEvaluationKind(RetTy)) {
     case TEK_Complex: {
       ComplexPairTy RT =
         EmitLoadOfComplex(MakeNaturalAlignAddrLValue(ReturnValue, RetTy),
                           EndLoc);
-      EmitStoreOfComplex(RT,
-                       MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+      EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(AI, RetTy),
                          /*isInit*/ true);
       break;
     }
@@ -1811,7 +1864,7 @@
       break;
     case TEK_Scalar:
       EmitStoreOfScalar(Builder.CreateLoad(ReturnValue),
-                        MakeNaturalAlignAddrLValue(CurFn->arg_begin(), RetTy),
+                        MakeNaturalAlignAddrLValue(AI, RetTy),
                         /*isInit*/ true);
       break;
     }
@@ -1840,7 +1893,7 @@
         // If that was the only use of the return value, nuke it as well now.
         if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
           cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
-          ReturnValue = 0;
+          ReturnValue = nullptr;
         }
 
       // Otherwise, we have to do a simple load.
@@ -1958,7 +2011,7 @@
   assert(!isProvablyNull(srcAddr) &&
          "shouldn't have writeback for provably null argument");
 
-  llvm::BasicBlock *contBB = 0;
+  llvm::BasicBlock *contBB = nullptr;
 
   // If the argument wasn't provably non-null, we need to null check
   // before doing the store.
@@ -2039,7 +2092,7 @@
   if (const UnaryOperator *uop = dyn_cast<UnaryOperator>(E->IgnoreParens()))
     if (uop->getOpcode() == UO_AddrOf)
       return uop->getSubExpr();
-  return 0;
+  return nullptr;
 }
 
 /// Emit an argument that's being passed call-by-writeback.  That is,
@@ -2093,9 +2146,9 @@
         cast<llvm::PointerType>(destType->getElementType()));
     CGF.Builder.CreateStore(null, temp);
   }
-  
-  llvm::BasicBlock *contBB = 0;
-  llvm::BasicBlock *originBB = 0;
+
+  llvm::BasicBlock *contBB = nullptr;
+  llvm::BasicBlock *originBB = nullptr;
 
   // If the address is *not* known to be non-null, we need to switch.
   llvm::Value *finalArgument;
@@ -2122,7 +2175,7 @@
     }
   }
 
-  llvm::Value *valueToUse = 0;
+  llvm::Value *valueToUse = nullptr;
 
   // Perform a copy if necessary.
   if (shouldCopy) {
@@ -2284,16 +2337,28 @@
   // In the Microsoft C++ ABI, aggregate arguments are destructed by the callee.
   // However, we still have to push an EH-only cleanup in case we unwind before
   // we make it to the call.
-  if (HasAggregateEvalKind && args.isUsingInAlloca()) {
-    assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
-    AggValueSlot Slot = createPlaceholderSlot(*this, type);
-    Slot.setExternallyDestructed();
+  if (HasAggregateEvalKind &&
+      CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // If we're using inalloca, use the argument memory.  Otherwise, use a
+    // temporary.
+    AggValueSlot Slot;
+    if (args.isUsingInAlloca())
+      Slot = createPlaceholderSlot(*this, type);
+    else
+      Slot = CreateAggTemp(type, "agg.tmp");
+
+    const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
+    bool DestroyedInCallee =
+        RD && RD->hasNonTrivialDestructor() &&
+        CGM.getCXXABI().getRecordArgABI(RD) != CGCXXABI::RAA_Default;
+    if (DestroyedInCallee)
+      Slot.setExternallyDestructed();
+
     EmitAggExpr(E, Slot);
     RValue RV = Slot.asRValue();
     args.add(RV, type);
 
-    const CXXRecordDecl *RD = type->getAsCXXRecordDecl();
-    if (RD->hasNonTrivialDestructor()) {
+    if (DestroyedInCallee) {
       // Create a no-op GEP between the placeholder and the cleanup so we can
       // RAUW it successfully.  It also serves as a marker of the first
       // instruction where the cleanup is active.
@@ -2467,7 +2532,7 @@
     LValue LV = MakeAddrLValue(RV.getAggregateAddr(), Ty);
 
     if (RD->isUnion()) {
-      const FieldDecl *LargestFD = 0;
+      const FieldDecl *LargestFD = nullptr;
       CharUnits UnionSize = CharUnits::Zero();
 
       for (const auto *FD : RD->fields()) {
@@ -2544,10 +2609,16 @@
 
   // If we're using inalloca, insert the allocation after the stack save.
   // FIXME: Do this earlier rather than hacking it in here!
-  llvm::Value *ArgMemory = 0;
+  llvm::Value *ArgMemory = nullptr;
   if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
-    llvm::AllocaInst *AI = new llvm::AllocaInst(
-        ArgStruct, "argmem", CallArgs.getStackBase()->getNextNode());
+    llvm::Instruction *IP = CallArgs.getStackBase();
+    llvm::AllocaInst *AI;
+    if (IP) {
+      IP = IP->getNextNode();
+      AI = new llvm::AllocaInst(ArgStruct, "argmem", IP);
+    } else {
+      AI = CreateTempAlloca(ArgStruct, "argmem");
+    }
     AI->setUsedWithInAlloca(true);
     assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
     ArgMemory = AI;
@@ -2555,14 +2626,20 @@
 
   // If the call returns a temporary with struct return, create a temporary
   // alloca to hold the result, unless one is given to us.
-  llvm::Value *SRetPtr = 0;
-  if (CGM.ReturnTypeUsesSRet(CallInfo) || RetAI.isInAlloca()) {
+  llvm::Value *SRetPtr = nullptr;
+  bool SwapThisWithSRet = false;
+  if (RetAI.isIndirect() || RetAI.isInAlloca()) {
     SRetPtr = ReturnValue.getValue();
     if (!SRetPtr)
       SRetPtr = CreateMemTemp(RetTy);
-    if (CGM.ReturnTypeUsesSRet(CallInfo)) {
+    if (RetAI.isIndirect()) {
       Args.push_back(SRetPtr);
+      SwapThisWithSRet = RetAI.isSRetAfterThis();
+      if (SwapThisWithSRet)
+        IRArgNo = 1;
       checkArgMatches(SRetPtr, IRArgNo, IRFuncTy);
+      if (SwapThisWithSRet)
+        IRArgNo = 0;
     } else {
       llvm::Value *Addr =
           Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
@@ -2578,6 +2655,10 @@
     const ABIArgInfo &ArgInfo = info_it->info;
     RValue RV = I->RV;
 
+    // Skip 'sret' if it came second.
+    if (IRArgNo == 1 && SwapThisWithSRet)
+      ++IRArgNo;
+
     CharUnits TypeAlign = getContext().getTypeAlignInChars(I->Ty);
 
     // Insert a padding argument to ensure proper alignment.
@@ -2714,8 +2795,11 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      if (llvm::StructType *STy =
-            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
+      llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
+      if (STy && !isAAPCSVFP(CallInfo, getTarget())) {
         llvm::Type *SrcTy =
           cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -2764,6 +2848,9 @@
     }
   }
 
+  if (SwapThisWithSRet)
+    std::swap(Args[0], Args[1]);
+
   if (ArgMemory) {
     llvm::Value *Arg = ArgMemory;
     llvm::Type *LastParamTy =
@@ -2832,7 +2919,7 @@
   llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
                                                      AttributeList);
 
-  llvm::BasicBlock *InvokeDest = 0;
+  llvm::BasicBlock *InvokeDest = nullptr;
   if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
                           llvm::Attribute::NoUnwind))
     InvokeDest = getInvokeDest();
@@ -2848,6 +2935,12 @@
   if (callOrInvoke)
     *callOrInvoke = CS.getInstruction();
 
+  if (CurCodeDecl && CurCodeDecl->hasAttr<FlattenAttr>() &&
+      !CS.hasFnAttr(llvm::Attribute::NoInline))
+    Attrs =
+        Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
+                           llvm::Attribute::AlwaysInline);
+
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 2e43d1d..9510a1c 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -56,7 +56,7 @@
   class CallArgList :
     public SmallVector<CallArg, 16> {
   public:
-    CallArgList() : StackBase(0), StackBaseMem(0) {}
+    CallArgList() : StackBase(nullptr), StackBaseMem(nullptr) {}
 
     struct Writeback {
       /// The original argument.  Note that the argument l-value
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index 071f8b3..97bea97 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -66,8 +66,8 @@
     ComputeNonVirtualBaseClassOffset(getContext(), ClassDecl,
                                      PathBegin, PathEnd);
   if (Offset.isZero())
-    return 0;
-  
+    return nullptr;
+
   llvm::Type *PtrDiffTy = 
   Types.ConvertType(getContext().getPointerDiffType());
   
@@ -114,7 +114,7 @@
                                 CharUnits nonVirtualOffset,
                                 llvm::Value *virtualOffset) {
   // Assert that we have something to do.
-  assert(!nonVirtualOffset.isZero() || virtualOffset != 0);
+  assert(!nonVirtualOffset.isZero() || virtualOffset != nullptr);
 
   // Compute the offset from the static and dynamic components.
   llvm::Value *baseOffset;
@@ -143,8 +143,8 @@
   assert(PathBegin != PathEnd && "Base path should not be empty!");
 
   CastExpr::path_const_iterator Start = PathBegin;
-  const CXXRecordDecl *VBase = 0;
-  
+  const CXXRecordDecl *VBase = nullptr;
+
   // Sema has done some convenient canonicalization here: if the
   // access path involved any virtual steps, the conversion path will
   // *start* with a step down to the correct virtual base subobject,
@@ -169,7 +169,7 @@
     const ASTRecordLayout &layout = getContext().getASTRecordLayout(Derived);
     CharUnits vBaseOffset = layout.getVBaseClassOffset(VBase);
     NonVirtualOffset += vBaseOffset;
-    VBase = 0; // we no longer have a virtual step
+    VBase = nullptr; // we no longer have a virtual step
   }
 
   // Get the base pointer type.
@@ -180,11 +180,11 @@
   // just do a bitcast; null checks are unnecessary.
   if (NonVirtualOffset.isZero() && !VBase) {
     return Builder.CreateBitCast(Value, BasePtrTy);
-  }    
+  }
 
-  llvm::BasicBlock *origBB = 0;
-  llvm::BasicBlock *endBB = 0;
-  
+  llvm::BasicBlock *origBB = nullptr;
+  llvm::BasicBlock *endBB = nullptr;
+
   // Skip over the offset (and the vtable load) if we're supposed to
   // null-check the pointer.
   if (NullCheckValue) {
@@ -198,7 +198,7 @@
   }
 
   // Compute the virtual offset.
-  llvm::Value *VirtualOffset = 0;
+  llvm::Value *VirtualOffset = nullptr;
   if (VBase) {
     VirtualOffset =
       CGM.getCXXABI().GetVirtualBaseClassOffset(*this, Value, Derived, VBase);
@@ -246,11 +246,11 @@
     // No offset, we can just cast back.
     return Builder.CreateBitCast(Value, DerivedPtrTy);
   }
-  
-  llvm::BasicBlock *CastNull = 0;
-  llvm::BasicBlock *CastNotNull = 0;
-  llvm::BasicBlock *CastEnd = 0;
-  
+
+  llvm::BasicBlock *CastNull = nullptr;
+  llvm::BasicBlock *CastNotNull = nullptr;
+  llvm::BasicBlock *CastEnd = nullptr;
+
   if (NullCheckValue) {
     CastNull = createBasicBlock("cast.null");
     CastNotNull = createBasicBlock("cast.notnull");
@@ -290,7 +290,7 @@
                                               bool Delegating) {
   if (!CGM.getCXXABI().NeedsVTTParameter(GD)) {
     // This constructor/destructor does not need a VTT parameter.
-    return 0;
+    return nullptr;
   }
   
   const CXXRecordDecl *RD = cast<CXXMethodDecl>(CurCodeDecl)->getParent();
@@ -454,7 +454,7 @@
 
     switch (CGF.getEvaluationKind(T)) {
     case TEK_Scalar:
-      CGF.EmitScalarInit(Init, /*decl*/ 0, LV, false);
+      CGF.EmitScalarInit(Init, /*decl*/ nullptr, LV, false);
       break;
     case TEK_Complex:
       CGF.EmitComplexExprIntoLValue(Init, LV, /*isInit*/ true);
@@ -607,7 +607,7 @@
     EmitComplexExprIntoLValue(Init, LHS, /*isInit*/ true);
     break;
   case TEK_Aggregate: {
-    llvm::Value *ArrayIndexVar = 0;
+    llvm::Value *ArrayIndexVar = nullptr;
     if (ArrayIndexes.size()) {
       llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
       
@@ -777,8 +777,8 @@
                     const VarDecl *SrcRec)
       : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), 
         RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
-        FirstField(0), LastField(0), FirstFieldOffset(0), LastFieldOffset(0),
-        LastAddedFieldIndex(0) { }
+        FirstField(nullptr), LastField(nullptr), FirstFieldOffset(0),
+        LastFieldOffset(0), LastAddedFieldIndex(0) {}
 
     static bool isMemcpyableField(FieldDecl *F) {
       Qualifiers Qual = F->getType().getQualifiers();
@@ -788,7 +788,7 @@
     }
 
     void addMemcpyableField(FieldDecl *F) {
-      if (FirstField == 0)
+      if (!FirstField)
         addInitialField(F);
       else
         addNextField(F);
@@ -810,7 +810,7 @@
     void emitMemcpy() {
       // Give the subclass a chance to bail out if it feels the memcpy isn't
       // worth it (e.g. Hasn't aggregated enough data).
-      if (FirstField == 0) {
+      if (!FirstField) {
         return;
       }
 
@@ -844,7 +844,7 @@
     }
 
     void reset() {
-      FirstField = 0;
+      FirstField = nullptr;
     }
 
   protected:
@@ -917,7 +917,7 @@
                                                FunctionArgList &Args) {
       if (CD->isCopyOrMoveConstructor() && CD->isDefaulted())
         return Args[Args.size() - 1];
-      return 0; 
+      return nullptr;
     }
 
     // Returns true if a CXXCtorInitializer represents a member initialization
@@ -926,7 +926,7 @@
       if (!MemcpyableCtor)
         return false;
       FieldDecl *Field = MemberInit->getMember();
-      assert(Field != 0 && "No field for member init.");
+      assert(Field && "No field for member init.");
       QualType FieldType = Field->getType();
       CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
 
@@ -1014,71 +1014,71 @@
     // exists. Otherwise returns null.
     FieldDecl *getMemcpyableField(Stmt *S) {
       if (!AssignmentsMemcpyable)
-        return 0;
+        return nullptr;
       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
         // Recognise trivial assignments.
         if (BO->getOpcode() != BO_Assign)
-          return 0;
+          return nullptr;
         MemberExpr *ME = dyn_cast<MemberExpr>(BO->getLHS());
         if (!ME)
-          return 0;
+          return nullptr;
         FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
         if (!Field || !isMemcpyableField(Field))
-          return 0;
+          return nullptr;
         Stmt *RHS = BO->getRHS();
         if (ImplicitCastExpr *EC = dyn_cast<ImplicitCastExpr>(RHS))
           RHS = EC->getSubExpr();
         if (!RHS)
-          return 0;
+          return nullptr;
         MemberExpr *ME2 = dyn_cast<MemberExpr>(RHS);
         if (dyn_cast<FieldDecl>(ME2->getMemberDecl()) != Field)
-          return 0;
+          return nullptr;
         return Field;
       } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
         CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
         if (!(MD && (MD->isCopyAssignmentOperator() ||
                        MD->isMoveAssignmentOperator()) &&
               MD->isTrivial()))
-          return 0;
+          return nullptr;
         MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
         if (!IOA)
-          return 0;
+          return nullptr;
         FieldDecl *Field = dyn_cast<FieldDecl>(IOA->getMemberDecl());
         if (!Field || !isMemcpyableField(Field))
-          return 0;
+          return nullptr;
         MemberExpr *Arg0 = dyn_cast<MemberExpr>(MCE->getArg(0));
         if (!Arg0 || Field != dyn_cast<FieldDecl>(Arg0->getMemberDecl()))
-          return 0;
+          return nullptr;
         return Field;
       } else if (CallExpr *CE = dyn_cast<CallExpr>(S)) {
         FunctionDecl *FD = dyn_cast<FunctionDecl>(CE->getCalleeDecl());
         if (!FD || FD->getBuiltinID() != Builtin::BI__builtin_memcpy)
-          return 0;
+          return nullptr;
         Expr *DstPtr = CE->getArg(0);
         if (ImplicitCastExpr *DC = dyn_cast<ImplicitCastExpr>(DstPtr))
           DstPtr = DC->getSubExpr();
         UnaryOperator *DUO = dyn_cast<UnaryOperator>(DstPtr);
         if (!DUO || DUO->getOpcode() != UO_AddrOf)
-          return 0;
+          return nullptr;
         MemberExpr *ME = dyn_cast<MemberExpr>(DUO->getSubExpr());
         if (!ME)
-          return 0;
+          return nullptr;
         FieldDecl *Field = dyn_cast<FieldDecl>(ME->getMemberDecl());
         if (!Field || !isMemcpyableField(Field))
-          return 0;
+          return nullptr;
         Expr *SrcPtr = CE->getArg(1);
         if (ImplicitCastExpr *SC = dyn_cast<ImplicitCastExpr>(SrcPtr))
           SrcPtr = SC->getSubExpr();
         UnaryOperator *SUO = dyn_cast<UnaryOperator>(SrcPtr);
         if (!SUO || SUO->getOpcode() != UO_AddrOf)
-          return 0;
+          return nullptr;
         MemberExpr *ME2 = dyn_cast<MemberExpr>(SUO->getSubExpr());
         if (!ME2 || Field != dyn_cast<FieldDecl>(ME2->getMemberDecl()))
-          return 0;
+          return nullptr;
         return Field;
       }
 
-      return 0;
+      return nullptr;
     }
 
     bool AssignmentsMemcpyable;
@@ -1137,7 +1137,7 @@
   CXXConstructorDecl::init_const_iterator B = CD->init_begin(),
                                           E = CD->init_end();
 
-  llvm::BasicBlock *BaseCtorContinueBB = 0;
+  llvm::BasicBlock *BaseCtorContinueBB = nullptr;
   if (ClassDecl->getNumVBases() &&
       !CGM.getTarget().getCXXABI().hasConstructorVariants()) {
     // The ABIs that don't have constructor variants need to put a branch
@@ -1374,7 +1374,7 @@
   public:
     CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition)
       : ShouldDeleteCondition(ShouldDeleteCondition) {
-      assert(ShouldDeleteCondition != NULL);
+      assert(ShouldDeleteCondition != nullptr);
     }
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
@@ -1552,7 +1552,7 @@
   // because of GCC extensions that permit zero-length arrays.  There
   // are probably legitimate places where we could assume that this
   // doesn't happen, but it's not clear that it's worth it.
-  llvm::BranchInst *zeroCheckBranch = 0;
+  llvm::BranchInst *zeroCheckBranch = nullptr;
 
   // Optimize for a constant count.
   llvm::ConstantInt *constantCount
@@ -1870,7 +1870,7 @@
     return;
 
   // Compute where to store the address point.
-  llvm::Value *VirtualOffset = 0;
+  llvm::Value *VirtualOffset = nullptr;
   CharUnits NonVirtualOffset = CharUnits::Zero();
   
   if (NeedsVirtualOffset) {
@@ -1968,7 +1968,7 @@
   // Initialize the vtable pointers for this class and all of its bases.
   VisitedVirtualBasesSetTy VBases;
   InitializeVTablePointers(BaseSubobject(RD, CharUnits::Zero()), 
-                           /*NearestVBase=*/0, 
+                           /*NearestVBase=*/nullptr,
                            /*OffsetFromNearestVBase=*/CharUnits::Zero(),
                            /*BaseIsNonVirtualPrimaryBase=*/false, RD, VBases);
 
@@ -2177,7 +2177,7 @@
     assert(MD->isFunctionTemplateSpecialization());
     const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
     FunctionTemplateDecl *CallOpTemplate = CallOp->getDescribedFunctionTemplate();
-    void *InsertPos = 0;
+    void *InsertPos = nullptr;
     FunctionDecl *CorrespondingCallOpSpecialization = 
         CallOpTemplate->findSpecialization(TAL->data(), TAL->size(), InsertPos); 
     assert(CorrespondingCallOpSpecialization);
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 8748224..ed9f96d 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -50,7 +50,7 @@
     CodeGenFunction::ComplexPairTy V = rv.getComplexVal();
     llvm::Type *ComplexTy =
       llvm::StructType::get(V.first->getType(), V.second->getType(),
-                            (void*) 0);
+                            (void*) nullptr);
     llvm::Value *addr = CGF.CreateTempAlloca(ComplexTy, "saved-complex");
     CGF.Builder.CreateStore(V.first, CGF.Builder.CreateStructGEP(addr, 0));
     CGF.Builder.CreateStore(V.second, CGF.Builder.CreateStructGEP(addr, 1));
@@ -245,7 +245,7 @@
   assert(BranchFixups.size() >= MinSize && "fixup stack out of order");
 
   while (BranchFixups.size() > MinSize &&
-         BranchFixups.back().Destination == 0)
+         BranchFixups.back().Destination == nullptr)
     BranchFixups.pop_back();
 }
 
@@ -263,7 +263,7 @@
 
   // Set that as the active flag in the cleanup.
   EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
-  assert(cleanup.getActiveFlag() == 0 && "cleanup already has active flag?");
+  assert(!cleanup.getActiveFlag() && "cleanup already has active flag?");
   cleanup.setActiveFlag(active);
 
   if (cleanup.isNormalCleanup()) cleanup.setTestFlagInNormalCleanup();
@@ -283,7 +283,7 @@
   for (unsigned I = 0, E = CGF.EHStack.getNumBranchFixups(); I != E; ++I) {
     // Skip this fixup if its destination isn't set.
     BranchFixup &Fixup = CGF.EHStack.getBranchFixup(I);
-    if (Fixup.Destination == 0) continue;
+    if (Fixup.Destination == nullptr) continue;
 
     // If there isn't an OptimisticBranchBlock, then InitialBranch is
     // still pointing directly to its destination; forward it to the
@@ -293,7 +293,7 @@
     //   lbl:
     // i.e. where there's an unresolved fixup inside a single cleanup
     // entry which we're currently popping.
-    if (Fixup.OptimisticBranchBlock == 0) {
+    if (Fixup.OptimisticBranchBlock == nullptr) {
       new llvm::StoreInst(CGF.Builder.getInt32(Fixup.DestinationIndex),
                           CGF.getNormalCleanupDestSlot(),
                           Fixup.InitialBranch);
@@ -347,7 +347,7 @@
     BranchFixup &Fixup = EHStack.getBranchFixup(I);
     if (Fixup.Destination != Block) continue;
 
-    Fixup.Destination = 0;
+    Fixup.Destination = nullptr;
     ResolvedAny = true;
 
     // If it doesn't have an optimistic branch block, LatestBranch is
@@ -473,7 +473,7 @@
 
   // If there's an active flag, load it and skip the cleanup if it's
   // false.
-  llvm::BasicBlock *ContBB = 0;
+  llvm::BasicBlock *ContBB = nullptr;
   if (ActiveFlag) {
     ContBB = CGF.createBasicBlock("cleanup.done");
     llvm::BasicBlock *CleanupBB = CGF.createBasicBlock("cleanup.action");
@@ -568,15 +568,15 @@
   // Remember activation information.
   bool IsActive = Scope.isActive();
   llvm::Value *NormalActiveFlag =
-    Scope.shouldTestFlagInNormalCleanup() ? Scope.getActiveFlag() : 0;
+    Scope.shouldTestFlagInNormalCleanup() ? Scope.getActiveFlag() : nullptr;
   llvm::Value *EHActiveFlag = 
-    Scope.shouldTestFlagInEHCleanup() ? Scope.getActiveFlag() : 0;
+    Scope.shouldTestFlagInEHCleanup() ? Scope.getActiveFlag() : nullptr;
 
   // Check whether we need an EH cleanup.  This is only true if we've
   // generated a lazy EH cleanup block.
   llvm::BasicBlock *EHEntry = Scope.getCachedEHDispatchBlock();
-  assert(Scope.hasEHBranches() == (EHEntry != 0));
-  bool RequiresEHCleanup = (EHEntry != 0);
+  assert(Scope.hasEHBranches() == (EHEntry != nullptr));
+  bool RequiresEHCleanup = (EHEntry != nullptr);
   EHScopeStack::stable_iterator EHParent = Scope.getEnclosingEHScope();
 
   // Check the three conditions which might require a normal cleanup:
@@ -590,7 +590,7 @@
 
   // - whether there's a fallthrough
   llvm::BasicBlock *FallthroughSource = Builder.GetInsertBlock();
-  bool HasFallthrough = (FallthroughSource != 0 && IsActive);
+  bool HasFallthrough = (FallthroughSource != nullptr && IsActive);
 
   // Branch-through fall-throughs leave the insertion point set to the
   // end of the last cleanup, which points to the current scope.  The
@@ -720,7 +720,7 @@
       //   - if fall-through is a branch-through
       //   - if there are fixups that will be optimistically forwarded
       //     to the enclosing cleanup
-      llvm::BasicBlock *BranchThroughDest = 0;
+      llvm::BasicBlock *BranchThroughDest = nullptr;
       if (Scope.hasBranchThroughs() ||
           (FallthroughSource && FallthroughIsBranchThrough) ||
           (HasFixups && HasEnclosingCleanups)) {
@@ -729,7 +729,7 @@
         BranchThroughDest = CreateNormalEntry(*this, cast<EHCleanupScope>(S));
       }
 
-      llvm::BasicBlock *FallthroughDest = 0;
+      llvm::BasicBlock *FallthroughDest = nullptr;
       SmallVector<llvm::Instruction*, 2> InstsToAppend;
 
       // If there's exactly one branch-after and no other threads,
@@ -943,7 +943,7 @@
     Fixup.Destination = Dest.getBlock();
     Fixup.DestinationIndex = Dest.getDestIndex();
     Fixup.InitialBranch = BI;
-    Fixup.OptimisticBranchBlock = 0;
+    Fixup.OptimisticBranchBlock = nullptr;
 
     Builder.ClearInsertionPoint();
     return;
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index fed75bc..066cdb2 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -96,7 +96,7 @@
   enum Kind { Cleanup, Catch, Terminate, Filter };
 
   EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope)
-    : CachedLandingPad(0), CachedEHDispatchBlock(0),
+    : CachedLandingPad(nullptr), CachedEHDispatchBlock(nullptr),
       EnclosingEHScope(enclosingEHScope) {
     CommonBits.Kind = kind;
   }
@@ -150,7 +150,7 @@
     /// The catch handler for this type.
     llvm::BasicBlock *Block;
 
-    bool isCatchAll() const { return Type == 0; }
+    bool isCatchAll() const { return Type == nullptr; }
   };
 
 private:
@@ -180,7 +180,7 @@
   }
 
   void setCatchAllHandler(unsigned I, llvm::BasicBlock *Block) {
-    setHandler(I, /*catchall*/ 0, Block);
+    setHandler(I, /*catchall*/ nullptr, Block);
   }
 
   void setHandler(unsigned I, llvm::Value *Type, llvm::BasicBlock *Block) {
@@ -268,7 +268,7 @@
                  EHScopeStack::stable_iterator enclosingNormal,
                  EHScopeStack::stable_iterator enclosingEH)
     : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal),
-      NormalBlock(0), ActiveFlag(0), ExtInfo(0) {
+      NormalBlock(nullptr), ActiveFlag(nullptr), ExtInfo(nullptr) {
     CleanupBits.IsNormalCleanup = isNormal;
     CleanupBits.IsEHCleanup = isEH;
     CleanupBits.IsActive = isActive;
@@ -455,7 +455,7 @@
   explicit iterator(char *Ptr) : Ptr(Ptr) {}
 
 public:
-  iterator() : Ptr(0) {}
+  iterator() : Ptr(nullptr) {}
 
   EHScope *get() const { 
     return reinterpret_cast<EHScope*>(Ptr);
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 0e94b51..e2ba70a 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -112,17 +112,14 @@
   if (LexicalBlockStack.empty()) return;
 
   SourceManager &SM = CGM.getContext().getSourceManager();
+  llvm::DIScope Scope(LexicalBlockStack.back());
   PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
-  PresumedLoc PPLoc = SM.getPresumedLoc(PrevLoc);
 
-  if (PCLoc.isInvalid() || PPLoc.isInvalid() ||
-      !strcmp(PPLoc.getFilename(), PCLoc.getFilename()))
+  if (PCLoc.isInvalid() || Scope.getFilename() == PCLoc.getFilename())
     return;
 
-  llvm::MDNode *LB = LexicalBlockStack.back();
-  llvm::DIScope Scope = llvm::DIScope(LB);
   if (Scope.isLexicalBlockFile()) {
-    llvm::DILexicalBlockFile LBF = llvm::DILexicalBlockFile(LB);
+    llvm::DILexicalBlockFile LBF = llvm::DILexicalBlockFile(Scope);
     llvm::DIDescriptor D
       = DBuilder.createLexicalBlockFile(LBF.getScope(),
                                         getOrCreateFile(CurLoc));
@@ -317,11 +314,18 @@
 /// CreateCompileUnit - Create new compile unit.
 void CGDebugInfo::CreateCompileUnit() {
 
+  // Should we be asking the SourceManager for the main file name, instead of
+  // accepting it as an argument? This just causes the main file name to
+  // mismatch with source locations and create extra lexical scopes or
+  // mismatched debug info (a CU with a DW_AT_file of "-", because that's what
+  // the driver passed, but functions/other things have DW_AT_file of "<stdin>"
+  // because that's what the SourceManager says)
+
   // Get absolute path name.
   SourceManager &SM = CGM.getContext().getSourceManager();
   std::string MainFileName = CGM.getCodeGenOpts().MainFileName;
   if (MainFileName.empty())
-    MainFileName = "<unknown>";
+    MainFileName = "<stdin>";
 
   // The main file name provided via the "-main-file-name" option contains just
   // the file name itself with no path information. This file name may have had
@@ -344,7 +348,7 @@
   std::string SplitDwarfFile = CGM.getCodeGenOpts().SplitDwarfFile;
   StringRef SplitDwarfFilename = internString(SplitDwarfFile);
 
-  unsigned LangTag;
+  llvm::dwarf::SourceLanguage LangTag;
   const LangOptions &LO = CGM.getLangOpts();
   if (LO.CPlusPlus) {
     if (LO.ObjC1)
@@ -379,7 +383,7 @@
 /// CreateType - Get the Basic type from the cache or create a new
 /// one if necessary.
 llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
-  unsigned Encoding = 0;
+  llvm::dwarf::TypeKind Encoding;
   StringRef BTName;
   switch (BT->getKind()) {
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -393,11 +397,10 @@
   case BuiltinType::Void:
     return llvm::DIType();
   case BuiltinType::ObjCClass:
-    if (ClassTy)
-      return ClassTy;
-    ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                         "objc_class", TheCU,
-                                         getOrCreateMainFile(), 0);
+    if (!ClassTy)
+      ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                           "objc_class", TheCU,
+                                           getOrCreateMainFile(), 0);
     return ClassTy;
   case BuiltinType::ObjCId: {
     // typedef struct objc_class *Class;
@@ -426,12 +429,10 @@
     return ObjTy;
   }
   case BuiltinType::ObjCSel: {
-    if (SelTy)
-      return SelTy;
-    SelTy =
-      DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                 "objc_selector", TheCU, getOrCreateMainFile(),
-                                 0);
+    if (!SelTy)
+      SelTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                         "objc_selector", TheCU,
+                                         getOrCreateMainFile(), 0);
     return SelTy;
   }
 
@@ -506,7 +507,7 @@
 
 llvm::DIType CGDebugInfo::CreateType(const ComplexType *Ty) {
   // Bit size, align and offset of the type.
-  unsigned Encoding = llvm::dwarf::DW_ATE_complex_float;
+  llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
   if (Ty->isComplexIntegerType())
     Encoding = llvm::dwarf::DW_ATE_lo_user;
 
@@ -531,7 +532,7 @@
 
   // We will create one Derived type for one qualifier and recurse to handle any
   // additional ones.
-  unsigned Tag;
+  llvm::dwarf::Tag Tag;
   if (Qc.hasConst()) {
     Tag = llvm::dwarf::DW_TAG_const_type;
     Qc.removeConst();
@@ -611,7 +612,7 @@
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
-  unsigned Tag = 0;
+  llvm::dwarf::Tag Tag;
   if (RD->isStruct() || RD->isInterface())
     Tag = llvm::dwarf::DW_TAG_structure_type;
   else if (RD->isUnion())
@@ -623,11 +624,13 @@
 
   // Create the type.
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
-  return DBuilder.createForwardDecl(Tag, RDName, Ctx, DefUnit, Line, 0, 0, 0,
-                                    FullName);
+  llvm::DICompositeType RetTy = DBuilder.createReplaceableForwardDecl(
+      Tag, RDName, Ctx, DefUnit, Line, 0, 0, 0, FullName);
+  ReplaceMap.push_back(std::make_pair(Ty, static_cast<llvm::Value *>(RetTy)));
+  return RetTy;
 }
 
-llvm::DIType CGDebugInfo::CreatePointerLikeType(unsigned Tag,
+llvm::DIType CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
                                                 const Type *Ty,
                                                 QualType PointeeTy,
                                                 llvm::DIFile Unit) {
@@ -719,12 +722,35 @@
   return BlockLiteralGeneric;
 }
 
+llvm::DIType CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, llvm::DIFile Unit) {
+  assert(Ty->isTypeAlias());
+  llvm::DIType Src = getOrCreateType(Ty->getAliasedType(), Unit);
+
+  SmallString<128> NS;
+  llvm::raw_svector_ostream OS(NS);
+  Ty->getTemplateName().print(OS, CGM.getContext().getPrintingPolicy(), /*qualified*/ false);
+
+  TemplateSpecializationType::PrintTemplateArgumentList(
+      OS, Ty->getArgs(), Ty->getNumArgs(),
+      CGM.getContext().getPrintingPolicy());
+
+  TypeAliasDecl *AliasDecl =
+      cast<TypeAliasTemplateDecl>(Ty->getTemplateName().getAsTemplateDecl())
+          ->getTemplatedDecl();
+
+  SourceLocation Loc = AliasDecl->getLocation();
+  llvm::DIFile File = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+
+  llvm::DIDescriptor Ctxt = getContextDescriptor(cast<Decl>(AliasDecl->getDeclContext()));
+
+  return DBuilder.createTypedef(Src, internString(OS.str()), File, Line, Ctxt);
+}
+
 llvm::DIType CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile Unit) {
   // Typedefs are derived from some other type.  If we have a typedef of a
   // typedef, make sure to emit the whole chain.
   llvm::DIType Src = getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit);
-  if (!Src)
-    return llvm::DIType();
   // We don't set size information, but do specify where the typedef was
   // declared.
   SourceLocation Loc = Ty->getDecl()->getLocation();
@@ -808,7 +834,7 @@
   unsigned fieldno = 0;
   for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(),
          E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) {
-    const LambdaExpr::Capture C = *I;
+    const LambdaCapture &C = *I;
     if (C.capturesVariable()) {
       VarDecl *V = C.getCapturedVar();
       llvm::DIFile VUnit = getOrCreateFile(C.getLocation());
@@ -852,7 +878,7 @@
 
   unsigned LineNumber = getLineNumber(Var->getLocation());
   StringRef VName = Var->getName();
-  llvm::Constant *C = NULL;
+  llvm::Constant *C = nullptr;
   if (Var->getInit()) {
     const APValue *Value = Var->evaluateValue();
     if (Value) {
@@ -1096,7 +1122,7 @@
                           MethodTy, /*isLocalToUnit=*/false,
                           /* isDefinition=*/ false,
                           Virtuality, VIndex, ContainingType,
-                          Flags, CGM.getLangOpts().Optimize, NULL,
+                          Flags, CGM.getLangOpts().Optimize, nullptr,
                           TParamsArray);
 
   SPCache[Method->getCanonicalDecl()] = llvm::WeakVH(SP);
@@ -1224,7 +1250,7 @@
                                                ->getTypeForDecl())
                        : CGM.getContext().getPointerType(D->getType());
       llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::Value *V = 0;
+      llvm::Value *V = nullptr;
       // Variable pointer template parameters have a value that is the address
       // of the variable.
       if (const VarDecl *VD = dyn_cast<VarDecl>(D))
@@ -1256,7 +1282,7 @@
     case TemplateArgument::NullPtr: {
       QualType T = TA.getNullPtrType();
       llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::Value *V = 0;
+      llvm::Value *V = nullptr;
       // Special case member data pointer null values since they're actually -1
       // instead of zero.
       if (const MemberPointerType *MPT =
@@ -1286,7 +1312,7 @@
       llvm::DITemplateValueParameter TVP =
           DBuilder.createTemplateParameterPack(
               TheCU, Name, llvm::DIType(),
-              CollectTemplateParams(NULL, TA.getPackAsArray(), Unit));
+              CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit));
       TemplateParams.push_back(TVP);
     } break;
     case TemplateArgument::Expression: {
@@ -1330,14 +1356,11 @@
 llvm::DIArray CGDebugInfo::
 CollectCXXTemplateParams(const ClassTemplateSpecializationDecl *TSpecial,
                          llvm::DIFile Unit) {
-  llvm::PointerUnion<ClassTemplateDecl *,
-                     ClassTemplatePartialSpecializationDecl *>
-    PU = TSpecial->getSpecializedTemplateOrPartial();
-
-  TemplateParameterList *TPList = PU.is<ClassTemplateDecl *>() ?
-    PU.get<ClassTemplateDecl *>()->getTemplateParameters() :
-    PU.get<ClassTemplatePartialSpecializationDecl *>()->getTemplateParameters();
-  const TemplateArgumentList &TAList = TSpecial->getTemplateInstantiationArgs();
+  // Always get the full list of parameters, not just the ones from
+  // the specialization.
+  TemplateParameterList *TPList =
+    TSpecial->getSpecializedTemplate()->getTemplateParameters();
+  const TemplateArgumentList &TAList = TSpecial->getTemplateArgs();
   return CollectTemplateParams(TPList, TAList.asArray(), Unit);
 }
 
@@ -1408,6 +1431,21 @@
   return T;
 }
 
+void CGDebugInfo::completeType(const EnumDecl *ED) {
+  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+    return;
+  QualType Ty = CGM.getContext().getEnumType(ED);
+  void* TyPtr = Ty.getAsOpaquePtr();
+  auto I = TypeCache.find(TyPtr);
+  if (I == TypeCache.end() ||
+      !llvm::DIType(cast<llvm::MDNode>(static_cast<llvm::Value *>(I->second)))
+           .isForwardDecl())
+    return;
+  llvm::DIType Res = CreateTypeDefinition(Ty->castAs<EnumType>());
+  assert(!Res.isForwardDecl());
+  TypeCache[TyPtr] = Res;
+}
+
 void CGDebugInfo::completeType(const RecordDecl *RD) {
   if (DebugKind > CodeGenOptions::LimitedDebugInfo ||
       !CGM.getLangOpts().CPlusPlus)
@@ -1433,11 +1471,13 @@
     return;
   QualType Ty = CGM.getContext().getRecordType(RD);
   void* TyPtr = Ty.getAsOpaquePtr();
-  if (CompletedTypeCache.count(TyPtr))
+  auto I = TypeCache.find(TyPtr);
+  if (I != TypeCache.end() &&
+      !llvm::DIType(cast<llvm::MDNode>(static_cast<llvm::Value *>(I->second)))
+           .isForwardDecl())
     return;
   llvm::DIType Res = CreateTypeDefinition(Ty->castAs<RecordType>());
   assert(!Res.isForwardDecl());
-  CompletedTypeCache[TyPtr] = Res;
   TypeCache[TyPtr] = Res;
 }
 
@@ -1525,9 +1565,6 @@
   LexicalBlockStack.push_back(&*FwdDecl);
   RegionMap[Ty->getDecl()] = llvm::WeakVH(FwdDecl);
 
-  // Add this to the completed-type cache while we're completing it recursively.
-  CompletedTypeCache[QualType(Ty, 0).getAsOpaquePtr()] = FwdDecl;
-
   // Convert all the elements.
   SmallVector<llvm::Value *, 16> EltTys;
   // what about nested types?
@@ -1599,20 +1636,28 @@
   // Get overall information about the record type for the debug info.
   llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
-  unsigned RuntimeLang = TheCU.getLanguage();
+  llvm::dwarf::SourceLanguage RuntimeLang = TheCU.getLanguage();
 
   // If this is just a forward declaration return a special forward-declaration
   // debug type since we won't be able to lay out the entire type.
   ObjCInterfaceDecl *Def = ID->getDefinition();
-  if (!Def) {
-    llvm::DIType FwdDecl =
-      DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                 ID->getName(), TheCU, DefUnit, Line,
-                                 RuntimeLang);
+  if (!Def || !Def->getImplementation()) {
+    llvm::DIType FwdDecl = DBuilder.createReplaceableForwardDecl(
+        llvm::dwarf::DW_TAG_structure_type, ID->getName(), TheCU, DefUnit, Line,
+        RuntimeLang);
+    ObjCInterfaceCache.push_back(ObjCInterfaceCacheEntry(Ty, FwdDecl, Unit));
     return FwdDecl;
   }
 
-  ID = Def;
+
+  return CreateTypeDefinition(Ty, Unit);
+}
+
+llvm::DIType CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIFile Unit) {
+  ObjCInterfaceDecl *ID = Ty->getDecl();
+  llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
+  unsigned Line = getLineNumber(ID->getLocation());
+  unsigned RuntimeLang = TheCU.getLanguage();
 
   // Bit size, align and offset of the type.
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
@@ -1627,10 +1672,8 @@
                               Line, Size, Align, Flags,
                               llvm::DIType(), llvm::DIArray(), RuntimeLang);
 
-  // Otherwise, insert it into the CompletedTypeCache so that recursive uses
-  // will find it and we're emitting the complete type.
-  QualType QualTy = QualType(Ty, 0);
-  CompletedTypeCache[QualTy.getAsOpaquePtr()] = RealDecl;
+  QualType QTy(Ty, 0);
+  TypeCache[QTy.getAsOpaquePtr()] = RealDecl;
 
   // Push the struct on region stack.
   LexicalBlockStack.push_back(static_cast<llvm::MDNode*>(RealDecl));
@@ -1722,7 +1765,7 @@
     else if (Field->getAccessControl() == ObjCIvarDecl::Private)
       Flags = llvm::DIDescriptor::FlagPrivate;
 
-    llvm::MDNode *PropertyNode = NULL;
+    llvm::MDNode *PropertyNode = nullptr;
     if (ObjCImplementationDecl *ImpD = ID->getImplementation()) {
       if (ObjCPropertyImplDecl *PImpD =
           ImpD->FindPropertyImplIvarDecl(Field->getIdentifier())) {
@@ -1754,12 +1797,6 @@
   llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
   RealDecl.setTypeArray(Elements);
 
-  // If the implementation is not yet set, we do not want to mark it
-  // as complete. An implementation may declare additional
-  // private ivars that we would miss otherwise.
-  if (ID->getImplementation() == 0)
-    CompletedTypeCache.erase(QualTy.getAsOpaquePtr());
-
   LexicalBlockStack.pop_back();
   return RealDecl;
 }
@@ -1890,11 +1927,27 @@
     llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
-    return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_enumeration_type,
-                                      EDName, EDContext, DefUnit, Line, 0,
-                                      Size, Align, FullName);
+    llvm::DIType RetTy = DBuilder.createReplaceableForwardDecl(
+        llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
+        0, Size, Align, FullName);
+    ReplaceMap.push_back(std::make_pair(Ty, static_cast<llvm::Value *>(RetTy)));
+    return RetTy;
   }
 
+  return CreateTypeDefinition(Ty);
+}
+
+llvm::DIType CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
+  const EnumDecl *ED = Ty->getDecl();
+  uint64_t Size = 0;
+  uint64_t Align = 0;
+  if (!ED->getTypeForDecl()->isIncompleteType()) {
+    Size = CGM.getContext().getTypeSize(ED->getTypeForDecl());
+    Align = CGM.getContext().getTypeAlign(ED->getTypeForDecl());
+  }
+
+  SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
+
   // Create DIEnumerator elements for each enumerator.
   SmallVector<llvm::Value *, 16> Enumerators;
   ED = ED->getDefinition();
@@ -1932,9 +1985,12 @@
     switch (T->getTypeClass()) {
     default:
       return C.getQualifiedType(T.getTypePtr(), Quals);
-    case Type::TemplateSpecialization:
-      T = cast<TemplateSpecializationType>(T)->desugar();
-      break;
+    case Type::TemplateSpecialization: {
+      const auto *Spec = cast<TemplateSpecializationType>(T);
+      if (Spec->isTypeAlias())
+        return C.getQualifiedType(T.getTypePtr(), Quals);
+      T = Spec->desugar();
+      break; }
     case Type::TypeOfExpr:
       T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType();
       break;
@@ -1979,16 +2035,7 @@
   // Unwrap the type as needed for debug information.
   Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
-  // Check for existing entry.
-  if (Ty->getTypeClass() == Type::ObjCInterface) {
-    llvm::Value *V = getCachedInterfaceTypeOrNull(Ty);
-    if (V)
-      return llvm::DIType(cast<llvm::MDNode>(V));
-    else return llvm::DIType();
-  }
-
-  llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
-    TypeCache.find(Ty.getAsOpaquePtr());
+  auto it = TypeCache.find(Ty.getAsOpaquePtr());
   if (it != TypeCache.end()) {
     // Verify that the debug info still exists.
     if (llvm::Value *V = it->second)
@@ -1998,27 +2045,6 @@
   return llvm::DIType();
 }
 
-/// getCompletedTypeOrNull - Get the type from the cache or return null if it
-/// doesn't exist.
-llvm::DIType CGDebugInfo::getCompletedTypeOrNull(QualType Ty) {
-
-  // Unwrap the type as needed for debug information.
-  Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
-
-  // Check for existing entry.
-  llvm::Value *V = 0;
-  llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
-    CompletedTypeCache.find(Ty.getAsOpaquePtr());
-  if (it != CompletedTypeCache.end())
-    V = it->second;
-  else {
-    V = getCachedInterfaceTypeOrNull(Ty);
-  }
-
-  // Verify that any cached debug info still exists.
-  return llvm::DIType(cast_or_null<llvm::MDNode>(V));
-}
-
 void CGDebugInfo::completeTemplateDefinition(
     const ClassTemplateSpecializationDecl &SD) {
   if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
@@ -2030,22 +2056,6 @@
   RetainedTypes.push_back(CGM.getContext().getRecordType(&SD).getAsOpaquePtr());
 }
 
-/// getCachedInterfaceTypeOrNull - Get the type from the interface
-/// cache, unless it needs to regenerated. Otherwise return null.
-llvm::Value *CGDebugInfo::getCachedInterfaceTypeOrNull(QualType Ty) {
-  // Is there a cached interface that hasn't changed?
-  llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned > >
-    ::iterator it1 = ObjCInterfaceCache.find(Ty.getAsOpaquePtr());
-
-  if (it1 != ObjCInterfaceCache.end())
-    if (ObjCInterfaceDecl* Decl = getObjCInterfaceDecl(Ty))
-      if (Checksum(Decl) == it1->second.second)
-        // Return cached forward declaration.
-        return it1->second.first;
-
-  return 0;
-}
-
 /// getOrCreateType - Get the type from the cache or create a new
 /// one if necessary.
 llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
@@ -2055,7 +2065,7 @@
   // Unwrap the type as needed for debug information.
   Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
-  if (llvm::DIType T = getCompletedTypeOrNull(Ty))
+  if (llvm::DIType T = getTypeOrNull(Ty))
     return T;
 
   // Otherwise create the type.
@@ -2065,39 +2075,6 @@
   // And update the type cache.
   TypeCache[TyPtr] = Res;
 
-  // FIXME: this getTypeOrNull call seems silly when we just inserted the type
-  // into the cache - but getTypeOrNull has a special case for cached interface
-  // types. We should probably just pull that out as a special case for the
-  // "else" block below & skip the otherwise needless lookup.
-  llvm::DIType TC = getTypeOrNull(Ty);
-  if (TC && TC.isForwardDecl())
-    ReplaceMap.push_back(std::make_pair(TyPtr, static_cast<llvm::Value*>(TC)));
-  else if (ObjCInterfaceDecl* Decl = getObjCInterfaceDecl(Ty)) {
-    // Interface types may have elements added to them by a
-    // subsequent implementation or extension, so we keep them in
-    // the ObjCInterfaceCache together with a checksum. Instead of
-    // the (possibly) incomplete interface type, we return a forward
-    // declaration that gets RAUW'd in CGDebugInfo::finalize().
-    std::pair<llvm::WeakVH, unsigned> &V = ObjCInterfaceCache[TyPtr];
-    if (V.first)
-      return llvm::DIType(cast<llvm::MDNode>(V.first));
-    TC = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
-                                    Decl->getName(), TheCU, Unit,
-                                    getLineNumber(Decl->getLocation()),
-                                    TheCU.getLanguage());
-    // Store the forward declaration in the cache.
-    V.first = TC;
-    V.second = Checksum(Decl);
-
-    // Register the type for replacement in finalize().
-    ReplaceMap.push_back(std::make_pair(TyPtr, static_cast<llvm::Value*>(TC)));
-
-    return TC;
-  }
-
-  if (!Res.isForwardDecl())
-    CompletedTypeCache[TyPtr] = Res;
-
   return Res;
 }
 
@@ -2109,7 +2086,7 @@
   // a checksum.
   unsigned Sum = 0;
   for (const ObjCIvarDecl *Ivar = ID->all_declared_ivar_begin();
-       Ivar != 0; Ivar = Ivar->getNextIvar())
+       Ivar != nullptr; Ivar = Ivar->getNextIvar())
     ++Sum;
 
   return Sum;
@@ -2123,7 +2100,7 @@
   case Type::ObjCInterface:
     return cast<ObjCInterfaceType>(Ty)->getDecl();
   default:
-    return 0;
+    return nullptr;
   }
 }
 
@@ -2133,7 +2110,7 @@
   if (Ty.hasLocalQualifiers())
     return CreateQualifiedType(Ty, Unit);
 
-  const char *Diag = 0;
+  const char *Diag = nullptr;
 
   // Work out details of type.
   switch (Ty->getTypeClass()) {
@@ -2191,8 +2168,10 @@
   case Type::Atomic:
     return CreateType(cast<AtomicType>(Ty), Unit);
 
-  case Type::Attributed:
   case Type::TemplateSpecialization:
+    return CreateType(cast<TemplateSpecializationType>(Ty), Unit);
+
+  case Type::Attributed:
   case Type::Elaborated:
   case Type::Paren:
   case Type::SubstTemplateTypeParm:
@@ -2236,10 +2215,6 @@
   // correct order if the full type is needed.
   Res.setTypeArray(T.getTypeArray());
 
-  if (T && T.isForwardDecl())
-    ReplaceMap.push_back(
-        std::make_pair(QTy.getAsOpaquePtr(), static_cast<llvm::Value *>(T)));
-
   // And update the type cache.
   TypeCache[QTy.getAsOpaquePtr()] = Res;
   return Res;
@@ -2259,13 +2234,6 @@
 
   // If we ended up creating the type during the context chain construction,
   // just return that.
-  // FIXME: this could be dealt with better if the type was recorded as
-  // completed before we started this (see the CompletedTypeCache usage in
-  // CGDebugInfo::CreateTypeDefinition(const RecordType*) - that would need to
-  // be pushed to before context creation, but after it was known to be
-  // destined for completion (might still have an issue if this caller only
-  // required a declaration but the context construction ended up creating a
-  // definition)
   llvm::DICompositeType T(getTypeOrNull(CGM.getContext().getRecordType(RD)));
   if (T && (!T.isForwardDecl() || !RD->getDefinition()))
       return T;
@@ -2473,7 +2441,10 @@
 }
 
 /// EmitFunctionStart - Constructs the debug code for entering a function.
-void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType,
+void CGDebugInfo::EmitFunctionStart(GlobalDecl GD,
+                                    SourceLocation Loc,
+                                    SourceLocation ScopeLoc,
+                                    QualType FnType,
                                     llvm::Function *Fn,
                                     CGBuilderTy &Builder) {
 
@@ -2483,24 +2454,7 @@
   FnBeginRegionCount.push_back(LexicalBlockStack.size());
 
   const Decl *D = GD.getDecl();
-
-  // Use the location of the start of the function to determine where
-  // the function definition is located. By default use the location
-  // of the declaration as the location for the subprogram. A function
-  // may lack a declaration in the source code if it is created by code
-  // gen. (examples: _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
-  bool HasDecl = (D != 0);
-  SourceLocation Loc;
-  if (HasDecl) {
-    Loc = D->getLocation();
-
-    // If this is a function specialization then use the pattern body
-    // as the location for the function.
-    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
-      if (const FunctionDecl *SpecDecl = FD->getTemplateInstantiationPattern())
-        if (SpecDecl->hasBody(SpecDecl))
-          Loc = SpecDecl->getLocation();
-  }
+  bool HasDecl = (D != nullptr);
 
   unsigned Flags = 0;
   llvm::DIFile Unit = getOrCreateFile(Loc);
@@ -2560,9 +2514,14 @@
   if (!Name.empty() && Name[0] == '\01')
     Name = Name.substr(1);
 
-  unsigned LineNo = getLineNumber(Loc);
-  if (!HasDecl || D->isImplicit())
+  if (!HasDecl || D->isImplicit()) {
     Flags |= llvm::DIDescriptor::FlagArtificial;
+    // Artificial functions without a location should not silently reuse CurLoc.
+    if (Loc.isInvalid())
+      CurLoc = SourceLocation();
+  }
+  unsigned LineNo = getLineNumber(Loc);
+  unsigned ScopeLine = getLineNumber(ScopeLoc);
 
   // FIXME: The function declaration we're constructing here is mostly reusing
   // declarations from CXXMethodDecl and not constructing new ones for arbitrary
@@ -2573,7 +2532,7 @@
       DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo,
                               getOrCreateFunctionType(D, FnType, Unit),
                               Fn->hasInternalLinkage(), true /*definition*/,
-                              getLineNumber(CurLoc), Flags,
+                              ScopeLine, Flags,
                               CGM.getLangOpts().Optimize, Fn, TParamsArray,
                               getFunctionDeclaration(D));
   if (HasDecl)
@@ -2731,7 +2690,7 @@
   }
 
   FType = Type;
-  llvm::DIType FieldTy = CGDebugInfo::getOrCreateType(FType, Unit);
+  llvm::DIType FieldTy = getOrCreateType(FType, Unit);
   FieldSize = CGM.getContext().getTypeSize(FType);
   FieldAlign = CGM.getContext().toBits(Align);
 
@@ -2751,7 +2710,7 @@
 }
 
 /// EmitDeclare - Emit local variable declaration debug info.
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag,
+void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::LLVMConstants Tag,
                               llvm::Value *Storage,
                               unsigned ArgNo, CGBuilderTy &Builder) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
@@ -2888,7 +2847,6 @@
                                          llvm::DIType Ty) {
   llvm::DIType CachedTy = getTypeOrNull(QualTy);
   if (CachedTy) Ty = CachedTy;
-  else DEBUG(llvm::dbgs() << "No cached type for self.");
   return DBuilder.createObjectPointerType(Ty);
 }
 
@@ -2899,7 +2857,7 @@
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
 
-  if (Builder.GetInsertBlock() == 0)
+  if (Builder.GetInsertBlock() == nullptr)
     return;
 
   bool isByRef = VD->hasAttr<BlocksAttr>();
@@ -3029,7 +2987,7 @@
     BlockLayoutChunk chunk;
     chunk.OffsetInBits =
       blockLayout->getElementOffsetInBits(block.CXXThisIndex);
-    chunk.Capture = 0;
+    chunk.Capture = nullptr;
     chunks.push_back(chunk);
   }
 
@@ -3149,6 +3107,37 @@
   return T;
 }
 
+/// Recursively collect all of the member fields of a global anonymous decl and
+/// create static variables for them. The first time this is called it needs
+/// to be on a union and then from there we can have additional unnamed fields.
+llvm::DIGlobalVariable
+CGDebugInfo::CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit,
+                                    unsigned LineNo, StringRef LinkageName,
+                                    llvm::GlobalVariable *Var,
+                                    llvm::DIDescriptor DContext) {
+  llvm::DIGlobalVariable GV;
+
+  for (const auto *Field : RD->fields()) {
+    llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
+    StringRef FieldName = Field->getName();
+
+    // Ignore unnamed fields, but recurse into anonymous records.
+    if (FieldName.empty()) {
+      const RecordType *RT = dyn_cast<RecordType>(Field->getType());
+      if (RT)
+        GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
+                                    Var, DContext);
+      continue;
+    }
+    // Use VarDecl's Tag, Scope and Line number.
+    GV = DBuilder.createStaticVariable(DContext, FieldName, LinkageName, Unit,
+                                       LineNo, FieldTy,
+                                       Var->hasInternalLinkage(), Var,
+                                       llvm::DIDerivedType());
+  }
+  return GV;
+}
+
 /// EmitGlobalVariable - Emit information about a global variable.
 void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
                                      const VarDecl *D) {
@@ -3169,19 +3158,35 @@
     T = CGM.getContext().getConstantArrayType(ET, ConstVal,
                                               ArrayType::Normal, 0);
   }
+
   StringRef DeclName = D->getName();
   StringRef LinkageName;
-  if (D->getDeclContext() && !isa<FunctionDecl>(D->getDeclContext())
-      && !isa<ObjCMethodDecl>(D->getDeclContext()))
+  if (D->getDeclContext() && !isa<FunctionDecl>(D->getDeclContext()) &&
+      !isa<ObjCMethodDecl>(D->getDeclContext()))
     LinkageName = Var->getName();
   if (LinkageName == DeclName)
     LinkageName = StringRef();
+
   llvm::DIDescriptor DContext =
     getContextDescriptor(dyn_cast<Decl>(D->getDeclContext()));
-  llvm::DIGlobalVariable GV = DBuilder.createStaticVariable(
-      DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
-      Var->hasInternalLinkage(), Var,
-      getOrCreateStaticDataMemberDeclarationOrNull(D));
+
+  // Attempt to store one global variable for the declaration - even if we
+  // emit a lot of fields.
+  llvm::DIGlobalVariable GV;
+
+  // If this is an anonymous union then we'll want to emit a global
+  // variable for each member of the anonymous union so that it's possible
+  // to find the name of any field in the union.
+  if (T->isUnionType() && DeclName.empty()) {
+    const RecordDecl *RD = cast<RecordType>(T)->getDecl();
+    assert(RD->isAnonymousStructOrUnion() && "unnamed non-anonymous struct or union?");
+    GV = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext);
+  } else {
+      GV = DBuilder.createStaticVariable(
+        DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
+        Var->hasInternalLinkage(), Var,
+        getOrCreateStaticDataMemberDeclarationOrNull(D));
+  }
   DeclCache.insert(std::make_pair(D->getCanonicalDecl(), llvm::WeakVH(GV)));
 }
 
@@ -3227,10 +3232,20 @@
   // Do not use DIGlobalVariable for enums.
   if (Ty.getTag() == llvm::dwarf::DW_TAG_enumeration_type)
     return;
+  // Do not emit separate definitions for function local const/statics.
+  if (isa<FunctionDecl>(VD->getDeclContext()))
+    return;
+  VD = cast<ValueDecl>(VD->getCanonicalDecl());
+  auto pair = DeclCache.insert(std::make_pair(VD, llvm::WeakVH()));
+  if (!pair.second)
+    return;
+  llvm::DIDescriptor DContext =
+      getContextDescriptor(dyn_cast<Decl>(VD->getDeclContext()));
   llvm::DIGlobalVariable GV = DBuilder.createStaticVariable(
-      Unit, Name, Name, Unit, getLineNumber(VD->getLocation()), Ty, true, Init,
+      DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty,
+      true, Init,
       getOrCreateStaticDataMemberDeclarationOrNull(cast<VarDecl>(VD)));
-  DeclCache.insert(std::make_pair(VD->getCanonicalDecl(), llvm::WeakVH(GV)));
+  pair.first->second = llvm::WeakVH(GV);
 }
 
 llvm::DIScope CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
@@ -3266,20 +3281,20 @@
 llvm::DIImportedEntity
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
-    return llvm::DIImportedEntity(0);
+    return llvm::DIImportedEntity(nullptr);
   llvm::WeakVH &VH = NamespaceAliasCache[&NA];
   if (VH)
     return llvm::DIImportedEntity(cast<llvm::MDNode>(VH));
-  llvm::DIImportedEntity R(0);
+  llvm::DIImportedEntity R(nullptr);
   if (const NamespaceAliasDecl *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
-    R = DBuilder.createImportedModule(
+    R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
         EmitNamespaceAlias(*Underlying), getLineNumber(NA.getLocation()),
         NA.getName());
   else
-    R = DBuilder.createImportedModule(
+    R = DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(NA.getDeclContext())),
         getOrCreateNameSpace(cast<NamespaceDecl>(NA.getAliasedNamespace())),
         getLineNumber(NA.getLocation()), NA.getName());
@@ -3308,23 +3323,27 @@
 }
 
 void CGDebugInfo::finalize() {
-  for (std::vector<std::pair<void *, llvm::WeakVH> >::const_iterator VI
-         = ReplaceMap.begin(), VE = ReplaceMap.end(); VI != VE; ++VI) {
-    llvm::DIType Ty, RepTy;
-    // Verify that the debug info still exists.
-    if (llvm::Value *V = VI->second)
-      Ty = llvm::DIType(cast<llvm::MDNode>(V));
+  // Creating types might create further types - invalidating the current
+  // element and the size(), so don't cache/reference them.
+  for (size_t i = 0; i != ObjCInterfaceCache.size(); ++i) {
+    ObjCInterfaceCacheEntry E = ObjCInterfaceCache[i];
+    E.Decl.replaceAllUsesWith(CGM.getLLVMContext(),
+                              E.Type->getDecl()->getDefinition()
+                                  ? CreateTypeDefinition(E.Type, E.Unit)
+                                  : E.Decl);
+  }
 
-    llvm::DenseMap<void *, llvm::WeakVH>::iterator it =
-      TypeCache.find(VI->first);
-    if (it != TypeCache.end()) {
-      // Verify that the debug info still exists.
-      if (llvm::Value *V = it->second)
-        RepTy = llvm::DIType(cast<llvm::MDNode>(V));
-    }
+  for (auto p : ReplaceMap) {
+    assert(p.second);
+    llvm::DIType Ty(cast<llvm::MDNode>(p.second));
+    assert(Ty.isForwardDecl());
 
-    if (Ty && Ty.isForwardDecl() && RepTy)
-      Ty.replaceAllUsesWith(RepTy);
+    auto it = TypeCache.find(p.first);
+    assert(it != TypeCache.end());
+    assert(it->second);
+
+    llvm::DIType RepTy(cast<llvm::MDNode>(it->second));
+    Ty.replaceAllUsesWith(CGM.getLLVMContext(), RepTy);
   }
 
   // We keep our own list of retained types, because we need to look
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 52784da..0da6179 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -65,21 +65,27 @@
   llvm::DIType BlockLiteralGeneric;
 
   /// TypeCache - Cache of previously constructed Types.
-  llvm::DenseMap<void *, llvm::WeakVH> TypeCache;
+  llvm::DenseMap<const void *, llvm::WeakVH> TypeCache;
+
+  struct ObjCInterfaceCacheEntry {
+    const ObjCInterfaceType *Type;
+    llvm::DIType Decl;
+    llvm::DIFile Unit;
+    ObjCInterfaceCacheEntry(const ObjCInterfaceType *Type, llvm::DIType Decl,
+                            llvm::DIFile Unit)
+        : Type(Type), Decl(Decl), Unit(Unit) {}
+  };
 
   /// ObjCInterfaceCache - Cache of previously constructed interfaces
-  /// which may change. Storing a pair of DIType and checksum.
-  llvm::DenseMap<void *, std::pair<llvm::WeakVH, unsigned> > ObjCInterfaceCache;
+  /// which may change.
+  llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
 
   /// RetainedTypes - list of interfaces we want to keep even if orphaned.
   std::vector<void *> RetainedTypes;
 
-  /// CompleteTypeCache - Cache of previously constructed complete RecordTypes.
-  llvm::DenseMap<void *, llvm::WeakVH> CompletedTypeCache;
-
   /// ReplaceMap - Cache of forward declared types to RAUW at the end of
   /// compilation.
-  std::vector<std::pair<void *, llvm::WeakVH> >ReplaceMap;
+  std::vector<std::pair<const TagType *, llvm::WeakVH>> ReplaceMap;
 
   // LexicalBlockStack - Keep track of our current nested lexical block.
   std::vector<llvm::TrackingVH<llvm::MDNode> > LexicalBlockStack;
@@ -109,6 +115,7 @@
   llvm::DIType CreateType(const ComplexType *Ty);
   llvm::DIType CreateQualifiedType(QualType Ty, llvm::DIFile Fg);
   llvm::DIType CreateType(const TypedefType *Ty, llvm::DIFile Fg);
+  llvm::DIType CreateType(const TemplateSpecializationType *Ty, llvm::DIFile Fg);
   llvm::DIType CreateType(const ObjCObjectPointerType *Ty,
                           llvm::DIFile F);
   llvm::DIType CreateType(const PointerType *Ty, llvm::DIFile F);
@@ -119,6 +126,7 @@
   llvm::DICompositeType CreateLimitedType(const RecordType *Ty);
   void CollectContainingType(const CXXRecordDecl *RD, llvm::DICompositeType CT);
   llvm::DIType CreateType(const ObjCInterfaceType *Ty, llvm::DIFile F);
+  llvm::DIType CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const ObjCObjectType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const VectorType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const ArrayType *Ty, llvm::DIFile F);
@@ -127,9 +135,9 @@
   llvm::DIType CreateType(const MemberPointerType *Ty, llvm::DIFile F);
   llvm::DIType CreateType(const AtomicType *Ty, llvm::DIFile F);
   llvm::DIType CreateEnumType(const EnumType *Ty);
+  llvm::DIType CreateTypeDefinition(const EnumType *Ty);
   llvm::DIType CreateSelfType(const QualType &QualTy, llvm::DIType Ty);
   llvm::DIType getTypeOrNull(const QualType);
-  llvm::DIType getCompletedTypeOrNull(const QualType);
   llvm::DICompositeType getOrCreateMethodType(const CXXMethodDecl *Method,
                                               llvm::DIFile F);
   llvm::DICompositeType getOrCreateInstanceMethodType(
@@ -139,7 +147,7 @@
   llvm::DIType getOrCreateVTablePtrType(llvm::DIFile F);
   llvm::DINameSpace getOrCreateNameSpace(const NamespaceDecl *N);
   llvm::DIType getOrCreateTypeDeclaration(QualType PointeeTy, llvm::DIFile F);
-  llvm::DIType CreatePointerLikeType(unsigned Tag,
+  llvm::DIType CreatePointerLikeType(llvm::dwarf::Tag Tag,
                                      const Type *Ty, QualType PointeeTy,
                                      llvm::DIFile F);
 
@@ -219,8 +227,12 @@
 
   /// EmitFunctionStart - Emit a call to llvm.dbg.function.start to indicate
   /// start of a new function.
-  void EmitFunctionStart(GlobalDecl GD, QualType FnType,
-                         llvm::Function *Fn, CGBuilderTy &Builder);
+  /// \param Loc       The location of the function header.
+  /// \param ScopeLoc  The location of the function body.
+  void EmitFunctionStart(GlobalDecl GD,
+                         SourceLocation Loc, SourceLocation ScopeLoc,
+                         QualType FnType, llvm::Function *Fn,
+                         CGBuilderTy &Builder);
 
   /// EmitFunctionEnd - Constructs the debug code for exiting a function.
   void EmitFunctionEnd(CGBuilderTy &Builder);
@@ -284,6 +296,7 @@
   llvm::DIType getOrCreateInterfaceType(QualType Ty,
                                         SourceLocation Loc);
 
+  void completeType(const EnumDecl *ED);
   void completeType(const RecordDecl *RD);
   void completeRequiredType(const RecordDecl *RD);
   void completeClassData(const RecordDecl *RD);
@@ -292,8 +305,10 @@
 
 private:
   /// EmitDeclare - Emit call to llvm.dbg.declare for a variable declaration.
-  void EmitDeclare(const VarDecl *decl, unsigned Tag, llvm::Value *AI,
-                   unsigned ArgNo, CGBuilderTy &Builder);
+  /// Tag accepts custom types DW_TAG_arg_variable and DW_TAG_auto_variable,
+  /// otherwise would be of type llvm::dwarf::Tag.
+  void EmitDeclare(const VarDecl *decl, llvm::dwarf::LLVMConstants Tag,
+                   llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder);
 
   // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.
   // See BuildByRefType.
@@ -357,6 +372,13 @@
   llvm::DIDerivedType
   getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D);
 
+  /// Return a global variable that represents one of the collection of
+  /// global variables created for an anonmyous union.
+  llvm::DIGlobalVariable
+  CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit, unsigned LineNo,
+                         StringRef LinkageName, llvm::GlobalVariable *Var,
+                         llvm::DIDescriptor DContext);
+
   /// getFunctionName - Get function name for the given FunctionDecl. If the
   /// name is constructed on demand (e.g. C++ destructor) then the name
   /// is stored on the side.
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 9db3c91..1869c1c 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -126,17 +126,11 @@
 void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
   if (D.isStaticLocal()) {
     llvm::GlobalValue::LinkageTypes Linkage =
-      llvm::GlobalValue::InternalLinkage;
+        CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false);
 
-    // If the variable is externally visible, it must have weak linkage so it
-    // can be uniqued.
-    if (D.isExternallyVisible()) {
-      Linkage = llvm::GlobalValue::LinkOnceODRLinkage;
-
-      // FIXME: We need to force the emission/use of a guard variable for
-      // some variables even if we can constant-evaluate them because
-      // we can't guarantee every translation unit will constant-evaluate them.
-    }
+    // FIXME: We need to force the emission/use of a guard variable for
+    // some variables even if we can constant-evaluate them because
+    // we can't guarantee every translation unit will constant-evaluate them.
 
     return EmitStaticVarDecl(D, Linkage);
   }
@@ -203,7 +197,7 @@
   llvm::GlobalVariable *GV =
     new llvm::GlobalVariable(CGM.getModule(), LTy,
                              Ty.isConstant(getContext()), Linkage,
-                             CGM.EmitNullConstant(D.getType()), Name, 0,
+                             CGM.EmitNullConstant(D.getType()), Name, nullptr,
                              llvm::GlobalVariable::NotThreadLocal,
                              AddrSpace);
   GV->setAlignment(getContext().getDeclAlign(&D).getQuantity());
@@ -297,7 +291,7 @@
 void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
                                       llvm::GlobalValue::LinkageTypes Linkage) {
   llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
+  assert(!DMEntry && "Decl already exists in localdeclmap!");
 
   // Check to see if we already have a global variable for this
   // declaration.  This can happen when double-emitting function
@@ -394,7 +388,7 @@
       // Along the exceptions path we always execute the dtor.
       bool NRVO = flags.isForNormalCleanup() && NRVOFlag;
 
-      llvm::BasicBlock *SkipDtorBB = 0;
+      llvm::BasicBlock *SkipDtorBB = nullptr;
       if (NRVO) {
         // If we exited via NRVO, we skip the destructor call.
         llvm::BasicBlock *RunDtorBB = CGF.createBasicBlock("nrvo.unused");
@@ -621,7 +615,7 @@
   }
 
   // Emit the initializer.
-  llvm::Value *value = 0;
+  llvm::Value *value = nullptr;
 
   switch (lifetime) {
   case Qualifiers::OCL_None:
@@ -874,7 +868,7 @@
           CGM.isTypeConstant(Ty, true)) {
         EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
 
-        emission.Address = 0; // signal this condition to later callbacks
+        emission.Address = nullptr; // signal this condition to later callbacks
         assert(emission.wasEmittedAsGlobal());
         return emission;
       }
@@ -969,7 +963,7 @@
   }
 
   llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
+  assert(!DMEntry && "Decl already exists in localdeclmap!");
   DMEntry = DeclPtr;
   emission.Address = DeclPtr;
 
@@ -1089,7 +1083,7 @@
   llvm::Value *Loc =
     capturedByInit ? emission.Address : emission.getObjectAddress(*this);
 
-  llvm::Constant *constant = 0;
+  llvm::Constant *constant = nullptr;
   if (emission.IsConstantAggregate || D.isConstexpr()) {
     assert(!capturedByInit && "constant init contains a capturing block?");
     constant = CGM.EmitConstantInit(D, this);
@@ -1216,7 +1210,7 @@
   QualType type = var->getType();
 
   CleanupKind cleanupKind = NormalAndEHCleanup;
-  CodeGenFunction::Destroyer *destroyer = 0;
+  CodeGenFunction::Destroyer *destroyer = nullptr;
 
   switch (dtorKind) {
   case QualType::DK_none:
@@ -1626,7 +1620,7 @@
     // The only implicit argument a block has is its literal.
     if (BlockInfo) {
       LocalDeclMap[&D] = Arg;
-      llvm::Value *LocalAddr = 0;
+      llvm::Value *LocalAddr = nullptr;
       if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
         // Allocate a stack slot to let the debug info survive the RA.
         llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
@@ -1740,7 +1734,7 @@
     EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
 
   llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(DMEntry == 0 && "Decl already exists in localdeclmap!");
+  assert(!DMEntry && "Decl already exists in localdeclmap!");
   DMEntry = DeclPtr;
 
   // Emit debug info for param declaration.
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index bc8620d..c287740 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -17,6 +17,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Path.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -89,7 +90,7 @@
 
   // Special-case non-array C++ destructors, where there's a function
   // with the right signature that we can just call.
-  const CXXRecordDecl *record = 0;
+  const CXXRecordDecl *record = nullptr;
   if (dtorKind == QualType::DK_cxx_destructor &&
       (record = type->getAsCXXRecordDecl())) {
     assert(!record->hasTrivialDestructor());
@@ -178,8 +179,7 @@
   CodeGenFunction CGF(CGM);
 
   CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn,
-                    CGM.getTypes().arrangeNullaryFunction(), FunctionArgList(),
-                    SourceLocation());
+                    CGM.getTypes().arrangeNullaryFunction(), FunctionArgList());
 
   llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
  
@@ -294,10 +294,12 @@
     //   have unordered initialization.
     //
     // As a consequence, we can put them into their own llvm.global_ctors entry.
-    // This should allow GlobalOpt to fire more often, and allow us to implement
-    // the Microsoft C++ ABI, which uses COMDAT elimination to avoid double
-    // initializaiton.
-    AddGlobalCtor(Fn);
+    //
+    // In addition, put the initializer into a COMDAT group with the global
+    // being initialized.  On most platforms, this is a minor startup time
+    // optimization.  In the MS C++ ABI, there are no guard variables, so this
+    // COMDAT key is required for correctness.
+    AddGlobalCtor(Fn, 65535, Addr);
     DelayedCXXInitPosition.erase(D);
   } else {
     llvm::DenseMap<const Decl *, unsigned>::iterator I =
@@ -305,7 +307,7 @@
     if (I == DelayedCXXInitPosition.end()) {
       CXXGlobalInits.push_back(Fn);
     } else {
-      assert(CXXGlobalInits[I->second] == 0);
+      assert(CXXGlobalInits[I->second] == nullptr);
       CXXGlobalInits[I->second] = Fn;
       DelayedCXXInitPosition.erase(I);
     }
@@ -313,7 +315,7 @@
 }
 
 void CodeGenModule::EmitCXXThreadLocalInitFunc() {
-  llvm::Function *InitFn = 0;
+  llvm::Function *InitFn = nullptr;
   if (!CXXThreadLocalInits.empty()) {
     // Generate a guarded initialization function.
     llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
@@ -363,7 +365,7 @@
       // Compute the function suffix from priority. Prepend with zeroes to make
       // sure the function names are also ordered as priorities.
       std::string PrioritySuffix = llvm::utostr(Priority);
-      // Priority is always <= 65535 (enforced by sema)..
+      // Priority is always <= 65535 (enforced by sema).
       PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix;
       llvm::Function *Fn = 
         CreateGlobalInitOrDestructFunction(*this, FTy,
@@ -377,8 +379,20 @@
     }
   }
   
-  llvm::Function *Fn = 
-    CreateGlobalInitOrDestructFunction(*this, FTy, "_GLOBAL__I_a");
+  // Include the filename in the symbol name. Including "sub_" matches gcc and
+  // makes sure these symbols appear lexicographically behind the symbols with
+  // priority emitted above.
+  SourceManager &SM = Context.getSourceManager();
+  SmallString<128> FileName(llvm::sys::path::filename(
+      SM.getFileEntryForID(SM.getMainFileID())->getName()));
+  for (size_t i = 0; i < FileName.size(); ++i) {
+    // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+    // to be the set of C preprocessing numbers.
+    if (!isPreprocessingNumberBody(FileName[i]))
+      FileName[i] = '_';
+  }
+  llvm::Function *Fn = CreateGlobalInitOrDestructFunction(
+      *this, FTy, llvm::Twine("_GLOBAL__sub_I_", FileName));
 
   CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits);
   AddGlobalCtor(Fn);
@@ -408,17 +422,17 @@
                                                        bool PerformInit) {
   // Check if we need to emit debug info for variable initializer.
   if (D->hasAttr<NoDebugAttr>())
-    DebugInfo = NULL; // disable debug info indefinitely for this function
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
 
   StartFunction(GlobalDecl(D), getContext().VoidTy, Fn,
                 getTypes().arrangeNullaryFunction(),
-                FunctionArgList(), D->getInit()->getExprLoc());
+                FunctionArgList(), D->getLocation(),
+                D->getInit()->getExprLoc());
 
   // Use guarded initialization if the global variable is weak. This
   // occurs for, e.g., instantiated static data members and
   // definitions explicitly marked weak.
-  if (Addr->getLinkage() == llvm::GlobalValue::WeakODRLinkage ||
-      Addr->getLinkage() == llvm::GlobalValue::WeakAnyLinkage) {
+  if (Addr->hasWeakLinkage() || Addr->hasLinkOnceLinkage()) {
     EmitCXXGuardedInit(*D, Addr, PerformInit);
   } else {
     EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
@@ -431,44 +445,49 @@
 CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                            ArrayRef<llvm::Constant *> Decls,
                                            llvm::GlobalVariable *Guard) {
-  StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
-                getTypes().arrangeNullaryFunction(),
-                FunctionArgList(), SourceLocation());
+  {
+    ArtificialLocation AL(*this, Builder);
+    StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
+                  getTypes().arrangeNullaryFunction(), FunctionArgList());
+    // Emit an artificial location for this function.
+    AL.Emit();
 
-  llvm::BasicBlock *ExitBlock = 0;
-  if (Guard) {
-    // If we have a guard variable, check whether we've already performed these
-    // initializations. This happens for TLS initialization functions.
-    llvm::Value *GuardVal = Builder.CreateLoad(Guard);
-    llvm::Value *Uninit = Builder.CreateIsNull(GuardVal, "guard.uninitialized");
-    // Mark as initialized before initializing anything else. If the
-    // initializers use previously-initialized thread_local vars, that's
-    // probably supposed to be OK, but the standard doesn't say.
-    Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(), 1), Guard);
-    llvm::BasicBlock *InitBlock = createBasicBlock("init");
-    ExitBlock = createBasicBlock("exit");
-    Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
-    EmitBlock(InitBlock);
-  }
+    llvm::BasicBlock *ExitBlock = nullptr;
+    if (Guard) {
+      // If we have a guard variable, check whether we've already performed
+      // these initializations. This happens for TLS initialization functions.
+      llvm::Value *GuardVal = Builder.CreateLoad(Guard);
+      llvm::Value *Uninit = Builder.CreateIsNull(GuardVal,
+                                                 "guard.uninitialized");
+      // Mark as initialized before initializing anything else. If the
+      // initializers use previously-initialized thread_local vars, that's
+      // probably supposed to be OK, but the standard doesn't say.
+      Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
+      llvm::BasicBlock *InitBlock = createBasicBlock("init");
+      ExitBlock = createBasicBlock("exit");
+      Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
+      EmitBlock(InitBlock);
+    }
 
-  RunCleanupsScope Scope(*this);
+    RunCleanupsScope Scope(*this);
 
-  // When building in Objective-C++ ARC mode, create an autorelease pool
-  // around the global initializers.
-  if (getLangOpts().ObjCAutoRefCount && getLangOpts().CPlusPlus) {    
-    llvm::Value *token = EmitObjCAutoreleasePoolPush();
-    EmitObjCAutoreleasePoolCleanup(token);
-  }
+    // When building in Objective-C++ ARC mode, create an autorelease pool
+    // around the global initializers.
+    if (getLangOpts().ObjCAutoRefCount && getLangOpts().CPlusPlus) {
+      llvm::Value *token = EmitObjCAutoreleasePoolPush();
+      EmitObjCAutoreleasePoolCleanup(token);
+    }
 
-  for (unsigned i = 0, e = Decls.size(); i != e; ++i)
-    if (Decls[i])
-      EmitRuntimeCall(Decls[i]);
+    for (unsigned i = 0, e = Decls.size(); i != e; ++i)
+      if (Decls[i])
+        EmitRuntimeCall(Decls[i]);
 
-  Scope.ForceCleanup();
+    Scope.ForceCleanup();
 
-  if (ExitBlock) {
-    Builder.CreateBr(ExitBlock);
-    EmitBlock(ExitBlock);
+    if (ExitBlock) {
+      Builder.CreateBr(ExitBlock);
+      EmitBlock(ExitBlock);
+    }
   }
 
   FinishFunction();
@@ -477,18 +496,22 @@
 void CodeGenFunction::GenerateCXXGlobalDtorsFunc(llvm::Function *Fn,
                   const std::vector<std::pair<llvm::WeakVH, llvm::Constant*> >
                                                 &DtorsAndObjects) {
-  StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
-                getTypes().arrangeNullaryFunction(),
-                FunctionArgList(), SourceLocation());
+  {
+    ArtificialLocation AL(*this, Builder);
+    StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
+                  getTypes().arrangeNullaryFunction(), FunctionArgList());
+    // Emit an artificial location for this function.
+    AL.Emit();
 
-  // Emit the dtors, in reverse order from construction.
-  for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) {
-    llvm::Value *Callee = DtorsAndObjects[e - i - 1].first;
-    llvm::CallInst *CI = Builder.CreateCall(Callee,
-                                            DtorsAndObjects[e - i - 1].second);
-    // Make sure the call and the callee agree on calling convention.
-    if (llvm::Function *F = dyn_cast<llvm::Function>(Callee))
-      CI->setCallingConv(F->getCallingConv());
+    // Emit the dtors, in reverse order from construction.
+    for (unsigned i = 0, e = DtorsAndObjects.size(); i != e; ++i) {
+      llvm::Value *Callee = DtorsAndObjects[e - i - 1].first;
+      llvm::CallInst *CI = Builder.CreateCall(Callee,
+                                          DtorsAndObjects[e - i - 1].second);
+      // Make sure the call and the callee agree on calling convention.
+      if (llvm::Function *F = dyn_cast<llvm::Function>(Callee))
+        CI->setCallingConv(F->getCallingConv());
+    }
   }
 
   FinishFunction();
@@ -500,7 +523,8 @@
     llvm::Constant *addr, QualType type, Destroyer *destroyer,
     bool useEHCleanupForArray, const VarDecl *VD) {
   FunctionArgList args;
-  ImplicitParamDecl dst(0, SourceLocation(), 0, getContext().VoidPtrTy);
+  ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr,
+                        getContext().VoidPtrTy);
   args.push_back(&dst);
 
   const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -509,7 +533,7 @@
   llvm::Function *fn = 
     CreateGlobalInitOrDestructFunction(CGM, FTy, "__cxx_global_array_dtor");
 
-  StartFunction(VD, getContext().VoidTy, fn, FI, args, SourceLocation());
+  StartFunction(VD, getContext().VoidTy, fn, FI, args);
 
   emitDestroy(addr, type, destroyer, useEHCleanupForArray);
   
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index ca31717..9fa478c 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -164,18 +164,21 @@
   };
 }
 
-const EHPersonality EHPersonality::GNU_C = { "__gcc_personality_v0", 0 };
-const EHPersonality EHPersonality::GNU_C_SJLJ = { "__gcc_personality_sj0", 0 };
-const EHPersonality EHPersonality::NeXT_ObjC = { "__objc_personality_v0", 0 };
-const EHPersonality EHPersonality::GNU_CPlusPlus = { "__gxx_personality_v0", 0};
+const EHPersonality EHPersonality::GNU_C = { "__gcc_personality_v0", nullptr };
 const EHPersonality
-EHPersonality::GNU_CPlusPlus_SJLJ = { "__gxx_personality_sj0", 0 };
+EHPersonality::GNU_C_SJLJ = { "__gcc_personality_sj0", nullptr };
+const EHPersonality
+EHPersonality::NeXT_ObjC = { "__objc_personality_v0", nullptr };
+const EHPersonality
+EHPersonality::GNU_CPlusPlus = { "__gxx_personality_v0", nullptr };
+const EHPersonality
+EHPersonality::GNU_CPlusPlus_SJLJ = { "__gxx_personality_sj0", nullptr };
 const EHPersonality
 EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", "objc_exception_throw"};
 const EHPersonality
-EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", 0 };
+EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", nullptr };
 const EHPersonality
-EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", 0 };
+EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", nullptr };
 
 static const EHPersonality &getCPersonality(const LangOptions &L) {
   if (L.SjLjExceptions)
@@ -418,6 +421,11 @@
 
 void CodeGenFunction::EmitCXXThrowExpr(const CXXThrowExpr *E,
                                        bool KeepInsertionPoint) {
+  if (CGM.getTarget().getTriple().isWindowsMSVCEnvironment()) {
+    ErrorUnsupported(E, "throw expression");
+    return;
+  }
+
   if (!E->getSubExpr()) {
     EmitNoreturnRuntimeCallOrInvoke(getReThrowFn(CGM),
                                     ArrayRef<llvm::Value*>());
@@ -462,7 +470,7 @@
 
   // The address of the destructor.  If the exception type has a
   // trivial destructor (or isn't a record), we just pass null.
-  llvm::Constant *Dtor = 0;
+  llvm::Constant *Dtor = nullptr;
   if (const RecordType *RecordTy = ThrowType->getAs<RecordType>()) {
     CXXRecordDecl *Record = cast<CXXRecordDecl>(RecordTy->getDecl());
     if (!Record->hasTrivialDestructor()) {
@@ -487,10 +495,16 @@
     return;
   
   const FunctionDecl* FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (FD == 0)
+  if (!FD) {
+    // Check if CapturedDecl is nothrow and create terminate scope for it.
+    if (const CapturedDecl* CD = dyn_cast_or_null<CapturedDecl>(D)) {
+      if (CD->isNothrow())
+        EHStack.pushTerminate();
+    }
     return;
+  }
   const FunctionProtoType *Proto = FD->getType()->getAs<FunctionProtoType>();
-  if (Proto == 0)
+  if (!Proto)
     return;
 
   ExceptionSpecificationType EST = Proto->getExceptionSpecType();
@@ -555,10 +569,16 @@
     return;
   
   const FunctionDecl* FD = dyn_cast_or_null<FunctionDecl>(D);
-  if (FD == 0)
+  if (!FD) {
+    // Check if CapturedDecl is nothrow and pop terminate scope for it.
+    if (const CapturedDecl* CD = dyn_cast_or_null<CapturedDecl>(D)) {
+      if (CD->isNothrow())
+        EHStack.popTerminate();
+    }
     return;
+  }
   const FunctionProtoType *Proto = FD->getType()->getAs<FunctionProtoType>();
-  if (Proto == 0)
+  if (!Proto)
     return;
 
   ExceptionSpecificationType EST = Proto->getExceptionSpecType();
@@ -574,6 +594,11 @@
 }
 
 void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) {
+  if (CGM.getTarget().getTriple().isWindowsMSVCEnvironment()) {
+    ErrorUnsupported(&S, "try statement");
+    return;
+  }
+
   EnterCXXTryStmt(S);
   EmitStmt(S.getTryBlock());
   ExitCXXTryStmt(S);
@@ -597,7 +622,7 @@
       QualType CaughtType = C->getCaughtType();
       CaughtType = CaughtType.getNonReferenceType().getUnqualifiedType();
 
-      llvm::Value *TypeInfo = 0;
+      llvm::Value *TypeInfo = nullptr;
       if (CaughtType->isObjCObjectPointerType())
         TypeInfo = CGM.getObjCRuntime().GetEHType(CaughtType);
       else
@@ -675,7 +700,7 @@
   assert(!EHStack.empty());
 
   if (!CGM.getLangOpts().Exceptions)
-    return 0;
+    return nullptr;
 
   // Check the innermost scope for a cached landing pad.  If this is
   // a non-EH cleanup, we'll check enclosing scopes in EmitLandingPad.
@@ -1422,7 +1447,7 @@
                                          llvm::Constant *beginCatchFn,
                                          llvm::Constant *endCatchFn,
                                          llvm::Constant *rethrowFn) {
-  assert((beginCatchFn != 0) == (endCatchFn != 0) &&
+  assert((beginCatchFn != nullptr) == (endCatchFn != nullptr) &&
          "begin/end catch functions not paired");
   assert(rethrowFn && "rethrow function is required");
 
@@ -1437,7 +1462,7 @@
   llvm::FunctionType *rethrowFnTy =
     cast<llvm::FunctionType>(
       cast<llvm::PointerType>(rethrowFn->getType())->getElementType());
-  SavedExnVar = 0;
+  SavedExnVar = nullptr;
   if (rethrowFnTy->getNumParams())
     SavedExnVar = CGF.CreateTempAlloca(CGF.Int8PtrTy, "finally.exn");
 
@@ -1487,7 +1512,7 @@
     CGBuilderTy::InsertPoint savedIP = CGF.Builder.saveAndClearIP();
     CGF.EmitBlock(catchBB);
 
-    llvm::Value *exn = 0;
+    llvm::Value *exn = nullptr;
 
     // If there's a begin-catch function, call it.
     if (BeginCatchFn) {
@@ -1651,7 +1676,7 @@
   // This can always be a call because we necessarily didn't find
   // anything on the EH stack which needs our help.
   const char *RethrowName = Personality.CatchallRethrowFn;
-  if (RethrowName != 0 && !isCleanup) {
+  if (RethrowName != nullptr && !isCleanup) {
     EmitRuntimeCall(getCatchallRethrowFn(CGM, RethrowName),
                       getExceptionFromSlot())
       ->setDoesNotReturn();
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 1bdd094..966edc4 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -21,6 +21,7 @@
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/Attr.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/IR/DataLayout.h"
@@ -53,13 +54,13 @@
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
                                                     const Twine &Name) {
   if (!Builder.isNamePreserving())
-    return new llvm::AllocaInst(Ty, 0, "", AllocaInsertPt);
-  return new llvm::AllocaInst(Ty, 0, Name, AllocaInsertPt);
+    return new llvm::AllocaInst(Ty, nullptr, "", AllocaInsertPt);
+  return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
 }
 
 void CodeGenFunction::InitTempAlloca(llvm::AllocaInst *Var,
                                      llvm::Value *Init) {
-  llvm::StoreInst *Store = new llvm::StoreInst(Init, Var);
+  auto *Store = new llvm::StoreInst(Init, Var);
   llvm::BasicBlock *Block = AllocaInsertPt->getParent();
   Block->getInstList().insertAfter(&*AllocaInsertPt, Store);
 }
@@ -241,11 +242,11 @@
     }
   }
 
-  CXXDestructorDecl *ReferenceTemporaryDtor = 0;
+  CXXDestructorDecl *ReferenceTemporaryDtor = nullptr;
   if (const RecordType *RT =
           E->getType()->getBaseElementTypeUnsafe()->getAs<RecordType>()) {
     // Get the destructor for the reference temporary.
-    CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
+    auto *ClassDecl = cast<CXXRecordDecl>(RT->getDecl());
     if (!ClassDecl->hasTrivialDestructor())
       ReferenceTemporaryDtor = ClassDecl->getDestructor();
   }
@@ -323,7 +324,7 @@
     llvm::Value *Object = createReferenceTemporary(*this, M, E);
     LValue RefTempDst = MakeAddrLValue(Object, M->getType());
 
-    if (llvm::GlobalVariable *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
+    if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
       // We should not have emitted the initializer for this temporary as a
       // constant.
       assert(!Var->hasInitializer());
@@ -343,7 +344,7 @@
   for (unsigned I = 0, N = CommaLHSs.size(); I != N; ++I)
     EmitIgnoredExpr(CommaLHSs[I]);
 
-  if (const OpaqueValueExpr *opaque = dyn_cast<OpaqueValueExpr>(E)) {
+  if (const auto *opaque = dyn_cast<OpaqueValueExpr>(E)) {
     if (opaque->getType()->isRecordType()) {
       assert(Adjustments.empty());
       return EmitOpaqueValueLValue(opaque);
@@ -352,7 +353,7 @@
 
   // Create and initialize the reference temporary.
   llvm::Value *Object = createReferenceTemporary(*this, M, E);
-  if (llvm::GlobalVariable *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
+  if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
     // If the temporary is a global and has a constant initializer, we may
     // have already initialized it.
     if (!Var->hasInitializer()) {
@@ -452,8 +453,8 @@
   if (Address->getType()->getPointerAddressSpace())
     return;
 
-  llvm::Value *Cond = 0;
-  llvm::BasicBlock *Done = 0;
+  llvm::Value *Cond = nullptr;
+  llvm::BasicBlock *Done = nullptr;
 
   if (SanOpts->Null) {
     // The glvalue must not be an empty glvalue.
@@ -467,7 +468,7 @@
       llvm::BasicBlock *Rest = createBasicBlock("not.null");
       Builder.CreateCondBr(Cond, Rest, Done);
       EmitBlock(Rest);
-      Cond = 0;
+      Cond = nullptr;
     }
   }
 
@@ -590,7 +591,7 @@
   // For compatibility with existing code, we treat arrays of length 0 or
   // 1 as flexible array members.
   const ArrayType *AT = E->getType()->castAsArrayTypeUnsafe();
-  if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) {
+  if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
     if (CAT->getSize().ugt(1))
       return false;
   } else if (!isa<IncompleteArrayType>(AT))
@@ -599,10 +600,10 @@
   E = E->IgnoreParens();
 
   // A flexible array member must be the last member in the class.
-  if (const MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
+  if (const auto *ME = dyn_cast<MemberExpr>(E)) {
     // FIXME: If the base type of the member expr is not FD->getParent(),
     // this should not be treated as a flexible array member access.
-    if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+    if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
       RecordDecl::field_iterator FI(
           DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
       return ++FI == FD->getParent()->field_end();
@@ -624,19 +625,19 @@
 
   Base = Base->IgnoreParens();
 
-  if (const CastExpr *CE = dyn_cast<CastExpr>(Base)) {
+  if (const auto *CE = dyn_cast<CastExpr>(Base)) {
     if (CE->getCastKind() == CK_ArrayToPointerDecay &&
         !isFlexibleArrayMemberExpr(CE->getSubExpr())) {
       IndexedType = CE->getSubExpr()->getType();
       const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
-      if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+      if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
         return CGF.Builder.getInt(CAT->getSize());
-      else if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(AT))
+      else if (const auto *VAT = dyn_cast<VariableArrayType>(AT))
         return CGF.getVLASize(VAT).first;
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
@@ -705,7 +706,7 @@
 
 RValue CodeGenFunction::GetUndefRValue(QualType Ty) {
   if (Ty->isVoidType())
-    return RValue::get(0);
+    return RValue::get(nullptr);
 
   switch (getEvaluationKind(Ty)) {
   case TEK_Complex: {
@@ -820,7 +821,7 @@
     return EmitLambdaLValue(cast<LambdaExpr>(E));
 
   case Expr::ExprWithCleanupsClass: {
-    const ExprWithCleanups *cleanups = cast<ExprWithCleanups>(E);
+    const auto *cleanups = cast<ExprWithCleanups>(E);
     enterFullExpression(cleanups);
     RunCleanupsScope Scope(*this);
     return EmitLValue(cleanups->getSubExpr());
@@ -888,8 +889,8 @@
 
   // Otherwise, all object types satisfy this except C++ classes with
   // mutable subobjects or non-trivial copy/destroy behavior.
-  if (const RecordType *RT = dyn_cast<RecordType>(type))
-    if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
+  if (const auto *RT = dyn_cast<RecordType>(type))
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(RT->getDecl()))
       if (RD->hasMutableFields() || !RD->isTrivial())
         return false;
 
@@ -911,7 +912,7 @@
 };
 static ConstantEmissionKind checkVarTypeForConstantEmission(QualType type) {
   type = type.getCanonicalType();
-  if (const ReferenceType *ref = dyn_cast<ReferenceType>(type)) {
+  if (const auto *ref = dyn_cast<ReferenceType>(type)) {
     if (isConstantEmittableObjectType(ref->getPointeeType()))
       return CEK_AsValueOrReference;
     return CEK_AsReferenceOnly;
@@ -934,7 +935,7 @@
   ConstantEmissionKind CEK;
   if (isa<ParmVarDecl>(value)) {
     CEK = CEK_None;
-  } else if (VarDecl *var = dyn_cast<VarDecl>(value)) {
+  } else if (auto *var = dyn_cast<VarDecl>(value)) {
     CEK = checkVarTypeForConstantEmission(var->getType());
   } else if (isa<EnumConstantDecl>(value)) {
     CEK = CEK_AsValueOnly;
@@ -1047,7 +1048,7 @@
   llvm::APInt Min, End;
   if (!getRangeForType(*this, Ty, Min, End,
                        CGM.getCodeGenOpts().StrictEnums))
-    return 0;
+    return nullptr;
 
   llvm::MDBuilder MDHelper(getLLVMContext());
   return MDHelper.createRange(Min, End);
@@ -1065,7 +1066,7 @@
     const llvm::Type *EltTy =
     cast<llvm::PointerType>(Addr->getType())->getElementType();
 
-    const llvm::VectorType *VTy = cast<llvm::VectorType>(EltTy);
+    const auto *VTy = cast<llvm::VectorType>(EltTy);
 
     // Handle vectors of size 3, like size 4 for better performance.
     if (VTy->getNumElements() == 3) {
@@ -1181,7 +1182,7 @@
   // Handle vectors differently to get better performance.
   if (Ty->isVectorType()) {
     llvm::Type *SrcTy = Value->getType();
-    llvm::VectorType *VecTy = cast<llvm::VectorType>(SrcTy);
+    auto *VecTy = cast<llvm::VectorType>(SrcTy);
     // Handle vec3 special.
     if (VecTy->getNumElements() == 3) {
       llvm::LLVMContext &VMContext = getLLVMContext();
@@ -1202,7 +1203,7 @@
                                           MaskV, "extractVec");
       SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
     }
-    llvm::PointerType *DstPtr = cast<llvm::PointerType>(Addr->getType());
+    auto *DstPtr = cast<llvm::PointerType>(Addr->getType());
     if (DstPtr->getElementType() != SrcTy) {
       llvm::Type *MemTy =
       llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace());
@@ -1276,6 +1277,10 @@
   if (LV.isExtVectorElt())
     return EmitLoadOfExtVectorElementLValue(LV);
 
+  // Global Register variables always invoke intrinsics
+  if (LV.isGlobalReg())
+    return EmitLoadOfGlobalRegLValue(LV);
+
   assert(LV.isBitField() && "Unknown LValue type!");
   return EmitLoadOfBitfieldLValue(LV);
 }
@@ -1343,6 +1348,16 @@
   return RValue::get(Vec);
 }
 
+/// @brief Load of global gamed gegisters are always calls to intrinsics.
+RValue CodeGenFunction::EmitLoadOfGlobalRegLValue(LValue LV) {
+  assert(LV.getType()->isIntegerType() && "Bad type for register variable");
+  llvm::MDNode *RegName = dyn_cast<llvm::MDNode>(LV.getGlobalReg());
+  assert(RegName && "Register LValue is not metadata");
+  llvm::Type *Types[] = { CGM.getTypes().ConvertType(LV.getType()) };
+  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+  llvm::Value* Call = Builder.CreateCall(F, RegName);
+  return RValue::get(Call);
+}
 
 
 /// EmitStoreThroughLValue - Store the specified rvalue into the specified
@@ -1370,6 +1385,9 @@
     if (Dst.isExtVectorElt())
       return EmitStoreThroughExtVectorComponentLValue(Src, Dst);
 
+    if (Dst.isGlobalReg())
+      return EmitStoreThroughGlobalRegLValue(Src, Dst);
+
     assert(Dst.isBitField() && "Unknown LValue type");
     return EmitStoreThroughBitfieldLValue(Src, Dst);
   }
@@ -1581,7 +1599,18 @@
   Store->setAlignment(Dst.getAlignment().getQuantity());
 }
 
-// setObjCGCLValueClass - sets class of he lvalue for the purpose of
+/// @brief Store of global named registers are always calls to intrinsics.
+void CodeGenFunction::EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst) {
+  assert(Dst.getType()->isIntegerType() && "Bad type for register variable");
+  llvm::MDNode *RegName = dyn_cast<llvm::MDNode>(Dst.getGlobalReg());
+  assert(RegName && "Register LValue is not metadata");
+  llvm::Type *Types[] = { CGM.getTypes().ConvertType(Dst.getType()) };
+  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+  llvm::Value *Value = Src.getScalarVal();
+  Builder.CreateCall2(F, RegName, Value);
+}
+
+// setObjCGCLValueClass - sets class of the lvalue for the purpose of
 // generating write-barries API. It is currently a global, ivar,
 // or neither.
 static void setObjCGCLValueClass(const ASTContext &Ctx, const Expr *E,
@@ -1603,14 +1632,14 @@
       }
     }
     LV.setObjCIvar(true);
-    ObjCIvarRefExpr *Exp = cast<ObjCIvarRefExpr>(const_cast<Expr*>(E));
+    auto *Exp = cast<ObjCIvarRefExpr>(const_cast<Expr *>(E));
     LV.setBaseIvarExp(Exp->getBase());
     LV.setObjCArray(E->getType()->isArrayType());
     return;
   }
 
-  if (const DeclRefExpr *Exp = dyn_cast<DeclRefExpr>(E)) {
-    if (const VarDecl *VD = dyn_cast<VarDecl>(Exp->getDecl())) {
+  if (const auto *Exp = dyn_cast<DeclRefExpr>(E)) {
+    if (const auto *VD = dyn_cast<VarDecl>(Exp->getDecl())) {
       if (VD->hasGlobalStorage()) {
         LV.setGlobalObjCRef(true);
         LV.setThreadLocalRef(VD->getTLSKind() != VarDecl::TLS_None);
@@ -1620,12 +1649,12 @@
     return;
   }
 
-  if (const UnaryOperator *Exp = dyn_cast<UnaryOperator>(E)) {
+  if (const auto *Exp = dyn_cast<UnaryOperator>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const ParenExpr *Exp = dyn_cast<ParenExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ParenExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     if (LV.isObjCIvar()) {
       // If cast is to a structure pointer, follow gcc's behavior and make it
@@ -1639,27 +1668,27 @@
     return;
   }
 
-  if (const GenericSelectionExpr *Exp = dyn_cast<GenericSelectionExpr>(E)) {
+  if (const auto *Exp = dyn_cast<GenericSelectionExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getResultExpr(), LV);
     return;
   }
 
-  if (const ImplicitCastExpr *Exp = dyn_cast<ImplicitCastExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ImplicitCastExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const CStyleCastExpr *Exp = dyn_cast<CStyleCastExpr>(E)) {
+  if (const auto *Exp = dyn_cast<CStyleCastExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const ObjCBridgedCastExpr *Exp = dyn_cast<ObjCBridgedCastExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ObjCBridgedCastExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getSubExpr(), LV, IsMemberAccess);
     return;
   }
 
-  if (const ArraySubscriptExpr *Exp = dyn_cast<ArraySubscriptExpr>(E)) {
+  if (const auto *Exp = dyn_cast<ArraySubscriptExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getBase(), LV);
     if (LV.isObjCIvar() && !LV.isObjCArray())
       // Using array syntax to assigning to what an ivar points to is not
@@ -1672,7 +1701,7 @@
     return;
   }
 
-  if (const MemberExpr *Exp = dyn_cast<MemberExpr>(E)) {
+  if (const auto *Exp = dyn_cast<MemberExpr>(E)) {
     setObjCGCLValueClass(Ctx, Exp->getBase(), LV, true);
     // We don't know if member is an 'ivar', but this flag is looked at
     // only in the context of LV.isObjCIvar().
@@ -1740,14 +1769,44 @@
   return CGF.EmitLValueForField(LV, FD);
 }
 
+/// Named Registers are named metadata pointing to the register name
+/// which will be read from/written to as an argument to the intrinsic
+/// @llvm.read/write_register.
+/// So far, only the name is being passed down, but other options such as
+/// register type, allocation type or even optimization options could be
+/// passed down via the metadata node.
+static LValue EmitGlobalNamedRegister(const VarDecl *VD,
+                                      CodeGenModule &CGM,
+                                      CharUnits Alignment) {
+  SmallString<64> Name("llvm.named.register.");
+  AsmLabelAttr *Asm = VD->getAttr<AsmLabelAttr>();
+  assert(Asm->getLabel().size() < 64-Name.size() &&
+      "Register name too big");
+  Name.append(Asm->getLabel());
+  llvm::NamedMDNode *M =
+    CGM.getModule().getOrInsertNamedMetadata(Name);
+  if (M->getNumOperands() == 0) {
+    llvm::MDString *Str = llvm::MDString::get(CGM.getLLVMContext(),
+                                              Asm->getLabel());
+    llvm::Value *Ops[] = { Str };
+    M->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops));
+  }
+  return LValue::MakeGlobalReg(M->getOperand(0), VD->getType(), Alignment);
+}
+
 LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   const NamedDecl *ND = E->getDecl();
   CharUnits Alignment = getContext().getDeclAlign(ND);
   QualType T = E->getType();
 
-  // A DeclRefExpr for a reference initialized by a constant expression can
-  // appear without being odr-used. Directly emit the constant initializer.
-  if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+  if (const auto *VD = dyn_cast<VarDecl>(ND)) {
+    // Global Named registers access via intrinsics only
+    if (VD->getStorageClass() == SC_Register &&
+        VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())
+      return EmitGlobalNamedRegister(VD, CGM, Alignment);
+
+    // A DeclRefExpr for a reference initialized by a constant expression can
+    // appear without being odr-used. Directly emit the constant initializer.
     const Expr *Init = VD->getAnyInitializer(VD);
     if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() &&
         VD->isUsableInConstantExpressions(getContext()) &&
@@ -1768,12 +1827,12 @@
          "Should not use decl without marking it used!");
 
   if (ND->hasAttr<WeakRefAttr>()) {
-    const ValueDecl *VD = cast<ValueDecl>(ND);
+    const auto *VD = cast<ValueDecl>(ND);
     llvm::Constant *Aliasee = CGM.GetWeakRefReference(VD);
     return MakeAddrLValue(Aliasee, T, Alignment);
   }
 
-  if (const VarDecl *VD = dyn_cast<VarDecl>(ND)) {
+  if (const auto *VD = dyn_cast<VarDecl>(ND)) {
     // Check if this is a global variable.
     if (VD->hasLinkage() || VD->isStaticDataMember())
       return EmitGlobalVarDeclLValue(*this, E, VD);
@@ -1832,7 +1891,7 @@
     return LV;
   }
 
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND))
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND))
     return EmitFunctionDeclLValue(*this, E, FD);
 
   llvm_unreachable("Unhandled DeclRefExpr");
@@ -1922,11 +1981,9 @@
                                             /*Pascal = */false,
                                             Ty, Loc);
   llvm::Constant *C = CGM.GetConstantArrayFromStringLiteral(SL);
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(CGM.getModule(), C->getType(),
-                             !CGM.getLangOpts().WritableStrings,
-                             llvm::GlobalValue::PrivateLinkage,
-                             C, GlobalName);
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), C->getType(), !CGM.getLangOpts().WritableStrings,
+      llvm::GlobalValue::PrivateLinkage, C, GlobalName);
   const unsigned WideAlignment =
     Context.getTypeAlignInChars(Ty).getQuantity();
   GV->setAlignment(WideAlignment);
@@ -1953,37 +2010,31 @@
   case PredefinedExpr::Function:
   case PredefinedExpr::LFunction:
   case PredefinedExpr::FuncDName:
+  case PredefinedExpr::FuncSig:
   case PredefinedExpr::PrettyFunction: {
     PredefinedExpr::IdentType IdentType = E->getIdentType();
-    std::string GlobalVarName;
+    std::string GVName;
 
+    // FIXME: We should use the string literal mangling for the Microsoft C++
+    // ABI so that strings get merged.
     switch (IdentType) {
     default: llvm_unreachable("Invalid type");
-    case PredefinedExpr::Func:
-      GlobalVarName = "__func__.";
-      break;
-    case PredefinedExpr::Function:
-      GlobalVarName = "__FUNCTION__.";
-      break;
-    case PredefinedExpr::FuncDName:
-      GlobalVarName = "__FUNCDNAME__.";
-      break;
-    case PredefinedExpr::LFunction:
-      GlobalVarName = "L__FUNCTION__.";
-      break;
-    case PredefinedExpr::PrettyFunction:
-      GlobalVarName = "__PRETTY_FUNCTION__.";
-      break;
+    case PredefinedExpr::Func:           GVName = "__func__."; break;
+    case PredefinedExpr::Function:       GVName = "__FUNCTION__."; break;
+    case PredefinedExpr::FuncDName:      GVName = "__FUNCDNAME__."; break;
+    case PredefinedExpr::FuncSig:        GVName = "__FUNCSIG__."; break;
+    case PredefinedExpr::LFunction:      GVName = "L__FUNCTION__."; break;
+    case PredefinedExpr::PrettyFunction: GVName = "__PRETTY_FUNCTION__."; break;
     }
 
     StringRef FnName = CurFn->getName();
     if (FnName.startswith("\01"))
       FnName = FnName.substr(1);
-    GlobalVarName += FnName;
+    GVName += FnName;
 
     // If this is outside of a function use the top level decl.
     const Decl *CurDecl = CurCodeDecl;
-    if (CurDecl == 0 || isa<VarDecl>(CurDecl))
+    if (!CurDecl || isa<VarDecl>(CurDecl))
       CurDecl = getContext().getTranslationUnitDecl();
 
     const Type *ElemType = E->getType()->getArrayElementTypeNoTypeQual();
@@ -2010,15 +2061,13 @@
           getContext().getTypeSizeInChars(ElemType).getQuantity(),
           FunctionName, RawChars);
       C = GetAddrOfConstantWideString(RawChars,
-                                      GlobalVarName.c_str(),
+                                      GVName.c_str(),
                                       getContext(),
                                       E->getType(),
                                       E->getLocation(),
                                       CGM);
     } else {
-      C = CGM.GetAddrOfConstantCString(FunctionName,
-                                       GlobalVarName.c_str(),
-                                       1);
+      C = CGM.GetAddrOfConstantCString(FunctionName, GVName.c_str(), 1);
     }
     return MakeAddrLValue(C, E->getType());
   }
@@ -2036,7 +2085,7 @@
 /// integer, 1 for a floating point value, and -1 for anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
-  if (llvm::Constant *C = CGM.getTypeDescriptor(T))
+  if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
     return C;
 
   uint16_t TypeKind = -1;
@@ -2056,7 +2105,7 @@
   SmallString<32> Buffer;
   CGM.getDiags().ConvertArgToString(DiagnosticsEngine::ak_qualtype,
                                     (intptr_t)T.getAsOpaquePtr(),
-                                    0, 0, 0, 0, 0, 0, Buffer,
+                                    nullptr, 0, nullptr, 0, nullptr, 0, Buffer,
                                     ArrayRef<intptr_t>());
 
   llvm::Constant *Components[] = {
@@ -2065,15 +2114,13 @@
   };
   llvm::Constant *Descriptor = llvm::ConstantStruct::getAnon(Components);
 
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(CGM.getModule(), Descriptor->getType(),
-                             /*isConstant=*/true,
-                             llvm::GlobalVariable::PrivateLinkage,
-                             Descriptor);
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), Descriptor->getType(),
+      /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, Descriptor);
   GV->setUnnamedAddr(true);
 
   // Remember the descriptor for this type.
-  CGM.setTypeDescriptor(T, GV);
+  CGM.setTypeDescriptorInMap(T, GV);
 
   return GV;
 }
@@ -2153,7 +2200,7 @@
   EmitBlock(Handler);
 
   llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
-  llvm::GlobalValue *InfoPtr =
+  auto *InfoPtr =
       new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
                                llvm::GlobalVariable::PrivateLinkage, Info);
   InfoPtr->setUnnamedAddr(true);
@@ -2232,14 +2279,14 @@
 /// array to pointer, return the array subexpression.
 static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
   // If this isn't just an array->pointer decay, bail out.
-  const CastExpr *CE = dyn_cast<CastExpr>(E);
-  if (CE == 0 || CE->getCastKind() != CK_ArrayToPointerDecay)
+  const auto *CE = dyn_cast<CastExpr>(E);
+  if (!CE || CE->getCastKind() != CK_ArrayToPointerDecay)
     return 0;
 
   // If this is a decay from variable width array, bail out.
   const Expr *SubExpr = CE->getSubExpr();
   if (SubExpr->getType()->isVariableArrayType())
-    return 0;
+    return nullptr;
 
   return SubExpr;
 }
@@ -2271,7 +2318,7 @@
 
   // We know that the pointer points to a type of the correct size, unless the
   // size is a VLA or Objective-C interface.
-  llvm::Value *Address = 0;
+  llvm::Value *Address = nullptr;
   CharUnits ArrayAlignment;
   if (const VariableArrayType *vla =
         getContext().getAsVariableArrayType(E->getType())) {
@@ -2317,7 +2364,7 @@
     LValue ArrayLV;
     // For simple multidimensional array indexing, set the 'accessed' flag for
     // better bounds-checking of the base expression.
-    if (const ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array))
       ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
     else
       ArrayLV = EmitLValue(Array);
@@ -2444,16 +2491,16 @@
     BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess);
 
   NamedDecl *ND = E->getMemberDecl();
-  if (FieldDecl *Field = dyn_cast<FieldDecl>(ND)) {
+  if (auto *Field = dyn_cast<FieldDecl>(ND)) {
     LValue LV = EmitLValueForField(BaseLV, Field);
     setObjCGCLValueClass(getContext(), E, LV);
     return LV;
   }
 
-  if (VarDecl *VD = dyn_cast<VarDecl>(ND))
+  if (auto *VD = dyn_cast<VarDecl>(ND))
     return EmitGlobalVarDeclLValue(*this, E, VD);
 
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND))
+  if (const auto *FD = dyn_cast<FunctionDecl>(ND))
     return EmitFunctionDeclLValue(*this, E, FD);
 
   llvm_unreachable("Unhandled member declaration!");
@@ -2769,7 +2816,7 @@
   case CK_Dynamic: {
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *V = LV.getAddress();
-    const CXXDynamicCastExpr *DCE = cast<CXXDynamicCastExpr>(E);
+    const auto *DCE = cast<CXXDynamicCastExpr>(E);
     return MakeAddrLValue(EmitDynamicCast(V, DCE), E->getType());
   }
 
@@ -2785,8 +2832,7 @@
   case CK_DerivedToBase: {
     const RecordType *DerivedClassTy =
       E->getSubExpr()->getType()->getAs<RecordType>();
-    CXXRecordDecl *DerivedClassDecl =
-      cast<CXXRecordDecl>(DerivedClassTy->getDecl());
+    auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl());
 
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *This = LV.getAddress();
@@ -2803,8 +2849,7 @@
     return EmitAggExprToLValue(E);
   case CK_BaseToDerived: {
     const RecordType *DerivedClassTy = E->getType()->getAs<RecordType>();
-    CXXRecordDecl *DerivedClassDecl =
-      cast<CXXRecordDecl>(DerivedClassTy->getDecl());
+    auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl());
 
     LValue LV = EmitLValue(E->getSubExpr());
 
@@ -2824,7 +2869,7 @@
   }
   case CK_LValueBitCast: {
     // This must be a reinterpret_cast (or c-style equivalent).
-    const ExplicitCastExpr *CE = cast<ExplicitCastExpr>(E);
+    const auto *CE = cast<ExplicitCastExpr>(E);
 
     LValue LV = EmitLValue(E->getSubExpr());
     llvm::Value *V = Builder.CreateBitCast(LV.getAddress(),
@@ -2876,6 +2921,10 @@
     SourceLocation Loc = E->getLocStart();
     // Force column info to be generated so we can differentiate
     // multiple call sites on the same line in the debug info.
+    // FIXME: This is insufficient. Two calls coming from the same macro
+    // expansion will still get the same line/column and break debug info. It's
+    // possible that LLVM can be fixed to not rely on this uniqueness, at which
+    // point this workaround can be removed.
     const FunctionDecl* Callee = E->getDirectCallee();
     bool ForceColumnInfo = Callee && Callee->isInlineSpecified();
     DI->EmitLocation(Builder, Loc, ForceColumnInfo);
@@ -2885,10 +2934,10 @@
   if (E->getCallee()->getType()->isBlockPointerType())
     return EmitBlockCallExpr(E, ReturnValue);
 
-  if (const CXXMemberCallExpr *CE = dyn_cast<CXXMemberCallExpr>(E))
+  if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
     return EmitCXXMemberCallExpr(CE, ReturnValue);
 
-  if (const CUDAKernelCallExpr *CE = dyn_cast<CUDAKernelCallExpr>(E))
+  if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E))
     return EmitCUDAKernelCallExpr(CE, ReturnValue);
 
   const Decl *TargetDecl = E->getCalleeDecl();
@@ -2897,12 +2946,12 @@
       return EmitBuiltinExpr(FD, builtinID, E);
   }
 
-  if (const CXXOperatorCallExpr *CE = dyn_cast<CXXOperatorCallExpr>(E))
+  if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
     if (const CXXMethodDecl *MD = dyn_cast_or_null<CXXMethodDecl>(TargetDecl))
       return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue);
 
-  if (const CXXPseudoDestructorExpr *PseudoDtor
-          = dyn_cast<CXXPseudoDestructorExpr>(E->getCallee()->IgnoreParens())) {
+  if (const auto *PseudoDtor =
+          dyn_cast<CXXPseudoDestructorExpr>(E->getCallee()->IgnoreParens())) {
     QualType DestroyedType = PseudoDtor->getDestroyedType();
     if (getLangOpts().ObjCAutoRefCount &&
         DestroyedType->isObjCLifetimeType() &&
@@ -2912,7 +2961,7 @@
       //   If the pseudo-expression names a retainable object with weak or
       //   strong lifetime, the object shall be released.
       Expr *BaseExpr = PseudoDtor->getBase();
-      llvm::Value *BaseValue = NULL;
+      llvm::Value *BaseValue = nullptr;
       Qualifiers BaseQuals;
 
       // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar.
@@ -2952,7 +3001,7 @@
       EmitScalarExpr(E->getCallee());
     }
 
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
 
   llvm::Value *Callee = EmitScalarExpr(E->getCallee());
@@ -3098,7 +3147,7 @@
 
 LValue CodeGenFunction::EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E) {
   // FIXME: A lot of the code below could be shared with EmitMemberExpr.
-  llvm::Value *BaseValue = 0;
+  llvm::Value *BaseValue = nullptr;
   const Expr *BaseExpr = E->getBase();
   Qualifiers BaseQuals;
   QualType ObjectTy;
@@ -3140,11 +3189,15 @@
 
   CalleeType = getContext().getCanonicalType(CalleeType);
 
-  const FunctionType *FnType
-    = cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
+  const auto *FnType =
+      cast<FunctionType>(cast<PointerType>(CalleeType)->getPointeeType());
 
   // Force column info to differentiate multiple inlined call sites on
   // the same line, analoguous to EmitCallExpr.
+  // FIXME: This is insufficient. Two calls coming from the same macro expansion
+  // will still get the same line/column and break debug info. It's possible
+  // that LLVM can be fixed to not rely on this uniqueness, at which point this
+  // workaround can be removed.
   bool ForceColumnInfo = false;
   if (const FunctionDecl* FD = dyn_cast_or_null<const FunctionDecl>(TargetDecl))
     ForceColumnInfo = FD->isInlineSpecified();
@@ -3297,7 +3350,7 @@
 
     // If this semantic expression is an opaque value, bind it
     // to the result of its source expression.
-    if (const OpaqueValueExpr *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
+    if (const auto *ov = dyn_cast<OpaqueValueExpr>(semantic)) {
 
       // If this is the result expression, we may need to evaluate
       // directly into the slot.
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 6c50521..760e6f1 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -389,9 +389,9 @@
   // already-constructed members if an initializer throws.
   // For that, we'll need an EH cleanup.
   QualType::DestructionKind dtorKind = elementType.isDestructedType();
-  llvm::AllocaInst *endOfInit = 0;
+  llvm::AllocaInst *endOfInit = nullptr;
   EHScopeStack::stable_iterator cleanup;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
   if (CGF.needsEHCleanup(dtorKind)) {
     // In principle we could tell the cleanup where we are more
     // directly, but the control flow can get so varied here that it
@@ -539,7 +539,7 @@
       if (castE->getCastKind() == CK_NoOp)
         continue;
     }
-    return 0;
+    return nullptr;
   }
 }
 
@@ -1044,7 +1044,7 @@
     return;
   case TEK_Scalar:
     if (LV.isSimple()) {
-      CGF.EmitScalarInit(E, /*D=*/0, LV, /*Captured=*/false);
+      CGF.EmitScalarInit(E, /*D=*/nullptr, LV, /*Captured=*/false);
     } else {
       CGF.EmitStoreThroughLValue(RValue::get(CGF.EmitScalarExpr(E)), LV);
     }
@@ -1165,7 +1165,7 @@
   // We'll need to enter cleanup scopes in case any of the member
   // initializers throw an exception.
   SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
 
   // Here we iterate over the fields; this makes it simpler to both
   // default-initialize fields and skip over unnamed fields.
@@ -1252,7 +1252,7 @@
   // If this is an initlist expr, sum up the size of sizes of the (present)
   // elements.  If this is something weird, assume the whole thing is non-zero.
   const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
-  if (ILE == 0 || !CGF.getTypes().isZeroInitializable(ILE->getType()))
+  if (!ILE || !CGF.getTypes().isZeroInitializable(ILE->getType()))
     return CGF.getContext().getTypeSizeInChars(E->getType());
   
   // InitListExprs for structs have to be handled carefully.  If there are
@@ -1301,7 +1301,8 @@
                                      CodeGenFunction &CGF) {
   // If the slot is already known to be zeroed, nothing to do.  Don't mess with
   // volatile stores.
-  if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == 0) return;
+  if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == nullptr)
+    return;
 
   // C++ objects with a user-declared constructor don't need zero'ing.
   if (CGF.getLangOpts().CPlusPlus)
@@ -1348,7 +1349,7 @@
 void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
   assert(E && hasAggregateEvaluationKind(E->getType()) &&
          "Invalid aggregate expression to emit");
-  assert((Slot.getAddr() != 0 || Slot.isIgnored()) &&
+  assert((Slot.getAddr() != nullptr || Slot.isIgnored()) &&
          "slot has bits but no address");
 
   // Optimize the slot if possible.
@@ -1468,10 +1469,10 @@
   // memcpy, as well as the TBAA tags for the members of the struct, in case
   // the optimizer wishes to expand it in to scalar memory operations.
   llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty);
-  
+
   Builder.CreateMemCpy(DestPtr, SrcPtr,
                        llvm::ConstantInt::get(IntPtrTy, 
                                               TypeInfo.first.getQuantity()),
                        alignment.getQuantity(), isVolatile,
-                       /*TBAATag=*/0, TBAAStructTag);
+                       /*TBAATag=*/nullptr, TBAAStructTag);
 }
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index f71a3de..548cd48 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -95,7 +95,7 @@
   const Expr *Base = ME->getBase();
   bool CanUseVirtualCall = MD->isVirtual() && !ME->hasQualifier();
 
-  const CXXMethodDecl *DevirtualizedMethod = NULL;
+  const CXXMethodDecl *DevirtualizedMethod = nullptr;
   if (CanUseVirtualCall && CanDevirtualizeMemberFunctionCall(Base, MD)) {
     const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType();
     DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl);
@@ -111,7 +111,7 @@
       // one or the one of the full expression, we would have to build
       // a derived-to-base cast to compute the correct this pointer, but
       // we don't have support for that yet, so do a virtual call.
-      DevirtualizedMethod = NULL;
+      DevirtualizedMethod = nullptr;
     }
     // If the return types are not the same, this might be a case where more
     // code needs to run to compensate for it. For example, the derived
@@ -121,7 +121,7 @@
     if (DevirtualizedMethod &&
         DevirtualizedMethod->getReturnType().getCanonicalType() !=
             MD->getReturnType().getCanonicalType())
-      DevirtualizedMethod = NULL;
+      DevirtualizedMethod = nullptr;
   }
 
   llvm::Value *This;
@@ -132,10 +132,10 @@
 
 
   if (MD->isTrivial()) {
-    if (isa<CXXDestructorDecl>(MD)) return RValue::get(0);
+    if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr);
     if (isa<CXXConstructorDecl>(MD) && 
         cast<CXXConstructorDecl>(MD)->isDefaultConstructor())
-      return RValue::get(0);
+      return RValue::get(nullptr);
 
     if (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()) {
       // We don't like to generate the trivial copy/move assignment operator
@@ -158,7 +158,7 @@
 
   // Compute the function type we're calling.
   const CXXMethodDecl *CalleeDecl = DevirtualizedMethod ? DevirtualizedMethod : MD;
-  const CGFunctionInfo *FInfo = 0;
+  const CGFunctionInfo *FInfo = nullptr;
   if (const CXXDestructorDecl *Dtor = dyn_cast<CXXDestructorDecl>(CalleeDecl))
     FInfo = &CGM.getTypes().arrangeCXXDestructor(Dtor,
                                                  Dtor_Complete);
@@ -199,9 +199,9 @@
         Callee = CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty);
       }
       EmitCXXMemberCall(MD, CE->getExprLoc(), Callee, ReturnValue, This,
-                        /*ImplicitParam=*/0, QualType(), 0, 0);
+                        /*ImplicitParam=*/nullptr, QualType(), nullptr,nullptr);
     }
-    return RValue::get(0);
+    return RValue::get(nullptr);
   }
   
   if (const CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(MD)) {
@@ -226,7 +226,7 @@
   }
 
   return EmitCXXMemberCall(MD, CE->getExprLoc(), Callee, ReturnValue, This,
-                           /*ImplicitParam=*/0, QualType(),
+                           /*ImplicitParam=*/nullptr, QualType(),
                            CE->arg_begin(), CE->arg_end());
 }
 
@@ -299,7 +299,7 @@
 
   llvm::Value *Callee = EmitCXXOperatorMemberCallee(E, MD, This);
   return EmitCXXMemberCall(MD, E->getExprLoc(), Callee, ReturnValue, This,
-                           /*ImplicitParam=*/0, QualType(),
+                           /*ImplicitParam=*/nullptr, QualType(),
                            E->arg_begin() + 1, E->arg_end());
 }
 
@@ -586,7 +586,7 @@
     //      size := sizeWithoutCookie + cookieSize
     //    and check whether it overflows.
 
-    llvm::Value *hasOverflow = 0;
+    llvm::Value *hasOverflow = nullptr;
 
     // If numElementsWidth > sizeWidth, then one way or another, we're
     // going to have to do a comparison for (2), and this happens to
@@ -726,8 +726,8 @@
   CharUnits Alignment = CGF.getContext().getTypeAlignInChars(AllocType);
   switch (CGF.getEvaluationKind(AllocType)) {
   case TEK_Scalar:
-    CGF.EmitScalarInit(Init, 0, CGF.MakeAddrLValue(NewPtr, AllocType,
-                                                   Alignment),
+    CGF.EmitScalarInit(Init, nullptr, CGF.MakeAddrLValue(NewPtr, AllocType,
+                                                         Alignment),
                        false);
     return;
   case TEK_Complex:
@@ -764,10 +764,10 @@
   unsigned initializerElements = 0;
 
   const Expr *Init = E->getInitializer();
-  llvm::AllocaInst *endOfInit = 0;
+  llvm::AllocaInst *endOfInit = nullptr;
   QualType::DestructionKind dtorKind = elementType.isDestructedType();
   EHScopeStack::stable_iterator cleanup;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
 
   // If the initializer is an initializer list, first do the explicit elements.
   if (const InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
@@ -814,20 +814,22 @@
     explicitPtr = Builder.CreateBitCast(explicitPtr, beginPtr->getType());
   }
 
+  llvm::ConstantInt *constNum = dyn_cast<llvm::ConstantInt>(numElements);
+
+  // If all elements have already been initialized, skip the whole loop.
+  if (constNum && constNum->getZExtValue() <= initializerElements) {
+    // If there was a cleanup, deactivate it.
+    if (cleanupDominator)
+      DeactivateCleanupBlock(cleanup, cleanupDominator);
+    return;
+  }
+
   // Create the continuation block.
   llvm::BasicBlock *contBB = createBasicBlock("new.loop.end");
 
   // If the number of elements isn't constant, we have to now check if there is
   // anything left to initialize.
-  if (llvm::ConstantInt *constNum = dyn_cast<llvm::ConstantInt>(numElements)) {
-    // If all elements have already been initialized, skip the whole loop.
-    if (constNum->getZExtValue() <= initializerElements) {
-      // If there was a cleanup, deactivate it.
-      if (cleanupDominator)
-        DeactivateCleanupBlock(cleanup, cleanupDominator);
-      return;
-    }
-  } else {
+  if (!constNum) {
     llvm::BasicBlock *nonEmptyBB = createBasicBlock("new.loop.nonempty");
     llvm::Value *isEmpty = Builder.CreateICmpEQ(explicitPtr, endPtr,
                                                 "array.isempty");
@@ -1159,8 +1161,8 @@
       minElements = ILE->getNumInits();
   }
 
-  llvm::Value *numElements = 0;
-  llvm::Value *allocSizeWithoutCookie = 0;
+  llvm::Value *numElements = nullptr;
+  llvm::Value *allocSizeWithoutCookie = nullptr;
   llvm::Value *allocSize =
     EmitCXXNewAllocSize(*this, E, minElements, numElements,
                         allocSizeWithoutCookie);
@@ -1194,8 +1196,8 @@
   bool nullCheck = allocatorType->isNothrow(getContext()) &&
     (!allocType.isPODType(getContext()) || E->hasInitializer());
 
-  llvm::BasicBlock *nullCheckBB = 0;
-  llvm::BasicBlock *contBB = 0;
+  llvm::BasicBlock *nullCheckBB = nullptr;
+  llvm::BasicBlock *contBB = nullptr;
 
   llvm::Value *allocation = RV.getScalarVal();
   unsigned AS = allocation->getType()->getPointerAddressSpace();
@@ -1219,7 +1221,7 @@
   // If there's an operator delete, enter a cleanup to call it if an
   // exception is thrown.
   EHScopeStack::stable_iterator operatorDeleteCleanup;
-  llvm::Instruction *cleanupDominator = 0;
+  llvm::Instruction *cleanupDominator = nullptr;
   if (E->getOperatorDelete() &&
       !E->getOperatorDelete()->isReservedGlobalPlacementOperator()) {
     EnterNewDeleteCleanup(*this, E, allocation, allocSize, allocatorArgs);
@@ -1286,7 +1288,7 @@
   CallArgList DeleteArgs;
 
   // Check if we need to pass the size to the delete operator.
-  llvm::Value *Size = 0;
+  llvm::Value *Size = nullptr;
   QualType SizeTy;
   if (DeleteFTy->getNumParams() == 2) {
     SizeTy = DeleteFTy->getParamType(1);
@@ -1332,7 +1334,7 @@
                              bool UseGlobalDelete) {
   // Find the destructor for the type, if applicable.  If the
   // destructor is virtual, we'll just emit the vcall and return.
-  const CXXDestructorDecl *Dtor = 0;
+  const CXXDestructorDecl *Dtor = nullptr;
   if (const RecordType *RT = ElementType->getAs<RecordType>()) {
     CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
     if (RD->hasDefinition() && !RD->hasTrivialDestructor()) {
@@ -1469,8 +1471,8 @@
                             const CXXDeleteExpr *E,
                             llvm::Value *deletedPtr,
                             QualType elementType) {
-  llvm::Value *numElements = 0;
-  llvm::Value *allocatedPtr = 0;
+  llvm::Value *numElements = nullptr;
+  llvm::Value *allocatedPtr = nullptr;
   CharUnits cookieSize;
   CGF.CGM.getCXXABI().ReadArrayCookie(CGF, deletedPtr, E, elementType,
                                       numElements, allocatedPtr, cookieSize);
@@ -1818,9 +1820,9 @@
   //   If the value of v is a null pointer value in the pointer case, the result
   //   is the null pointer value of type T.
   bool ShouldNullCheckSrcValue = SrcTy->isPointerType();
-  
-  llvm::BasicBlock *CastNull = 0;
-  llvm::BasicBlock *CastNotNull = 0;
+
+  llvm::BasicBlock *CastNull = nullptr;
+  llvm::BasicBlock *CastNotNull = nullptr;
   llvm::BasicBlock *CastEnd = createBasicBlock("dynamic_cast.end");
   
   if (ShouldNullCheckSrcValue) {
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 1f84c86..7244b9e 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -306,7 +306,7 @@
   unsigned ComplexAlign = C.getTypeAlignInChars(ComplexTy).getQuantity();
   unsigned AlignI = std::min(AlignR, ComplexAlign);
 
-  llvm::Value *Real=0, *Imag=0;
+  llvm::Value *Real=nullptr, *Imag=nullptr;
 
   if (!IgnoreReal || isVolatile) {
     llvm::Value *RealP = Builder.CreateStructGEP(SrcPtr, 0,
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 82382dd..b2f08c3 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -530,7 +530,7 @@
   ConstStructBuilder Builder(CGM, CGF);
 
   if (!Builder.Build(ILE))
-    return 0;
+    return nullptr;
 
   return Builder.Finalize(ILE->getType());
 }
@@ -572,7 +572,7 @@
   //===--------------------------------------------------------------------===//
 
   llvm::Constant *VisitStmt(Stmt *S) {
-    return 0;
+    return nullptr;
   }
 
   llvm::Constant *VisitParenExpr(ParenExpr *PE) {
@@ -599,7 +599,7 @@
   llvm::Constant *VisitCastExpr(CastExpr* E) {
     Expr *subExpr = E->getSubExpr();
     llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF);
-    if (!C) return 0;
+    if (!C) return nullptr;
 
     llvm::Type *destType = ConvertType(E->getType());
 
@@ -660,7 +660,7 @@
     case CK_ARCReclaimReturnedObject:
     case CK_ARCExtendBlockObject:
     case CK_CopyAndAutoreleaseBlockObject:
-      return 0;
+      return nullptr;
 
     // These don't need to be handled here because Evaluate knows how to
     // evaluate them in the cases where they can be folded.
@@ -701,7 +701,7 @@
     case CK_FloatingToBoolean:
     case CK_FloatingCast:
     case CK_ZeroToOCLEvent:
-      return 0;
+      return nullptr;
     }
     llvm_unreachable("Invalid CastKind");
   }
@@ -743,7 +743,7 @@
       Expr *Init = ILE->getInit(i);
       llvm::Constant *C = CGM.EmitConstantExpr(Init, Init->getType(), CGF);
       if (!C)
-        return 0;
+        return nullptr;
       RewriteType |= (C->getType() != ElemTy);
       Elts.push_back(C);
     }
@@ -756,7 +756,7 @@
     else
       fillC = llvm::Constant::getNullValue(ElemTy);
     if (!fillC)
-      return 0;
+      return nullptr;
     RewriteType |= (fillC->getType() != ElemTy);
     Elts.resize(NumElements, fillC);
 
@@ -789,12 +789,12 @@
     if (ILE->getType()->isRecordType())
       return EmitRecordInitialization(ILE);
 
-    return 0;
+    return nullptr;
   }
 
   llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E) {
     if (!E->getConstructor()->isTrivial())
-      return 0;
+      return nullptr;
 
     QualType Ty = E->getType();
 
@@ -806,8 +806,8 @@
     // If the class doesn't have a trivial destructor, we can't emit it as a
     // constant expr.
     if (!RD->hasTrivialDestructor())
-      return 0;
-    
+      return nullptr;
+
     // Only copy and default constructors can be trivial.
 
 
@@ -836,7 +836,10 @@
     // as an inline array.
     std::string Str;
     CGM.getContext().getObjCEncodingForType(E->getEncodedType(), Str);
-    const ConstantArrayType *CAT = cast<ConstantArrayType>(E->getType());
+    QualType T = E->getType();
+    if (T->getTypeClass() == Type::TypeOfExpr)
+      T = cast<TypeOfExprType>(T)->getUnderlyingExpr()->getType();
+    const ConstantArrayType *CAT = cast<ConstantArrayType>(T);
 
     // Resize the string to the right size, adding zeros at the end, or
     // truncating as needed.
@@ -869,7 +872,7 @@
             return CGM.getStaticLocalDeclAddress(VD);
         }
       }
-      return 0;
+      return nullptr;
     }
 
     Expr *E = const_cast<Expr*>(LVBase.get<const Expr*>());
@@ -886,7 +889,7 @@
         C = new llvm::GlobalVariable(CGM.getModule(), C->getType(),
                                      E->getType().isConstant(CGM.getContext()),
                                      llvm::GlobalValue::InternalLinkage,
-                                     C, ".compoundliteral", 0,
+                                     C, ".compoundliteral", nullptr,
                                      llvm::GlobalVariable::NotThreadLocal,
                           CGM.getContext().getTargetAddressSpace(E->getType()));
       return C;
@@ -967,7 +970,7 @@
     }
     }
 
-    return 0;
+    return nullptr;
   }
 };
 
@@ -1001,7 +1004,7 @@
   // interprets that as the (pointer) value of the reference, rather than the
   // desired value of the referee.
   if (D.getType()->isReferenceType())
-    return 0;
+    return nullptr;
 
   const Expr *E = D.getInit();
   assert(E && "No initializer to emit");
@@ -1026,7 +1029,7 @@
   else
     Success = E->EvaluateAsRValue(Result, Context);
 
-  llvm::Constant *C = 0;
+  llvm::Constant *C = nullptr;
   if (Success && !Result.HasSideEffects)
     C = EmitConstantValue(Result.Val, DestType, CGF);
   else
@@ -1063,7 +1066,9 @@
 
       // Apply offset if necessary.
       if (!Offset->isNullValue()) {
-        llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, Int8PtrTy);
+        unsigned AS = C->getType()->getPointerAddressSpace();
+        llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS);
+        llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy);
         Casted = llvm::ConstantExpr::getGetElementPtr(Casted, Offset);
         C = llvm::ConstantExpr::getPointerCast(Casted, C->getType());
       }
@@ -1169,13 +1174,13 @@
     Elts.reserve(NumElements);
 
     // Emit array filler, if there is one.
-    llvm::Constant *Filler = 0;
+    llvm::Constant *Filler = nullptr;
     if (Value.hasArrayFiller())
       Filler = EmitConstantValueForMemory(Value.getArrayFiller(),
                                           CAT->getElementType(), CGF);
 
     // Emit initializer elements.
-    llvm::Type *CommonElementType = 0;
+    llvm::Type *CommonElementType = nullptr;
     for (unsigned I = 0; I < NumElements; ++I) {
       llvm::Constant *C = Filler;
       if (I < NumInitElts)
@@ -1186,7 +1191,7 @@
       if (I == 0)
         CommonElementType = C->getType();
       else if (C->getType() != CommonElementType)
-        CommonElementType = 0;
+        CommonElementType = nullptr;
       Elts.push_back(C);
     }
 
@@ -1225,7 +1230,7 @@
 llvm::Constant *
 CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) {
   assert(E->isFileScope() && "not a file-scope compound literal expr");
-  return ConstExprEmitter(*this, 0).EmitLValue(E);
+  return ConstExprEmitter(*this, nullptr).EmitLValue(E);
 }
 
 llvm::Constant *
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 5f932b0..1ee3ce8 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -365,7 +365,7 @@
   }
   Value *VisitCXXDeleteExpr(const CXXDeleteExpr *E) {
     CGF.EmitCXXDeleteExpr(E);
-    return 0;
+    return nullptr;
   }
 
   Value *VisitTypeTraitExpr(const TypeTraitExpr *E) {
@@ -387,7 +387,7 @@
     //   effect is the evaluation of the postfix-expression before the dot or
     //   arrow.
     CGF.EmitScalarExpr(E->getBase());
-    return 0;
+    return nullptr;
   }
 
   Value *VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *E) {
@@ -396,7 +396,7 @@
 
   Value *VisitCXXThrowExpr(const CXXThrowExpr *E) {
     CGF.EmitCXXThrowExpr(E);
-    return 0;
+    return nullptr;
   }
 
   Value *VisitCXXNoexceptExpr(const CXXNoexceptExpr *E) {
@@ -559,7 +559,7 @@
 
   llvm::Type *SrcTy = Src->getType();
 
-  llvm::Value *Check = 0;
+  llvm::Value *Check = nullptr;
   if (llvm::IntegerType *IntTy = dyn_cast<llvm::IntegerType>(SrcTy)) {
     // Integer to floating-point. This can fail for unsigned short -> __half
     // or unsigned __int128 -> float.
@@ -693,7 +693,7 @@
   DstType = CGF.getContext().getCanonicalType(DstType);
   if (SrcType == DstType) return Src;
 
-  if (DstType->isVoidType()) return 0;
+  if (DstType->isVoidType()) return nullptr;
 
   llvm::Value *OrigSrc = Src;
   QualType OrigSrcType = SrcType;
@@ -758,7 +758,7 @@
     return Builder.CreateBitCast(Src, DstTy, "conv");
 
   // Finally, we have the arithmetic types: real int/float.
-  Value *Res = NULL;
+  Value *Res = nullptr;
   llvm::Type *ResTy = DstTy;
 
   // An overflowing conversion has undefined behavior if either the source type
@@ -887,7 +887,7 @@
 Value *ScalarExprEmitter::VisitExpr(Expr *E) {
   CGF.ErrorUnsupported(E, "scalar expression");
   if (E->getType()->isVoidType())
-    return 0;
+    return nullptr;
   return llvm::UndefValue::get(CGF.ConvertType(E->getType()));
 }
 
@@ -1013,7 +1013,7 @@
   }
 
   // We have the arithmetic types: real int/float.
-  Value *Res = NULL;
+  Value *Res = nullptr;
 
   if (isa<llvm::IntegerType>(SrcEltTy)) {
     bool InputSigned = SrcEltType->isSignedIntegerOrEnumerationType();
@@ -1133,7 +1133,7 @@
 
         if (EI->getVectorOperandType()->getNumElements() == ResElts) {
           llvm::ConstantInt *C = cast<llvm::ConstantInt>(EI->getIndexOperand());
-          Value *LHS = 0, *RHS = 0;
+          Value *LHS = nullptr, *RHS = nullptr;
           if (CurIdx == 0) {
             // insert into undef -> shuffle (src, undef)
             Args.push_back(C);
@@ -1450,7 +1450,7 @@
 
   case CK_ToVoid: {
     CGF.EmitIgnoredExpr(E);
-    return 0;
+    return nullptr;
   }
   case CK_VectorSplat: {
     llvm::Type *DstTy = ConvertType(DestTy);
@@ -1460,7 +1460,7 @@
 
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
-    return Builder.CreateVectorSplat(NumElements, Elt, "splat");;
+    return Builder.CreateVectorSplat(NumElements, Elt, "splat");
   }
 
   case CK_IntegralCast:
@@ -1507,7 +1507,7 @@
   llvm::Value *RetAlloca = CGF.EmitCompoundStmt(*E->getSubStmt(),
                                                 !E->getType()->isVoidType());
   if (!RetAlloca)
-    return 0;
+    return nullptr;
   return CGF.EmitLoadOfScalar(CGF.MakeAddrLValue(RetAlloca, E->getType()),
                               E->getExprLoc());
 }
@@ -1545,7 +1545,7 @@
                                            bool isInc, bool isPre) {
 
   QualType type = E->getSubExpr()->getType();
-  llvm::PHINode *atomicPHI = 0;
+  llvm::PHINode *atomicPHI = nullptr;
   llvm::Value *value;
   llvm::Value *input;
 
@@ -1819,7 +1819,7 @@
   QualType CurrentType = E->getTypeSourceInfo()->getType();
   for (unsigned i = 0; i != n; ++i) {
     OffsetOfExpr::OffsetOfNode ON = E->getComponent(i);
-    llvm::Value *Offset = 0;
+    llvm::Value *Offset = nullptr;
     switch (ON.getKind()) {
     case OffsetOfExpr::OffsetOfNode::Array: {
       // Compute the index
@@ -2009,7 +2009,7 @@
   // Load/convert the LHS.
   LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
 
-  llvm::PHINode *atomicPHI = 0;
+  llvm::PHINode *atomicPHI = nullptr;
   if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
     QualType type = atomicTy->getValueType();
     if (!type->isBooleanType() && type->isIntegerType() &&
@@ -2108,7 +2108,7 @@
 
   // If the result is clearly ignored, return now.
   if (Ignore)
-    return 0;
+    return nullptr;
 
   // The result of an assignment in C is the assigned r-value.
   if (!CGF.getLangOpts().CPlusPlus)
@@ -2124,7 +2124,7 @@
 
 void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
     const BinOpInfo &Ops, llvm::Value *Zero, bool isDiv) {
-  llvm::Value *Cond = 0;
+  llvm::Value *Cond = nullptr;
 
   if (CGF.SanOpts->IntegerDivideByZero)
     Cond = Builder.CreateICmpNE(Ops.RHS, Zero);
@@ -2428,12 +2428,12 @@
 
   // Check whether this op is marked as fusable.
   if (!op.FPContractable)
-    return 0;
+    return nullptr;
 
   // Check whether -ffp-contract=on. (If -ffp-contract=off/fast, fusing is
   // either disabled, or handled entirely by the LLVM backend).
   if (CGF.CGM.getCodeGenOpts().getFPContractMode() != CodeGenOptions::FPC_On)
-    return 0;
+    return nullptr;
 
   // We have a potentially fusable op. Look for a mul on one of the operands.
   if (llvm::BinaryOperator* LHSBinOp = dyn_cast<llvm::BinaryOperator>(op.LHS)) {
@@ -2451,7 +2451,7 @@
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) {
@@ -2533,7 +2533,7 @@
   const BinaryOperator *expr = cast<BinaryOperator>(op.E);
   QualType elementType = expr->getLHS()->getType()->getPointeeType();
 
-  llvm::Value *divisor = 0;
+  llvm::Value *divisor = nullptr;
 
   // For a variable-length array, this is going to be non-constant.
   if (const VariableArrayType *vla
@@ -2861,7 +2861,7 @@
 
   // If the result is clearly ignored, return now.
   if (Ignore)
-    return 0;
+    return nullptr;
 
   // The result of an assignment in C is the assigned r-value.
   if (!CGF.getLangOpts().CPlusPlus)
@@ -3152,7 +3152,7 @@
     if (!LHS) {
       // If the conditional has void type, make sure we return a null Value*.
       assert(!RHS && "LHS and RHS types must match");
-      return 0;
+      return nullptr;
     }
     return Builder.CreateSelect(CondV, LHS, RHS, "cond");
   }
@@ -3199,6 +3199,10 @@
 }
 
 Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
+  QualType Ty = VE->getType();
+  if (Ty->isVariablyModifiedType())
+    CGF.EmitVariablyModifiedType(Ty);
+
   llvm::Value *ArgValue = CGF.EmitVAListRef(VE->getSubExpr());
   llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
 
@@ -3345,7 +3349,7 @@
 LValue CodeGenFunction::EmitCompoundAssignmentLValue(
                                             const CompoundAssignOperator *E) {
   ScalarExprEmitter Scalar(*this);
-  Value *Result = 0;
+  Value *Result = nullptr;
   switch (E->getOpcode()) {
 #define COMPOUND_OP(Op)                                                       \
     case BO_##Op##Assign:                                                     \
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
new file mode 100644
index 0000000..7b154b2
--- /dev/null
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -0,0 +1,112 @@
+//===---- CGLoopInfo.cpp - LLVM CodeGen for loop metadata -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGLoopInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+using namespace clang;
+using namespace CodeGen;
+using namespace llvm;
+
+static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
+
+  if (!Attrs.IsParallel && Attrs.VectorizerWidth == 0 &&
+      Attrs.VectorizerUnroll == 0 &&
+      Attrs.VectorizerEnable == LoopAttributes::VecUnspecified)
+    return nullptr;
+
+  SmallVector<Value *, 4> Args;
+  // Reserve operand 0 for loop id self reference.
+  MDNode *TempNode = MDNode::getTemporary(Ctx, None);
+  Args.push_back(TempNode);
+
+  // Setting vectorizer.width
+  if (Attrs.VectorizerWidth > 0) {
+    Value *Vals[] = { MDString::get(Ctx, "llvm.vectorizer.width"),
+                      ConstantInt::get(Type::getInt32Ty(Ctx),
+                                       Attrs.VectorizerWidth) };
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  // Setting vectorizer.unroll
+  if (Attrs.VectorizerUnroll > 0) {
+    Value *Vals[] = { MDString::get(Ctx, "llvm.vectorizer.unroll"),
+                      ConstantInt::get(Type::getInt32Ty(Ctx),
+                                       Attrs.VectorizerUnroll) };
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  // Setting vectorizer.enable
+  if (Attrs.VectorizerEnable != LoopAttributes::VecUnspecified) {
+    Value *Vals[] = { MDString::get(Ctx, "llvm.vectorizer.enable"),
+                      ConstantInt::get(Type::getInt1Ty(Ctx),
+                                       (Attrs.VectorizerEnable ==
+                                        LoopAttributes::VecEnable)) };
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  MDNode *LoopID = MDNode::get(Ctx, Args);
+  assert(LoopID->use_empty() && "LoopID should not be used");
+
+  // Set the first operand to itself.
+  LoopID->replaceOperandWith(0, LoopID);
+  MDNode::deleteTemporary(TempNode);
+  return LoopID;
+}
+
+LoopAttributes::LoopAttributes(bool IsParallel)
+    : IsParallel(IsParallel), VectorizerEnable(LoopAttributes::VecUnspecified),
+      VectorizerWidth(0), VectorizerUnroll(0) {}
+
+void LoopAttributes::clear() {
+  IsParallel = false;
+  VectorizerWidth = 0;
+  VectorizerUnroll = 0;
+  VectorizerEnable = LoopAttributes::VecUnspecified;
+}
+
+LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
+    : LoopID(nullptr), Header(Header), Attrs(Attrs) {
+  LoopID = createMetadata(Header->getContext(), Attrs);
+}
+
+void LoopInfoStack::push(BasicBlock *Header) {
+  Active.push_back(LoopInfo(Header, StagedAttrs));
+  // Clear the attributes so nested loops do not inherit them.
+  StagedAttrs.clear();
+}
+
+void LoopInfoStack::pop() {
+  assert(!Active.empty() && "No active loops to pop");
+  Active.pop_back();
+}
+
+void LoopInfoStack::InsertHelper(Instruction *I) const {
+  if (!hasInfo())
+    return;
+
+  const LoopInfo &L = getInfo();
+  if (!L.getLoopID())
+    return;
+
+  if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
+    for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
+      if (TI->getSuccessor(i) == L.getHeader()) {
+        TI->setMetadata("llvm.loop", L.getLoopID());
+        break;
+      }
+    return;
+  }
+
+  if (L.getAttributes().IsParallel && I->mayReadOrWriteMemory())
+    I->setMetadata("llvm.mem.parallel_loop_access", L.getLoopID());
+}
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
new file mode 100644
index 0000000..f4a8dca
--- /dev/null
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -0,0 +1,136 @@
+//===---- CGLoopInfo.h - LLVM CodeGen for loop metadata -*- C++ -*---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the internal state used for llvm translation for loop statement
+// metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_CGLOOPINFO_H
+#define CLANG_CODEGEN_CGLOOPINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class BasicBlock;
+class Instruction;
+class MDNode;
+} // end namespace llvm
+
+namespace clang {
+namespace CodeGen {
+
+/// \brief Attributes that may be specified on loops.
+struct LoopAttributes {
+  explicit LoopAttributes(bool IsParallel = false);
+  void clear();
+
+  /// \brief Generate llvm.loop.parallel metadata for loads and stores.
+  bool IsParallel;
+
+  /// \brief Values of llvm.vectorizer.enable metadata.
+  enum LVEnableState { VecUnspecified, VecEnable, VecDisable };
+
+  /// \brief llvm.vectorizer.enable
+  LVEnableState VectorizerEnable;
+
+  /// \brief llvm.vectorizer.width
+  unsigned VectorizerWidth;
+
+  /// \brief llvm.vectorizer.unroll
+  unsigned VectorizerUnroll;
+};
+
+/// \brief Information used when generating a structured loop.
+class LoopInfo {
+public:
+  /// \brief Construct a new LoopInfo for the loop with entry Header.
+  LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs);
+
+  /// \brief Get the loop id metadata for this loop.
+  llvm::MDNode *getLoopID() const { return LoopID; }
+
+  /// \brief Get the header block of this loop.
+  llvm::BasicBlock *getHeader() const { return Header; }
+
+  /// \brief Get the set of attributes active for this loop.
+  const LoopAttributes &getAttributes() const { return Attrs; }
+
+private:
+  /// \brief Loop ID metadata.
+  llvm::MDNode *LoopID;
+  /// \brief Header block of this loop.
+  llvm::BasicBlock *Header;
+  /// \brief The attributes for this loop.
+  LoopAttributes Attrs;
+};
+
+/// \brief A stack of loop information corresponding to loop nesting levels.
+/// This stack can be used to prepare attributes which are applied when a loop
+/// is emitted.
+class LoopInfoStack {
+  LoopInfoStack(const LoopInfoStack &) LLVM_DELETED_FUNCTION;
+  void operator=(const LoopInfoStack &) LLVM_DELETED_FUNCTION;
+
+public:
+  LoopInfoStack() {}
+
+  /// \brief Begin a new structured loop. The set of staged attributes will be
+  /// applied to the loop and then cleared.
+  void push(llvm::BasicBlock *Header);
+
+  /// \brief End the current loop.
+  void pop();
+
+  /// \brief Return the top loop id metadata.
+  llvm::MDNode *getCurLoopID() const { return getInfo().getLoopID(); }
+
+  /// \brief Return true if the top loop is parallel.
+  bool getCurLoopParallel() const {
+    return hasInfo() ? getInfo().getAttributes().IsParallel : false;
+  }
+
+  /// \brief Function called by the CodeGenFunction when an instruction is
+  /// created.
+  void InsertHelper(llvm::Instruction *I) const;
+
+  /// \brief Set the next pushed loop as parallel.
+  void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; }
+
+  /// \brief Set the next pushed loop 'vectorizer.enable'
+  void setVectorizerEnable(bool Enable = true) {
+    StagedAttrs.VectorizerEnable =
+        Enable ? LoopAttributes::VecEnable : LoopAttributes::VecDisable;
+  }
+
+  /// \brief Set the vectorizer width for the next loop pushed.
+  void setVectorizerWidth(unsigned W) { StagedAttrs.VectorizerWidth = W; }
+
+  /// \brief Set the vectorizer unroll for the next loop pushed.
+  void setVectorizerUnroll(unsigned U) { StagedAttrs.VectorizerUnroll = U; }
+
+private:
+  /// \brief Returns true if there is LoopInfo on the stack.
+  bool hasInfo() const { return !Active.empty(); }
+  /// \brief Return the LoopInfo for the current loop. HasInfo should be called
+  /// first to ensure LoopInfo is present.
+  const LoopInfo &getInfo() const { return Active.back(); }
+  /// \brief The set of attributes that will be applied to the next pushed loop.
+  LoopAttributes StagedAttrs;
+  /// \brief Stack of active loops.
+  llvm::SmallVector<LoopInfo, 4> Active;
+};
+
+} // end namespace CodeGen
+} // end namespace clang
+
+#endif // CLANG_CODEGEN_CGLOOPINFO_H
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index f78bb0b..8c54bba 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -90,7 +90,7 @@
 llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
                                     const ObjCMethodDecl *MethodWithObjects) {
   ASTContext &Context = CGM.getContext();
-  const ObjCDictionaryLiteral *DLE = 0;
+  const ObjCDictionaryLiteral *DLE = nullptr;
   const ObjCArrayLiteral *ALE = dyn_cast<ObjCArrayLiteral>(E);
   if (!ALE)
     DLE = cast<ObjCDictionaryLiteral>(E);
@@ -106,8 +106,8 @@
                                    ArrayType::Normal, /*IndexTypeQuals=*/0);
 
   // Allocate the temporary array(s).
-  llvm::Value *Objects = CreateMemTemp(ElementArrayType, "objects");  
-  llvm::Value *Keys = 0;
+  llvm::Value *Objects = CreateMemTemp(ElementArrayType, "objects");
+  llvm::Value *Keys = nullptr;
   if (DLE)
     Keys = CreateMemTemp(ElementArrayType, "keys");
   
@@ -314,10 +314,10 @@
   CGObjCRuntime &Runtime = CGM.getObjCRuntime();
   bool isSuperMessage = false;
   bool isClassMessage = false;
-  ObjCInterfaceDecl *OID = 0;
+  ObjCInterfaceDecl *OID = nullptr;
   // Find the receiver
   QualType ReceiverType;
-  llvm::Value *Receiver = 0;
+  llvm::Value *Receiver = nullptr;
   switch (E->getReceiverKind()) {
   case ObjCMessageExpr::Instance:
     ReceiverType = E->getInstanceReceiver()->getType();
@@ -466,7 +466,7 @@
   FunctionArgList args;
   // Check if we should generate debug info for this method.
   if (OMD->hasAttr<NoDebugAttr>())
-    DebugInfo = NULL; // disable debug info indefinitely for this function
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
 
   llvm::Function *Fn = CGM.getObjCRuntime().GenerateMethod(OMD, CD);
 
@@ -481,7 +481,8 @@
 
   CurGD = OMD;
 
-  StartFunction(OMD, OMD->getReturnType(), Fn, FI, args, StartLoc);
+  StartFunction(OMD, OMD->getReturnType(), Fn, FI, args,
+                OMD->getLocation(), StartLoc);
 
   // In ARC, certain methods get an extra cleanup.
   if (CGM.getLangOpts().ObjCAutoRefCount &&
@@ -819,7 +820,7 @@
   if (!hasTrivialGetExpr(propImpl)) {
     if (!AtomicHelperFn) {
       ReturnStmt ret(SourceLocation(), propImpl->getGetterCXXConstructor(),
-                     /*nrvo*/ 0);
+                     /*nrvo*/ nullptr);
       EmitReturnStmt(ret);
     }
     else {
@@ -900,7 +901,7 @@
     RValue RV = EmitCall(getTypes().arrangeFreeFunctionCall(propType, args,
                                                        FunctionType::ExtInfo(),
                                                             RequiredArgs::All),
-                         getPropertyFn, ReturnValueSlot(), args, 0,
+                         getPropertyFn, ReturnValueSlot(), args, nullptr,
                          &CallInstruction);
     if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction))
       call->setTailCall();
@@ -1145,9 +1146,9 @@
 
   case PropertyImplStrategy::GetSetProperty:
   case PropertyImplStrategy::SetPropertyAndExpressionGet: {
-  
-    llvm::Value *setOptimizedPropertyFn = 0;
-    llvm::Value *setPropertyFn = 0;
+
+    llvm::Value *setOptimizedPropertyFn = nullptr;
+    llvm::Value *setPropertyFn = nullptr;
     if (UseOptimizedSetter(CGM)) {
       // 10.8 and iOS 6.0 code and GC is off
       setOptimizedPropertyFn = 
@@ -1330,7 +1331,7 @@
     QualType::DestructionKind dtorKind = type.isDestructedType();
     if (!dtorKind) continue;
 
-    CodeGenFunction::Destroyer *destroyer = 0;
+    CodeGenFunction::Destroyer *destroyer = nullptr;
 
     // Use a call to objc_storeStrong to destroy strong ivars, for the
     // general benefit of the tools.
@@ -1870,7 +1871,7 @@
   };
   llvm::CallInst *result = CGF.EmitNounwindRuntimeCall(fn, args);
 
-  if (ignored) return 0;
+  if (ignored) return nullptr;
 
   return CGF.Builder.CreateBitCast(result, origType);
 }
@@ -2068,7 +2069,7 @@
   };
   EmitNounwindRuntimeCall(fn, args);
 
-  if (ignored) return 0;
+  if (ignored) return nullptr;
   return value;
 }
 
@@ -2563,7 +2564,7 @@
 
   // The desired result type, if it differs from the type of the
   // ultimate opaque expression.
-  llvm::Type *resultType = 0;
+  llvm::Type *resultType = nullptr;
 
   while (true) {
     e = e->IgnoreParens();
@@ -2869,16 +2870,16 @@
                                         const ObjCPropertyImplDecl *PID) {
   if (!getLangOpts().CPlusPlus ||
       !getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
-    return 0;
+    return nullptr;
   QualType Ty = PID->getPropertyIvarDecl()->getType();
   if (!Ty->isRecordType())
-    return 0;
+    return nullptr;
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
-    return 0;
-  llvm::Constant * HelperFn = 0;
+    return nullptr;
+  llvm::Constant *HelperFn = nullptr;
   if (hasTrivialSetExpr(PID))
-    return 0;
+    return nullptr;
   assert(PID->getSetterCXXAssignment() && "SetterCXXAssignment - null");
   if ((HelperFn = CGM.getAtomicSetterHelperFnMap(Ty)))
     return HelperFn;
@@ -2889,20 +2890,20 @@
   FunctionDecl *FD = FunctionDecl::Create(C,
                                           C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false,
                                           false);
-  
+
   QualType DestTy = C.getPointerType(Ty);
   QualType SrcTy = Ty;
   SrcTy.addConst();
   SrcTy = C.getPointerType(SrcTy);
   
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(FD, SourceLocation(), 0, DestTy);
+  ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
   args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(FD, SourceLocation(), 0, SrcTy);
+  ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
 
   const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -2915,7 +2916,7 @@
                            "__assign_helper_atomic_property_",
                            &CGM.getModule());
   
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr DstExpr(&dstDecl, false, DestTy,
                       VK_RValue, SourceLocation());
@@ -2946,17 +2947,17 @@
                                             const ObjCPropertyImplDecl *PID) {
   if (!getLangOpts().CPlusPlus ||
       !getLangOpts().ObjCRuntime.hasAtomicCopyHelper())
-    return 0;
+    return nullptr;
   const ObjCPropertyDecl *PD = PID->getPropertyDecl();
   QualType Ty = PD->getType();
   if (!Ty->isRecordType())
-    return 0;
+    return nullptr;
   if ((!(PD->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_atomic)))
-    return 0;
-  llvm::Constant * HelperFn = 0;
-  
+    return nullptr;
+  llvm::Constant *HelperFn = nullptr;
+
   if (hasTrivialGetExpr(PID))
-    return 0;
+    return nullptr;
   assert(PID->getGetterCXXConstructor() && "getGetterCXXConstructor - null");
   if ((HelperFn = CGM.getAtomicGetterHelperFnMap(Ty)))
     return HelperFn;
@@ -2968,20 +2969,20 @@
   FunctionDecl *FD = FunctionDecl::Create(C,
                                           C.getTranslationUnitDecl(),
                                           SourceLocation(),
-                                          SourceLocation(), II, C.VoidTy, 0,
-                                          SC_Static,
+                                          SourceLocation(), II, C.VoidTy,
+                                          nullptr, SC_Static,
                                           false,
                                           false);
-  
+
   QualType DestTy = C.getPointerType(Ty);
   QualType SrcTy = Ty;
   SrcTy.addConst();
   SrcTy = C.getPointerType(SrcTy);
   
   FunctionArgList args;
-  ImplicitParamDecl dstDecl(FD, SourceLocation(), 0, DestTy);
+  ImplicitParamDecl dstDecl(getContext(), FD, SourceLocation(), nullptr,DestTy);
   args.push_back(&dstDecl);
-  ImplicitParamDecl srcDecl(FD, SourceLocation(), 0, SrcTy);
+  ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
 
   const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
@@ -2993,7 +2994,7 @@
   llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
                          "__copy_helper_atomic_property_", &CGM.getModule());
   
-  StartFunction(FD, C.VoidTy, Fn, FI, args, SourceLocation());
+  StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
                       VK_RValue, SourceLocation());
@@ -3058,11 +3059,11 @@
   RValue Result;
   Result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(),
                                        Ty, CopySelector,
-                                       Val, CallArgList(), 0, 0);
+                                       Val, CallArgList(), nullptr, nullptr);
   Val = Result.getScalarVal();
   Result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(),
                                        Ty, AutoreleaseSelector,
-                                       Val, CallArgList(), 0, 0);
+                                       Val, CallArgList(), nullptr, nullptr);
   Val = Result.getScalarVal();
   return Val;
 }
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 2689d7b..3ca0378 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -53,7 +53,8 @@
     /// Constructor leaves this class uninitialized, because it is intended to
     /// be used as a field in another class and not all of the types that are
     /// used as arguments will necessarily be available at construction time.
-    LazyRuntimeFunction() : CGM(0), FunctionName(0), Function(0) {}
+    LazyRuntimeFunction()
+      : CGM(nullptr), FunctionName(nullptr), Function(nullptr) {}
 
     /// Initialises the lazy function with the name, return type, and the types
     /// of the arguments.
@@ -62,7 +63,7 @@
         llvm::Type *RetTy, ...) {
        CGM =Mod;
        FunctionName = name;
-       Function = 0;
+       Function = nullptr;
        ArgTys.clear();
        va_list Args;
        va_start(Args, RetTy);
@@ -76,7 +77,7 @@
    /// LLVM constant.
    operator llvm::Constant*() {
      if (!Function) {
-       if (0 == FunctionName) return 0;
+       if (!FunctionName) return nullptr;
        // We put the return type on the end of the vector, so pop it back off
        llvm::Type *RetTy = ArgTys.back();
        ArgTys.pop_back();
@@ -564,7 +565,7 @@
 
   llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
                                        bool Weak = false) override {
-    return 0;
+    return nullptr;
   }
 };
 /// Class representing the legacy GCC Objective-C ABI.  This is the default when
@@ -605,10 +606,11 @@
   public:
     CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
       // IMP objc_msg_lookup(id, SEL);
-      MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, NULL);
+      MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
+                       nullptr);
       // IMP objc_msg_lookup_super(struct objc_super*, SEL);
       MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-              PtrToObjCSuperTy, SelectorTy, NULL);
+              PtrToObjCSuperTy, SelectorTy, nullptr);
     }
 };
 /// Class used when targeting the new GNUstep runtime ABI.
@@ -696,51 +698,51 @@
       const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
 
       llvm::StructType *SlotStructTy = llvm::StructType::get(PtrTy,
-          PtrTy, PtrTy, IntTy, IMPTy, NULL);
+          PtrTy, PtrTy, IntTy, IMPTy, nullptr);
       SlotTy = llvm::PointerType::getUnqual(SlotStructTy);
       // Slot_t objc_msg_lookup_sender(id *receiver, SEL selector, id sender);
       SlotLookupFn.init(&CGM, "objc_msg_lookup_sender", SlotTy, PtrToIdTy,
-          SelectorTy, IdTy, NULL);
+          SelectorTy, IdTy, nullptr);
       // Slot_t objc_msg_lookup_super(struct objc_super*, SEL);
       SlotLookupSuperFn.init(&CGM, "objc_slot_lookup_super", SlotTy,
-              PtrToObjCSuperTy, SelectorTy, NULL);
+              PtrToObjCSuperTy, SelectorTy, nullptr);
       // If we're in ObjC++ mode, then we want to make 
       if (CGM.getLangOpts().CPlusPlus) {
         llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
         // void *__cxa_begin_catch(void *e)
-        EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, NULL);
+        EnterCatchFn.init(&CGM, "__cxa_begin_catch", PtrTy, PtrTy, nullptr);
         // void __cxa_end_catch(void)
-        ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, NULL);
+        ExitCatchFn.init(&CGM, "__cxa_end_catch", VoidTy, nullptr);
         // void _Unwind_Resume_or_Rethrow(void*)
         ExceptionReThrowFn.init(&CGM, "_Unwind_Resume_or_Rethrow", VoidTy,
-            PtrTy, NULL);
+            PtrTy, nullptr);
       } else if (R.getVersion() >= VersionTuple(1, 7)) {
         llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
         // id objc_begin_catch(void *e)
-        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, NULL);
+        EnterCatchFn.init(&CGM, "objc_begin_catch", IdTy, PtrTy, nullptr);
         // void objc_end_catch(void)
-        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, NULL);
+        ExitCatchFn.init(&CGM, "objc_end_catch", VoidTy, nullptr);
         // void _Unwind_Resume_or_Rethrow(void*)
         ExceptionReThrowFn.init(&CGM, "objc_exception_rethrow", VoidTy,
-            PtrTy, NULL);
+            PtrTy, nullptr);
       }
       llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
       SetPropertyAtomic.init(&CGM, "objc_setProperty_atomic", VoidTy, IdTy,
-          SelectorTy, IdTy, PtrDiffTy, NULL);
+          SelectorTy, IdTy, PtrDiffTy, nullptr);
       SetPropertyAtomicCopy.init(&CGM, "objc_setProperty_atomic_copy", VoidTy,
-          IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+          IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
       SetPropertyNonAtomic.init(&CGM, "objc_setProperty_nonatomic", VoidTy,
-          IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+          IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
       SetPropertyNonAtomicCopy.init(&CGM, "objc_setProperty_nonatomic_copy",
-          VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, NULL);
+          VoidTy, IdTy, SelectorTy, IdTy, PtrDiffTy, nullptr);
       // void objc_setCppObjectAtomic(void *dest, const void *src, void
       // *helper);
       CxxAtomicObjectSetFn.init(&CGM, "objc_setCppObjectAtomic", VoidTy, PtrTy,
-          PtrTy, PtrTy, NULL);
+          PtrTy, PtrTy, nullptr);
       // void objc_getCppObjectAtomic(void *dest, const void *src, void
       // *helper);
       CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
-          PtrTy, PtrTy, NULL);
+          PtrTy, PtrTy, nullptr);
     }
     llvm::Constant *GetCppAtomicObjectGetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
@@ -834,7 +836,7 @@
     if (!ClassSymbol)
       ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false,
                                              llvm::GlobalValue::ExternalLinkage,
-                                             0, SymbolName);
+                                             nullptr, SymbolName);
 
     return ClassSymbol;
   }
@@ -842,14 +844,14 @@
 public:
   CGObjCObjFW(CodeGenModule &Mod): CGObjCGNU(Mod, 9, 3) {
     // IMP objc_msg_lookup(id, SEL);
-    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, NULL);
+    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy, nullptr);
     MsgLookupFnSRet.init(&CGM, "objc_msg_lookup_stret", IMPTy, IdTy,
-                         SelectorTy, NULL);
+                         SelectorTy, nullptr);
     // IMP objc_msg_lookup_super(struct objc_super*, SEL);
     MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-                          PtrToObjCSuperTy, SelectorTy, NULL);
+                          PtrToObjCSuperTy, SelectorTy, nullptr);
     MsgLookupSuperFnSRet.init(&CGM, "objc_msg_lookup_super_stret", IMPTy,
-                              PtrToObjCSuperTy, SelectorTy, NULL);
+                              PtrToObjCSuperTy, SelectorTy, nullptr);
   }
 };
 } // end anonymous namespace
@@ -867,7 +869,8 @@
   llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(symbolName);
   if (!ClassSymbol) {
     ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false,
-        llvm::GlobalValue::ExternalLinkage, 0, symbolName);
+                                           llvm::GlobalValue::ExternalLinkage,
+                                           nullptr, symbolName);
   }
   new llvm::GlobalVariable(TheModule, ClassSymbol->getType(), true,
     llvm::GlobalValue::WeakAnyLinkage, ClassSymbol, symbolRef);
@@ -884,10 +887,11 @@
 }
 
 CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
-    unsigned protocolClassVersion)
+                     unsigned protocolClassVersion)
   : CGObjCRuntime(cgm), TheModule(CGM.getModule()),
-    VMContext(cgm.getLLVMContext()), ClassPtrAlias(0), MetaClassPtrAlias(0),
-    RuntimeVersion(runtimeABIVersion), ProtocolVersion(protocolClassVersion) {
+    VMContext(cgm.getLLVMContext()), ClassPtrAlias(nullptr),
+    MetaClassPtrAlias(nullptr), RuntimeVersion(runtimeABIVersion),
+    ProtocolVersion(protocolClassVersion) {
 
   msgSendMDKind = VMContext.getMDKindID("GNUObjCMessageSend");
 
@@ -937,35 +941,35 @@
   }
   PtrToIdTy = llvm::PointerType::getUnqual(IdTy);
 
-  ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, NULL);
+  ObjCSuperTy = llvm::StructType::get(IdTy, IdTy, nullptr);
   PtrToObjCSuperTy = llvm::PointerType::getUnqual(ObjCSuperTy);
 
   llvm::Type *VoidTy = llvm::Type::getVoidTy(VMContext);
 
   // void objc_exception_throw(id);
-  ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, NULL);
-  ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, NULL);
+  ExceptionThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
+  ExceptionReThrowFn.init(&CGM, "objc_exception_throw", VoidTy, IdTy, nullptr);
   // int objc_sync_enter(id);
-  SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, NULL);
+  SyncEnterFn.init(&CGM, "objc_sync_enter", IntTy, IdTy, nullptr);
   // int objc_sync_exit(id);
-  SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, NULL);
+  SyncExitFn.init(&CGM, "objc_sync_exit", IntTy, IdTy, nullptr);
 
   // void objc_enumerationMutation (id)
   EnumerationMutationFn.init(&CGM, "objc_enumerationMutation", VoidTy,
-      IdTy, NULL);
+      IdTy, nullptr);
 
   // id objc_getProperty(id, SEL, ptrdiff_t, BOOL)
   GetPropertyFn.init(&CGM, "objc_getProperty", IdTy, IdTy, SelectorTy,
-      PtrDiffTy, BoolTy, NULL);
+      PtrDiffTy, BoolTy, nullptr);
   // void objc_setProperty(id, SEL, ptrdiff_t, id, BOOL, BOOL)
   SetPropertyFn.init(&CGM, "objc_setProperty", VoidTy, IdTy, SelectorTy,
-      PtrDiffTy, IdTy, BoolTy, BoolTy, NULL);
+      PtrDiffTy, IdTy, BoolTy, BoolTy, nullptr);
   // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
   GetStructPropertyFn.init(&CGM, "objc_getPropertyStruct", VoidTy, PtrTy, PtrTy, 
-      PtrDiffTy, BoolTy, BoolTy, NULL);
+      PtrDiffTy, BoolTy, BoolTy, nullptr);
   // void objc_setPropertyStruct(void*, void*, ptrdiff_t, BOOL, BOOL)
   SetStructPropertyFn.init(&CGM, "objc_setPropertyStruct", VoidTy, PtrTy, PtrTy, 
-      PtrDiffTy, BoolTy, BoolTy, NULL);
+      PtrDiffTy, BoolTy, BoolTy, nullptr);
 
   // IMP type
   llvm::Type *IMPArgs[] = { IdTy, SelectorTy };
@@ -990,20 +994,20 @@
 
     // id objc_assign_ivar(id, id, ptrdiff_t);
     IvarAssignFn.init(&CGM, "objc_assign_ivar", IdTy, IdTy, IdTy, PtrDiffTy,
-        NULL);
+        nullptr);
     // id objc_assign_strongCast (id, id*)
     StrongCastAssignFn.init(&CGM, "objc_assign_strongCast", IdTy, IdTy,
-        PtrToIdTy, NULL);
+        PtrToIdTy, nullptr);
     // id objc_assign_global(id, id*);
     GlobalAssignFn.init(&CGM, "objc_assign_global", IdTy, IdTy, PtrToIdTy,
-        NULL);
+        nullptr);
     // id objc_assign_weak(id, id*);
-    WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, NULL);
+    WeakAssignFn.init(&CGM, "objc_assign_weak", IdTy, IdTy, PtrToIdTy, nullptr);
     // id objc_read_weak(id*);
-    WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, NULL);
+    WeakReadFn.init(&CGM, "objc_read_weak", IdTy, PtrToIdTy, nullptr);
     // void *objc_memmove_collectable(void*, void *, size_t);
     MemMoveFn.init(&CGM, "objc_memmove_collectable", PtrTy, PtrTy, PtrTy,
-        SizeTy, NULL);
+        SizeTy, nullptr);
   }
 }
 
@@ -1042,8 +1046,7 @@
     const std::string &TypeEncoding, bool lval) {
 
   SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
-  llvm::GlobalAlias *SelValue = 0;
-
+  llvm::GlobalAlias *SelValue = nullptr;
 
   for (SmallVectorImpl<TypedSelector>::iterator i = Types.begin(),
       e = Types.end() ; i!=e ; i++) {
@@ -1052,11 +1055,10 @@
       break;
     }
   }
-  if (0 == SelValue) {
-    SelValue = new llvm::GlobalAlias(SelectorTy,
-                                     llvm::GlobalValue::PrivateLinkage,
-                                     ".objc_selector_"+Sel.getAsString(), NULL,
-                                     &TheModule);
+  if (!SelValue) {
+    SelValue = llvm::GlobalAlias::create(
+        SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage,
+        ".objc_selector_" + Sel.getAsString(), &TheModule);
     Types.push_back(TypedSelector(TypeEncoding, SelValue));
   }
 
@@ -1089,7 +1091,7 @@
     if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) {
       return MakeConstantString("@id");
     } else {
-      return 0;
+      return nullptr;
     }
   }
 
@@ -1118,7 +1120,7 @@
         new llvm::GlobalVariable(CGM.getModule(), PtrToInt8Ty,
                                  false,
                                  llvm::GlobalValue::ExternalLinkage,
-                                 0, "__objc_id_type_info");
+                                 nullptr, "__objc_id_type_info");
     return llvm::ConstantExpr::getBitCast(IDEHType, PtrToInt8Ty);
   }
 
@@ -1145,7 +1147,8 @@
   llvm::Constant *Vtable = TheModule.getGlobalVariable(vtableName);
   if (!Vtable) {
     Vtable = new llvm::GlobalVariable(TheModule, PtrToInt8Ty, true,
-            llvm::GlobalValue::ExternalLinkage, 0, vtableName);
+                                      llvm::GlobalValue::ExternalLinkage,
+                                      nullptr, vtableName);
   }
   llvm::Constant *Two = llvm::ConstantInt::get(IntTy, 2);
   Vtable = llvm::ConstantExpr::getGetElementPtr(Vtable, Two);
@@ -1159,7 +1162,7 @@
   fields.push_back(typeName);
   llvm::Constant *TI = 
       MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-              NULL), fields, "__objc_eh_typeinfo_" + className,
+              nullptr), fields, "__objc_eh_typeinfo_" + className,
           llvm::GlobalValue::LinkOnceODRLinkage);
   return llvm::ConstantExpr::getBitCast(TI, PtrToInt8Ty);
 }
@@ -1185,7 +1188,7 @@
 
   if (!isa)
     isa = new llvm::GlobalVariable(TheModule, IdTy, /* isConstant */false,
-            llvm::GlobalValue::ExternalWeakLinkage, 0, Sym);
+            llvm::GlobalValue::ExternalWeakLinkage, nullptr, Sym);
   else if (isa->getType() != PtrToIdTy)
     isa = llvm::ConstantExpr::getBitCast(isa, PtrToIdTy);
 
@@ -1194,7 +1197,7 @@
   Ivars.push_back(MakeConstantString(Str));
   Ivars.push_back(llvm::ConstantInt::get(IntTy, Str.size()));
   llvm::Constant *ObjCStr = MakeGlobal(
-    llvm::StructType::get(PtrToIdTy, PtrToInt8Ty, IntTy, NULL),
+    llvm::StructType::get(PtrToIdTy, PtrToInt8Ty, IntTy, nullptr),
     Ivars, ".objc_str");
   ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStr, PtrToInt8Ty);
   ObjCStrings[Str] = ObjCStr;
@@ -1223,7 +1226,7 @@
                   CGM.getTypes().ConvertType(ResultType)));
     }
     if (Sel == ReleaseSel) {
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
   }
 
@@ -1238,9 +1241,9 @@
 
   MessageSendInfo MSI = getMessageSendInfo(Method, ResultType, ActualArgs);
 
-  llvm::Value *ReceiverClass = 0;
+  llvm::Value *ReceiverClass = nullptr;
   if (isCategoryImpl) {
-    llvm::Constant *classLookupFunction = 0;
+    llvm::Constant *classLookupFunction = nullptr;
     if (IsClassMessage)  {
       classLookupFunction = CGM.CreateRuntimeFunction(llvm::FunctionType::get(
             IdTy, PtrTy, true), "objc_get_meta_class");
@@ -1258,16 +1261,16 @@
     // super_class pointer from either the class or metaclass structure.
     if (IsClassMessage)  {
       if (!MetaClassPtrAlias) {
-        MetaClassPtrAlias = new llvm::GlobalAlias(IdTy,
-            llvm::GlobalValue::InternalLinkage, ".objc_metaclass_ref" +
-            Class->getNameAsString(), NULL, &TheModule);
+        MetaClassPtrAlias = llvm::GlobalAlias::create(
+            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+            ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = MetaClassPtrAlias;
     } else {
       if (!ClassPtrAlias) {
-        ClassPtrAlias = new llvm::GlobalAlias(IdTy,
-            llvm::GlobalValue::InternalLinkage, ".objc_class_ref" +
-            Class->getNameAsString(), NULL, &TheModule);
+        ClassPtrAlias = llvm::GlobalAlias::create(
+            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+            ".objc_class_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = ClassPtrAlias;
     }
@@ -1275,14 +1278,14 @@
   // Cast the pointer to a simplified version of the class structure
   ReceiverClass = Builder.CreateBitCast(ReceiverClass,
       llvm::PointerType::getUnqual(
-        llvm::StructType::get(IdTy, IdTy, NULL)));
+        llvm::StructType::get(IdTy, IdTy, nullptr)));
   // Get the superclass pointer
   ReceiverClass = Builder.CreateStructGEP(ReceiverClass, 1);
   // Load the superclass pointer
   ReceiverClass = Builder.CreateLoad(ReceiverClass);
   // Construct the structure used to look up the IMP
   llvm::StructType *ObjCSuperTy = llvm::StructType::get(
-      Receiver->getType(), IdTy, NULL);
+      Receiver->getType(), IdTy, nullptr);
   llvm::Value *ObjCSuper = Builder.CreateAlloca(ObjCSuperTy);
 
   Builder.CreateStore(Receiver, Builder.CreateStructGEP(ObjCSuper, 0));
@@ -1302,7 +1305,8 @@
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
   llvm::Instruction *call;
-  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, 0, &call);
+  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, nullptr,
+                               &call);
   call->setMetadata(msgSendMDKind, node);
   return msgRet;
 }
@@ -1326,7 +1330,7 @@
                   CGM.getTypes().ConvertType(ResultType)));
     }
     if (Sel == ReleaseSel) {
-      return RValue::get(0);
+      return RValue::get(nullptr);
     }
   }
 
@@ -1344,9 +1348,9 @@
   bool isPointerSizedReturn = (ResultType->isAnyPointerType() ||
       ResultType->isIntegralOrEnumerationType() || ResultType->isVoidType());
 
-  llvm::BasicBlock *startBB = 0;
-  llvm::BasicBlock *messageBB = 0;
-  llvm::BasicBlock *continueBB = 0;
+  llvm::BasicBlock *startBB = nullptr;
+  llvm::BasicBlock *messageBB = nullptr;
+  llvm::BasicBlock *continueBB = nullptr;
 
   if (!isPointerSizedReturn) {
     startBB = Builder.GetInsertBlock();
@@ -1371,7 +1375,8 @@
   llvm::Value *impMD[] = {
         llvm::MDString::get(VMContext, Sel.getAsString()),
         llvm::MDString::get(VMContext, Class ? Class->getNameAsString() :""),
-        llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext), Class!=0)
+        llvm::ConstantInt::get(llvm::Type::getInt1Ty(VMContext),
+                               Class!=nullptr)
    };
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
@@ -1414,7 +1419,8 @@
   imp = EnforceType(Builder, imp, MSI.MessengerType);
 
   llvm::Instruction *call;
-  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, 0, &call);
+  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, nullptr,
+                               &call);
   call->setMetadata(msgSendMDKind, node);
 
 
@@ -1470,7 +1476,7 @@
     PtrToInt8Ty, // Really a selector, but the runtime creates it us.
     PtrToInt8Ty, // Method types
     IMPTy, //Method pointer
-    NULL);
+    nullptr);
   std::vector<llvm::Constant*> Methods;
   std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) {
@@ -1502,7 +1508,7 @@
       NextPtrTy,
       IntTy,
       ObjCMethodArrayTy,
-      NULL);
+      nullptr);
 
   Methods.clear();
   Methods.push_back(llvm::ConstantPointerNull::get(
@@ -1526,7 +1532,7 @@
     PtrToInt8Ty,
     PtrToInt8Ty,
     IntTy,
-    NULL);
+    nullptr);
   std::vector<llvm::Constant*> Ivars;
   std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) {
@@ -1548,7 +1554,7 @@
   // Structure containing array and array count
   llvm::StructType *ObjCIvarListTy = llvm::StructType::get(IntTy,
     ObjCIvarArrayTy,
-    NULL);
+    nullptr);
 
   // Create an instance of the structure
   return MakeGlobal(ObjCIvarListTy, Elements, ".objc_ivar_list");
@@ -1598,7 +1604,7 @@
       Properties->getType(),  // properties
       IntPtrTy,               // strong_pointers
       IntPtrTy,               // weak_pointers
-      NULL);
+      nullptr);
   llvm::Constant *Zero = llvm::ConstantInt::get(LongTy, 0);
   // Fill in the structure
   std::vector<llvm::Constant*> Elements;
@@ -1652,7 +1658,7 @@
   llvm::StructType *ObjCMethodDescTy = llvm::StructType::get(
     PtrToInt8Ty, // Really a selector, but the runtime does the casting for us.
     PtrToInt8Ty,
-    NULL);
+    nullptr);
   std::vector<llvm::Constant*> Methods;
   std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) {
@@ -1666,7 +1672,7 @@
   llvm::Constant *Array = llvm::ConstantArray::get(ObjCMethodArrayTy,
                                                    Methods);
   llvm::StructType *ObjCMethodDescListTy = llvm::StructType::get(
-      IntTy, ObjCMethodArrayTy, NULL);
+      IntTy, ObjCMethodArrayTy, nullptr);
   Methods.clear();
   Methods.push_back(llvm::ConstantInt::get(IntTy, MethodNames.size()));
   Methods.push_back(Array);
@@ -1681,11 +1687,11 @@
       PtrTy, //Should be a recurisve pointer, but it's always NULL here.
       SizeTy,
       ProtocolArrayTy,
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> Elements;
   for (const std::string *iter = Protocols.begin(), *endIter = Protocols.end();
       iter != endIter ; iter++) {
-    llvm::Constant *protocol = 0;
+    llvm::Constant *protocol = nullptr;
     llvm::StringMap<llvm::Constant*>::iterator value =
       ExistingProtocols.find(*iter);
     if (value == ExistingProtocols.end()) {
@@ -1731,7 +1737,7 @@
       MethodList->getType(),
       MethodList->getType(),
       MethodList->getType(),
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
@@ -1812,7 +1818,7 @@
   // structures for protocol metadata everywhere.
   llvm::StructType *PropertyMetadataTy = llvm::StructType::get(
           PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
-          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, NULL);
+          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, nullptr);
   std::vector<llvm::Constant*> Properties;
   std::vector<llvm::Constant*> OptionalProperties;
 
@@ -1821,7 +1827,7 @@
   for (auto *property : PD->properties()) {
     std::vector<llvm::Constant*> Fields;
 
-    Fields.push_back(MakePropertyEncodingString(property, 0));
+    Fields.push_back(MakePropertyEncodingString(property, nullptr));
     PushPropertyAttributes(Fields, property);
 
     if (ObjCMethodDecl *getter = property->getGetterMethodDecl()) {
@@ -1888,7 +1894,7 @@
       OptionalClassMethodList->getType(),
       PropertyList->getType(),
       OptionalPropertyList->getType(),
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
@@ -1929,7 +1935,7 @@
       PtrTy, //Should be a recurisve pointer, but it's always NULL here.
       SizeTy,
       ProtocolArrayTy,
-      NULL);
+      nullptr);
   std::vector<llvm::Constant*> ProtocolElements;
   for (llvm::StringMapIterator<llvm::Constant*> iter =
        ExistingProtocols.begin(), endIter = ExistingProtocols.end();
@@ -1949,7 +1955,7 @@
                   ProtocolElements, ".objc_protocol_list"), PtrTy));
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-            PtrTy, PtrTy, PtrTy, NULL), Elements), PtrTy));
+            PtrTy, PtrTy, PtrTy, nullptr), Elements), PtrTy));
 }
 
 /// Libobjc2 uses a bitfield representation where small(ish) bitfields are
@@ -1989,7 +1995,7 @@
       llvm::ConstantInt::get(Int32Ty, values.size()),
       array };
   llvm::Constant *GS = MakeGlobal(llvm::StructType::get(Int32Ty, arrayTy,
-        NULL), fields);
+        nullptr), fields);
   llvm::Constant *ptr = llvm::ConstantExpr::getPtrToInt(GS, IntPtrTy);
   return ptr;
 }
@@ -2041,7 +2047,7 @@
         GenerateProtocolList(Protocols), PtrTy));
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-            PtrTy, PtrTy, PtrTy, NULL), Elements), PtrTy));
+            PtrTy, PtrTy, PtrTy, nullptr), Elements), PtrTy));
 }
 
 llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OID,
@@ -2052,7 +2058,7 @@
   // setter name, setter types, getter name, getter types.
   llvm::StructType *PropertyMetadataTy = llvm::StructType::get(
           PtrToInt8Ty, Int8Ty, Int8Ty, Int8Ty, Int8Ty, PtrToInt8Ty,
-          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, NULL);
+          PtrToInt8Ty, PtrToInt8Ty, PtrToInt8Ty, nullptr);
   std::vector<llvm::Constant*> Properties;
 
   // Add all of the property methods need adding to the method list and to the
@@ -2279,7 +2285,7 @@
   // the offset (third field in ivar structure)
   llvm::Type *IndexTy = Int32Ty;
   llvm::Constant *offsetPointerIndexes[] = {Zeros[0],
-      llvm::ConstantInt::get(IndexTy, 1), 0,
+      llvm::ConstantInt::get(IndexTy, 1), nullptr,
       llvm::ConstantInt::get(IndexTy, 2) };
 
   unsigned ivarIndex = 0;
@@ -2310,14 +2316,14 @@
   llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0);
   //Generate metaclass for class methods
   llvm::Constant *MetaClassStruct = GenerateClassStructure(NULLPtr,
-      NULLPtr, 0x12L, ClassName.c_str(), 0, Zeros[0], GenerateIvarList(
+      NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], GenerateIvarList(
         empty, empty, empty), ClassMethodList, NULLPtr,
       NULLPtr, NULLPtr, ZeroPtr, ZeroPtr, true);
 
   // Generate the class structure
   llvm::Constant *ClassStruct =
     GenerateClassStructure(MetaClassStruct, SuperClass, 0x11L,
-                           ClassName.c_str(), 0,
+                           ClassName.c_str(), nullptr,
       llvm::ConstantInt::get(LongTy, instanceSize), IvarList,
       MethodList, GenerateProtocolList(Protocols), IvarOffsetArray,
       Properties, StrongIvarBitmap, WeakIvarBitmap);
@@ -2327,13 +2333,13 @@
     ClassPtrAlias->replaceAllUsesWith(
         llvm::ConstantExpr::getBitCast(ClassStruct, IdTy));
     ClassPtrAlias->eraseFromParent();
-    ClassPtrAlias = 0;
+    ClassPtrAlias = nullptr;
   }
   if (MetaClassPtrAlias) {
     MetaClassPtrAlias->replaceAllUsesWith(
         llvm::ConstantExpr::getBitCast(MetaClassStruct, IdTy));
     MetaClassPtrAlias->eraseFromParent();
-    MetaClassPtrAlias = 0;
+    MetaClassPtrAlias = nullptr;
   }
 
   // Add class structure to list to be added to the symtab later
@@ -2346,7 +2352,7 @@
   // Only emit an ObjC load function if no Objective-C stuff has been called
   if (Classes.empty() && Categories.empty() && ConstantStrings.empty() &&
       ExistingProtocols.empty() && SelectorTable.empty())
-    return NULL;
+    return nullptr;
 
   // Add all referenced protocols to a category.
   GenerateProtocolHolderCategory();
@@ -2354,8 +2360,8 @@
   llvm::StructType *SelStructTy = dyn_cast<llvm::StructType>(
           SelectorTy->getElementType());
   llvm::Type *SelStructPtrTy = SelectorTy;
-  if (SelStructTy == 0) {
-    SelStructTy = llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, NULL);
+  if (!SelStructTy) {
+    SelStructTy = llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, nullptr);
     SelStructPtrTy = llvm::PointerType::getUnqual(SelStructTy);
   }
 
@@ -2376,7 +2382,7 @@
     Elements.push_back(llvm::ConstantArray::get(StaticsArrayTy,
        ConstantStrings));
     llvm::StructType *StaticsListTy =
-      llvm::StructType::get(PtrToInt8Ty, StaticsArrayTy, NULL);
+      llvm::StructType::get(PtrToInt8Ty, StaticsArrayTy, nullptr);
     llvm::Type *StaticsListPtrTy =
       llvm::PointerType::getUnqual(StaticsListTy);
     Statics = MakeGlobal(StaticsListTy, Elements, ".objc_statics");
@@ -2394,7 +2400,7 @@
   llvm::StructType *SymTabTy = llvm::StructType::get(LongTy, SelStructPtrTy,
                                                      llvm::Type::getInt16Ty(VMContext),
                                                      llvm::Type::getInt16Ty(VMContext),
-                                                     ClassListTy, NULL);
+                                                     ClassListTy, nullptr);
 
   Elements.clear();
   // Pointer to an array of selectors used in this module.
@@ -2475,7 +2481,7 @@
   // constants
   llvm::StructType * ModuleTy = llvm::StructType::get(LongTy, LongTy,
       PtrToInt8Ty, llvm::PointerType::getUnqual(SymTabTy), 
-      (RuntimeVersion >= 10) ? IntTy : NULL, NULL);
+      (RuntimeVersion >= 10) ? IntTy : nullptr, nullptr);
   Elements.clear();
   // Runtime version, used for ABI compatibility checking.
   Elements.push_back(llvm::ConstantInt::get(LongTy, RuntimeVersion));
@@ -2556,7 +2562,7 @@
        llvm::Constant *TheClass =
          TheModule.getGlobalVariable(("_OBJC_CLASS_" + iter->first).c_str(),
             true);
-       if (0 != TheClass) {
+       if (TheClass) {
          TheClass = llvm::ConstantExpr::getBitCast(TheClass, PtrTy);
          Builder.CreateCall2(RegisterAlias, TheClass,
             MakeConstantString(iter->second));
@@ -2606,7 +2612,7 @@
 
 llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic,
                                                            bool copy) {
-  return 0;
+  return nullptr;
 }
 
 llvm::Constant *CGObjCGNU::GetGetStructFunction() {
@@ -2616,10 +2622,10 @@
   return SetStructPropertyFn;
 }
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() {
-  return 0;
+  return nullptr;
 }
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() {
-  return 0;
+  return nullptr;
 }
 
 llvm::Constant *CGObjCGNU::EnumerationMutationFunction() {
@@ -2767,7 +2773,7 @@
     } else {
       IvarOffsetPointer = new llvm::GlobalVariable(TheModule,
               llvm::Type::getInt32PtrTy(VMContext), false,
-              llvm::GlobalValue::ExternalLinkage, 0, Name);
+              llvm::GlobalValue::ExternalLinkage, nullptr, Name);
     }
   }
   return IvarOffsetPointer;
@@ -2797,7 +2803,7 @@
   if (const ObjCInterfaceDecl *Super = OID->getSuperClass())
     return FindIvarInterface(Context, Super, OIVD);
 
-  return 0;
+  return nullptr;
 }
 
 llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index 8f5969c..95503cc 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -1050,7 +1050,7 @@
   llvm::Constant *GenerateConstantString(const StringLiteral *SL) override;
 
   llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
-                                 const ObjCContainerDecl *CD=0) override;
+                                 const ObjCContainerDecl *CD=nullptr) override;
 
   void GenerateProtocol(const ObjCProtocolDecl *PD) override;
 
@@ -1564,7 +1564,7 @@
 /// value.
 struct NullReturnState {
   llvm::BasicBlock *NullBB;
-  NullReturnState() : NullBB(0) {}
+  NullReturnState() : NullBB(nullptr) {}
 
   /// Perform a null-check of the given receiver.
   void init(CodeGenFunction &CGF, llvm::Value *receiver) {
@@ -1593,8 +1593,8 @@
 
     // The continuation block.  This will be left null if we don't have an
     // IP, which can happen if the method we're calling is marked noreturn.
-    llvm::BasicBlock *contBB = 0;
-    
+    llvm::BasicBlock *contBB = nullptr;
+
     // Finish the call path.
     llvm::BasicBlock *callBB = CGF.Builder.GetInsertBlock();
     if (callBB) {
@@ -1881,7 +1881,7 @@
 
   NullReturnState nullReturn;
 
-  llvm::Constant *Fn = NULL;
+  llvm::Constant *Fn = nullptr;
   if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
     if (!IsSuper) nullReturn.init(CGF, Arg0);
     Fn = (ObjCABI == 2) ?  ObjCTypes.getSendStretFn2(IsSuper)
@@ -1915,7 +1915,7 @@
   Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType);
   RValue rvalue = CGF.EmitCall(MSI.CallInfo, Fn, Return, ActualArgs);
   return nullReturn.complete(CGF, rvalue, ResultType, CallArgs,
-                             requiresnullCheck ? Method : 0);
+                             requiresnullCheck ? Method : nullptr);
 }
 
 static Qualifiers::GC GetGCAttrTypeForType(ASTContext &Ctx, QualType FQT) {
@@ -2069,8 +2069,8 @@
                                           bool ByrefLayout) {
   bool IsUnion = (RD && RD->isUnion());
   CharUnits MaxUnionSize = CharUnits::Zero();
-  const FieldDecl *MaxField = 0;
-  const FieldDecl *LastFieldBitfieldOrUnnamed = 0;
+  const FieldDecl *MaxField = nullptr;
+  const FieldDecl *LastFieldBitfieldOrUnnamed = nullptr;
   CharUnits MaxFieldOffset = CharUnits::Zero();
   CharUnits LastBitfieldOrUnnamedOffset = CharUnits::Zero();
   
@@ -2092,8 +2092,8 @@
       LastBitfieldOrUnnamedOffset = FieldOffset;
       continue;
     }
-    
-    LastFieldBitfieldOrUnnamed = 0;
+
+    LastFieldBitfieldOrUnnamed = nullptr;
     QualType FQT = Field->getType();
     if (FQT->isRecordType() || FQT->isUnionType()) {
       if (FQT->isUnionType())
@@ -2558,7 +2558,7 @@
   (void)NameRef;
   assert(NameRef[0] == '\01' && (NameRef[1] == 'L' || NameRef[1] == 'l'));
   assert(GV->getVisibility() == llvm::GlobalValue::DefaultVisibility);
-  assert(GV->getLinkage() == llvm::GlobalValue::PrivateLinkage);
+  assert(GV->hasPrivateLinkage());
 }
 
 /*
@@ -2642,7 +2642,7 @@
 
   if (Entry) {
     // Already created, update the initializer.
-    assert(Entry->getLinkage() == llvm::GlobalValue::PrivateLinkage);
+    assert(Entry->hasPrivateLinkage());
     Entry->setInitializer(Init);
   } else {
     Entry =
@@ -2672,7 +2672,7 @@
     Entry =
       new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolTy, false,
                                llvm::GlobalValue::PrivateLinkage,
-                               0,
+                               nullptr,
                                "\01L_OBJC_PROTOCOL_" + PD->getName());
     Entry->setSection("__OBJC,__protocol,regular,no_dead_strip");
     // FIXME: Is this necessary? Why only for protocol?
@@ -2708,8 +2708,8 @@
     EmitMethodDescList("\01L_OBJC_PROTOCOL_CLASS_METHODS_OPT_" + PD->getName(),
                        "__OBJC,__cat_cls_meth,regular,no_dead_strip",
                        OptClassMethods),
-    EmitPropertyList("\01L_OBJC_$_PROP_PROTO_LIST_" + PD->getName(), 0, PD,
-                     ObjCTypes),
+    EmitPropertyList("\01L_OBJC_$_PROP_PROTO_LIST_" + PD->getName(), nullptr,
+                     PD, ObjCTypes),
     EmitProtocolMethodTypes("\01L_OBJC_PROTOCOL_METHOD_TYPES_" + PD->getName(),
                             MethodTypesExt, ObjCTypes)
   };
@@ -2725,7 +2725,7 @@
   // No special section, but goes in llvm.used
   return CreateMetadataVar("\01l_OBJC_PROTOCOLEXT_" + PD->getName(),
                            Init,
-                           0, 0, true);
+                           nullptr, 0, true);
 }
 
 /*
@@ -2860,7 +2860,7 @@
 
   llvm::GlobalVariable *GV = 
     CreateMetadataVar(Name, Init,
-                      (ObjCABI == 2) ? "__DATA, __objc_const" : 0,
+                      (ObjCABI == 2) ? "__DATA, __objc_const" : nullptr,
                       (ObjCABI == 2) ? 8 : 4,
                       true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.Int8PtrPtrTy);
@@ -2880,8 +2880,8 @@
     GetMethodVarType(MD)
   };
   if (!Desc[1])
-    return 0;
-  
+    return nullptr;
+
   return llvm::ConstantStruct::get(ObjCTypes.MethodDescriptionTy,
                                    Desc);
 }
@@ -3211,7 +3211,8 @@
   llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name, true);
   if (!GV)
     GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
-                                  llvm::GlobalValue::PrivateLinkage, 0, Name);
+                                  llvm::GlobalValue::PrivateLinkage, nullptr,
+                                  Name);
 
   assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
          "Forward metaclass reference has incorrect type.");
@@ -3225,7 +3226,8 @@
 
   if (!GV)
     GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassTy, false,
-                                  llvm::GlobalValue::PrivateLinkage, 0, Name);
+                                  llvm::GlobalValue::PrivateLinkage, nullptr,
+                                  Name);
 
   assert(GV->getType()->getElementType() == ObjCTypes.ClassTy &&
          "Forward class metadata reference has incorrect type.");
@@ -3345,7 +3347,7 @@
 llvm::Constant *CGObjCMac::GetMethodConstant(const ObjCMethodDecl *MD) {
   llvm::Function *Fn = GetMethodDefinition(MD);
   if (!Fn)
-    return 0;
+    return nullptr;
 
   llvm::Constant *Method[] = {
     llvm::ConstantExpr::getBitCast(GetMethodVarName(MD->getSelector()),
@@ -3416,7 +3418,7 @@
 llvm::Function *CGObjCMac::ModuleInitFunction() {
   // Abuse this interface function as a place to finalize.
   FinishModule();
-  return NULL;
+  return nullptr;
 }
 
 llvm::Constant *CGObjCMac::GetPropertyGetFunction() {
@@ -3799,7 +3801,7 @@
   // @synchronized.  We can't avoid a temp here because we need the
   // value to be preserved.  If the backend ever does liveness
   // correctly after setjmp, this will be unnecessary.
-  llvm::Value *SyncArgSlot = 0;
+  llvm::Value *SyncArgSlot = nullptr;
   if (!isTry) {
     llvm::Value *SyncArg =
       CGF.EmitScalarExpr(cast<ObjCAtSynchronizedStmt>(S).getSynchExpr());
@@ -3834,7 +3836,7 @@
 
   // A slot containing the exception to rethrow.  Only needed when we
   // have both a @catch and a @finally.
-  llvm::Value *PropagatingExnVar = 0;
+  llvm::Value *PropagatingExnVar = nullptr;
 
   // Push a normal cleanup to leave the try scope.
   CGF.EHStack.pushCleanup<PerformFragileFinally>(NormalAndEHCleanup, &S,
@@ -3900,10 +3902,10 @@
 
     const ObjCAtTryStmt* AtTryStmt = cast<ObjCAtTryStmt>(&S);
 
-    bool HasFinally = (AtTryStmt->getFinallyStmt() != 0);
+    bool HasFinally = (AtTryStmt->getFinallyStmt() != nullptr);
 
-    llvm::BasicBlock *CatchBlock = 0;
-    llvm::BasicBlock *CatchHandler = 0;
+    llvm::BasicBlock *CatchBlock = nullptr;
+    llvm::BasicBlock *CatchHandler = nullptr;
     if (HasFinally) {
       // Save the currently-propagating exception before
       // objc_exception_try_enter clears the exception slot.
@@ -3941,7 +3943,7 @@
       const ObjCAtCatchStmt *CatchStmt = AtTryStmt->getCatchStmt(I);
 
       const VarDecl *CatchParam = CatchStmt->getCatchParamDecl();
-      const ObjCObjectPointerType *OPT = 0;
+      const ObjCObjectPointerType *OPT = nullptr;
 
       // catch(...) always matches.
       if (!CatchParam) {
@@ -4472,7 +4474,7 @@
   if (I != MethodDefinitions.end())
     return I->second;
 
-  return NULL;
+  return nullptr;
 }
 
 /// GetIvarLayoutName - Returns a unique constant for the given
@@ -4493,8 +4495,8 @@
   const llvm::StructLayout *RecLayout =
     CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
 
-  BuildAggrIvarLayout(0, RecLayout, RD, Fields, BytePos,
-                      ForStrongLayout, HasUnion);
+  BuildAggrIvarLayout(nullptr, RecLayout, RD, Fields, BytePos, ForStrongLayout,
+                      HasUnion);
 }
 
 void CGObjCCommonMac::BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
@@ -4506,9 +4508,9 @@
   bool IsUnion = (RD && RD->isUnion());
   uint64_t MaxUnionIvarSize = 0;
   uint64_t MaxSkippedUnionIvarSize = 0;
-  const FieldDecl *MaxField = 0;
-  const FieldDecl *MaxSkippedField = 0;
-  const FieldDecl *LastFieldBitfieldOrUnnamed = 0;
+  const FieldDecl *MaxField = nullptr;
+  const FieldDecl *MaxSkippedField = nullptr;
+  const FieldDecl *LastFieldBitfieldOrUnnamed = nullptr;
   uint64_t MaxFieldOffset = 0;
   uint64_t MaxSkippedFieldOffset = 0;
   uint64_t LastBitfieldOrUnnamedOffset = 0;
@@ -4543,7 +4545,7 @@
       continue;
     }
 
-    LastFieldBitfieldOrUnnamed = 0;
+    LastFieldBitfieldOrUnnamed = nullptr;
     QualType FQT = Field->getType();
     if (FQT->isRecordType() || FQT->isUnionType()) {
       if (FQT->isUnionType())
@@ -4834,7 +4836,8 @@
   SkipIvars.clear();
   IvarsInfo.clear();
 
-  BuildAggrIvarLayout(OMD, 0, 0, RecFields, 0, ForStrongLayout, hasUnion);
+  BuildAggrIvarLayout(OMD, nullptr, nullptr, RecFields, 0, ForStrongLayout,
+                      hasUnion);
   if (IvarsInfo.empty())
     return llvm::Constant::getNullValue(PtrTy);
   // Sort on byte position in case we encounterred a union nested in
@@ -4903,7 +4906,7 @@
                                                   bool Extended) {
   std::string TypeStr;
   if (CGM.getContext().getObjCEncodingForMethodDecl(D, TypeStr, Extended))
-    return 0;
+    return nullptr;
 
   llvm::GlobalVariable *&Entry = MethodVarTypes[TypeStr];
 
@@ -5010,14 +5013,14 @@
 CGObjCNonFragileABIMac::CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm)
   : CGObjCCommonMac(cgm),
     ObjCTypes(cgm) {
-  ObjCEmptyCacheVar = ObjCEmptyVtableVar = NULL;
+  ObjCEmptyCacheVar = ObjCEmptyVtableVar = nullptr;
   ObjCABI = 2;
 }
 
 /* *** */
 
 ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
-  : VMContext(cgm.getLLVMContext()), CGM(cgm), ExternalProtocolPtrTy(0) 
+  : VMContext(cgm.getLLVMContext()), CGM(cgm), ExternalProtocolPtrTy(nullptr)
 {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
@@ -5031,7 +5034,8 @@
 
   // arm64 targets use "int" ivar offset variables. All others,
   // including OS X x86_64 and Windows x86_64, use "long" ivar offsets.
-  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::arm64)
+  if (CGM.getTarget().getTriple().getArch() == llvm::Triple::arm64 ||
+      CGM.getTarget().getTriple().getArch() == llvm::Triple::aarch64)
     IvarOffsetVarTy = IntTy;
   else
     IvarOffsetVarTy = LongTy;
@@ -5056,11 +5060,12 @@
                                       Ctx.getTranslationUnitDecl(),
                                       SourceLocation(), SourceLocation(),
                                       &Ctx.Idents.get("_objc_super"));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.getObjCIdType(), 0, 0, false, ICIS_NoInit));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.getObjCClassType(), 0, 0, false,
-                                ICIS_NoInit));
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.getObjCIdType(), nullptr, nullptr,
+                                false, ICIS_NoInit));
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.getObjCClassType(), nullptr,
+                                nullptr, false, ICIS_NoInit));
   RD->completeDefinition();
 
   SuperCTy = Ctx.getTagDeclType(RD);
@@ -5437,11 +5442,12 @@
                                       Ctx.getTranslationUnitDecl(),
                                       SourceLocation(), SourceLocation(),
                                       &Ctx.Idents.get("_message_ref_t"));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.VoidPtrTy, 0, 0, false, ICIS_NoInit));
-  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0,
-                                Ctx.getObjCSelType(), 0, 0, false,
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.VoidPtrTy, nullptr, nullptr, false,
                                 ICIS_NoInit));
+  RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(),
+                                nullptr, Ctx.getObjCSelType(), nullptr, nullptr,
+                                false, ICIS_NoInit));
   RD->completeDefinition();
 
   MessageRefCTy = Ctx.getTagDeclType(RD);
@@ -5479,7 +5485,7 @@
 llvm::Function *CGObjCNonFragileABIMac::ModuleInitFunction() {
   FinishNonFragileABIModule();
 
-  return NULL;
+  return nullptr;
 }
 
 void CGObjCNonFragileABIMac::
@@ -5639,7 +5645,7 @@
   Values[ 2] = llvm::ConstantInt::get(ObjCTypes.IntTy, InstanceSize);
   // FIXME. For 64bit targets add 0 here.
   Values[ 3] = (flags & NonFragileABI_Class_Meta)
-    ? GetIvarLayoutName(0, ObjCTypes)
+    ? GetIvarLayoutName(nullptr, ObjCTypes)
     : BuildIvarLayout(ID, true);
   Values[ 4] = GetClassName(ID->getIdentifier());
   // const struct _method_list_t * const baseMethods;
@@ -5681,7 +5687,7 @@
 
   if (flags & NonFragileABI_Class_Meta) {
     Values[ 7] = llvm::Constant::getNullValue(ObjCTypes.IvarListnfABIPtrTy);
-    Values[ 8] = GetIvarLayoutName(0, ObjCTypes);
+    Values[ 8] = GetIvarLayoutName(nullptr, ObjCTypes);
     Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   } else {
     Values[ 7] = EmitIvarList(ID);
@@ -5745,7 +5751,7 @@
 
 bool
 CGObjCNonFragileABIMac::ImplementationIsNonLazy(const ObjCImplDecl *OD) const {
-  return OD->getClassMethod(GetNullarySelector("load")) != 0;
+  return OD->getClassMethod(GetNullarySelector("load")) != nullptr;
 }
 
 void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID,
@@ -5772,22 +5778,22 @@
       ObjCTypes.CacheTy,
       false,
       llvm::GlobalValue::ExternalLinkage,
-      0,
+      nullptr,
       "_objc_empty_cache");
-    
+
     // Make this entry NULL for any iOS device target, any iOS simulator target,
     // OS X with deployment target 10.9 or later.
     const llvm::Triple &Triple = CGM.getTarget().getTriple();
     if (Triple.isiOS() || (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9)))
       // This entry will be null.
-      ObjCEmptyVtableVar = 0;
+      ObjCEmptyVtableVar = nullptr;
     else
       ObjCEmptyVtableVar = new llvm::GlobalVariable(
                                                     CGM.getModule(),
                                                     ObjCTypes.ImpnfABITy,
                                                     false,
                                                     llvm::GlobalValue::ExternalLinkage,
-                                                    0,
+                                                    nullptr,
                                                     "_objc_empty_vtable");
   }
   assert(ID->getClassInterface() &&
@@ -5875,7 +5881,7 @@
 
   if (!ID->getClassInterface()->getSuperClass()) {
     flags |= NonFragileABI_Class_Root;
-    SuperClassGV = 0;
+    SuperClassGV = nullptr;
   } else {
     // Has a root. Current class is not a root.
     std::string RootClassName =
@@ -6042,7 +6048,7 @@
   const ObjCMethodDecl *MD) {
   llvm::Function *Fn = GetMethodDefinition(MD);
   if (!Fn)
-    return 0;
+    return nullptr;
 
   llvm::Constant *Method[] = {
     llvm::ConstantExpr::getBitCast(GetMethodVarName(MD->getSelector()),
@@ -6102,7 +6108,7 @@
   if (!IvarOffsetGV)
     IvarOffsetGV = new llvm::GlobalVariable(
         CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false,
-        llvm::GlobalValue::ExternalLinkage, 0, Name);
+        llvm::GlobalValue::ExternalLinkage, nullptr, Name);
   return IvarOffsetGV;
 }
 
@@ -6217,7 +6223,8 @@
     // contents for protocols which were referenced but never defined.
     Entry =
         new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ProtocolnfABITy,
-                                 false, llvm::GlobalValue::WeakAnyLinkage, 0,
+                                 false, llvm::GlobalValue::WeakAnyLinkage,
+                                 nullptr,
                                  "\01l_OBJC_PROTOCOL_$_" + PD->getName());
     Entry->setSection("__DATA,__datacoal_nt,coalesced");
   }
@@ -6315,7 +6322,7 @@
                              "__DATA, __objc_const",
                              OptClassMethods);
   Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getName(),
-                               0, PD, ObjCTypes);
+                               nullptr, PD, ObjCTypes);
   uint32_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy);
   Values[8] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
@@ -6328,7 +6335,7 @@
 
   if (Entry) {
     // Already created, update the initializer.
-    assert(Entry->getLinkage() == llvm::GlobalValue::WeakAnyLinkage);
+    assert(Entry->hasWeakAnyLinkage());
     Entry->setInitializer(Init);
   } else {
     Entry =
@@ -6427,8 +6434,8 @@
                                    ObjCTypes.SelectorPtrTy);
   Desc[1] = GetMethodVarType(MD);
   if (!Desc[1])
-    return 0;
-  
+    return nullptr;
+
   // Protocol methods have no implementation. So, this entry is always NULL.
   Desc[2] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
   return llvm::ConstantStruct::get(ObjCTypes.MethodTy, Desc);
@@ -6515,7 +6522,7 @@
 
   // Second argument: a pointer to the message ref structure.  Leave
   // the actual argument value blank for now.
-  args.add(RValue::get(0), ObjCTypes.MessageRefCPtrTy);
+  args.add(RValue::get(nullptr), ObjCTypes.MessageRefCPtrTy);
 
   args.insert(args.end(), formalArgs.begin(), formalArgs.end());
 
@@ -6530,7 +6537,7 @@
   // The runtime currently never uses vtable dispatch for anything
   // except normal, non-super message-sends.
   // FIXME: don't use this for that.
-  llvm::Constant *fn = 0;
+  llvm::Constant *fn = nullptr;
   std::string messageRefName("\01l_");
   if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
     if (isSuper) {
@@ -6601,8 +6608,8 @@
   callee = CGF.Builder.CreateBitCast(callee, MSI.MessengerType);
 
   RValue result = CGF.EmitCall(MSI.CallInfo, callee, returnSlot, args);
-  return nullReturn.complete(CGF, result, resultType, formalArgs, 
-                             requiresnullCheck ? method : 0);
+  return nullReturn.complete(CGF, result, resultType, formalArgs,
+                             requiresnullCheck ? method : nullptr);
 }
 
 /// Generate code for a message send expression in the nonfragile abi.
@@ -6635,7 +6642,7 @@
 
   if (!GV)
     GV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABITy,
-                                  false, L, 0, Name);
+                                  false, L, nullptr, Name);
 
   assert(GV->getLinkage() == L);
   return GV;
@@ -6733,7 +6740,7 @@
     std::string ClassName(getClassSymbolPrefix() + ID->getNameAsString());
     llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, true);
     (void)ClassGV;
-    assert(ClassGV->getLinkage() == llvm::GlobalValue::ExternalWeakLinkage);
+    assert(ClassGV->hasExternalWeakLinkage());
   }
   
   return EmitClassRef(CGF, ID);
@@ -6950,7 +6957,7 @@
         new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
                                  false,
                                  llvm::GlobalValue::ExternalLinkage,
-                                 0, "OBJC_EHTYPE_id");
+                                 nullptr, "OBJC_EHTYPE_id");
     return IDEHType;
   }
 
@@ -7007,7 +7014,7 @@
       return Entry =
         new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
                                  llvm::GlobalValue::ExternalLinkage,
-                                 0,
+                                 nullptr,
                                  ("OBJC_EHTYPE_$_" +
                                   ID->getIdentifier()->getName()));
   }
@@ -7023,7 +7030,7 @@
     VTableGV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy,
                                         false,
                                         llvm::GlobalValue::ExternalLinkage,
-                                        0, VTableName);
+                                        nullptr, VTableName);
 
   llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2);
 
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index 8d6c653..1a5db9b 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -65,7 +65,7 @@
 uint64_t CGObjCRuntime::ComputeIvarBaseOffset(CodeGen::CodeGenModule &CGM,
                                               const ObjCInterfaceDecl *OID,
                                               const ObjCIvarDecl *Ivar) {
-  return LookupFieldBitOffset(CGM, OID, 0, Ivar) / 
+  return LookupFieldBitOffset(CGM, OID, nullptr, Ivar) /
     CGM.getContext().getCharWidth();
 }
 
@@ -116,7 +116,7 @@
   // Note, there is a subtle invariant here: we can only call this routine on
   // non-synthesized ivars but we may be called for synthesized ivars.  However,
   // a synthesized ivar can never be a bit-field, so this is safe.
-  uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, 0, Ivar);
+  uint64_t FieldBitOffset = LookupFieldBitOffset(CGF.CGM, OID, nullptr, Ivar);
   uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
   uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign();
   uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
@@ -201,7 +201,7 @@
 
       // @catch(...) always matches.
       if (!CatchDecl) {
-        Handler.TypeInfo = 0; // catch-all
+        Handler.TypeInfo = nullptr; // catch-all
         // Don't consider any other catches.
         break;
       }
@@ -242,7 +242,7 @@
 
     if (endCatchFn) {
       // Add a cleanup to leave the catch.
-      bool EndCatchMightThrow = (Handler.Variable == 0);
+      bool EndCatchMightThrow = (Handler.Variable == nullptr);
 
       CGF.EHStack.pushCleanup<CallObjCEndCatch>(NormalAndEHCleanup,
                                                 EndCatchMightThrow,
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index 2a8ae90..fc6bee3 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -156,8 +156,8 @@
                       Selector Sel,
                       llvm::Value *Receiver,
                       const CallArgList &CallArgs,
-                      const ObjCInterfaceDecl *Class = 0,
-                      const ObjCMethodDecl *Method = 0) = 0;
+                      const ObjCInterfaceDecl *Class = nullptr,
+                      const ObjCMethodDecl *Method = nullptr) = 0;
 
   /// Generate an Objective-C message send operation to the super
   /// class initiated in a method for Class and with the given Self
@@ -175,7 +175,7 @@
                            llvm::Value *Self,
                            bool IsClassMessage,
                            const CallArgList &CallArgs,
-                           const ObjCMethodDecl *Method = 0) = 0;
+                           const ObjCMethodDecl *Method = nullptr) = 0;
 
   /// Emit the code to return the named protocol as an object, as in a
   /// \@protocol expression.
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 6e1a3c9..079ef72 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -39,7 +39,7 @@
   switch (cast<BuiltinType>(T)->getKind()) {
   default: 
     llvm_unreachable("Unexpected opencl builtin type!");
-    return 0;
+    return nullptr;
   case BuiltinType::OCLImage1d:
     return llvm::PointerType::get(llvm::StructType::create(
                            Ctx, "opencl.image1d_t"), ImgAddrSpc);
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
new file mode 100644
index 0000000..da86e2b
--- /dev/null
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -0,0 +1,177 @@
+//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/Decl.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/raw_ostream.h"
+#include <assert.h>
+
+using namespace clang;
+using namespace CodeGen;
+
+CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
+    : CGM(CGM), DefaultOpenMPPSource(nullptr) {
+  IdentTy = llvm::StructType::create(
+      "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
+      CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
+      CGM.Int8PtrTy /* psource */, NULL);
+  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
+  llvm::Type *MicroParams[] = { llvm::PointerType::getUnqual(CGM.Int32Ty),
+                                llvm::PointerType::getUnqual(CGM.Int32Ty) };
+  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
+}
+
+llvm::Value *
+CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
+  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
+  if (!Entry) {
+    if (!DefaultOpenMPPSource) {
+      // Initialize default location for psource field of ident_t structure of
+      // all ident_t objects. Format is ";file;function;line;column;;".
+      // Taken from
+      // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
+      DefaultOpenMPPSource =
+          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
+      DefaultOpenMPPSource =
+          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
+    }
+    llvm::GlobalVariable *DefaultOpenMPLocation = cast<llvm::GlobalVariable>(
+        CGM.CreateRuntimeVariable(IdentTy, ".kmpc_default_loc.addr"));
+    DefaultOpenMPLocation->setUnnamedAddr(true);
+    DefaultOpenMPLocation->setConstant(true);
+    DefaultOpenMPLocation->setLinkage(llvm::GlobalValue::PrivateLinkage);
+
+    llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
+    llvm::Constant *Values[] = { Zero,
+                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+                                 Zero, Zero, DefaultOpenMPPSource };
+    llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
+    DefaultOpenMPLocation->setInitializer(Init);
+    return DefaultOpenMPLocation;
+  }
+  return Entry;
+}
+
+llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
+    CodeGenFunction &CGF, SourceLocation Loc, OpenMPLocationFlags Flags) {
+  // If no debug info is generated - return global default location.
+  if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
+      Loc.isInvalid())
+    return GetOrCreateDefaultOpenMPLocation(Flags);
+
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+
+  llvm::Value *LocValue = nullptr;
+  OpenMPLocMapTy::iterator I = OpenMPLocMap.find(CGF.CurFn);
+  if (I != OpenMPLocMap.end()) {
+    LocValue = I->second;
+  } else {
+    // Generate "ident_t .kmpc_loc.addr;"
+    llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
+    AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
+    OpenMPLocMap[CGF.CurFn] = AI;
+    LocValue = AI;
+
+    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+    CGF.Builder.CreateMemCpy(LocValue, GetOrCreateDefaultOpenMPLocation(Flags),
+                             llvm::ConstantExpr::getSizeOf(IdentTy),
+                             CGM.PointerAlignInBytes);
+  }
+
+  // char **psource = &.kmpc_loc_<flags>.addr.psource;
+  llvm::Value *PSource =
+      CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource);
+
+  SmallString<128> Buffer2;
+  llvm::raw_svector_ostream OS2(Buffer2);
+  // Build debug location
+  PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+  OS2 << ";" << PLoc.getFilename() << ";";
+  if (const FunctionDecl *FD =
+          dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
+    OS2 << FD->getQualifiedNameAsString();
+  }
+  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
+  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
+  CGF.Builder.CreateStore(CGF.Builder.CreateGlobalStringPtr(OS2.str()),
+                          PSource);
+  return LocValue;
+}
+
+llvm::Value *CGOpenMPRuntime::GetOpenMPGlobalThreadNum(CodeGenFunction &CGF,
+                                                       SourceLocation Loc) {
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+
+  llvm::Value *GTid = nullptr;
+  OpenMPGtidMapTy::iterator I = OpenMPGtidMap.find(CGF.CurFn);
+  if (I != OpenMPGtidMap.end()) {
+    GTid = I->second;
+  } else {
+    // Generate "int32 .kmpc_global_thread_num.addr;"
+    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+    llvm::Value *Args[] = { EmitOpenMPUpdateLocation(CGF, Loc) };
+    GTid = CGF.EmitRuntimeCall(
+        CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
+    OpenMPGtidMap[CGF.CurFn] = GTid;
+  }
+  return GTid;
+}
+
+void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+  if (OpenMPGtidMap.count(CGF.CurFn))
+    OpenMPGtidMap.erase(CGF.CurFn);
+  if (OpenMPLocMap.count(CGF.CurFn))
+    OpenMPLocMap.erase(CGF.CurFn);
+}
+
+llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
+  return llvm::PointerType::getUnqual(IdentTy);
+}
+
+llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
+  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
+}
+
+llvm::Constant *
+CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
+  llvm::Constant *RTLFn = nullptr;
+  switch (Function) {
+  case OMPRTL__kmpc_fork_call: {
+    // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
+    // microtask, ...);
+    llvm::Type *TypeParams[] = { getIdentTyPointerTy(), CGM.Int32Ty,
+                                 getKmpc_MicroPointerTy() };
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, true);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
+    break;
+  }
+  case OMPRTL__kmpc_global_thread_num: {
+    // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
+    llvm::Type *TypeParams[] = { getIdentTyPointerTy() };
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
+    break;
+  }
+  }
+  return RTLFn;
+}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
new file mode 100644
index 0000000..06103cf
--- /dev/null
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -0,0 +1,174 @@
+//===----- CGOpenMPRuntime.h - Interface to OpenMP Runtimes -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_CODEGEN_OPENMPRUNTIME_H
+#define CLANG_CODEGEN_OPENMPRUNTIME_H
+
+#include "clang/AST/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+
+namespace llvm {
+class AllocaInst;
+class CallInst;
+class GlobalVariable;
+class Constant;
+class Function;
+class Module;
+class StructLayout;
+class FunctionType;
+class StructType;
+class Type;
+class Value;
+}
+
+namespace clang {
+
+namespace CodeGen {
+
+class CodeGenFunction;
+class CodeGenModule;
+
+class CGOpenMPRuntime {
+public:
+  /// \brief Values for bit flags used in the ident_t to describe the fields.
+  /// All enumeric elements are named and described in accordance with the code
+  /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+  enum OpenMPLocationFlags {
+    /// \brief Use trampoline for internal microtask.
+    OMP_IDENT_IMD = 0x01,
+    /// \brief Use c-style ident structure.
+    OMP_IDENT_KMPC = 0x02,
+    /// \brief Atomic reduction option for kmpc_reduce.
+    OMP_ATOMIC_REDUCE = 0x10,
+    /// \brief Explicit 'barrier' directive.
+    OMP_IDENT_BARRIER_EXPL = 0x20,
+    /// \brief Implicit barrier in code.
+    OMP_IDENT_BARRIER_IMPL = 0x40,
+    /// \brief Implicit barrier in 'for' directive.
+    OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
+    /// \brief Implicit barrier in 'sections' directive.
+    OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
+    /// \brief Implicit barrier in 'single' directive.
+    OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
+  };
+  enum OpenMPRTLFunction {
+    // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
+    // microtask, ...);
+    OMPRTL__kmpc_fork_call,
+    // Call to kmp_int32 kmpc_global_thread_num(ident_t *loc);
+    OMPRTL__kmpc_global_thread_num
+  };
+
+private:
+  CodeGenModule &CGM;
+  /// \brief Default const ident_t object used for initialization of all other
+  /// ident_t objects.
+  llvm::Constant *DefaultOpenMPPSource;
+  /// \brief Map of flags and corrsponding default locations.
+  typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
+  OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
+  llvm::Value *GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags);
+  /// \brief Describes ident structure that describes a source location.
+  /// All descriptions are taken from
+  /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+  /// Original structure:
+  /// typedef struct ident {
+  ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
+  ///                                  see above  */
+  ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
+  ///                                  KMP_IDENT_KMPC identifies this union
+  ///                                  member  */
+  ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
+  ///                                  see above */
+  ///#if USE_ITT_BUILD
+  ///                            /*  but currently used for storing
+  ///                                region-specific ITT */
+  ///                            /*  contextual information. */
+  ///#endif /* USE_ITT_BUILD */
+  ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
+  ///                                 C++  */
+  ///    char const *psource;    /**< String describing the source location.
+  ///                            The string is composed of semi-colon separated
+  //                             fields which describe the source file,
+  ///                            the function and a pair of line numbers that
+  ///                            delimit the construct.
+  ///                             */
+  /// } ident_t;
+  enum IdentFieldIndex {
+    /// \brief might be used in Fortran
+    IdentField_Reserved_1,
+    /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
+    IdentField_Flags,
+    /// \brief Not really used in Fortran any more
+    IdentField_Reserved_2,
+    /// \brief Source[4] in Fortran, do not use for C++
+    IdentField_Reserved_3,
+    /// \brief String describing the source location. The string is composed of
+    /// semi-colon separated fields which describe the source file, the function
+    /// and a pair of line numbers that delimit the construct.
+    IdentField_PSource
+  };
+  llvm::StructType *IdentTy;
+  /// \brief The type for a microtask which gets passed to __kmpc_fork_call().
+  /// Original representation is:
+  /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
+  llvm::FunctionType *Kmpc_MicroTy;
+  /// \brief Map of local debug location and functions.
+  typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPLocMapTy;
+  OpenMPLocMapTy OpenMPLocMap;
+  /// \brief Map of local gtid and functions.
+  typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPGtidMapTy;
+  OpenMPGtidMapTy OpenMPGtidMap;
+
+public:
+  CGOpenMPRuntime(CodeGenModule &CGM);
+  ~CGOpenMPRuntime() {}
+
+  /// \brief Cleans up references to the objects in finished function.
+  /// \param CGF Reference to finished CodeGenFunction.
+  ///
+  void FunctionFinished(CodeGenFunction &CGF);
+
+  /// \brief Emits object of ident_t type with info for source location.
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param Flags Flags for OpenMP location.
+  ///
+  llvm::Value *
+  EmitOpenMPUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
+                           OpenMPLocationFlags Flags = OMP_IDENT_KMPC);
+
+  /// \brief Generates global thread number value.
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  ///
+  llvm::Value *GetOpenMPGlobalThreadNum(CodeGenFunction &CGF,
+                                        SourceLocation Loc);
+
+  /// \brief Returns pointer to ident_t type;
+  llvm::Type *getIdentTyPointerTy();
+
+  /// \brief Returns pointer to kmpc_micro type;
+  llvm::Type *getKmpc_MicroPointerTy();
+
+  /// \brief Returns specified OpenMP runtime function.
+  /// \param Function OpenMP runtime function.
+  /// \return Specified function.
+  llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
+};
+}
+}
+
+#endif
diff --git a/lib/CodeGen/CGRTTI.cpp b/lib/CodeGen/CGRTTI.cpp
index 7049df7..4ca315c 100644
--- a/lib/CodeGen/CGRTTI.cpp
+++ b/lib/CodeGen/CGRTTI.cpp
@@ -22,7 +22,7 @@
 using namespace CodeGen;
 
 namespace {
-class RTTIBuilder {
+class ItaniumRTTIBuilder {
   CodeGenModule &CGM;  // Per-module state.
   llvm::LLVMContext &VMContext;
   
@@ -62,7 +62,7 @@
   void BuildPointerToMemberTypeInfo(const MemberPointerType *Ty);
   
 public:
-  RTTIBuilder(CodeGenModule &CGM) : CGM(CGM), 
+  ItaniumRTTIBuilder(CodeGenModule &CGM) : CGM(CGM), 
     VMContext(CGM.getModule().getContext()) { }
 
   // Pointer type info flags.
@@ -110,7 +110,7 @@
 }
 
 llvm::GlobalVariable *
-RTTIBuilder::GetAddrOfTypeName(QualType Ty, 
+ItaniumRTTIBuilder::GetAddrOfTypeName(QualType Ty,
                                llvm::GlobalVariable::LinkageTypes Linkage) {
   SmallString<256> OutName;
   llvm::raw_svector_ostream Out(OutName);
@@ -132,7 +132,8 @@
   return GV;
 }
 
-llvm::Constant *RTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
+llvm::Constant *
+ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
   // Mangle the RTTI name.
   SmallString<256> OutName;
   llvm::raw_svector_ostream Out(OutName);
@@ -147,7 +148,8 @@
     // Create a new global variable.
     GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
                                   /*Constant=*/true,
-                                  llvm::GlobalValue::ExternalLinkage, 0, Name);
+                                  llvm::GlobalValue::ExternalLinkage, nullptr,
+                                  Name);
   }
   
   return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
@@ -316,8 +318,8 @@
 
 /// getTypeInfoLinkage - Return the linkage that the type info and type info
 /// name constants should have for the given type.
-static llvm::GlobalVariable::LinkageTypes 
-getTypeInfoLinkage(CodeGenModule &CGM, QualType Ty) {
+llvm::GlobalVariable::LinkageTypes 
+CodeGenModule::getTypeInfoLinkage(QualType Ty) {
   // Itanium C++ ABI 2.9.5p7:
   //   In addition, it and all of the intermediate abi::__pointer_type_info 
   //   structs in the chain down to the abi::__class_type_info for the
@@ -338,7 +340,7 @@
 
   case VisibleNoLinkage:
   case ExternalLinkage:
-    if (!CGM.getLangOpts().RTTI) {
+    if (!getLangOpts().RTTI) {
       // RTTI is not enabled, which means that this type info struct is going
       // to be used for exception handling. Give it linkonce_odr linkage.
       return llvm::GlobalValue::LinkOnceODRLinkage;
@@ -349,7 +351,7 @@
       if (RD->hasAttr<WeakAttr>())
         return llvm::GlobalValue::WeakODRLinkage;
       if (RD->isDynamicClass())
-        return CGM.getVTableLinkage(RD);
+        return getVTableLinkage(RD);
     }
 
     return llvm::GlobalValue::LinkOnceODRLinkage;
@@ -387,7 +389,7 @@
   return true;
 }
 
-void RTTIBuilder::BuildVTablePointer(const Type *Ty) {
+void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
   // abi::__class_type_info.
   static const char * const ClassTypeInfo =
     "_ZTVN10__cxxabiv117__class_type_infoE";
@@ -398,7 +400,7 @@
   static const char * const VMIClassTypeInfo =
     "_ZTVN10__cxxabiv121__vmi_class_type_infoE";
 
-  const char *VTableName = 0;
+  const char *VTableName = nullptr;
 
   switch (Ty->getTypeClass()) {
 #define TYPE(Class, Base)
@@ -508,7 +510,7 @@
   Fields.push_back(VTable);
 }
 
-llvm::Constant *RTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
+llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
   // We want to operate on the canonical type.
   Ty = CGM.getContext().getCanonicalType(Ty);
 
@@ -537,7 +539,7 @@
   if (IsStdLib)
     Linkage = llvm::GlobalValue::ExternalLinkage;
   else
-    Linkage = getTypeInfoLinkage(CGM, Ty);
+    Linkage = CGM.getTypeInfoLinkage(Ty);
 
   // Add the vtable pointer.
   BuildVTablePointer(cast<Type>(Ty));
@@ -678,18 +680,17 @@
 
   // Give the type_info object and name the formal visibility of the
   // type itself.
-  Visibility formalVisibility = Ty->getVisibility();
-  llvm::GlobalValue::VisibilityTypes llvmVisibility =
-    CodeGenModule::GetLLVMVisibility(formalVisibility);
+  llvm::GlobalValue::VisibilityTypes llvmVisibility;
+  if (llvm::GlobalValue::isLocalLinkage(Linkage))
+    // If the linkage is local, only default visibility makes sense.
+    llvmVisibility = llvm::GlobalValue::DefaultVisibility;
+  else if (RTTIUniqueness == CGCXXABI::RUK_NonUniqueHidden)
+    llvmVisibility = llvm::GlobalValue::HiddenVisibility;
+  else
+    llvmVisibility = CodeGenModule::GetLLVMVisibility(Ty->getVisibility());
   TypeName->setVisibility(llvmVisibility);
   GV->setVisibility(llvmVisibility);
 
-  // FIXME: integrate this better into the above when we move to trunk
-  if (RTTIUniqueness == CGCXXABI::RUK_NonUniqueHidden) {
-    TypeName->setVisibility(llvm::GlobalValue::HiddenVisibility);
-    GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  }
-
   return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
 }
 
@@ -699,18 +700,18 @@
   unsigned Flags = 0;
 
   if (Quals.hasConst())
-    Flags |= RTTIBuilder::PTI_Const;
+    Flags |= ItaniumRTTIBuilder::PTI_Const;
   if (Quals.hasVolatile())
-    Flags |= RTTIBuilder::PTI_Volatile;
+    Flags |= ItaniumRTTIBuilder::PTI_Volatile;
   if (Quals.hasRestrict())
-    Flags |= RTTIBuilder::PTI_Restrict;
+    Flags |= ItaniumRTTIBuilder::PTI_Restrict;
 
   return Flags;
 }
 
 /// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info
 /// for the given Objective-C object type.
-void RTTIBuilder::BuildObjCObjectTypeInfo(const ObjCObjectType *OT) {
+void ItaniumRTTIBuilder::BuildObjCObjectTypeInfo(const ObjCObjectType *OT) {
   // Drop qualifiers.
   const Type *T = OT->getBaseType().getTypePtr();
   assert(isa<BuiltinType>(T) || isa<ObjCInterfaceType>(T));
@@ -728,18 +729,18 @@
   QualType SuperTy = CGM.getContext().getObjCInterfaceType(Super);
 
   // Everything else is single inheritance.
-  llvm::Constant *BaseTypeInfo = RTTIBuilder(CGM).BuildTypeInfo(SuperTy);
+  llvm::Constant *BaseTypeInfo = ItaniumRTTIBuilder(CGM).BuildTypeInfo(SuperTy);
   Fields.push_back(BaseTypeInfo);
 }
 
 /// BuildSIClassTypeInfo - Build an abi::__si_class_type_info, used for single
 /// inheritance, according to the Itanium C++ ABI, 2.95p6b.
-void RTTIBuilder::BuildSIClassTypeInfo(const CXXRecordDecl *RD) {
+void ItaniumRTTIBuilder::BuildSIClassTypeInfo(const CXXRecordDecl *RD) {
   // Itanium C++ ABI 2.9.5p6b:
   // It adds to abi::__class_type_info a single member pointing to the 
   // type_info structure for the base type,
   llvm::Constant *BaseTypeInfo = 
-    RTTIBuilder(CGM).BuildTypeInfo(RD->bases_begin()->getType());
+    ItaniumRTTIBuilder(CGM).BuildTypeInfo(RD->bases_begin()->getType());
   Fields.push_back(BaseTypeInfo);
 }
 
@@ -768,20 +769,20 @@
     if (!Bases.VirtualBases.insert(BaseDecl)) {
       // If this virtual base has been seen before, then the class is diamond
       // shaped.
-      Flags |= RTTIBuilder::VMI_DiamondShaped;
+      Flags |= ItaniumRTTIBuilder::VMI_DiamondShaped;
     } else {
       if (Bases.NonVirtualBases.count(BaseDecl))
-        Flags |= RTTIBuilder::VMI_NonDiamondRepeat;
+        Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
     }
   } else {
     // Mark the non-virtual base as seen.
     if (!Bases.NonVirtualBases.insert(BaseDecl)) {
       // If this non-virtual base has been seen before, then the class has non-
       // diamond shaped repeated inheritance.
-      Flags |= RTTIBuilder::VMI_NonDiamondRepeat;
+      Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
     } else {
       if (Bases.VirtualBases.count(BaseDecl))
-        Flags |= RTTIBuilder::VMI_NonDiamondRepeat;
+        Flags |= ItaniumRTTIBuilder::VMI_NonDiamondRepeat;
     }
   }
 
@@ -806,7 +807,7 @@
 /// BuildVMIClassTypeInfo - Build an abi::__vmi_class_type_info, used for
 /// classes with bases that do not satisfy the abi::__si_class_type_info 
 /// constraints, according ti the Itanium C++ ABI, 2.9.5p5c.
-void RTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
+void ItaniumRTTIBuilder::BuildVMIClassTypeInfo(const CXXRecordDecl *RD) {
   llvm::Type *UnsignedIntLTy = 
     CGM.getTypes().ConvertType(CGM.getContext().UnsignedIntTy);
   
@@ -847,7 +848,7 @@
   //   };
   for (const auto &Base : RD->bases()) {
     // The __base_type member points to the RTTI for the base type.
-    Fields.push_back(RTTIBuilder(CGM).BuildTypeInfo(Base.getType()));
+    Fields.push_back(ItaniumRTTIBuilder(CGM).BuildTypeInfo(Base.getType()));
 
     const CXXRecordDecl *BaseDecl = 
       cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
@@ -882,7 +883,7 @@
 
 /// BuildPointerTypeInfo - Build an abi::__pointer_type_info struct,
 /// used for pointer types.
-void RTTIBuilder::BuildPointerTypeInfo(QualType PointeeTy) {  
+void ItaniumRTTIBuilder::BuildPointerTypeInfo(QualType PointeeTy) {
   Qualifiers Quals;
   QualType UnqualifiedPointeeTy = 
     CGM.getContext().getUnqualifiedArrayType(PointeeTy, Quals);
@@ -906,13 +907,14 @@
   //  __pointee is a pointer to the std::type_info derivation for the 
   //  unqualified type being pointed to.
   llvm::Constant *PointeeTypeInfo = 
-    RTTIBuilder(CGM).BuildTypeInfo(UnqualifiedPointeeTy);
+    ItaniumRTTIBuilder(CGM).BuildTypeInfo(UnqualifiedPointeeTy);
   Fields.push_back(PointeeTypeInfo);
 }
 
 /// BuildPointerToMemberTypeInfo - Build an abi::__pointer_to_member_type_info 
 /// struct, used for member pointer types.
-void RTTIBuilder::BuildPointerToMemberTypeInfo(const MemberPointerType *Ty) {
+void
+ItaniumRTTIBuilder::BuildPointerToMemberTypeInfo(const MemberPointerType *Ty) {
   QualType PointeeTy = Ty->getPointeeType();
   
   Qualifiers Quals;
@@ -943,14 +945,15 @@
   //   __pointee is a pointer to the std::type_info derivation for the 
   //   unqualified type being pointed to.
   llvm::Constant *PointeeTypeInfo = 
-    RTTIBuilder(CGM).BuildTypeInfo(UnqualifiedPointeeTy);
+    ItaniumRTTIBuilder(CGM).BuildTypeInfo(UnqualifiedPointeeTy);
   Fields.push_back(PointeeTypeInfo);
 
   // Itanium C++ ABI 2.9.5p9:
   //   __context is a pointer to an abi::__class_type_info corresponding to the
   //   class type containing the member pointed to 
   //   (e.g., the "A" in "int A::*").
-  Fields.push_back(RTTIBuilder(CGM).BuildTypeInfo(QualType(ClassType, 0)));
+  Fields.push_back(
+      ItaniumRTTIBuilder(CGM).BuildTypeInfo(QualType(ClassType, 0)));
 }
 
 llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
@@ -965,15 +968,17 @@
       LangOpts.ObjCRuntime.isGNUFamily())
     return ObjCRuntime->GetEHType(Ty);
 
-  return RTTIBuilder(*this).BuildTypeInfo(Ty);
+  if (getTarget().getCXXABI().isMicrosoft())
+    return getMSTypeDescriptor(Ty);
+  return ItaniumRTTIBuilder(*this).BuildTypeInfo(Ty);
 }
 
 void CodeGenModule::EmitFundamentalRTTIDescriptor(QualType Type) {
   QualType PointerType = Context.getPointerType(Type);
   QualType PointerTypeConst = Context.getPointerType(Type.withConst());
-  RTTIBuilder(*this).BuildTypeInfo(Type, true);
-  RTTIBuilder(*this).BuildTypeInfo(PointerType, true);
-  RTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, true);
+  ItaniumRTTIBuilder(*this).BuildTypeInfo(Type, true);
+  ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerType, true);
+  ItaniumRTTIBuilder(*this).BuildTypeInfo(PointerTypeConst, true);
 }
 
 void CodeGenModule::EmitFundamentalRTTIDescriptors() {
@@ -984,7 +989,8 @@
                                   Context.UnsignedShortTy, Context.IntTy,
                                   Context.UnsignedIntTy, Context.LongTy, 
                                   Context.UnsignedLongTy, Context.LongLongTy, 
-                                  Context.UnsignedLongLongTy, Context.FloatTy,
+                                  Context.UnsignedLongLongTy,
+                                  Context.HalfTy, Context.FloatTy,
                                   Context.DoubleTy, Context.LongDoubleTy,
                                   Context.Char16Ty, Context.Char32Ty };
   for (unsigned i = 0; i < llvm::array_lengthof(FundamentalTypes); ++i)
diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h
index 0fc7b8a..b45fee5 100644
--- a/lib/CodeGen/CGRecordLayout.h
+++ b/lib/CodeGen/CGRecordLayout.h
@@ -183,6 +183,7 @@
   /// \brief Return llvm::StructType element number that corresponds to the
   /// field FD.
   unsigned getLLVMFieldNo(const FieldDecl *FD) const {
+    FD = FD->getCanonicalDecl();
     assert(FieldInfo.count(FD) && "Invalid field for record!");
     return FieldInfo.lookup(FD);
   }
@@ -201,6 +202,7 @@
 
   /// \brief Return the BitFieldInfo that corresponds to the field FD.
   const CGBitFieldInfo &getBitFieldInfo(const FieldDecl *FD) const {
+    FD = FD->getCanonicalDecl();
     assert(FD->isBitField() && "Invalid call for non-bit-field decl!");
     llvm::DenseMap<const FieldDecl *, CGBitFieldInfo>::const_iterator
       it = BitFields.find(FD);
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 75b4504..a10d8e7 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -84,7 +84,7 @@
       const CXXRecordDecl *RD;
     };
     MemberInfo(CharUnits Offset, InfoKind Kind, llvm::Type *Data,
-               const FieldDecl *FD = 0)
+               const FieldDecl *FD = nullptr)
       : Offset(Offset), Kind(Kind), Data(Data), FD(FD) {}
     MemberInfo(CharUnits Offset, InfoKind Kind, llvm::Type *Data,
                const CXXRecordDecl *RD)
@@ -212,7 +212,7 @@
 
 void CGRecordLowering::setBitFieldInfo(
     const FieldDecl *FD, CharUnits StartOffset, llvm::Type *StorageType) {
-  CGBitFieldInfo &Info = BitFields[FD];
+  CGBitFieldInfo &Info = BitFields[FD->getCanonicalDecl()];
   Info.IsSigned = FD->getType()->isSignedIntegerOrEnumerationType();
   Info.Offset = (unsigned)(getFieldBitOffset(FD) - Context.toBits(StartOffset));
   Info.Size = FD->getBitWidthValue(Context);
@@ -278,7 +278,7 @@
 
 void CGRecordLowering::lowerUnion() {
   CharUnits LayoutSize = Layout.getSize();
-  llvm::Type *StorageType = 0;
+  llvm::Type *StorageType = nullptr;
   // Compute zero-initializable status.
   if (!D->field_empty() && !isZeroInitializable(*D->field_begin()))
     IsZeroInitializable = IsZeroInitializableAsBase = false;
@@ -297,7 +297,7 @@
         FieldType = getByteArrayType(LayoutSize);
       setBitFieldInfo(Field, CharUnits::Zero(), FieldType);
     }
-    Fields[Field] = 0;
+    Fields[Field->getCanonicalDecl()] = 0;
     llvm::Type *FieldType = getStorageType(Field);
     // Conditionally update our storage type if we've got a new "better" one.
     if (!StorageType ||
@@ -373,7 +373,7 @@
       // Bitfields get the offset of their storage but come afterward and remain
       // there after a stable sort.
       Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
-                                   MemberInfo::Field, 0, *Field));
+                                   MemberInfo::Field, nullptr, *Field));
     }
     return;
   }
@@ -407,17 +407,18 @@
     Members.push_back(StorageInfo(bitsToCharUnits(StartBitOffset), Type));
     for (; Run != Field; ++Run)
       Members.push_back(MemberInfo(bitsToCharUnits(StartBitOffset),
-                                   MemberInfo::Field, 0, *Run));
+                                   MemberInfo::Field, nullptr, *Run));
     Run = FieldEnd;
   }
 }
 
 void CGRecordLowering::accumulateBases() {
   // If we've got a primary virtual base, we need to add it with the bases.
-  if (Layout.isPrimaryBaseVirtual())
-    Members.push_back(StorageInfo(
-      CharUnits::Zero(),
-      getStorageType(Layout.getPrimaryBase())));
+  if (Layout.isPrimaryBaseVirtual()) {
+    const CXXRecordDecl *BaseDecl = Layout.getPrimaryBase();
+    Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::Base,
+                                 getStorageType(BaseDecl), BaseDecl));
+  }
   // Accumulate the non-virtual bases.
   for (const auto &Base : RD->bases()) {
     if (Base.isVirtual())
@@ -440,8 +441,25 @@
 }
 
 void CGRecordLowering::accumulateVBases() {
-  Members.push_back(MemberInfo(Layout.getNonVirtualSize(),
-                               MemberInfo::Scissor, 0, RD));
+  CharUnits ScissorOffset = Layout.getNonVirtualSize();
+  // In the itanium ABI, it's possible to place a vbase at a dsize that is
+  // smaller than the nvsize.  Here we check to see if such a base is placed
+  // before the nvsize and set the scissor offset to that, instead of the
+  // nvsize.
+  if (!useMSABI())
+    for (const auto &Base : RD->vbases()) {
+      const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+      if (BaseDecl->isEmpty())
+        continue;
+      // If the vbase is a primary virtual base of some base, then it doesn't
+      // get its own storage location but instead lives inside of that base.
+      if (Context.isNearlyEmpty(BaseDecl) && !hasOwnStorage(RD, BaseDecl))
+        continue;
+      ScissorOffset = std::min(ScissorOffset,
+                               Layout.getVBaseClassOffset(BaseDecl));
+    }
+  Members.push_back(MemberInfo(ScissorOffset, MemberInfo::Scissor, nullptr,
+                               RD));
   for (const auto &Base : RD->vbases()) {
     const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
     if (BaseDecl->isEmpty())
@@ -451,7 +469,8 @@
     // get its own storage location but instead lives inside of that base.
     if (!useMSABI() && Context.isNearlyEmpty(BaseDecl) &&
         !hasOwnStorage(RD, BaseDecl)) {
-      Members.push_back(MemberInfo(Offset, MemberInfo::VBase, 0, BaseDecl));
+      Members.push_back(MemberInfo(Offset, MemberInfo::VBase, nullptr,
+                                   BaseDecl));
       continue;
     }
     // If we've got a vtordisp, add it as a storage type.
@@ -570,7 +589,7 @@
       FieldTypes.push_back(Member->Data);
     if (Member->Kind == MemberInfo::Field) {
       if (Member->FD)
-        Fields[Member->FD] = FieldTypes.size() - 1;
+        Fields[Member->FD->getCanonicalDecl()] = FieldTypes.size() - 1;
       // A field without storage must be a bitfield.
       if (!Member->Data)
         setBitFieldInfo(Member->FD, Member->Offset, FieldTypes.back());
@@ -627,7 +646,7 @@
   Builder.lower(false);
 
   // If we're in C++, compute the base subobject type.
-  llvm::StructType *BaseTy = 0;
+  llvm::StructType *BaseTy = nullptr;
   if (isa<CXXRecordDecl>(D) && !D->isUnion() && !D->hasAttr<FinalAttr>()) {
     BaseTy = Ty;
     if (Builder.Layout.getNonVirtualSize() != Builder.Layout.getSize()) {
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index cba9e6c..573973a 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -76,8 +76,6 @@
   case Stmt::SEHExceptStmtClass:
   case Stmt::SEHFinallyStmtClass:
   case Stmt::MSDependentExistsStmtClass:
-  case Stmt::OMPParallelDirectiveClass:
-  case Stmt::OMPSimdDirectiveClass:
     llvm_unreachable("invalid statement class to emit generically");
   case Stmt::NullStmtClass:
   case Stmt::CompoundStmtClass:
@@ -174,6 +172,12 @@
   case Stmt::SEHTryStmtClass:
     EmitSEHTryStmt(cast<SEHTryStmt>(*S));
     break;
+  case Stmt::OMPParallelDirectiveClass:
+    EmitOMPParallelDirective(cast<OMPParallelDirective>(*S));
+    break;
+  case Stmt::OMPSimdDirectiveClass:
+    EmitOMPSimdDirective(cast<OMPSimdDirective>(*S));
+    break;
   }
 }
 
@@ -219,7 +223,7 @@
        E = S.body_end()-GetLast; I != E; ++I)
     EmitStmt(*I);
 
-  llvm::Value *RetAlloca = 0;
+  llvm::Value *RetAlloca = nullptr;
   if (GetLast) {
     // We have to special case labels here.  They are statements, but when put
     // at the end of a statement expression, they yield the value of their
@@ -508,6 +512,8 @@
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
+  LoopStack.push(LoopHeader.getBlock());
+
   // Create an exit block for when the condition fails, which will
   // also become the break target.
   JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
@@ -571,6 +577,8 @@
   // Branch to the loop header again.
   EmitBranch(LoopHeader.getBlock());
 
+  LoopStack.pop();
+
   // Emit the exit block.
   EmitBlock(LoopExit.getBlock(), true);
 
@@ -591,6 +599,9 @@
 
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
+
+  LoopStack.push(LoopBody);
+
   EmitBlockWithFallThrough(LoopBody, Cnt);
   {
     RunCleanupsScope BodyScope(*this);
@@ -621,6 +632,8 @@
     Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
                          PGO.createLoopWeights(S.getCond(), Cnt));
 
+  LoopStack.pop();
+
   // Emit the exit block.
   EmitBlock(LoopExit.getBlock());
 
@@ -652,6 +665,8 @@
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
+  LoopStack.push(CondBlock);
+
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
   // a block for it (in the current scope, i.e. in the scope of the
@@ -722,6 +737,8 @@
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 
+  LoopStack.pop();
+
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock(), true);
 }
@@ -747,6 +764,8 @@
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
+  LoopStack.push(CondBlock);
+
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
   llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
@@ -796,6 +815,8 @@
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 
+  LoopStack.pop();
+
   // Emit the fall-through block.
   EmitBlock(LoopExit.getBlock(), true);
 }
@@ -848,7 +869,7 @@
     // for side effects.
     if (RV)
       EmitAnyExpr(RV);
-  } else if (RV == 0) {
+  } else if (!RV) {
     // Do nothing (return value is left uninitialized)
   } else if (FnRetTy->isReferenceType()) {
     // If this function returns a reference, take the address of the expression
@@ -878,7 +899,7 @@
   }
 
   ++NumReturnExprs;
-  if (RV == 0 || RV->isEvaluatable(getContext()))
+  if (!RV || RV->isEvaluatable(getContext()))
     ++NumSimpleReturnExprs;
 
   cleanupScope.ForceCleanup();
@@ -982,7 +1003,7 @@
   llvm::Value *Cond =
     Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
 
-  llvm::MDNode *Weights = 0;
+  llvm::MDNode *Weights = nullptr;
   if (SwitchWeights) {
     uint64_t ThisCount = CaseCnt.getCount();
     uint64_t DefaultCount = (*SwitchWeights)[0];
@@ -1066,7 +1087,7 @@
   const CaseStmt *NextCase = dyn_cast<CaseStmt>(S.getSubStmt());
 
   // Otherwise, iteratively add consecutive cases to this switch stmt.
-  while (NextCase && NextCase->getRHS() == 0) {
+  while (NextCase && NextCase->getRHS() == nullptr) {
     CurCase = NextCase;
     llvm::ConstantInt *CaseVal =
       Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
@@ -1127,7 +1148,7 @@
                                             bool &FoundCase,
                               SmallVectorImpl<const Stmt*> &ResultStmts) {
   // If this is a null statement, just succeed.
-  if (S == 0)
+  if (!S)
     return Case ? CSFC_Success : CSFC_FallThrough;
 
   // If this is the switchcase (case 4: or default) that we're looking for, then
@@ -1135,7 +1156,7 @@
   if (const SwitchCase *SC = dyn_cast<SwitchCase>(S)) {
     if (S == Case) {
       FoundCase = true;
-      return CollectStatementsForCase(SC->getSubStmt(), 0, FoundCase,
+      return CollectStatementsForCase(SC->getSubStmt(), nullptr, FoundCase,
                                       ResultStmts);
     }
 
@@ -1146,7 +1167,7 @@
 
   // If we are in the live part of the code and we found our break statement,
   // return a success!
-  if (Case == 0 && isa<BreakStmt>(S))
+  if (!Case && isa<BreakStmt>(S))
     return CSFC_Success;
 
   // If this is a switch statement, then it might contain the SwitchCase, the
@@ -1191,7 +1212,7 @@
           // statements in the compound statement as candidates for inclusion.
           assert(FoundCase && "Didn't find case but returned fallthrough?");
           // We recursively found Case, so we're not looking for it anymore.
-          Case = 0;
+          Case = nullptr;
 
           // If we found the case and skipped declarations, we can't do the
           // optimization.
@@ -1205,7 +1226,7 @@
     // If we have statements in our range, then we know that the statements are
     // live and need to be added to the set of statements we're tracking.
     for (; I != E; ++I) {
-      switch (CollectStatementsForCase(*I, 0, FoundCase, ResultStmts)) {
+      switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) {
       case CSFC_Failure: return CSFC_Failure;
       case CSFC_FallThrough:
         // A fallthrough result means that the statement was simple and just
@@ -1256,7 +1277,7 @@
   // First step, find the switch case that is being branched to.  We can do this
   // efficiently by scanning the SwitchCase list.
   const SwitchCase *Case = S.getSwitchCaseList();
-  const DefaultStmt *DefaultCase = 0;
+  const DefaultStmt *DefaultCase = nullptr;
 
   for (; Case; Case = Case->getNextSwitchCase()) {
     // It's either a default or case.  Just remember the default statement in
@@ -1278,10 +1299,10 @@
 
   // If we didn't find a matching case, we use a default if it exists, or we
   // elide the whole switch body!
-  if (Case == 0) {
+  if (!Case) {
     // It is safe to elide the body of the switch if it doesn't contain labels
     // etc.  If it is safe, return successfully with an empty ResultStmts list.
-    if (DefaultCase == 0)
+    if (!DefaultCase)
       return !CodeGenFunction::ContainsLabel(&S);
     Case = DefaultCase;
   }
@@ -1302,13 +1323,6 @@
 }
 
 void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
-  JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
-
-  RunCleanupsScope ConditionScope(*this);
-
-  if (S.getConditionVariable())
-    EmitAutoVarDecl(*S.getConditionVariable());
-
   // Handle nested switch statements.
   llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
   SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
@@ -1319,7 +1333,7 @@
   llvm::APSInt ConstantCondValue;
   if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
     SmallVector<const Stmt*, 4> CaseStmts;
-    const SwitchCase *Case = 0;
+    const SwitchCase *Case = nullptr;
     if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
                                    getContext(), Case)) {
       if (Case) {
@@ -1328,10 +1342,15 @@
       }
       RunCleanupsScope ExecutedScope(*this);
 
+      // Emit the condition variable if needed inside the entire cleanup scope
+      // used by this special case for constant folded switches.
+      if (S.getConditionVariable())
+        EmitAutoVarDecl(*S.getConditionVariable());
+
       // At this point, we are no longer "within" a switch instance, so
       // we can temporarily enforce this to ensure that any embedded case
       // statements are not emitted.
-      SwitchInsn = 0;
+      SwitchInsn = nullptr;
 
       // Okay, we can dead code eliminate everything except this case.  Emit the
       // specified series of statements and we're good.
@@ -1348,6 +1367,11 @@
     }
   }
 
+  JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
+
+  RunCleanupsScope ConditionScope(*this);
+  if (S.getConditionVariable())
+    EmitAutoVarDecl(*S.getConditionVariable());
   llvm::Value *CondV = EmitScalarExpr(S.getCond());
 
   // Create basic block to hold stuff that comes after switch
@@ -1432,7 +1456,7 @@
 
 static std::string
 SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
-                 SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=0) {
+                 SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=nullptr) {
   std::string Result;
 
   while (*Constraint) {
@@ -1918,6 +1942,12 @@
   return F;
 }
 
+llvm::Value *
+CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) {
+  LValue CapStruct = InitCapturedStruct(*this, S);
+  return CapStruct.getAddress();
+}
+
 /// Creates the outlined function for a CapturedStmt.
 llvm::Function *
 CodeGenFunction::GenerateCapturedStmtFunction(const CapturedDecl *CD,
@@ -1944,7 +1974,9 @@
   CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
 
   // Generate the function.
-  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getBody()->getLocStart());
+  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args,
+                CD->getLocation(),
+                CD->getBody()->getLocStart());
 
   // Set the context parameter in CapturedStmtInfo.
   llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
@@ -1960,8 +1992,11 @@
     CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
   }
 
+  PGO.assignRegionCounters(CD, F);
   CapturedStmtInfo->EmitBody(*this, CD->getBody());
   FinishFunction(CD->getBodyRBrace());
+  PGO.emitInstrumentationData();
+  PGO.destroyRegionCounters();
 
   return F;
 }
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
new file mode 100644
index 0000000..17d8dd1
--- /dev/null
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -0,0 +1,78 @@
+//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit OpenMP nodes as LLVM code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "CodeGenModule.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/StmtOpenMP.h"
+using namespace clang;
+using namespace CodeGen;
+
+//===----------------------------------------------------------------------===//
+//                              OpenMP Directive Emission
+//===----------------------------------------------------------------------===//
+
+void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+  const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS);
+
+  llvm::Value *OutlinedFn;
+  {
+    CodeGenFunction CGF(CGM, true);
+    CGCapturedStmtInfo CGInfo(*CS, CS->getCapturedRegionKind());
+    CGF.CapturedStmtInfo = &CGInfo;
+    OutlinedFn = CGF.GenerateCapturedStmtFunction(
+        CS->getCapturedDecl(), CS->getCapturedRecordDecl(), CS->getLocStart());
+  }
+
+  // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
+  llvm::Value *Args[] = {
+    CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(*this, S.getLocStart()),
+    Builder.getInt32(1), // Number of arguments after 'microtask' argument
+                         // (there is only one additional argument - 'context')
+    Builder.CreateBitCast(OutlinedFn,
+                          CGM.getOpenMPRuntime().getKmpc_MicroPointerTy()),
+    EmitCastToVoidPtr(CapturedStruct)
+  };
+  llvm::Constant *RTLFn = CGM.getOpenMPRuntime().CreateRuntimeFunction(
+      CGOpenMPRuntime::OMPRTL__kmpc_fork_call);
+  EmitRuntimeCall(RTLFn, Args);
+}
+
+void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+  const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  const Stmt *Body = CS->getCapturedStmt();
+  LoopStack.setParallel();
+  LoopStack.setVectorizerEnable(true);
+  for (auto C : S.clauses()) {
+    switch (C->getClauseKind()) {
+    case OMPC_safelen: {
+      RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(),
+                               AggValueSlot::ignored(), true);
+      llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+      LoopStack.setVectorizerWidth(Val->getZExtValue());
+      // In presence of finite 'safelen', it may be unsafe to mark all
+      // the memory instructions parallel, because loop-carried
+      // dependences of 'safelen' iterations are possible.
+      LoopStack.setParallel(false);
+      break;
+    }
+    default:
+      // Not handled yet
+      ;
+    }
+  }
+  EmitStmt(Body);
+}
+
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 0ad765c..d10db5a 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -71,11 +71,11 @@
                                       const ThunkInfo &Thunk) {
   // Emit the return adjustment.
   bool NullCheckValue = !ResultType->isReferenceType();
-  
-  llvm::BasicBlock *AdjustNull = 0;
-  llvm::BasicBlock *AdjustNotNull = 0;
-  llvm::BasicBlock *AdjustEnd = 0;
-  
+
+  llvm::BasicBlock *AdjustNull = nullptr;
+  llvm::BasicBlock *AdjustNotNull = nullptr;
+  llvm::BasicBlock *AdjustEnd = nullptr;
+
   llvm::Value *ReturnValue = RV.getScalarVal();
 
   if (NullCheckValue) {
@@ -159,7 +159,7 @@
   // with "this".
   llvm::Value *ThisPtr = &*AI;
   llvm::BasicBlock *EntryBB = Fn->begin();
-  llvm::Instruction *ThisStore = 0;
+  llvm::Instruction *ThisStore = nullptr;
   for (llvm::BasicBlock::iterator I = EntryBB->begin(), E = EntryBB->end();
        I != E; I++) {
     if (isa<llvm::StoreInst>(I) && I->getOperand(0) == ThisPtr) {
@@ -217,7 +217,7 @@
 
   // Start defining the function.
   StartFunction(GlobalDecl(), ResultType, Fn, FnInfo, FunctionArgs,
-                SourceLocation());
+                MD->getLocation(), SourceLocation());
 
   // Since we didn't pass a GlobalDecl to StartFunction, do this ourselves.
   CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
@@ -321,27 +321,30 @@
   const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeGlobalDeclaration(GD);
 
   // FIXME: re-use FnInfo in this computation.
-  llvm::Constant *Entry = CGM.GetAddrOfThunk(GD, Thunk);
-  
+  llvm::Constant *C = CGM.GetAddrOfThunk(GD, Thunk);
+  llvm::GlobalValue *Entry;
+
   // Strip off a bitcast if we got one back.
-  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
+  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(C)) {
     assert(CE->getOpcode() == llvm::Instruction::BitCast);
-    Entry = CE->getOperand(0);
+    Entry = cast<llvm::GlobalValue>(CE->getOperand(0));
+  } else {
+    Entry = cast<llvm::GlobalValue>(C);
   }
-  
+
   // There's already a declaration with the same name, check if it has the same
   // type or if we need to replace it.
-  if (cast<llvm::GlobalValue>(Entry)->getType()->getElementType() != 
+  if (Entry->getType()->getElementType() !=
       CGM.getTypes().GetFunctionTypeForVTable(GD)) {
-    llvm::GlobalValue *OldThunkFn = cast<llvm::GlobalValue>(Entry);
-    
+    llvm::GlobalValue *OldThunkFn = Entry;
+
     // If the types mismatch then we have to rewrite the definition.
     assert(OldThunkFn->isDeclaration() &&
            "Shouldn't replace non-declaration");
 
     // Remove the name from the old thunk function and get a new thunk.
     OldThunkFn->setName(StringRef());
-    Entry = CGM.GetAddrOfThunk(GD, Thunk);
+    Entry = cast<llvm::GlobalValue>(CGM.GetAddrOfThunk(GD, Thunk));
     
     // If needed, replace the old thunk with a bitcast.
     if (!OldThunkFn->use_empty()) {
@@ -443,13 +446,13 @@
   llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor(ClassType);
   
   unsigned NextVTableThunkIndex = 0;
-  
-  llvm::Constant *PureVirtualFn = 0, *DeletedVirtualFn = 0;
+
+  llvm::Constant *PureVirtualFn = nullptr, *DeletedVirtualFn = nullptr;
 
   for (unsigned I = 0; I != NumComponents; ++I) {
     VTableComponent Component = Components[I];
 
-    llvm::Constant *Init = 0;
+    llvm::Constant *Init = nullptr;
 
     switch (Component.getKind()) {
     case VTableComponent::CK_VCallOffset:
@@ -614,7 +617,7 @@
   if (const CXXMethodDecl *keyFunction = Context.getCurrentKeyFunction(RD)) {
     // If this class has a key function, use that to determine the
     // linkage of the vtable.
-    const FunctionDecl *def = 0;
+    const FunctionDecl *def = nullptr;
     if (keyFunction->hasBody(def))
       keyFunction = cast<CXXMethodDecl>(def);
     
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index da2a034..956f324 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -110,7 +110,8 @@
     Simple,       // This is a normal l-value, use getAddress().
     VectorElt,    // This is a vector element l-value (V[i]), use getVector*
     BitField,     // This is a bitfield l-value, use getBitfield*.
-    ExtVectorElt  // This is an extended vector subset, use getExtVectorComp
+    ExtVectorElt, // This is an extended vector subset, use getExtVectorComp
+    GlobalReg     // This is a register l-value, use getGlobalReg()
   } LVType;
 
   llvm::Value *V;
@@ -168,7 +169,7 @@
 private:
   void Initialize(QualType Type, Qualifiers Quals,
                   CharUnits Alignment,
-                  llvm::MDNode *TBAAInfo = 0) {
+                  llvm::MDNode *TBAAInfo = nullptr) {
     this->Type = Type;
     this->Quals = Quals;
     this->Alignment = Alignment.getQuantity();
@@ -179,7 +180,7 @@
     this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
     this->ImpreciseLifetime = false;
     this->ThreadLocalRef = false;
-    this->BaseIvarExp = 0;
+    this->BaseIvarExp = nullptr;
 
     // Initialize fields for TBAA.
     this->TBAABaseType = Type;
@@ -192,6 +193,7 @@
   bool isVectorElt() const { return LVType == VectorElt; }
   bool isBitField() const { return LVType == BitField; }
   bool isExtVectorElt() const { return LVType == ExtVectorElt; }
+  bool isGlobalReg() const { return LVType == GlobalReg; }
 
   bool isVolatileQualified() const { return Quals.hasVolatile(); }
   bool isRestrictQualified() const { return Quals.hasRestrict(); }
@@ -286,9 +288,12 @@
     return *BitFieldInfo;
   }
 
+  // global register lvalue
+  llvm::Value *getGlobalReg() const { assert(isGlobalReg()); return V; }
+
   static LValue MakeAddr(llvm::Value *address, QualType type,
                          CharUnits alignment, ASTContext &Context,
-                         llvm::MDNode *TBAAInfo = 0) {
+                         llvm::MDNode *TBAAInfo = nullptr) {
     Qualifiers qs = type.getQualifiers();
     qs.setObjCGCAttr(Context.getObjCGCAttrKind(type));
 
@@ -336,6 +341,16 @@
     return R;
   }
 
+  static LValue MakeGlobalReg(llvm::Value *Reg,
+                              QualType type,
+                              CharUnits Alignment) {
+    LValue R;
+    R.LVType = GlobalReg;
+    R.V = Reg;
+    R.Initialize(type, type.getQualifiers(), Alignment);
+    return R;
+  }
+
   RValue asAggregateRValue() const {
     // FIMXE: Alignment
     return RValue::getAggregate(getAddress(), isVolatileQualified());
@@ -390,7 +405,7 @@
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
   static AggValueSlot ignored() {
-    return forAddr(0, CharUnits(), Qualifiers(), IsNotDestructed,
+    return forAddr(nullptr, CharUnits(), Qualifiers(), IsNotDestructed,
                    DoesNotNeedGCBarriers, IsNotAliased);
   }
 
@@ -460,7 +475,7 @@
   }
 
   bool isIgnored() const {
-    return Addr == 0;
+    return Addr == nullptr;
   }
 
   CharUnits getAlignment() const {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 12cb1ab..4b7d51b 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,6 +9,7 @@
   Linker
   MC
   ObjCARCOpts
+  ProfileData
   ScalarOpts
   Support
   Target
@@ -37,14 +38,17 @@
   CGExprComplex.cpp
   CGExprConstant.cpp
   CGExprScalar.cpp
+  CGLoopInfo.cpp
   CGObjC.cpp
   CGObjCGNU.cpp
   CGObjCMac.cpp
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
+  CGOpenMPRuntime.cpp
   CGRTTI.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
+  CGStmtOpenMP.cpp
   CGVTT.cpp
   CGVTables.cpp
   CodeGenABITypes.cpp
@@ -56,6 +60,7 @@
   CodeGenTypes.cpp
   ItaniumCXXABI.cpp
   MicrosoftCXXABI.cpp
+  MicrosoftRTTI.cpp
   ModuleBuilder.cpp
   TargetInfo.cpp
 
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 573f8e9..2fe9842 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -11,6 +11,7 @@
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclGroup.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
@@ -20,6 +21,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
@@ -103,6 +105,19 @@
       return true;
     }
 
+    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+      PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
+                                     Context->getSourceManager(),
+                                     "LLVM IR generation of inline method");
+      if (llvm::TimePassesIsEnabled)
+        LLVMIRGeneration.startTimer();
+
+      Gen->HandleInlineMethodDefinition(D);
+
+      if (llvm::TimePassesIsEnabled)
+        LLVMIRGeneration.stopTimer();
+    }
+
     void HandleTranslationUnit(ASTContext &C) override {
       {
         PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
@@ -220,6 +235,11 @@
     /// \return True if the diagnostic has been successfully reported, false
     /// otherwise.
     bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
+    /// \brief Specialized handler for the optimization diagnostic.
+    /// Note that this handler only accepts remarks and it always handles
+    /// them.
+    void
+    OptimizationRemarkHandler(const llvm::DiagnosticInfoOptimizationRemark &D);
   };
   
   void BackendConsumer::anchor() {}
@@ -243,7 +263,7 @@
   llvm::MemoryBuffer *CBuf =
   llvm::MemoryBuffer::getMemBufferCopy(LBuf->getBuffer(),
                                        LBuf->getBufferIdentifier());
-  FileID FID = CSM.createFileIDForMemBuffer(CBuf);
+  FileID FID = CSM.createFileID(CBuf);
 
   // Translate the offset into the file.
   unsigned Offset = D.getLoc().getPointer()  - LBuf->getBufferStart();
@@ -374,6 +394,51 @@
   return true;
 }
 
+void BackendConsumer::OptimizationRemarkHandler(
+    const llvm::DiagnosticInfoOptimizationRemark &D) {
+  // We only support remarks.
+  assert(D.getSeverity() == llvm::DS_Remark);
+
+  // Optimization remarks are active only if -Rpass=regexp is given and the
+  // regular expression pattern in 'regexp' matches the name of the pass
+  // name in \p D.
+  if (CodeGenOpts.OptimizationRemarkPattern &&
+      CodeGenOpts.OptimizationRemarkPattern->match(D.getPassName())) {
+    SourceManager &SourceMgr = Context->getSourceManager();
+    FileManager &FileMgr = SourceMgr.getFileManager();
+    StringRef Filename;
+    unsigned Line, Column;
+    D.getLocation(&Filename, &Line, &Column);
+    SourceLocation Loc;
+    const FileEntry *FE = FileMgr.getFile(Filename);
+    if (FE && Line > 0) {
+      // If -gcolumn-info was not used, Column will be 0. This upsets the
+      // source manager, so if Column is not set, set it to 1.
+      if (Column == 0)
+        Column = 1;
+      Loc = SourceMgr.translateFileLineCol(FE, Line, Column);
+    }
+    Diags.Report(Loc, diag::remark_fe_backend_optimization_remark)
+        << AddFlagValue(D.getPassName()) << D.getMsg().str();
+
+    if (Line == 0)
+      // If we could not extract a source location for the diagnostic,
+      // inform the user how they can get source locations back.
+      //
+      // FIXME: We should really be generating !srcloc annotations when
+      // -Rpass is used. !srcloc annotations need to be emitted in
+      // approximately the same spots as !dbg nodes.
+      Diags.Report(diag::note_fe_backend_optimization_remark_missing_loc);
+    else if (Loc.isInvalid())
+      // If we were not able to translate the file:line:col information
+      // back to a SourceLocation, at least emit a note stating that
+      // we could not translate this location. This can happen in the
+      // case of #line directives.
+      Diags.Report(diag::note_fe_backend_optimization_remark_invalid_loc)
+        << Filename << Line << Column;
+  }
+}
+
 /// \brief This function is invoked when the backend needs
 /// to report something to the user.
 void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
@@ -391,6 +456,11 @@
       return;
     ComputeDiagID(Severity, backend_frame_larger_than, DiagID);
     break;
+  case llvm::DK_OptimizationRemark:
+    // Optimization remarks are always handled completely by this
+    // handler. There is no generic way of emitting them.
+    OptimizationRemarkHandler(cast<DiagnosticInfoOptimizationRemark>(DI));
+    return;
   default:
     // Plugin IDs are not bound to any value as they are set dynamically.
     ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
@@ -410,7 +480,7 @@
 #undef ComputeDiagID
 
 CodeGenAction::CodeGenAction(unsigned _Act, LLVMContext *_VMContext)
-  : Act(_Act), LinkModule(0),
+  : Act(_Act), LinkModule(nullptr),
     VMContext(_VMContext ? _VMContext : new LLVMContext),
     OwnsVMContext(!_VMContext) {}
 
@@ -453,7 +523,7 @@
   case Backend_EmitBC:
     return CI.createDefaultOutputFile(true, InFile, "bc");
   case Backend_EmitNothing:
-    return 0;
+    return nullptr;
   case Backend_EmitMCNull:
   case Backend_EmitObj:
     return CI.createDefaultOutputFile(true, InFile, "o");
@@ -467,7 +537,7 @@
   BackendAction BA = static_cast<BackendAction>(Act);
   std::unique_ptr<raw_ostream> OS(GetOutputStream(CI, InFile, BA));
   if (BA != Backend_EmitNothing && !OS)
-    return 0;
+    return nullptr;
 
   llvm::Module *LinkModuleToUse = LinkModule;
 
@@ -482,7 +552,7 @@
     if (!BCBuf) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
         << LinkBCFile << ErrorStr;
-      return 0;
+      return nullptr;
     }
 
     ErrorOr<llvm::Module *> ModuleOrErr =
@@ -490,14 +560,17 @@
     if (error_code EC = ModuleOrErr.getError()) {
       CI.getDiagnostics().Report(diag::err_cannot_open_file)
         << LinkBCFile << EC.message();
-      return 0;
+      return nullptr;
     }
     LinkModuleToUse = ModuleOrErr.get();
   }
 
+  StringRef MainFileName = getCompilerInstance().getCodeGenOpts().MainFileName;
+  if (MainFileName.empty())
+    MainFileName = InFile;
   BEConsumer = new BackendConsumer(BA, CI.getDiagnostics(), CI.getCodeGenOpts(),
                                    CI.getTargetOpts(), CI.getLangOpts(),
-                                   CI.getFrontendOpts().ShowTimers, InFile,
+                                   CI.getFrontendOpts().ShowTimers, MainFileName,
                                    LinkModuleToUse, OS.release(), *VMContext);
   return BEConsumer;
 }
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 806540a..7de619e 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -15,6 +15,7 @@
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenModule.h"
 #include "CodeGenPGO.h"
 #include "TargetInfo.h"
@@ -34,23 +35,26 @@
 
 CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
     : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()),
-      Builder(cgm.getModule().getContext()), CapturedStmtInfo(0),
+      Builder(cgm.getModule().getContext(), llvm::ConstantFolder(),
+              CGBuilderInserterTy(this)), CapturedStmtInfo(nullptr),
       SanitizePerformTypeCheck(CGM.getSanOpts().Null |
                                CGM.getSanOpts().Alignment |
                                CGM.getSanOpts().ObjectSize |
                                CGM.getSanOpts().Vptr),
-      SanOpts(&CGM.getSanOpts()), AutoreleaseResult(false), BlockInfo(0),
-      BlockPointer(0), LambdaThisCaptureField(0), NormalCleanupDest(0),
-      NextCleanupDestIndex(1), FirstBlockInfo(0), EHResumeBlock(0),
-      ExceptionSlot(0), EHSelectorSlot(0), DebugInfo(CGM.getModuleDebugInfo()),
-      DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(0),
-      PGO(cgm), SwitchInsn(0), SwitchWeights(0),
-      CaseRangeBlock(0), UnreachableBlock(0), NumReturnExprs(0),
-      NumSimpleReturnExprs(0), CXXABIThisDecl(0), CXXABIThisValue(0),
-      CXXThisValue(0), CXXDefaultInitExprThis(0),
-      CXXStructorImplicitParamDecl(0), CXXStructorImplicitParamValue(0),
-      OutermostConditional(0), CurLexicalScope(0), TerminateLandingPad(0),
-      TerminateHandler(0), TrapBB(0) {
+      SanOpts(&CGM.getSanOpts()), AutoreleaseResult(false), BlockInfo(nullptr),
+      BlockPointer(nullptr), LambdaThisCaptureField(nullptr),
+      NormalCleanupDest(nullptr), NextCleanupDestIndex(1),
+      FirstBlockInfo(nullptr), EHResumeBlock(nullptr), ExceptionSlot(nullptr),
+      EHSelectorSlot(nullptr), DebugInfo(CGM.getModuleDebugInfo()),
+      DisableDebugInfo(false), DidCallStackSave(false), IndirectBranch(nullptr),
+      PGO(cgm), SwitchInsn(nullptr), SwitchWeights(nullptr),
+      CaseRangeBlock(nullptr), UnreachableBlock(nullptr), NumReturnExprs(0),
+      NumSimpleReturnExprs(0), CXXABIThisDecl(nullptr),
+      CXXABIThisValue(nullptr), CXXThisValue(nullptr),
+      CXXDefaultInitExprThis(nullptr), CXXStructorImplicitParamDecl(nullptr),
+      CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr),
+      CurLexicalScope(nullptr), TerminateLandingPad(nullptr),
+      TerminateHandler(nullptr), TrapBB(nullptr) {
   if (!suppressNewContext)
     CGM.getCXXABI().getMangleContext().startNewFunction();
 
@@ -72,6 +76,10 @@
   // something.
   if (FirstBlockInfo)
     destroyBlockInfos(FirstBlockInfo);
+
+  if (getLangOpts().OpenMP) {
+    CGM.getOpenMPRuntime().FunctionFinished(*this);
+  }
 }
 
 
@@ -256,7 +264,7 @@
 
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
   llvm::Instruction *Ptr = AllocaInsertPt;
-  AllocaInsertPt = 0;
+  AllocaInsertPt = nullptr;
   Ptr->eraseFromParent();
 
   // If someone took the address of a label but never did an indirect goto, we
@@ -339,6 +347,8 @@
   // Each MDNode is a list in the form of "key", N number of values which is
   // the same number of values as their are kernel arguments.
 
+  const PrintingPolicy &Policy = ASTCtx.getPrintingPolicy();
+
   // MDNode for the kernel argument address space qualifiers.
   SmallVector<llvm::Value*, 8> addressQuals;
   addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space"));
@@ -372,7 +382,8 @@
         pointeeTy.getAddressSpace())));
 
       // Get argument type name.
-      std::string typeName = pointeeTy.getUnqualifiedType().getAsString() + "*";
+      std::string typeName =
+          pointeeTy.getUnqualifiedType().getAsString(Policy) + "*";
 
       // Turn "unsigned type" to "utype"
       std::string::size_type pos = typeName.find("unsigned");
@@ -398,7 +409,7 @@
       addressQuals.push_back(Builder.getInt32(AddrSpc));
 
       // Get argument type name.
-      std::string typeName = ty.getUnqualifiedType().getAsString();
+      std::string typeName = ty.getUnqualifiedType().getAsString(Policy);
 
       // Turn "unsigned type" to "utype"
       std::string::size_type pos = typeName.find("unsigned");
@@ -495,17 +506,34 @@
   OpenCLKernelMetadata->addOperand(kernelMDNode);
 }
 
+/// Determine whether the function F ends with a return stmt.
+static bool endsWithReturn(const Decl* F) {
+  const Stmt *Body = nullptr;
+  if (auto *FD = dyn_cast_or_null<FunctionDecl>(F))
+    Body = FD->getBody();
+  else if (auto *OMD = dyn_cast_or_null<ObjCMethodDecl>(F))
+    Body = OMD->getBody();
+
+  if (auto *CS = dyn_cast_or_null<CompoundStmt>(Body)) {
+    auto LastStmt = CS->body_rbegin();
+    if (LastStmt != CS->body_rend())
+      return isa<ReturnStmt>(*LastStmt);
+  }
+  return false;
+}
+
 void CodeGenFunction::StartFunction(GlobalDecl GD,
                                     QualType RetTy,
                                     llvm::Function *Fn,
                                     const CGFunctionInfo &FnInfo,
                                     const FunctionArgList &Args,
+                                    SourceLocation Loc,
                                     SourceLocation StartLoc) {
   const Decl *D = GD.getDecl();
 
   DidCallStackSave = false;
   CurCodeDecl = D;
-  CurFuncDecl = (D ? D->getNonClosureContext() : 0);
+  CurFuncDecl = (D ? D->getNonClosureContext() : nullptr);
   FnRetTy = RetTy;
   CurFn = Fn;
   CurFnInfo = &FnInfo;
@@ -577,9 +605,7 @@
     QualType FnType =
       getContext().getFunctionType(RetTy, ArgTypes,
                                    FunctionProtoType::ExtProtoInfo());
-
-    DI->setLocation(StartLoc);
-    DI->EmitFunctionStart(GD, FnType, CurFn, Builder);
+    DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder);
   }
 
   if (ShouldInstrumentFunction())
@@ -590,12 +616,19 @@
 
   if (RetTy->isVoidType()) {
     // Void type; nothing to return.
-    ReturnValue = 0;
+    ReturnValue = nullptr;
+
+    // Count the implicit return.
+    if (!endsWithReturn(D))
+      ++NumReturnExprs;
   } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect &&
              !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
     // Indirect aggregate return; emit returned value directly into sret slot.
     // This reduces code size, and affects correctness in C++.
-    ReturnValue = CurFn->arg_begin();
+    auto AI = CurFn->arg_begin();
+    if (CurFnInfo->getReturnInfo().isSRetAfterThis())
+      ++AI;
+    ReturnValue = AI;
   } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca &&
              !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
     // Load the sret pointer from the argument struct and return into that.
@@ -682,7 +715,7 @@
 /// this just calls EmitBlock().
 void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
                                                RegionCounter &Cnt) {
-  llvm::BasicBlock *SkipCountBB = 0;
+  llvm::BasicBlock *SkipCountBB = nullptr;
   if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) {
     // When instrumenting for profiling, the fallthrough to certain
     // statements needs to skip over the instrumentation code so that we
@@ -733,7 +766,7 @@
 
   // Check if we should generate debug info for this function.
   if (FD->hasAttr<NoDebugAttr>())
-    DebugInfo = NULL; // disable debug info indefinitely for this function
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
 
   FunctionArgList Args;
   QualType ResTy = FD->getReturnType();
@@ -756,8 +789,24 @@
   if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
   CurEHLocation = BodyRange.getEnd();
 
+  // Use the location of the start of the function to determine where
+  // the function definition is located. By default use the location
+  // of the declaration as the location for the subprogram. A function
+  // may lack a declaration in the source code if it is created by code
+  // gen. (examples: _GLOBAL__I_a, __cxx_global_array_dtor, thunk).
+  SourceLocation Loc;
+  if (FD) {
+    Loc = FD->getLocation();
+
+    // If this is a function specialization then use the pattern body
+    // as the location for the function.
+    if (const FunctionDecl *SpecDecl = FD->getTemplateInstantiationPattern())
+      if (SpecDecl->hasBody(SpecDecl))
+        Loc = SpecDecl->getLocation();
+  }
+
   // Emit the standard function prologue.
-  StartFunction(GD, ResTy, Fn, FnInfo, Args, BodyRange.getBegin());
+  StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin());
 
   // Generate the body of the function.
   PGO.assignRegionCounters(GD.getDecl(), CurFn);
@@ -830,7 +879,7 @@
 /// that we can just remove the code.
 bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) {
   // Null statement, not a label!
-  if (S == 0) return false;
+  if (!S) return false;
 
   // If this is a label, we have to emit the code, consider something like:
   // if (0) {  ...  foo:  bar(); }  goto foo;
@@ -862,7 +911,7 @@
 /// inside of it, this is fine.
 bool CodeGenFunction::containsBreak(const Stmt *S) {
   // Null statement, not a label!
-  if (S == 0) return false;
+  if (!S) return false;
 
   // If this is a switch or loop that defines its own break scope, then we can
   // include it and anything inside of it.
@@ -1188,7 +1237,7 @@
     }
   } else {
     SizeVal = CGM.getSize(Size);
-    vla = 0;
+    vla = nullptr;
   }
 
   // If the type contains a pointer to data member we can't memset it to zero.
@@ -1225,7 +1274,7 @@
 
 llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
   // Make sure that there is a block for the indirect goto.
-  if (IndirectBranch == 0)
+  if (!IndirectBranch)
     GetIndirectGotoBlock();
 
   llvm::BasicBlock *BB = getJumpDestForLabel(L).getBlock();
@@ -1259,7 +1308,7 @@
 
   // If it's a VLA, we have to load the stored size.  Note that
   // this is the size of the VLA in bytes, not its size in elements.
-  llvm::Value *numVLAElements = 0;
+  llvm::Value *numVLAElements = nullptr;
   if (isa<VariableArrayType>(arrayType)) {
     numVLAElements = getVLASize(cast<VariableArrayType>(arrayType)).first;
 
@@ -1352,7 +1401,7 @@
 std::pair<llvm::Value*, QualType>
 CodeGenFunction::getVLASize(const VariableArrayType *type) {
   // The number of elements so far; always size_t.
-  llvm::Value *numElements = 0;
+  llvm::Value *numElements = nullptr;
 
   QualType elementType;
   do {
@@ -1596,3 +1645,30 @@
 }
 
 CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { }
+
+void CodeGenFunction::InsertHelper(llvm::Instruction *I,
+                                   const llvm::Twine &Name,
+                                   llvm::BasicBlock *BB,
+                                   llvm::BasicBlock::iterator InsertPt) const {
+  LoopStack.InsertHelper(I);
+}
+
+template <bool PreserveNames>
+void CGBuilderInserter<PreserveNames>::InsertHelper(
+    llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
+    llvm::BasicBlock::iterator InsertPt) const {
+  llvm::IRBuilderDefaultInserter<PreserveNames>::InsertHelper(I, Name, BB,
+                                                              InsertPt);
+  if (CGF)
+    CGF->InsertHelper(I, Name, BB, InsertPt);
+}
+
+#ifdef NDEBUG
+#define PreserveNames false
+#else
+#define PreserveNames true
+#endif
+template void CGBuilderInserter<PreserveNames>::InsertHelper(
+    llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
+    llvm::BasicBlock::iterator InsertPt) const;
+#undef PreserveNames
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 5b0653a..750bec8 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -16,6 +16,7 @@
 
 #include "CGBuilder.h"
 #include "CGDebugInfo.h"
+#include "CGLoopInfo.h"
 #include "CGValue.h"
 #include "CodeGenModule.h"
 #include "CodeGenPGO.h"
@@ -35,51 +36,51 @@
 #include "llvm/Support/Debug.h"
 
 namespace llvm {
-  class BasicBlock;
-  class LLVMContext;
-  class MDNode;
-  class Module;
-  class SwitchInst;
-  class Twine;
-  class Value;
-  class CallSite;
+class BasicBlock;
+class LLVMContext;
+class MDNode;
+class Module;
+class SwitchInst;
+class Twine;
+class Value;
+class CallSite;
 }
 
 namespace clang {
-  class ASTContext;
-  class BlockDecl;
-  class CXXDestructorDecl;
-  class CXXForRangeStmt;
-  class CXXTryStmt;
-  class Decl;
-  class LabelDecl;
-  class EnumConstantDecl;
-  class FunctionDecl;
-  class FunctionProtoType;
-  class LabelStmt;
-  class ObjCContainerDecl;
-  class ObjCInterfaceDecl;
-  class ObjCIvarDecl;
-  class ObjCMethodDecl;
-  class ObjCImplementationDecl;
-  class ObjCPropertyImplDecl;
-  class TargetInfo;
-  class TargetCodeGenInfo;
-  class VarDecl;
-  class ObjCForCollectionStmt;
-  class ObjCAtTryStmt;
-  class ObjCAtThrowStmt;
-  class ObjCAtSynchronizedStmt;
-  class ObjCAutoreleasePoolStmt;
+class ASTContext;
+class BlockDecl;
+class CXXDestructorDecl;
+class CXXForRangeStmt;
+class CXXTryStmt;
+class Decl;
+class LabelDecl;
+class EnumConstantDecl;
+class FunctionDecl;
+class FunctionProtoType;
+class LabelStmt;
+class ObjCContainerDecl;
+class ObjCInterfaceDecl;
+class ObjCIvarDecl;
+class ObjCMethodDecl;
+class ObjCImplementationDecl;
+class ObjCPropertyImplDecl;
+class TargetInfo;
+class TargetCodeGenInfo;
+class VarDecl;
+class ObjCForCollectionStmt;
+class ObjCAtTryStmt;
+class ObjCAtThrowStmt;
+class ObjCAtSynchronizedStmt;
+class ObjCAutoreleasePoolStmt;
 
 namespace CodeGen {
-  class CodeGenTypes;
-  class CGFunctionInfo;
-  class CGRecordLayout;
-  class CGBlockInfo;
-  class CGCXXABI;
-  class BlockFlags;
-  class BlockFieldFlags;
+class CodeGenTypes;
+class CGFunctionInfo;
+class CGRecordLayout;
+class CGBlockInfo;
+class CGCXXABI;
+class BlockFlags;
+class BlockFieldFlags;
 
 /// The kind of evaluation to perform on values of a particular
 /// type.  Basically, is the code in CGExprScalar, CGExprComplex, or
@@ -103,13 +104,13 @@
   /// A jump destination is an abstract label, branching to which may
   /// require a jump out through normal cleanups.
   struct JumpDest {
-    JumpDest() : Block(0), ScopeDepth(), Index(0) {}
+    JumpDest() : Block(nullptr), ScopeDepth(), Index(0) {}
     JumpDest(llvm::BasicBlock *Block,
              EHScopeStack::stable_iterator Depth,
              unsigned Index)
       : Block(Block), ScopeDepth(Depth), Index(Index) {}
 
-    bool isValid() const { return Block != 0; }
+    bool isValid() const { return Block != nullptr; }
     llvm::BasicBlock *getBlock() const { return Block; }
     EHScopeStack::stable_iterator getScopeDepth() const { return ScopeDepth; }
     unsigned getDestIndex() const { return Index; }
@@ -129,8 +130,15 @@
   const TargetInfo &Target;
 
   typedef std::pair<llvm::Value *, llvm::Value *> ComplexPairTy;
+  LoopInfoStack LoopStack;
   CGBuilderTy Builder;
 
+  /// \brief CGBuilder insert helper. This function is called after an
+  /// instruction is created using Builder.
+  void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
+                    llvm::BasicBlock *BB,
+                    llvm::BasicBlock::iterator InsertPt) const;
+
   /// CurFuncDecl - Holds the Decl for the current outermost
   /// non-closure context.
   const Decl *CurFuncDecl;
@@ -163,7 +171,7 @@
   public:
     explicit CGCapturedStmtInfo(const CapturedStmt &S,
                                 CapturedRegionKind K = CR_Default)
-      : Kind(K), ThisValue(0), CXXThisFieldDecl(0) {
+      : Kind(K), ThisValue(nullptr), CXXThisFieldDecl(nullptr) {
 
       RecordDecl::field_iterator Field =
         S.getCapturedRecordDecl()->field_begin();
@@ -190,11 +198,13 @@
       return CaptureFields.lookup(VD);
     }
 
-    bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != 0; }
+    bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; }
     FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
 
     /// \brief Emit the captured statement body.
     virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) {
+      RegionCounter Cnt = CGF.getPGORegionCounter(S);
+      Cnt.beginRegion(CGF.Builder);
       CGF.EmitStmt(S);
     }
 
@@ -609,9 +619,9 @@
     }
 
     void end(CodeGenFunction &CGF) {
-      assert(CGF.OutermostConditional != 0);
+      assert(CGF.OutermostConditional != nullptr);
       if (CGF.OutermostConditional == this)
-        CGF.OutermostConditional = 0;
+        CGF.OutermostConditional = nullptr;
     }
 
     /// Returns a block which will be executed prior to each
@@ -623,7 +633,7 @@
 
   /// isInConditionalBranch - Return true if we're currently emitting
   /// one branch or the other of a conditional expression.
-  bool isInConditionalBranch() const { return OutermostConditional != 0; }
+  bool isInConditionalBranch() const { return OutermostConditional != nullptr; }
 
   void setBeforeOutermostConditional(llvm::Value *value, llvm::Value *addr) {
     assert(isInConditionalBranch());
@@ -644,7 +654,7 @@
   public:
     StmtExprEvaluation(CodeGenFunction &CGF)
       : CGF(CGF), SavedOutermostConditional(CGF.OutermostConditional) {
-      CGF.OutermostConditional = 0;
+      CGF.OutermostConditional = nullptr;
     }
 
     ~StmtExprEvaluation() {
@@ -661,7 +671,7 @@
     friend class CodeGenFunction;
 
   public:
-    PeepholeProtection() : Inst(0) {}
+    PeepholeProtection() : Inst(nullptr) {}
   };
 
   /// A non-RAII class containing all the information about a bound
@@ -679,7 +689,7 @@
                            bool boundLValue)
       : OpaqueValue(ov), BoundLValue(boundLValue) {}
   public:
-    OpaqueValueMappingData() : OpaqueValue(0) {}
+    OpaqueValueMappingData() : OpaqueValue(nullptr) {}
 
     static bool shouldBindAsLValue(const Expr *expr) {
       // gl-values should be bound as l-values for obvious reasons.
@@ -724,8 +734,8 @@
       return data;
     }
 
-    bool isValid() const { return OpaqueValue != 0; }
-    void clear() { OpaqueValue = 0; }
+    bool isValid() const { return OpaqueValue != nullptr; }
+    void clear() { OpaqueValue = nullptr; }
 
     void unbind(CodeGenFunction &CGF) {
       assert(OpaqueValue && "no data to unbind!");
@@ -968,7 +978,7 @@
   ASTContext &getContext() const { return CGM.getContext(); }
   CGDebugInfo *getDebugInfo() { 
     if (DisableDebugInfo) 
-      return NULL;
+      return nullptr;
     return DebugInfo; 
   }
   void disableDebugInfo() { DisableDebugInfo = true; }
@@ -1001,7 +1011,7 @@
   }
 
   llvm::BasicBlock *getInvokeDest() {
-    if (!EHStack.requiresLandingPad()) return 0;
+    if (!EHStack.requiresLandingPad()) return nullptr;
     return getInvokeDestImpl();
   }
 
@@ -1140,12 +1150,16 @@
 
   void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                     const CGFunctionInfo &FnInfo);
+  /// \brief Emit code for the start of a function.
+  /// \param Loc       The location to be associated with the function.
+  /// \param StartLoc  The location of the function body.
   void StartFunction(GlobalDecl GD,
                      QualType RetTy,
                      llvm::Function *Fn,
                      const CGFunctionInfo &FnInfo,
                      const FunctionArgList &Args,
-                     SourceLocation StartLoc);
+                     SourceLocation Loc = SourceLocation(),
+                     SourceLocation StartLoc = SourceLocation());
 
   void EmitConstructorBody(FunctionArgList &Args);
   void EmitDestructorBody(FunctionArgList &Args);
@@ -1285,8 +1299,8 @@
 
   /// createBasicBlock - Create an LLVM basic block.
   llvm::BasicBlock *createBasicBlock(const Twine &name = "",
-                                     llvm::Function *parent = 0,
-                                     llvm::BasicBlock *before = 0) {
+                                     llvm::Function *parent = nullptr,
+                                     llvm::BasicBlock *before = nullptr) {
 #ifdef NDEBUG
     return llvm::BasicBlock::Create(getLLVMContext(), "", parent, before);
 #else
@@ -1330,7 +1344,7 @@
   /// HaveInsertPoint - True if an insertion point is defined. If not, this
   /// indicates that the current code being emitted is unreachable.
   bool HaveInsertPoint() const {
-    return Builder.GetInsertBlock() != 0;
+    return Builder.GetInsertBlock() != nullptr;
   }
 
   /// EnsureInsertPoint - Ensure that an insertion point is defined so that
@@ -1741,19 +1755,21 @@
     llvm::Value *SizeForLifetimeMarkers;
 
     struct Invalid {};
-    AutoVarEmission(Invalid) : Variable(0) {}
+    AutoVarEmission(Invalid) : Variable(nullptr) {}
 
     AutoVarEmission(const VarDecl &variable)
-      : Variable(&variable), Address(0), NRVOFlag(0),
+      : Variable(&variable), Address(nullptr), NRVOFlag(nullptr),
         IsByRef(false), IsConstantAggregate(false),
-        SizeForLifetimeMarkers(0) {}
+        SizeForLifetimeMarkers(nullptr) {}
 
-    bool wasEmittedAsGlobal() const { return Address == 0; }
+    bool wasEmittedAsGlobal() const { return Address == nullptr; }
 
   public:
     static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
 
-    bool useLifetimeMarkers() const { return SizeForLifetimeMarkers != 0; }
+    bool useLifetimeMarkers() const {
+      return SizeForLifetimeMarkers != nullptr;
+    }
     llvm::Value *getSizeForLifetimeMarkers() const {
       assert(useLifetimeMarkers());
       return SizeForLifetimeMarkers;
@@ -1872,6 +1888,10 @@
   llvm::Function *GenerateCapturedStmtFunction(const CapturedDecl *CD,
                                                const RecordDecl *RD,
                                                SourceLocation Loc);
+  llvm::Value *GenerateCapturedStmtArgument(const CapturedStmt &S);
+
+  void EmitOMPParallelDirective(const OMPParallelDirective &S);
+  void EmitOMPSimdDirective(const OMPSimdDirective &S);
 
   //===--------------------------------------------------------------------===//
   //                         LValue Expression Emission
@@ -1938,7 +1958,7 @@
   llvm::Value *EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
                                 unsigned Alignment, QualType Ty,
                                 SourceLocation Loc,
-                                llvm::MDNode *TBAAInfo = 0,
+                                llvm::MDNode *TBAAInfo = nullptr,
                                 QualType TBAABaseTy = QualType(),
                                 uint64_t TBAAOffset = 0);
 
@@ -1953,7 +1973,7 @@
   /// the LLVM value representation.
   void EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
                          bool Volatile, unsigned Alignment, QualType Ty,
-                         llvm::MDNode *TBAAInfo = 0, bool isInit = false,
+                         llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
                          QualType TBAABaseTy = QualType(),
                          uint64_t TBAAOffset = 0);
 
@@ -1970,12 +1990,14 @@
   RValue EmitLoadOfLValue(LValue V, SourceLocation Loc);
   RValue EmitLoadOfExtVectorElementLValue(LValue V);
   RValue EmitLoadOfBitfieldLValue(LValue LV);
+  RValue EmitLoadOfGlobalRegLValue(LValue LV);
 
   /// EmitStoreThroughLValue - Store the specified rvalue into the specified
   /// lvalue, where both are guaranteed to the have the same type, and that type
   /// is 'Ty'.
   void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false);
   void EmitStoreThroughExtVectorComponentLValue(RValue Src, LValue Dst);
+  void EmitStoreThroughGlobalRegLValue(RValue Src, LValue Dst);
 
   /// EmitStoreThroughBitfieldLValue - Store Src into Dst with same constraints
   /// as EmitStoreThroughLValue.
@@ -1984,7 +2006,7 @@
   /// bit-field contents after the store, appropriate for use as the result of
   /// an assignment to the bit-field.
   void EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
-                                      llvm::Value **Result=0);
+                                      llvm::Value **Result=nullptr);
 
   /// Emit an l-value for an assignment (simple or compound) of complex type.
   LValue EmitComplexAssignmentLValue(const BinaryOperator *E);
@@ -2000,6 +2022,7 @@
   // Note: only available for agg return types
   LValue EmitVAArgExprLValue(const VAArgExpr *E);
   LValue EmitDeclRefLValue(const DeclRefExpr *E);
+  LValue EmitReadRegister(const VarDecl *VD);
   LValue EmitStringLiteralLValue(const StringLiteral *E);
   LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
   LValue EmitPredefinedLValue(const PredefinedExpr *E);
@@ -2031,7 +2054,9 @@
       return ConstantEmission(C, false);
     }
 
-    LLVM_EXPLICIT operator bool() const { return ValueAndIsReference.getOpaqueValue() != 0; }
+    LLVM_EXPLICIT operator bool() const {
+      return ValueAndIsReference.getOpaqueValue() != nullptr;
+    }
 
     bool isReference() const { return ValueAndIsReference.getInt(); }
     LValue getReferenceLValue(CodeGenFunction &CGF, Expr *refExpr) const {
@@ -2094,15 +2119,15 @@
                   llvm::Value *Callee,
                   ReturnValueSlot ReturnValue,
                   const CallArgList &Args,
-                  const Decl *TargetDecl = 0,
-                  llvm::Instruction **callOrInvoke = 0);
+                  const Decl *TargetDecl = nullptr,
+                  llvm::Instruction **callOrInvoke = nullptr);
 
   RValue EmitCall(QualType FnType, llvm::Value *Callee,
                   SourceLocation CallLoc,
                   ReturnValueSlot ReturnValue,
                   CallExpr::const_arg_iterator ArgBeg,
                   CallExpr::const_arg_iterator ArgEnd,
-                  const Decl *TargetDecl = 0);
+                  const Decl *TargetDecl = nullptr);
   RValue EmitCallExpr(const CallExpr *E,
                       ReturnValueSlot ReturnValue = ReturnValueSlot());
 
@@ -2176,8 +2201,6 @@
                                              const llvm::CmpInst::Predicate Fp,
                                              const llvm::CmpInst::Predicate Ip,
                                              const llvm::Twine &Name = "");
-  llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty);
-  llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
@@ -2187,7 +2210,7 @@
                                          unsigned Modifier,
                                          const CallExpr *E,
                                          SmallVectorImpl<llvm::Value *> &Ops,
-                                         llvm::Value *Align = 0);
+                                         llvm::Value *Align = nullptr);
   llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
                                           unsigned Modifier, llvm::Type *ArgTy,
                                           const CallExpr *E);
@@ -2203,14 +2226,14 @@
   llvm::Value *EmitConcatVectors(llvm::Value *Lo, llvm::Value *Hi,
                                  llvm::Type *ArgTy);
   llvm::Value *EmitExtractHigh(llvm::Value *In, llvm::Type *ResTy);
-  // Helper functions for EmitARM64BuiltinExpr.
+  // Helper functions for EmitAArch64BuiltinExpr.
   llvm::Value *vectorWrapScalar8(llvm::Value *Op);
   llvm::Value *vectorWrapScalar16(llvm::Value *Op);
   llvm::Value *emitVectorWrappedScalar8Intrinsic(
       unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
   llvm::Value *emitVectorWrappedScalar16Intrinsic(
       unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
-  llvm::Value *EmitARM64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitNeon64Call(llvm::Function *F,
                               llvm::SmallVectorImpl<llvm::Value *> &O,
                               const char *name);
@@ -2385,7 +2408,7 @@
   /// variables.
   void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                  ArrayRef<llvm::Constant *> Decls,
-                                 llvm::GlobalVariable *Guard = 0);
+                                 llvm::GlobalVariable *Guard = nullptr);
 
   /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
   /// variables.
@@ -2413,7 +2436,7 @@
 
   void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest);
 
-  RValue EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest = 0);
+  RValue EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest = nullptr);
 
   //===--------------------------------------------------------------------===//
   //                         Annotations Emission
@@ -2559,7 +2582,7 @@
                    ForceColumnInfo);
     } else {
       // T::param_type_iterator might not have a default ctor.
-      const QualType *NoIter = 0;
+      const QualType *NoIter = nullptr;
       EmitCallArgs(Args, /*AllowExtraArguments=*/true, NoIter, NoIter, ArgBeg,
                    ArgEnd, ForceColumnInfo);
     }
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index c26f769..c55e231 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -18,6 +18,7 @@
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "CodeGenTBAA.h"
@@ -47,6 +48,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -75,15 +77,17 @@
                              DiagnosticsEngine &diags)
     : Context(C), LangOpts(C.getLangOpts()), CodeGenOpts(CGO), TheModule(M),
       Diags(diags), TheDataLayout(TD), Target(C.getTargetInfo()),
-      ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0),
-      TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0),
-      OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0),
-      NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0),
-      CFConstantStringClassRef(0),
-      ConstantStringClassRef(0), NSConstantStringType(0),
-      NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0),
-      BlockObjectDispose(0), BlockDescriptorType(0), GenericBlockLiteralType(0),
-      LifetimeStartFn(0), LifetimeEndFn(0),
+      ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(nullptr),
+      TheTargetCodeGenInfo(nullptr), Types(*this), VTables(*this),
+      ObjCRuntime(nullptr), OpenCLRuntime(nullptr), OpenMPRuntime(nullptr),
+      CUDARuntime(nullptr), DebugInfo(nullptr), ARCData(nullptr),
+      NoObjCARCExceptionsMetadata(nullptr), RRData(nullptr), PGOReader(nullptr),
+      CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr),
+      NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr),
+      NSConcreteStackBlock(nullptr), BlockObjectAssign(nullptr),
+      BlockObjectDispose(nullptr), BlockDescriptorType(nullptr),
+      GenericBlockLiteralType(nullptr), LifetimeStartFn(nullptr),
+      LifetimeEndFn(nullptr),
       SanitizerBlacklist(
           llvm::SpecialCaseList::createOrDie(CGO.SanitizerBlacklistFile)),
       SanOpts(SanitizerBlacklist->isIn(M) ? SanitizerOptions::Disabled
@@ -112,6 +116,8 @@
     createObjCRuntime();
   if (LangOpts.OpenCL)
     createOpenCLRuntime();
+  if (LangOpts.OpenMP)
+    createOpenMPRuntime();
   if (LangOpts.CUDA)
     createCUDARuntime();
 
@@ -134,13 +140,20 @@
     ARCData = new ARCEntrypoints();
   RRData = new RREntrypoints();
 
-  if (!CodeGenOpts.InstrProfileInput.empty())
-    PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput);
+  if (!CodeGenOpts.InstrProfileInput.empty()) {
+    if (llvm::error_code EC = llvm::IndexedInstrProfReader::create(
+            CodeGenOpts.InstrProfileInput, PGOReader)) {
+      unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+                                              "Could not read profile: %0");
+      getDiags().Report(DiagID) << EC.message();
+    }
+  }
 }
 
 CodeGenModule::~CodeGenModule() {
   delete ObjCRuntime;
   delete OpenCLRuntime;
+  delete OpenMPRuntime;
   delete CUDARuntime;
   delete TheTargetCodeGenInfo;
   delete TBAA;
@@ -172,6 +185,10 @@
   OpenCLRuntime = new CGOpenCLRuntime(*this);
 }
 
+void CodeGenModule::createOpenMPRuntime() {
+  OpenMPRuntime = new CGOpenMPRuntime(*this);
+}
+
 void CodeGenModule::createCUDARuntime() {
   CUDARuntime = CreateNVCUDARuntime(*this);
 }
@@ -185,13 +202,13 @@
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
     if (!Entry)
       continue;
-    llvm::Function *OldF = cast<llvm::Function>(Entry);
-    llvm::Function *NewF = dyn_cast<llvm::Function>(Replacement);
+    auto *OldF = cast<llvm::Function>(Entry);
+    auto *NewF = dyn_cast<llvm::Function>(Replacement);
     if (!NewF) {
-      if (llvm::GlobalAlias *Alias = dyn_cast<llvm::GlobalAlias>(Replacement)) {
-        NewF = dyn_cast<llvm::Function>(Alias->getAliasedGlobal());
+      if (auto *Alias = dyn_cast<llvm::GlobalAlias>(Replacement)) {
+        NewF = dyn_cast<llvm::Function>(Alias->getAliasee());
       } else {
-        llvm::ConstantExpr *CE = cast<llvm::ConstantExpr>(Replacement);
+        auto *CE = cast<llvm::ConstantExpr>(Replacement);
         assert(CE->getOpcode() == llvm::Instruction::BitCast ||
                CE->getOpcode() == llvm::Instruction::GetElementPtr);
         NewF = dyn_cast<llvm::Function>(CE->getOperand(0));
@@ -213,46 +230,27 @@
   // that we have to do this in CodeGen, but we only construct mangled names
   // and aliases during codegen.
   bool Error = false;
+  DiagnosticsEngine &Diags = getDiags();
   for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
          E = Aliases.end(); I != E; ++I) {
     const GlobalDecl &GD = *I;
-    const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
+    const auto *D = cast<ValueDecl>(GD.getDecl());
     const AliasAttr *AA = D->getAttr<AliasAttr>();
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    llvm::GlobalAlias *Alias = cast<llvm::GlobalAlias>(Entry);
-    llvm::GlobalValue *GV = Alias->getAliasedGlobal();
-    if (!GV) {
+    auto *Alias = cast<llvm::GlobalAlias>(Entry);
+    llvm::GlobalValue *GV = Alias->getAliasee();
+    if (GV->isDeclaration()) {
       Error = true;
-      getDiags().Report(AA->getLocation(), diag::err_cyclic_alias);
-    } else if (GV->isDeclaration()) {
-      Error = true;
-      getDiags().Report(AA->getLocation(), diag::err_alias_to_undefined);
+      Diags.Report(AA->getLocation(), diag::err_alias_to_undefined);
     }
 
-    // We have to handle alias to weak aliases in here. LLVM itself disallows
-    // this since the object semantics would not match the IL one. For
-    // compatibility with gcc we implement it by just pointing the alias
-    // to its aliasee's aliasee. We also warn, since the user is probably
-    // expecting the link to be weak.
-    llvm::Constant *Aliasee = Alias->getAliasee();
-    llvm::GlobalValue *AliaseeGV;
-    if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee)) {
-      assert((CE->getOpcode() == llvm::Instruction::BitCast ||
-              CE->getOpcode() == llvm::Instruction::AddrSpaceCast) &&
-             "Unsupported aliasee");
-      AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0));
-    } else {
-      AliaseeGV = cast<llvm::GlobalValue>(Aliasee);
-    }
-    if (auto GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) {
-      if (GA->mayBeOverridden()) {
-        getDiags().Report(AA->getLocation(), diag::warn_alias_to_weak_alias)
-          << GA->getAliasedGlobal()->getName() << GA->getName();
-        Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-            GA->getAliasee(), Alias->getType());
-        Alias->setAliasee(Aliasee);
-      }
+    llvm::GlobalObject *Aliasee = Alias->getAliasee();
+    if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {
+      StringRef AliasSection = SA->getName();
+      if (AliasSection != Aliasee->getSection())
+        Diags.Report(SA->getLocation(), diag::warn_alias_with_section)
+            << AliasSection;
     }
   }
   if (!Error)
@@ -263,7 +261,7 @@
     const GlobalDecl &GD = *I;
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    llvm::GlobalAlias *Alias = cast<llvm::GlobalAlias>(Entry);
+    auto *Alias = cast<llvm::GlobalAlias>(Entry);
     Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType()));
     Alias->eraseFromParent();
   }
@@ -283,6 +281,12 @@
   if (ObjCRuntime)
     if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction())
       AddGlobalCtor(ObjCInitFunction);
+  if (getCodeGenOpts().ProfileInstrGenerate)
+    if (llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this))
+      AddGlobalCtor(PGOInit, 0);
+  if (PGOReader && PGOStats.isOutOfDate())
+    getDiags().Report(diag::warn_profile_data_out_of_date)
+        << PGOStats.Visited << PGOStats.Missing << PGOStats.Mismatched;
   EmitCtorList(GlobalCtors, "llvm.global_ctors");
   EmitCtorList(GlobalDtors, "llvm.global_dtors");
   EmitGlobalAnnotations();
@@ -299,12 +303,10 @@
     getModule().addModuleFlag(llvm::Module::Warning, "Dwarf Version",
                               CodeGenOpts.DwarfVersion);
   if (DebugInfo)
-    // We support a single version in the linked module: error out when
-    // modules do not have the same version. We are going to implement dropping
-    // debug info when the version number is not up-to-date. Once that is
-    // done, the bitcode linker is not going to see modules with different
-    // version numbers.
-    getModule().addModuleFlag(llvm::Module::Error, "Debug Info Version",
+    // We support a single version in the linked module. The LLVM
+    // parser will drop debug info with a different version number
+    // (and warn about it, too).
+    getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version",
                               llvm::DEBUG_METADATA_VERSION);
 
   SimplifyPersonality();
@@ -328,25 +330,25 @@
 
 llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAInfo(QTy);
 }
 
 llvm::MDNode *CodeGenModule::getTBAAInfoForVTablePtr() {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAInfoForVTablePtr();
 }
 
 llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAStructInfo(QTy);
 }
 
 llvm::MDNode *CodeGenModule::getTBAAStructTypeInfo(QualType QTy) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAStructTypeInfo(QTy);
 }
 
@@ -354,7 +356,7 @@
                                                   llvm::MDNode *AccessN,
                                                   uint64_t O) {
   if (!TBAA)
-    return 0;
+    return nullptr;
   return TBAA->getTBAAStructTagInfo(BaseTy, AccessN, O);
 }
 
@@ -453,7 +455,7 @@
 }
 
 StringRef CodeGenModule::getMangledName(GlobalDecl GD) {
-  const NamedDecl *ND = cast<NamedDecl>(GD.getDecl());
+  const auto *ND = cast<NamedDecl>(GD.getDecl());
 
   StringRef &Str = MangledDeclNames[GD.getCanonicalDecl()];
   if (!Str.empty())
@@ -469,9 +471,9 @@
   
   SmallString<256> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
-  if (const CXXConstructorDecl *D = dyn_cast<CXXConstructorDecl>(ND))
+  if (const auto *D = dyn_cast<CXXConstructorDecl>(ND))
     getCXXABI().getMangleContext().mangleCXXCtor(D, GD.getCtorType(), Out);
-  else if (const CXXDestructorDecl *D = dyn_cast<CXXDestructorDecl>(ND))
+  else if (const auto *D = dyn_cast<CXXDestructorDecl>(ND))
     getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out);
   else
     getCXXABI().getMangleContext().mangleName(ND, Out);
@@ -492,12 +494,12 @@
   MangleContext &MangleCtx = getCXXABI().getMangleContext();
   const Decl *D = GD.getDecl();
   llvm::raw_svector_ostream Out(Buffer.getBuffer());
-  if (D == 0)
+  if (!D)
     MangleCtx.mangleGlobalBlock(BD, 
       dyn_cast_or_null<VarDecl>(initializedGlobalDecl.getDecl()), Out);
-  else if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(D))
+  else if (const auto *CD = dyn_cast<CXXConstructorDecl>(D))
     MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out);
-  else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(D))
+  else if (const auto *DD = dyn_cast<CXXDestructorDecl>(D))
     MangleCtx.mangleDtorBlock(DD, GD.getDtorType(), BD, Out);
   else
     MangleCtx.mangleBlock(cast<DeclContext>(D), BD, Out);
@@ -509,16 +511,17 @@
 
 /// AddGlobalCtor - Add a function to the list that will be called before
 /// main() runs.
-void CodeGenModule::AddGlobalCtor(llvm::Function * Ctor, int Priority) {
+void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
+                                  llvm::Constant *AssociatedData) {
   // FIXME: Type coercion of void()* types.
-  GlobalCtors.push_back(std::make_pair(Ctor, Priority));
+  GlobalCtors.push_back(Structor(Priority, Ctor, AssociatedData));
 }
 
 /// AddGlobalDtor - Add a function to the list that will be called
 /// when the module is unloaded.
-void CodeGenModule::AddGlobalDtor(llvm::Function * Dtor, int Priority) {
+void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) {
   // FIXME: Type coercion of void()* types.
-  GlobalDtors.push_back(std::make_pair(Dtor, Priority));
+  GlobalDtors.push_back(Structor(Priority, Dtor, 0));
 }
 
 void CodeGenModule::EmitCtorList(const CtorList &Fns, const char *GlobalName) {
@@ -526,16 +529,19 @@
   llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false);
   llvm::Type *CtorPFTy = llvm::PointerType::getUnqual(CtorFTy);
 
-  // Get the type of a ctor entry, { i32, void ()* }.
-  llvm::StructType *CtorStructTy =
-    llvm::StructType::get(Int32Ty, llvm::PointerType::getUnqual(CtorFTy), NULL);
+  // Get the type of a ctor entry, { i32, void ()*, i8* }.
+  llvm::StructType *CtorStructTy = llvm::StructType::get(
+      Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, NULL);
 
   // Construct the constructor and destructor arrays.
   SmallVector<llvm::Constant*, 8> Ctors;
   for (CtorList::const_iterator I = Fns.begin(), E = Fns.end(); I != E; ++I) {
     llvm::Constant *S[] = {
-      llvm::ConstantInt::get(Int32Ty, I->second, false),
-      llvm::ConstantExpr::getBitCast(I->first, CtorPFTy)
+      llvm::ConstantInt::get(Int32Ty, I->Priority, false),
+      llvm::ConstantExpr::getBitCast(I->Initializer, CtorPFTy),
+      (I->AssociatedData
+           ? llvm::ConstantExpr::getBitCast(I->AssociatedData, VoidPtrTy)
+           : llvm::Constant::getNullValue(VoidPtrTy))
     };
     Ctors.push_back(llvm::ConstantStruct::get(CtorStructTy, S));
   }
@@ -551,69 +557,25 @@
 
 llvm::GlobalValue::LinkageTypes
 CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
-  const FunctionDecl *D = cast<FunctionDecl>(GD.getDecl());
+  const auto *D = cast<FunctionDecl>(GD.getDecl());
 
   GVALinkage Linkage = getContext().GetGVALinkageForFunction(D);
 
-  if (Linkage == GVA_Internal)
-    return llvm::Function::InternalLinkage;
-  
-  if (D->hasAttr<DLLExportAttr>())
-    return llvm::Function::ExternalLinkage;
-  
-  if (D->hasAttr<WeakAttr>())
-    return llvm::Function::WeakAnyLinkage;
-  
-  // In C99 mode, 'inline' functions are guaranteed to have a strong
-  // definition somewhere else, so we can use available_externally linkage.
-  if (Linkage == GVA_C99Inline)
-    return llvm::Function::AvailableExternallyLinkage;
-
-  // Note that Apple's kernel linker doesn't support symbol
-  // coalescing, so we need to avoid linkonce and weak linkages there.
-  // Normally, this means we just map to internal, but for explicit
-  // instantiations we'll map to external.
-
-  // In C++, the compiler has to emit a definition in every translation unit
-  // that references the function.  We should use linkonce_odr because
-  // a) if all references in this translation unit are optimized away, we
-  // don't need to codegen it.  b) if the function persists, it needs to be
-  // merged with other definitions. c) C++ has the ODR, so we know the
-  // definition is dependable.
-  if (Linkage == GVA_CXXInline || Linkage == GVA_TemplateInstantiation)
-    return !Context.getLangOpts().AppleKext 
-             ? llvm::Function::LinkOnceODRLinkage 
-             : llvm::Function::InternalLinkage;
-  
-  // An explicit instantiation of a template has weak linkage, since
-  // explicit instantiations can occur in multiple translation units
-  // and must all be equivalent. However, we are not allowed to
-  // throw away these explicit instantiations.
-  if (Linkage == GVA_StrongODR)
-    return !Context.getLangOpts().AppleKext
-             ? llvm::Function::WeakODRLinkage
-             : llvm::Function::ExternalLinkage;
-
-  // Destructor variants in the Microsoft C++ ABI are always linkonce_odr thunks
-  // emitted on an as-needed basis.
   if (isa<CXXDestructorDecl>(D) &&
       getCXXABI().useThunkForDtorVariant(cast<CXXDestructorDecl>(D),
-                                         GD.getDtorType()))
-    return llvm::Function::LinkOnceODRLinkage;
+                                         GD.getDtorType())) {
+    // Destructor variants in the Microsoft C++ ABI are always internal or
+    // linkonce_odr thunks emitted on an as-needed basis.
+    return Linkage == GVA_Internal ? llvm::GlobalValue::InternalLinkage
+                                   : llvm::GlobalValue::LinkOnceODRLinkage;
+  }
 
-  // Otherwise, we have strong external linkage.
-  assert(Linkage == GVA_StrongExternal);
-  return llvm::Function::ExternalLinkage;
+  return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
 }
 
-
-/// SetFunctionDefinitionAttributes - Set attributes for a global.
-///
-/// FIXME: This is currently only done for aliases and functions, but not for
-/// variables (these details are set in EmitGlobalVarDefinition for variables).
-void CodeGenModule::SetFunctionDefinitionAttributes(const FunctionDecl *D,
-                                                    llvm::GlobalValue *GV) {
-  SetCommonAttributes(D, GV);
+void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D,
+                                                    llvm::Function *F) {
+  setNonAliasAttributes(D, F);
 }
 
 void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D,
@@ -717,7 +679,7 @@
 
   if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
     F->setUnnamedAddr(true);
-  else if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D))
+  else if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
     if (MD->isVirtual())
       F->setUnnamedAddr(true);
 
@@ -732,20 +694,23 @@
 
 void CodeGenModule::SetCommonAttributes(const Decl *D,
                                         llvm::GlobalValue *GV) {
-  if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
+  if (const auto *ND = dyn_cast<NamedDecl>(D))
     setGlobalVisibility(GV, ND);
   else
     GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
 
   if (D->hasAttr<UsedAttr>())
     addUsedGlobal(GV);
+}
+
+void CodeGenModule::setNonAliasAttributes(const Decl *D,
+                                          llvm::GlobalObject *GO) {
+  SetCommonAttributes(D, GO);
 
   if (const SectionAttr *SA = D->getAttr<SectionAttr>())
-    GV->setSection(SA->getName());
+    GO->setSection(SA->getName());
 
-  // Alias cannot have attributes. Filter them here.
-  if (!isa<llvm::GlobalAlias>(GV))
-    getTargetCodeGenInfo().SetTargetAttributes(D, GV, *this);
+  getTargetCodeGenInfo().SetTargetAttributes(D, GO, *this);
 }
 
 void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
@@ -756,7 +721,32 @@
 
   F->setLinkage(llvm::Function::InternalLinkage);
 
-  SetCommonAttributes(D, F);
+  setNonAliasAttributes(D, F);
+}
+
+static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
+                                         const NamedDecl *ND) {
+  // Set linkage and visibility in case we never see a definition.
+  LinkageInfo LV = ND->getLinkageAndVisibility();
+  if (LV.getLinkage() != ExternalLinkage) {
+    // Don't set internal linkage on declarations.
+  } else {
+    if (ND->hasAttr<DLLImportAttr>()) {
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+    } else if (ND->hasAttr<DLLExportAttr>()) {
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+    } else if (ND->hasAttr<WeakAttr>() || ND->isWeakImported()) {
+      // "extern_weak" is overloaded in LLVM; we probably should have
+      // separate linkage types for this.
+      GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
+    }
+
+    // Set visibility on a declaration only if it's explicit.
+    if (LV.isVisibilityExplicit())
+      GV->setVisibility(CodeGenModule::GetLLVMVisibility(LV.getVisibility()));
+  }
 }
 
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
@@ -770,7 +760,7 @@
     return;
   }
 
-  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
 
   if (!IsIncompleteFunction)
     SetLLVMFunctionAttributes(FD, getTypes().arrangeGlobalDeclaration(GD), F);
@@ -791,22 +781,12 @@
   // Only a few attributes are set on declarations; these may later be
   // overridden by a definition.
 
-  if (FD->hasAttr<DLLImportAttr>()) {
-    F->setLinkage(llvm::Function::ExternalLinkage);
-    F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-  } else if (FD->hasAttr<WeakAttr>() ||
-             FD->isWeakImported()) {
-    // "extern_weak" is overloaded in LLVM; we probably should have
-    // separate linkage types for this.
-    F->setLinkage(llvm::Function::ExternalWeakLinkage);
-  } else {
-    F->setLinkage(llvm::Function::ExternalLinkage);
-    if (FD->hasAttr<DLLExportAttr>())
-      F->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+  setLinkageAndVisibilityForGV(F, FD);
 
-    LinkageInfo LV = FD->getLinkageAndVisibility();
-    if (LV.getLinkage() == ExternalLinkage && LV.isVisibilityExplicit()) {
-      F->setVisibility(GetLLVMVisibility(LV.getVisibility()));
+  if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) {
+    if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
+      // Don't dllexport/import destructor thunks.
+      F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
     }
   }
 
@@ -851,11 +831,9 @@
     return;
   llvm::ArrayType *ATy = llvm::ArrayType::get(CGM.Int8PtrTy, UsedArray.size());
 
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(CGM.getModule(), ATy, false,
-                             llvm::GlobalValue::AppendingLinkage,
-                             llvm::ConstantArray::get(ATy, UsedArray),
-                             Name);
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage,
+      llvm::ConstantArray::get(ATy, UsedArray), Name);
 
   GV->setSection("llvm.metadata");
 }
@@ -1036,9 +1014,9 @@
   // Create a new global variable for the ConstantStruct in the Module.
   llvm::Constant *Array = llvm::ConstantArray::get(llvm::ArrayType::get(
     Annotations[0]->getType(), Annotations.size()), Annotations);
-  llvm::GlobalValue *gv = new llvm::GlobalVariable(getModule(),
-    Array->getType(), false, llvm::GlobalValue::AppendingLinkage, Array,
-    "llvm.global.annotations");
+  auto *gv = new llvm::GlobalVariable(getModule(), Array->getType(), false,
+                                      llvm::GlobalValue::AppendingLinkage,
+                                      Array, "llvm.global.annotations");
   gv->setSection(AnnotationSection);
 }
 
@@ -1049,8 +1027,9 @@
 
   // Not found yet, create a new global.
   llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str);
-  llvm::GlobalValue *gv = new llvm::GlobalVariable(getModule(), s->getType(),
-    true, llvm::GlobalValue::PrivateLinkage, s, ".str");
+  auto *gv =
+      new llvm::GlobalVariable(getModule(), s->getType(), true,
+                               llvm::GlobalValue::PrivateLinkage, s, ".str");
   gv->setSection(AnnotationSection);
   gv->setUnnamedAddr(true);
   AStr = gv;
@@ -1122,7 +1101,7 @@
   llvm::Constant *Init = EmitUuidofInitializer(Uuid, E->getType());
   assert(Init && "failed to initialize as constant");
 
-  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+  auto *GV = new llvm::GlobalVariable(
       getModule(), Init->getType(),
       /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name);
   return GV;
@@ -1148,9 +1127,10 @@
                                       /*ForVTable=*/false);
   else
     Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(),
-                                    llvm::PointerType::getUnqual(DeclTy), 0);
+                                    llvm::PointerType::getUnqual(DeclTy),
+                                    nullptr);
 
-  llvm::GlobalValue* F = cast<llvm::GlobalValue>(Aliasee);
+  auto *F = cast<llvm::GlobalValue>(Aliasee);
   F->setLinkage(llvm::Function::ExternalWeakLinkage);
   WeakRefReferences.insert(F);
 
@@ -1158,7 +1138,7 @@
 }
 
 void CodeGenModule::EmitGlobal(GlobalDecl GD) {
-  const ValueDecl *Global = cast<ValueDecl>(GD.getDecl());
+  const auto *Global = cast<ValueDecl>(GD.getDecl());
 
   // Weak references don't produce any output by themselves.
   if (Global->hasAttr<WeakRefAttr>())
@@ -1187,7 +1167,7 @@
   }
 
   // Ignore declarations, they will be emitted on their first use.
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Global)) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
     // Forward declarations are emitted lazily on first use.
     if (!FD->doesThisDeclarationHaveABody()) {
       if (!FD->doesDeclarationForceExternallyVisibleDefinition())
@@ -1204,7 +1184,7 @@
       return;
     }
   } else {
-    const VarDecl *VD = cast<VarDecl>(Global);
+    const auto *VD = cast<VarDecl>(Global);
     assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
 
     if (VD->isThisDeclarationADefinition() != VarDecl::Definition)
@@ -1224,7 +1204,7 @@
   if (getLangOpts().CPlusPlus && isa<VarDecl>(Global) &&
       cast<VarDecl>(Global)->hasInit()) {
     DelayedCXXInitPosition[Global] = CXXGlobalInits.size();
-    CXXGlobalInits.push_back(0);
+    CXXGlobalInits.push_back(nullptr);
   }
   
   // If the value has already been used, add it directly to the
@@ -1299,7 +1279,7 @@
 CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
   if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage)
     return true;
-  const FunctionDecl *F = cast<FunctionDecl>(GD.getDecl());
+  const auto *F = cast<FunctionDecl>(GD.getDecl());
   if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>())
     return false;
   // PR9614. Avoid cases where the source code is lying to us. An available
@@ -1321,14 +1301,13 @@
 
   if (CGDebugInfo *DI = getModuleDebugInfo())
     if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
-      const PointerType *ThisPtr =
-        cast<PointerType>(D->getThisType(getContext()));
+      const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext()));
       DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation());
     }
 }
 
 void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
-  const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
+  const auto *D = cast<ValueDecl>(GD.getDecl());
 
   PrettyStackTraceDecl CrashInfo(const_cast<ValueDecl *>(D), D->getLocation(), 
                                  Context.getSourceManager(),
@@ -1340,13 +1319,13 @@
     if (!shouldEmitFunction(GD))
       return;
 
-    if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
+    if (const auto *Method = dyn_cast<CXXMethodDecl>(D)) {
       CompleteDIClassType(Method);
       // Make sure to emit the definition(s) before we emit the thunks.
       // This is necessary for the generation of certain thunks.
-      if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(Method))
+      if (const auto *CD = dyn_cast<CXXConstructorDecl>(Method))
         EmitCXXConstructor(CD, GD.getCtorType());
-      else if (const CXXDestructorDecl *DD =dyn_cast<CXXDestructorDecl>(Method))
+      else if (const auto *DD = dyn_cast<CXXDestructorDecl>(Method))
         EmitCXXDestructor(DD, GD.getDtorType());
       else
         EmitGlobalFunctionDefinition(GD, GV);
@@ -1359,8 +1338,8 @@
 
     return EmitGlobalFunctionDefinition(GD, GV);
   }
-  
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D))
+
+  if (const auto *VD = dyn_cast<VarDecl>(D))
     return EmitGlobalVarDefinition(VD);
   
   llvm_unreachable("Invalid argument to EmitGlobalDefinition()");
@@ -1465,7 +1444,7 @@
       // in a vtable, unless it's already marked as used.
     } else if (getLangOpts().CPlusPlus && D) {
       // Look for a declaration that's lexically in a record.
-      const FunctionDecl *FD = cast<FunctionDecl>(D);
+      const auto *FD = cast<FunctionDecl>(D);
       FD = FD->getMostRecentDecl();
       do {
         if (isa<CXXRecordDecl>(FD->getLexicalDeclContext())) {
@@ -1484,6 +1463,8 @@
     }
   }
 
+  getTargetCodeGenInfo().emitTargetMD(D, F, *this);
+
   // Make sure the result is of the requested type.
   if (!IsIncompleteFunction) {
     assert(F->getType()->getElementType() == Ty);
@@ -1518,7 +1499,7 @@
   llvm::Constant *C =
       GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false,
                               /*DontDefer=*/false, ExtraAttrs);
-  if (llvm::Function *F = dyn_cast<llvm::Function>(C))
+  if (auto *F = dyn_cast<llvm::Function>(C))
     if (F->empty())
       F->setCallingConv(getRuntimeCC());
   return C;
@@ -1544,6 +1525,18 @@
   return true;
 }
 
+static bool isVarDeclInlineInitializedStaticDataMember(const VarDecl *VD) {
+  if (!VD->isStaticDataMember())
+    return false;
+  const VarDecl *InitDecl;
+  const Expr *InitExpr = VD->getAnyInitializer(InitDecl);
+  if (!InitExpr)
+    return false;
+  if (InitDecl->isThisDeclarationADefinition())
+    return false;
+  return true;
+}
+
 /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module,
 /// create and return an llvm GlobalVariable with the specified type.  If there
 /// is something in the module with the specified name, return it potentially
@@ -1554,8 +1547,7 @@
 llvm::Constant *
 CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
                                      llvm::PointerType *Ty,
-                                     const VarDecl *D,
-                                     bool UnnamedAddr) {
+                                     const VarDecl *D) {
   // Lookup the entry, lazily creating it if necessary.
   llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
   if (Entry) {
@@ -1564,9 +1556,6 @@
         Entry->setLinkage(llvm::Function::ExternalLinkage);
     }
 
-    if (UnnamedAddr)
-      Entry->setUnnamedAddr(true);
-
     if (Entry->getType() == Ty)
       return Entry;
 
@@ -1578,11 +1567,10 @@
   }
 
   unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), Ty->getElementType(), false,
-                             llvm::GlobalValue::ExternalLinkage,
-                             0, MangledName, 0,
-                             llvm::GlobalVariable::NotThreadLocal, AddrSpace);
+  auto *GV = new llvm::GlobalVariable(
+      getModule(), Ty->getElementType(), false,
+      llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr,
+      llvm::GlobalVariable::NotThreadLocal, AddrSpace);
 
   // This is the first use or definition of a mangled name.  If there is a
   // deferred decl with this name, remember that we need to emit it at the end
@@ -1601,21 +1589,7 @@
     // handling.
     GV->setConstant(isTypeConstant(D->getType(), false));
 
-    // Set linkage and visibility in case we never see a definition.
-    LinkageInfo LV = D->getLinkageAndVisibility();
-    if (LV.getLinkage() != ExternalLinkage) {
-      // Don't set internal linkage on declarations.
-    } else {
-      if (D->hasAttr<DLLImportAttr>()) {
-        GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
-        GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-      } else if (D->hasAttr<WeakAttr>() || D->isWeakImported())
-        GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
-
-      // Set visibility on a declaration only if it's explicit.
-      if (LV.isVisibilityExplicit())
-        GV->setVisibility(GetLLVMVisibility(LV.getVisibility()));
-    }
+    setLinkageAndVisibilityForGV(GV, D);
 
     if (D->getTLSKind()) {
       if (D->getTLSKind() == VarDecl::TLS_Dynamic)
@@ -1626,19 +1600,21 @@
     // If required by the ABI, treat declarations of static data members with
     // inline initializers as definitions.
     if (getCXXABI().isInlineInitializedStaticDataMemberLinkOnce() &&
-        D->isStaticDataMember() && D->hasInit() &&
-        !D->isThisDeclarationADefinition())
+        isVarDeclInlineInitializedStaticDataMember(D))
       EmitGlobalVarDefinition(D);
+
+    // Handle XCore specific ABI requirements.
+    if (getTarget().getTriple().getArch() == llvm::Triple::xcore &&
+        D->getLanguageLinkage() == CLanguageLinkage &&
+        D->getType().isConstant(Context) &&
+        isExternallyVisible(D->getLinkageAndVisibility().getLinkage()))
+      GV->setSection(".cp.rodata");
   }
 
   if (AddrSpace != Ty->getAddressSpace())
     return llvm::ConstantExpr::getAddrSpaceCast(GV, Ty);
 
-  if (getTarget().getTriple().getArch() == llvm::Triple::xcore &&
-      D->getLanguageLinkage() == CLanguageLinkage &&
-      D->getType().isConstant(Context) &&
-      isExternallyVisible(D->getLinkageAndVisibility().getLinkage()))
-    GV->setSection(".cp.rodata");
+  getTargetCodeGenInfo().emitTargetMD(D, GV, *this);
 
   return GV;
 }
@@ -1649,9 +1625,8 @@
                                       llvm::Type *Ty,
                                       llvm::GlobalValue::LinkageTypes Linkage) {
   llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name);
-  llvm::GlobalVariable *OldGV = 0;
+  llvm::GlobalVariable *OldGV = nullptr;
 
-  
   if (GV) {
     // Check if the variable has the right type.
     if (GV->getType()->getElementType() == Ty)
@@ -1665,8 +1640,8 @@
   
   // Create a new variable.
   GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true,
-                                Linkage, 0, Name);
-  
+                                Linkage, nullptr, Name);
+
   if (OldGV) {
     // Replace occurrences of the old variable if needed.
     GV->takeName(OldGV);
@@ -1691,7 +1666,7 @@
                                                   llvm::Type *Ty) {
   assert(D->hasGlobalStorage() && "Not a global variable");
   QualType ASTTy = D->getType();
-  if (Ty == 0)
+  if (!Ty)
     Ty = getTypes().ConvertTypeForMem(ASTTy);
 
   llvm::PointerType *PTy =
@@ -1706,8 +1681,7 @@
 llvm::Constant *
 CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
                                      StringRef Name) {
-  return GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), 0,
-                               true);
+  return GetOrCreateLLVMGlobal(Name, llvm::PointerType::getUnqual(Ty), nullptr);
 }
 
 void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
@@ -1777,11 +1751,11 @@
   // If we have multiple internal linkage entities with the same name
   // in extern "C" regions, none of them gets that name.
   if (!R.second)
-    R.first->second = 0;
+    R.first->second = nullptr;
 }
 
 void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
-  llvm::Constant *Init = 0;
+  llvm::Constant *Init = nullptr;
   QualType ASTTy = D->getType();
   CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
   bool NeedsGlobalCtor = false;
@@ -1831,7 +1805,7 @@
   llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType);
 
   // Strip off a bitcast if we got one back.
-  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
+  if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
     assert(CE->getOpcode() == llvm::Instruction::BitCast ||
            CE->getOpcode() == llvm::Instruction::AddrSpaceCast ||
            // All zero index gep.
@@ -1840,7 +1814,7 @@
   }
 
   // Entry is now either a Function or GlobalVariable.
-  llvm::GlobalVariable *GV = dyn_cast<llvm::GlobalVariable>(Entry);
+  auto *GV = dyn_cast<llvm::GlobalVariable>(Entry);
 
   // We have a definition after a declaration with the wrong type.
   // We must make a new GlobalVariable* and update everything that used OldGV
@@ -1851,7 +1825,7 @@
   // "extern int x[];") and then a definition of a different type (e.g.
   // "int x[10];"). This also happens when an initializer has a different type
   // from the type of the global (this happens with unions).
-  if (GV == 0 ||
+  if (!GV ||
       GV->getType()->getElementType() != InitType ||
       GV->getType()->getAddressSpace() !=
        GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) {
@@ -1885,26 +1859,19 @@
   GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
 
   // Set the llvm linkage type as appropriate.
-  llvm::GlobalValue::LinkageTypes Linkage = 
-    GetLLVMLinkageVarDefinition(D, GV->isConstant());
+  llvm::GlobalValue::LinkageTypes Linkage =
+      getLLVMLinkageVarDefinition(D, GV->isConstant());
   GV->setLinkage(Linkage);
   if (D->hasAttr<DLLImportAttr>())
     GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
   else if (D->hasAttr<DLLExportAttr>())
     GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
 
-  // If required by the ABI, give definitions of static data members with inline
-  // initializers linkonce_odr linkage.
-  if (getCXXABI().isInlineInitializedStaticDataMemberLinkOnce() &&
-      D->isStaticDataMember() && InitExpr &&
-      !InitDecl->isThisDeclarationADefinition())
-    GV->setLinkage(llvm::GlobalVariable::LinkOnceODRLinkage);
-
   if (Linkage == llvm::GlobalVariable::CommonLinkage)
     // common vars aren't constant even if declared const.
     GV->setConstant(false);
 
-  SetCommonAttributes(D, GV);
+  setNonAliasAttributes(D, GV);
 
   // Emit the initializer function if necessary.
   if (NeedsGlobalCtor || NeedsGlobalDtor)
@@ -1928,46 +1895,105 @@
       DI->EmitGlobalVariable(GV, D);
 }
 
-llvm::GlobalValue::LinkageTypes
-CodeGenModule::GetLLVMLinkageVarDefinition(const VarDecl *D, bool isConstant) {
-  GVALinkage Linkage = getContext().GetGVALinkageForVariable(D);
+static bool isVarDeclStrongDefinition(const VarDecl *D, bool NoCommon) {
+  // Don't give variables common linkage if -fno-common was specified unless it
+  // was overridden by a NoCommon attribute.
+  if ((NoCommon || D->hasAttr<NoCommonAttr>()) && !D->hasAttr<CommonAttr>())
+    return true;
+
+  // C11 6.9.2/2:
+  //   A declaration of an identifier for an object that has file scope without
+  //   an initializer, and without a storage-class specifier or with the
+  //   storage-class specifier static, constitutes a tentative definition.
+  if (D->getInit() || D->hasExternalStorage())
+    return true;
+
+  // A variable cannot be both common and exist in a section.
+  if (D->hasAttr<SectionAttr>())
+    return true;
+
+  // Thread local vars aren't considered common linkage.
+  if (D->getTLSKind())
+    return true;
+
+  // Tentative definitions marked with WeakImportAttr are true definitions.
+  if (D->hasAttr<WeakImportAttr>())
+    return true;
+
+  return false;
+}
+
+llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
+    const DeclaratorDecl *D, GVALinkage Linkage, bool IsConstantVariable) {
   if (Linkage == GVA_Internal)
     return llvm::Function::InternalLinkage;
-  else if (D->hasAttr<DLLImportAttr>())
-    return llvm::Function::ExternalLinkage;
-  else if (D->hasAttr<DLLExportAttr>())
-    return llvm::Function::ExternalLinkage;
-  else if (D->hasAttr<SelectAnyAttr>()) {
-    // selectany symbols are externally visible, so use weak instead of
-    // linkonce.  MSVC optimizes away references to const selectany globals, so
-    // all definitions should be the same and ODR linkage should be used.
-    // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx
-    return llvm::GlobalVariable::WeakODRLinkage;
-  } else if (D->hasAttr<WeakAttr>()) {
-    if (isConstant)
+
+  if (D->hasAttr<WeakAttr>()) {
+    if (IsConstantVariable)
       return llvm::GlobalVariable::WeakODRLinkage;
     else
       return llvm::GlobalVariable::WeakAnyLinkage;
-  } else if (Linkage == GVA_TemplateInstantiation || Linkage == GVA_StrongODR)
-    return llvm::GlobalVariable::WeakODRLinkage;
-  else if (!getLangOpts().CPlusPlus && 
-           ((!CodeGenOpts.NoCommon && !D->hasAttr<NoCommonAttr>()) ||
-             D->hasAttr<CommonAttr>()) &&
-           !D->hasExternalStorage() && !D->getInit() &&
-           !D->hasAttr<SectionAttr>() && !D->getTLSKind() &&
-           !D->hasAttr<WeakImportAttr>()) {
-    // Thread local vars aren't considered common linkage.
+  }
+
+  // We are guaranteed to have a strong definition somewhere else,
+  // so we can use available_externally linkage.
+  if (Linkage == GVA_AvailableExternally)
+    return llvm::Function::AvailableExternallyLinkage;
+
+  // Note that Apple's kernel linker doesn't support symbol
+  // coalescing, so we need to avoid linkonce and weak linkages there.
+  // Normally, this means we just map to internal, but for explicit
+  // instantiations we'll map to external.
+
+  // In C++, the compiler has to emit a definition in every translation unit
+  // that references the function.  We should use linkonce_odr because
+  // a) if all references in this translation unit are optimized away, we
+  // don't need to codegen it.  b) if the function persists, it needs to be
+  // merged with other definitions. c) C++ has the ODR, so we know the
+  // definition is dependable.
+  if (Linkage == GVA_DiscardableODR)
+    return !Context.getLangOpts().AppleKext ? llvm::Function::LinkOnceODRLinkage
+                                            : llvm::Function::InternalLinkage;
+
+  // An explicit instantiation of a template has weak linkage, since
+  // explicit instantiations can occur in multiple translation units
+  // and must all be equivalent. However, we are not allowed to
+  // throw away these explicit instantiations.
+  if (Linkage == GVA_StrongODR)
+    return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
+                                            : llvm::Function::ExternalLinkage;
+
+  // If required by the ABI, give definitions of static data members with inline
+  // initializers at least linkonce_odr linkage.
+  if (getCXXABI().isInlineInitializedStaticDataMemberLinkOnce() &&
+      isa<VarDecl>(D) &&
+      isVarDeclInlineInitializedStaticDataMember(cast<VarDecl>(D)))
+    return llvm::GlobalValue::LinkOnceODRLinkage;
+
+  // C++ doesn't have tentative definitions and thus cannot have common
+  // linkage.
+  if (!getLangOpts().CPlusPlus && isa<VarDecl>(D) &&
+      !isVarDeclStrongDefinition(cast<VarDecl>(D), CodeGenOpts.NoCommon))
     return llvm::GlobalVariable::CommonLinkage;
-  } else if (D->getTLSKind() == VarDecl::TLS_Dynamic &&
-             getTarget().getTriple().isMacOSX())
-    // On Darwin, the backing variable for a C++11 thread_local variable always
-    // has internal linkage; all accesses should just be calls to the
-    // Itanium-specified entry point, which has the normal linkage of the
-    // variable.
-    return llvm::GlobalValue::InternalLinkage;
+
+  // selectany symbols are externally visible, so use weak instead of
+  // linkonce.  MSVC optimizes away references to const selectany globals, so
+  // all definitions should be the same and ODR linkage should be used.
+  // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx
+  if (D->hasAttr<SelectAnyAttr>())
+    return llvm::GlobalVariable::WeakODRLinkage;
+
+  // Otherwise, we have strong external linkage.
+  assert(Linkage == GVA_StrongExternal);
   return llvm::GlobalVariable::ExternalLinkage;
 }
 
+llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition(
+    const VarDecl *VD, bool IsConstant) {
+  GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD);
+  return getLLVMLinkageForDeclarator(VD, Linkage, IsConstant);
+}
+
 /// Replace the uses of a function that was declared with a non-proto type.
 /// We want to silently drop extra arguments from call sites
 static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
@@ -1985,7 +2011,7 @@
 
     // Recognize and replace uses of bitcasts.  Most calls to
     // unprototyped functions will use bitcasts.
-    if (llvm::ConstantExpr *bitcast = dyn_cast<llvm::ConstantExpr>(user)) {
+    if (auto *bitcast = dyn_cast<llvm::ConstantExpr>(user)) {
       if (bitcast->getOpcode() == llvm::Instruction::BitCast)
         replaceUsesOfNonProtoConstant(bitcast, newFn);
       continue;
@@ -2049,8 +2075,7 @@
       newCall = llvm::CallInst::Create(newFn, newArgs, "",
                                        callSite.getInstruction());
     } else {
-      llvm::InvokeInst *oldInvoke =
-        cast<llvm::InvokeInst>(callSite.getInstruction());
+      auto *oldInvoke = cast<llvm::InvokeInst>(callSite.getInstruction());
       newCall = llvm::InvokeInst::Create(newFn,
                                          oldInvoke->getNormalDest(),
                                          oldInvoke->getUnwindDest(),
@@ -2105,34 +2130,34 @@
 
 void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
                                                  llvm::GlobalValue *GV) {
-  const FunctionDecl *D = cast<FunctionDecl>(GD.getDecl());
+  const auto *D = cast<FunctionDecl>(GD.getDecl());
 
   // Compute the function info and LLVM type.
   const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
   llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
 
   // Get or create the prototype for the function.
-  llvm::Constant *Entry =
-      GV ? GV
-         : GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer*/ true);
+  if (!GV) {
+    llvm::Constant *C =
+        GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer*/ true);
 
-  // Strip off a bitcast if we got one back.
-  if (llvm::ConstantExpr *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
-    assert(CE->getOpcode() == llvm::Instruction::BitCast);
-    Entry = CE->getOperand(0);
+    // Strip off a bitcast if we got one back.
+    if (auto *CE = dyn_cast<llvm::ConstantExpr>(C)) {
+      assert(CE->getOpcode() == llvm::Instruction::BitCast);
+      GV = cast<llvm::GlobalValue>(CE->getOperand(0));
+    } else {
+      GV = cast<llvm::GlobalValue>(C);
+    }
   }
 
-  if (!cast<llvm::GlobalValue>(Entry)->isDeclaration()) {
+  if (!GV->isDeclaration()) {
     getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name);
     return;
   }
 
-  if (cast<llvm::GlobalValue>(Entry)->getType()->getElementType() != Ty) {
-    llvm::GlobalValue *OldFn = cast<llvm::GlobalValue>(Entry);
-
+  if (GV->getType()->getElementType() != Ty) {
     // If the types mismatch then we have to rewrite the definition.
-    assert(OldFn->isDeclaration() &&
-           "Shouldn't replace non-declaration");
+    assert(GV->isDeclaration() && "Shouldn't replace non-declaration");
 
     // F is the Function* for the one with the wrong type, we must make a new
     // Function* and update everything that used F (a declaration) with the new
@@ -2142,8 +2167,8 @@
     // (e.g. "int f()") and then a definition of a different type
     // (e.g. "int f(int x)").  Move the old function aside so that it
     // doesn't interfere with GetAddrOfFunction.
-    OldFn->setName(StringRef());
-    llvm::Function *NewFn = cast<llvm::Function>(GetAddrOfFunction(GD, Ty));
+    GV->setName(StringRef());
+    auto *NewFn = cast<llvm::Function>(GetAddrOfFunction(GD, Ty));
 
     // This might be an implementation of a function without a
     // prototype, in which case, try to do special replacement of
@@ -2152,39 +2177,39 @@
     // so as to make a direct call, which makes the inliner happier
     // and suppresses a number of optimizer warnings (!) about
     // dropping arguments.
-    if (!OldFn->use_empty()) {
-      ReplaceUsesOfNonProtoTypeWithRealFunction(OldFn, NewFn);
-      OldFn->removeDeadConstantUsers();
+    if (!GV->use_empty()) {
+      ReplaceUsesOfNonProtoTypeWithRealFunction(GV, NewFn);
+      GV->removeDeadConstantUsers();
     }
 
     // Replace uses of F with the Function we will endow with a body.
-    if (!Entry->use_empty()) {
+    if (!GV->use_empty()) {
       llvm::Constant *NewPtrForOldDecl =
-        llvm::ConstantExpr::getBitCast(NewFn, Entry->getType());
-      Entry->replaceAllUsesWith(NewPtrForOldDecl);
+          llvm::ConstantExpr::getBitCast(NewFn, GV->getType());
+      GV->replaceAllUsesWith(NewPtrForOldDecl);
     }
 
     // Ok, delete the old function now, which is dead.
-    OldFn->eraseFromParent();
+    GV->eraseFromParent();
 
-    Entry = NewFn;
+    GV = NewFn;
   }
 
   // We need to set linkage and visibility on the function before
   // generating code for it because various parts of IR generation
   // want to propagate this information down (e.g. to local static
   // declarations).
-  llvm::Function *Fn = cast<llvm::Function>(Entry);
+  auto *Fn = cast<llvm::Function>(GV);
   setFunctionLinkage(GD, Fn);
 
-  // FIXME: this is redundant with part of SetFunctionDefinitionAttributes
+  // FIXME: this is redundant with part of setFunctionDefinitionAttributes
   setGlobalVisibility(Fn, D);
 
   MaybeHandleStaticInExternC(D, Fn);
 
   CodeGenFunction(*this).GenerateCode(D, Fn, FI);
 
-  SetFunctionDefinitionAttributes(D, Fn);
+  setFunctionDefinitionAttributes(D, Fn);
   SetLLVMFunctionAttributesForDefinition(D, Fn);
 
   if (const ConstructorAttr *CA = D->getAttr<ConstructorAttr>())
@@ -2193,14 +2218,33 @@
     AddGlobalDtor(Fn, DA->getPriority());
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, Fn);
+}
 
-  llvm::Function *PGOInit = CodeGenPGO::emitInitialization(*this);
-  if (PGOInit)
-    AddGlobalCtor(PGOInit, 0);
+static llvm::GlobalObject &getGlobalObjectInExpr(DiagnosticsEngine &Diags,
+                                                 const AliasAttr *AA,
+                                                 llvm::Constant *C) {
+  if (auto *GO = dyn_cast<llvm::GlobalObject>(C))
+    return *GO;
+
+  auto *GA = dyn_cast<llvm::GlobalAlias>(C);
+  if (GA) {
+    if (GA->mayBeOverridden()) {
+      Diags.Report(AA->getLocation(), diag::warn_alias_to_weak_alias)
+          << GA->getAliasee()->getName() << GA->getName();
+    }
+
+    return *GA->getAliasee();
+  }
+
+  auto *CE = cast<llvm::ConstantExpr>(C);
+  assert(CE->getOpcode() == llvm::Instruction::BitCast ||
+         CE->getOpcode() == llvm::Instruction::GetElementPtr ||
+         CE->getOpcode() == llvm::Instruction::AddrSpaceCast);
+  return *cast<llvm::GlobalObject>(CE->getOperand(0));
 }
 
 void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
-  const ValueDecl *D = cast<ValueDecl>(GD.getDecl());
+  const auto *D = cast<ValueDecl>(GD.getDecl());
   const AliasAttr *AA = D->getAttr<AliasAttr>();
   assert(AA && "Not an alias?");
 
@@ -2224,15 +2268,21 @@
                                       /*ForVTable=*/false);
   else
     Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(),
-                                    llvm::PointerType::getUnqual(DeclTy), 0);
+                                    llvm::PointerType::getUnqual(DeclTy),
+                                    nullptr);
 
   // Create the new alias itself, but don't set a name yet.
-  llvm::GlobalValue *GA =
-    new llvm::GlobalAlias(Aliasee->getType(),
-                          llvm::Function::ExternalLinkage,
-                          "", Aliasee, &getModule());
+  auto *GA = llvm::GlobalAlias::create(
+      cast<llvm::PointerType>(Aliasee->getType())->getElementType(), 0,
+      llvm::Function::ExternalLinkage, "",
+      &getGlobalObjectInExpr(Diags, AA, Aliasee));
 
   if (Entry) {
+    if (GA->getAliasee() == Entry) {
+      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+      return;
+    }
+
     assert(Entry->isDeclaration());
 
     // If there is a declaration in the module, then we had an extern followed
@@ -2255,7 +2305,7 @@
   // specialization of the attributes which may be set on a global
   // variable/function.
   if (D->hasAttr<DLLExportAttr>()) {
-    if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
       // The dllexport attribute is ignored for undefined symbols.
       if (FD->hasBody())
         GA->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
@@ -2353,8 +2403,7 @@
 
   QualType CFTy = getContext().getCFConstantStringType();
 
-  llvm::StructType *STy =
-    cast<llvm::StructType>(getTypes().ConvertType(CFTy));
+  auto *STy = cast<llvm::StructType>(getTypes().ConvertType(CFTy));
 
   llvm::Constant *Fields[4];
 
@@ -2367,7 +2416,7 @@
     llvm::ConstantInt::get(Ty, 0x07C8);
 
   // String pointer.
-  llvm::Constant *C = 0;
+  llvm::Constant *C = nullptr;
   if (isUTF16) {
     ArrayRef<uint16_t> Arr =
       llvm::makeArrayRef<uint16_t>(reinterpret_cast<uint16_t*>(
@@ -2380,7 +2429,7 @@
 
   // Note: -fwritable-strings doesn't make the backing store strings of
   // CFStrings writable. (See <rdar://problem/10657500>)
-  llvm::GlobalVariable *GV =
+  auto *GV =
       new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
                                llvm::GlobalValue::PrivateLinkage, C, ".str");
   GV->setUnnamedAddr(true);
@@ -2479,9 +2528,9 @@
     for (unsigned i = 0; i < 3; ++i) {
       FieldDecl *Field = FieldDecl::Create(Context, D,
                                            SourceLocation(),
-                                           SourceLocation(), 0,
-                                           FieldTypes[i], /*TInfo=*/0,
-                                           /*BitWidth=*/0,
+                                           SourceLocation(), nullptr,
+                                           FieldTypes[i], /*TInfo=*/nullptr,
+                                           /*BitWidth=*/nullptr,
                                            /*Mutable=*/false,
                                            ICIS_NoInit);
       Field->setAccess(AS_public);
@@ -2506,10 +2555,9 @@
   bool isConstant;
   Linkage = llvm::GlobalValue::PrivateLinkage;
   isConstant = !LangOpts.WritableStrings;
-  
-  llvm::GlobalVariable *GV =
-  new llvm::GlobalVariable(getModule(), C->getType(), isConstant, Linkage, C,
-                           ".str");
+
+  auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), isConstant,
+                                      Linkage, C, ".str");
   GV->setUnnamedAddr(true);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
@@ -2555,9 +2603,9 @@
       FieldDecl *Field = FieldDecl::Create(Context,
                                            D,
                                            SourceLocation(),
-                                           SourceLocation(), 0,
-                                           FieldTypes[i], /*TInfo=*/0,
-                                           /*BitWidth=*/0,
+                                           SourceLocation(), nullptr,
+                                           FieldTypes[i], /*TInfo=*/nullptr,
+                                           /*BitWidth=*/nullptr,
                                            /*Mutable=*/false,
                                            ICIS_NoInit);
       Field->setAccess(AS_public);
@@ -2585,9 +2633,8 @@
     Str.resize(CAT->getSize().getZExtValue());
     return llvm::ConstantDataArray::getString(VMContext, Str, false);
   }
-  
-  llvm::ArrayType *AType =
-    cast<llvm::ArrayType>(getTypes().ConvertType(E->getType()));
+
+  auto *AType = cast<llvm::ArrayType>(getTypes().ConvertType(E->getType()));
   llvm::Type *ElemTy = AType->getElementType();
   unsigned NumElements = AType->getNumElements();
 
@@ -2656,7 +2703,7 @@
       LT = llvm::GlobalValue::LinkOnceODRLinkage;
       GlobalVariableName = MangledNameBuffer;
     } else {
-      LT = llvm::GlobalValue::PrivateLinkage;;
+      LT = llvm::GlobalValue::PrivateLinkage;
       GlobalVariableName = ".str";
     }
 
@@ -2708,9 +2755,9 @@
     AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
 
   // Create a global variable for this string
-  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+  auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), C->getType(), constant,
-      llvm::GlobalValue::PrivateLinkage, C, GlobalName, 0,
+      llvm::GlobalValue::PrivateLinkage, C, GlobalName, nullptr,
       llvm::GlobalVariable::NotThreadLocal, AddrSpace);
   GV->setAlignment(Alignment);
   GV->setUnnamedAddr(true);
@@ -2771,7 +2818,7 @@
     const MaterializeTemporaryExpr *E, const Expr *Init) {
   assert((E->getStorageDuration() == SD_Static ||
           E->getStorageDuration() == SD_Thread) && "not a global temporary");
-  const VarDecl *VD = cast<VarDecl>(E->getExtendingDecl());
+  const auto *VD = cast<VarDecl>(E->getExtendingDecl());
 
   // If we're not materializing a subobject of the temporary, keep the
   // cv-qualifiers from the type of the MaterializeTemporaryExpr.
@@ -2788,10 +2835,11 @@
   // we also need to make the temporaries externally-visible).
   SmallString<256> Name;
   llvm::raw_svector_ostream Out(Name);
-  getCXXABI().getMangleContext().mangleReferenceTemporary(VD, Out);
+  getCXXABI().getMangleContext().mangleReferenceTemporary(
+      VD, E->getManglingNumber(), Out);
   Out.flush();
 
-  APValue *Value = 0;
+  APValue *Value = nullptr;
   if (E->getStorageDuration() == SD_Static) {
     // We might have a cached constant initializer for this temporary. Note
     // that this might have a different value from the value computed by
@@ -2799,7 +2847,7 @@
     // modifies the temporary.
     Value = getContext().getMaterializedTemporaryValue(E, false);
     if (Value && Value->isUninit())
-      Value = 0;
+      Value = nullptr;
   }
 
   // Try evaluating it now, it might have a constant initializer.
@@ -2808,12 +2856,12 @@
       !EvalResult.hasSideEffects())
     Value = &EvalResult.Val;
 
-  llvm::Constant *InitialValue = 0;
+  llvm::Constant *InitialValue = nullptr;
   bool Constant = false;
   llvm::Type *Type;
   if (Value) {
     // The temporary has a constant initializer, use it.
-    InitialValue = EmitConstantValue(*Value, MaterializedType, 0);
+    InitialValue = EmitConstantValue(*Value, MaterializedType, nullptr);
     Constant = isTypeConstant(MaterializedType, /*ExcludeCtor*/Value);
     Type = InitialValue->getType();
   } else {
@@ -2823,10 +2871,19 @@
   }
 
   // Create a global variable for this lifetime-extended temporary.
-  llvm::GlobalVariable *GV =
-    new llvm::GlobalVariable(getModule(), Type, Constant,
-                             llvm::GlobalValue::PrivateLinkage,
-                             InitialValue, Name.c_str());
+  llvm::GlobalValue::LinkageTypes Linkage =
+      getLLVMLinkageVarDefinition(VD, Constant);
+  // There is no need for this temporary to have global linkage if the global
+  // variable has external linkage.
+  if (Linkage == llvm::GlobalVariable::ExternalLinkage)
+    Linkage = llvm::GlobalVariable::PrivateLinkage;
+  unsigned AddrSpace = GetGlobalVarAddressSpace(
+      VD, getContext().getTargetAddressSpace(MaterializedType));
+  auto *GV = new llvm::GlobalVariable(
+      getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(),
+      /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+      AddrSpace);
+  setGlobalVisibility(GV, VD);
   GV->setAlignment(
       getContext().getTypeAlignInChars(MaterializedType).getQuantity());
   if (VD->getTLSKind())
@@ -2879,7 +2936,7 @@
     Selector cxxSelector = getContext().Selectors.getSelector(0, &II);
     ObjCMethodDecl *DTORMethod =
       ObjCMethodDecl::Create(getContext(), D->getLocation(), D->getLocation(),
-                             cxxSelector, getContext().VoidTy, 0, D,
+                             cxxSelector, getContext().VoidTy, nullptr, D,
                              /*isInstance=*/true, /*isVariadic=*/false,
                           /*isPropertyAccessor=*/true, /*isImplicitlyDeclared=*/true,
                              /*isDefined=*/false, ObjCMethodDecl::Required);
@@ -2900,8 +2957,8 @@
                                                 D->getLocation(),
                                                 D->getLocation(),
                                                 cxxSelector,
-                                                getContext().getObjCIdType(), 0, 
-                                                D, /*isInstance=*/true,
+                                                getContext().getObjCIdType(),
+                                                nullptr, D, /*isInstance=*/true,
                                                 /*isVariadic=*/false,
                                                 /*isPropertyAccessor=*/true,
                                                 /*isImplicitlyDeclared=*/true,
@@ -3025,7 +3082,7 @@
     break;
 
   case Decl::ObjCProtocol: {
-    ObjCProtocolDecl *Proto = cast<ObjCProtocolDecl>(D);
+    auto *Proto = cast<ObjCProtocolDecl>(D);
     if (Proto->isThisDeclarationADefinition())
       ObjCRuntime->GenerateProtocol(Proto);
     break;
@@ -3038,7 +3095,7 @@
     break;
 
   case Decl::ObjCImplementation: {
-    ObjCImplementationDecl *OMD = cast<ObjCImplementationDecl>(D);
+    auto *OMD = cast<ObjCImplementationDecl>(D);
     EmitObjCPropertyImplementations(OMD);
     EmitObjCIvarInitializations(OMD);
     ObjCRuntime->GenerateClass(OMD);
@@ -3050,7 +3107,7 @@
     break;
   }
   case Decl::ObjCMethod: {
-    ObjCMethodDecl *OMD = cast<ObjCMethodDecl>(D);
+    auto *OMD = cast<ObjCMethodDecl>(D);
     // If this is not a prototype, emit the body.
     if (OMD->getBody())
       CodeGenFunction(*this).GenerateObjCMethod(OMD);
@@ -3065,7 +3122,7 @@
     break;
 
   case Decl::FileScopeAsm: {
-    FileScopeAsmDecl *AD = cast<FileScopeAsmDecl>(D);
+    auto *AD = cast<FileScopeAsmDecl>(D);
     StringRef AsmString = AD->getAsmString()->getString();
 
     const std::string &S = getModule().getModuleInlineAsm();
@@ -3079,7 +3136,7 @@
   }
 
   case Decl::Import: {
-    ImportDecl *Import = cast<ImportDecl>(D);
+    auto *Import = cast<ImportDecl>(D);
 
     // Ignore import declarations that come from imported modules.
     if (clang::Module *Owner = Import->getOwningModule()) {
@@ -3093,8 +3150,7 @@
   }
 
   case Decl::ClassTemplateSpecialization: {
-    const ClassTemplateSpecializationDecl *Spec =
-        cast<ClassTemplateSpecializationDecl>(D);
+    const auto *Spec = cast<ClassTemplateSpecializationDecl>(D);
     if (DebugInfo &&
         Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition)
       DebugInfo->completeTemplateDefinition(*Spec);
@@ -3144,8 +3200,8 @@
     IdentifierInfo *Name = I->first;
     llvm::GlobalValue *Val = I->second;
     if (Val && !getModule().getNamedValue(Name->getName()))
-      addUsedGlobal(new llvm::GlobalAlias(Val->getType(), Val->getLinkage(),
-                                          Name->getName(), Val, &getModule()));
+      addUsedGlobal(llvm::GlobalAlias::create(Name->getName(),
+                                              cast<llvm::GlobalObject>(Val)));
   }
 }
 
@@ -3157,7 +3213,7 @@
 /// with an llvm::GlobalValue, we create a global named metadata
 /// with the name 'clang.global.decl.ptrs'.
 void CodeGenModule::EmitDeclMetadata() {
-  llvm::NamedMDNode *GlobalMetadata = 0;
+  llvm::NamedMDNode *GlobalMetadata = nullptr;
 
   // StaticLocalDeclMap
   for (llvm::DenseMap<GlobalDecl,StringRef>::iterator
@@ -3178,17 +3234,17 @@
   // Find the unique metadata ID for this name.
   unsigned DeclPtrKind = Context.getMDKindID("clang.decl.ptr");
 
-  llvm::NamedMDNode *GlobalMetadata = 0;
+  llvm::NamedMDNode *GlobalMetadata = nullptr;
 
   for (llvm::DenseMap<const Decl*, llvm::Value*>::iterator
          I = LocalDeclMap.begin(), E = LocalDeclMap.end(); I != E; ++I) {
     const Decl *D = I->first;
     llvm::Value *Addr = I->second;
 
-    if (llvm::AllocaInst *Alloca = dyn_cast<llvm::AllocaInst>(Addr)) {
+    if (auto *Alloca = dyn_cast<llvm::AllocaInst>(Addr)) {
       llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D);
       Alloca->setMetadata(DeclPtrKind, llvm::MDNode::get(Context, DAddr));
-    } else if (llvm::GlobalValue *GV = dyn_cast<llvm::GlobalValue>(Addr)) {
+    } else if (auto *GV = dyn_cast<llvm::GlobalValue>(Addr)) {
       GlobalDecl GD = GlobalDecl(cast<VarDecl>(D));
       EmitGlobalDeclMetadata(CGM, GlobalMetadata, GD, GV);
     }
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index 0d13bdc..c54f6de 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -34,124 +34,122 @@
 #include "llvm/Transforms/Utils/SpecialCaseList.h"
 
 namespace llvm {
-  class Module;
-  class Constant;
-  class ConstantInt;
-  class Function;
-  class GlobalValue;
-  class DataLayout;
-  class FunctionType;
-  class LLVMContext;
+class Module;
+class Constant;
+class ConstantInt;
+class Function;
+class GlobalValue;
+class DataLayout;
+class FunctionType;
+class LLVMContext;
+class IndexedInstrProfReader;
 }
 
 namespace clang {
-  class TargetCodeGenInfo;
-  class ASTContext;
-  class AtomicType;
-  class FunctionDecl;
-  class IdentifierInfo;
-  class ObjCMethodDecl;
-  class ObjCImplementationDecl;
-  class ObjCCategoryImplDecl;
-  class ObjCProtocolDecl;
-  class ObjCEncodeExpr;
-  class BlockExpr;
-  class CharUnits;
-  class Decl;
-  class Expr;
-  class Stmt;
-  class InitListExpr;
-  class StringLiteral;
-  class NamedDecl;
-  class ValueDecl;
-  class VarDecl;
-  class LangOptions;
-  class CodeGenOptions;
-  class DiagnosticsEngine;
-  class AnnotateAttr;
-  class CXXDestructorDecl;
-  class MangleBuffer;
-  class Module;
+class TargetCodeGenInfo;
+class ASTContext;
+class AtomicType;
+class FunctionDecl;
+class IdentifierInfo;
+class ObjCMethodDecl;
+class ObjCImplementationDecl;
+class ObjCCategoryImplDecl;
+class ObjCProtocolDecl;
+class ObjCEncodeExpr;
+class BlockExpr;
+class CharUnits;
+class Decl;
+class Expr;
+class Stmt;
+class InitListExpr;
+class StringLiteral;
+class NamedDecl;
+class ValueDecl;
+class VarDecl;
+class LangOptions;
+class CodeGenOptions;
+class DiagnosticsEngine;
+class AnnotateAttr;
+class CXXDestructorDecl;
+class MangleBuffer;
+class Module;
 
 namespace CodeGen {
 
-  class CallArgList;
-  class CodeGenFunction;
-  class CodeGenTBAA;
-  class CGCXXABI;
-  class CGDebugInfo;
-  class CGObjCRuntime;
-  class CGOpenCLRuntime;
-  class CGCUDARuntime;
-  class BlockFieldFlags;
-  class FunctionArgList;
-  class PGOProfileData;
+class CallArgList;
+class CodeGenFunction;
+class CodeGenTBAA;
+class CGCXXABI;
+class CGDebugInfo;
+class CGObjCRuntime;
+class CGOpenCLRuntime;
+class CGOpenMPRuntime;
+class CGCUDARuntime;
+class BlockFieldFlags;
+class FunctionArgList;
 
-  struct OrderGlobalInits {
-    unsigned int priority;
-    unsigned int lex_order;
-    OrderGlobalInits(unsigned int p, unsigned int l) 
+struct OrderGlobalInits {
+  unsigned int priority;
+  unsigned int lex_order;
+  OrderGlobalInits(unsigned int p, unsigned int l)
       : priority(p), lex_order(l) {}
-    
-    bool operator==(const OrderGlobalInits &RHS) const {
-      return priority == RHS.priority &&
-             lex_order == RHS.lex_order;
-    }
-    
-    bool operator<(const OrderGlobalInits &RHS) const {
-      return std::tie(priority, lex_order) <
-             std::tie(RHS.priority, RHS.lex_order);
-    }
+
+  bool operator==(const OrderGlobalInits &RHS) const {
+    return priority == RHS.priority && lex_order == RHS.lex_order;
+  }
+
+  bool operator<(const OrderGlobalInits &RHS) const {
+    return std::tie(priority, lex_order) <
+           std::tie(RHS.priority, RHS.lex_order);
+  }
+};
+
+struct CodeGenTypeCache {
+  /// void
+  llvm::Type *VoidTy;
+
+  /// i8, i16, i32, and i64
+  llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
+  /// float, double
+  llvm::Type *FloatTy, *DoubleTy;
+
+  /// int
+  llvm::IntegerType *IntTy;
+
+  /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
+  union {
+    llvm::IntegerType *IntPtrTy;
+    llvm::IntegerType *SizeTy;
+    llvm::IntegerType *PtrDiffTy;
   };
 
-  struct CodeGenTypeCache {
-    /// void
-    llvm::Type *VoidTy;
-
-    /// i8, i16, i32, and i64
-    llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
-    /// float, double
-    llvm::Type *FloatTy, *DoubleTy;
-
-    /// int
-    llvm::IntegerType *IntTy;
-
-    /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
-    union {
-      llvm::IntegerType *IntPtrTy;
-      llvm::IntegerType *SizeTy;
-      llvm::IntegerType *PtrDiffTy;
-    };
-
-    /// void* in address space 0
-    union {
-      llvm::PointerType *VoidPtrTy;
-      llvm::PointerType *Int8PtrTy;
-    };
-
-    /// void** in address space 0
-    union {
-      llvm::PointerType *VoidPtrPtrTy;
-      llvm::PointerType *Int8PtrPtrTy;
-    };
-
-    /// The width of a pointer into the generic address space.
-    unsigned char PointerWidthInBits;
-
-    /// The size and alignment of a pointer into the generic address
-    /// space.
-    union {
-      unsigned char PointerAlignInBytes;
-      unsigned char PointerSizeInBytes;
-      unsigned char SizeSizeInBytes;     // sizeof(size_t)
-    };
-
-    llvm::CallingConv::ID RuntimeCC;
-    llvm::CallingConv::ID getRuntimeCC() const {
-      return RuntimeCC;
-    }
+  /// void* in address space 0
+  union {
+    llvm::PointerType *VoidPtrTy;
+    llvm::PointerType *Int8PtrTy;
   };
 
+  /// void** in address space 0
+  union {
+    llvm::PointerType *VoidPtrPtrTy;
+    llvm::PointerType *Int8PtrPtrTy;
+  };
+
+  /// The width of a pointer into the generic address space.
+  unsigned char PointerWidthInBits;
+
+  /// The size and alignment of a pointer into the generic address
+  /// space.
+  union {
+    unsigned char PointerAlignInBytes;
+    unsigned char PointerSizeInBytes;
+    unsigned char SizeSizeInBytes; // sizeof(size_t)
+  };
+
+  llvm::CallingConv::ID RuntimeCC;
+  llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
+};
+
 struct RREntrypoints {
   RREntrypoints() { memset(this, 0, sizeof(*this)); }
   /// void objc_autoreleasePoolPop(void*);
@@ -220,13 +218,33 @@
   llvm::Constant *clang_arc_use;
 };
 
-/// CodeGenModule - This class organizes the cross-function state that is used
-/// while generating LLVM code.
+/// This class records statistics on instrumentation based profiling.
+struct InstrProfStats {
+  InstrProfStats() : Visited(0), Missing(0), Mismatched(0) {}
+  bool isOutOfDate() { return Missing || Mismatched; }
+  uint32_t Visited;
+  uint32_t Missing;
+  uint32_t Mismatched;
+};
+
+/// This class organizes the cross-function state that is used while generating
+/// LLVM code.
 class CodeGenModule : public CodeGenTypeCache {
   CodeGenModule(const CodeGenModule &) LLVM_DELETED_FUNCTION;
   void operator=(const CodeGenModule &) LLVM_DELETED_FUNCTION;
 
-  typedef std::vector<std::pair<llvm::Constant*, int> > CtorList;
+  struct Structor {
+    Structor() : Priority(0), Initializer(nullptr), AssociatedData(nullptr) {}
+    Structor(int Priority, llvm::Constant *Initializer,
+             llvm::Constant *AssociatedData)
+        : Priority(Priority), Initializer(Initializer),
+          AssociatedData(AssociatedData) {}
+    int Priority;
+    llvm::Constant *Initializer;
+    llvm::Constant *AssociatedData;
+  };
+
+  typedef std::vector<Structor> CtorList;
 
   ASTContext &Context;
   const LangOptions &LangOpts;
@@ -247,32 +265,33 @@
   // if TheTargetCodeGenInfo is NULL
   CodeGenTypes Types;
  
-  /// VTables - Holds information about C++ vtables.
+  /// Holds information about C++ vtables.
   CodeGenVTables VTables;
 
   CGObjCRuntime* ObjCRuntime;
   CGOpenCLRuntime* OpenCLRuntime;
+  CGOpenMPRuntime* OpenMPRuntime;
   CGCUDARuntime* CUDARuntime;
   CGDebugInfo* DebugInfo;
   ARCEntrypoints *ARCData;
   llvm::MDNode *NoObjCARCExceptionsMetadata;
   RREntrypoints *RRData;
-  PGOProfileData *PGOData;
+  std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader;
+  InstrProfStats PGOStats;
 
-  // WeakRefReferences - A set of references that have only been seen via
-  // a weakref so far. This is used to remove the weak of the reference if we
-  // ever see a direct reference or a definition.
+  // A set of references that have only been seen via a weakref so far. This is
+  // used to remove the weak of the reference if we ever see a direct reference
+  // or a definition.
   llvm::SmallPtrSet<llvm::GlobalValue*, 10> WeakRefReferences;
 
-  /// DeferredDecls - This contains all the decls which have definitions but
-  /// which are deferred for emission and therefore should only be output if
-  /// they are actually used.  If a decl is in this, then it is known to have
-  /// not been referenced yet.
+  /// This contains all the decls which have definitions but/ which are deferred
+  /// for emission and therefore should only be output if they are actually
+  /// used. If a decl is in this, then it is known to have not been referenced
+  /// yet.
   llvm::StringMap<GlobalDecl> DeferredDecls;
 
-  /// DeferredDeclsToEmit - This is a list of deferred decls which we have seen
-  /// that *are* actually referenced.  These get code generated when the module
-  /// is done.
+  /// This is a list of deferred decls which we have seen that *are* actually
+  /// referenced. These get code generated when the module is done.
   struct DeferredGlobal {
     DeferredGlobal(llvm::GlobalValue *GV, GlobalDecl GD) : GV(GV), GD(GD) {}
     llvm::AssertingVH<llvm::GlobalValue> GV;
@@ -290,25 +309,24 @@
   typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy;
   ReplacementsTy Replacements;
 
-  /// DeferredVTables - A queue of (optional) vtables to consider emitting.
+  /// A queue of (optional) vtables to consider emitting.
   std::vector<const CXXRecordDecl*> DeferredVTables;
 
-  /// LLVMUsed - List of global values which are required to be
-  /// present in the object file; bitcast to i8*. This is used for
-  /// forcing visibility of symbols which may otherwise be optimized
-  /// out.
+  /// List of global values which are required to be present in the object file;
+  /// bitcast to i8*. This is used for forcing visibility of symbols which may
+  /// otherwise be optimized out.
   std::vector<llvm::WeakVH> LLVMUsed;
   std::vector<llvm::WeakVH> LLVMCompilerUsed;
 
-  /// GlobalCtors - Store the list of global constructors and their respective
-  /// priorities to be emitted when the translation unit is complete.
+  /// Store the list of global constructors and their respective priorities to
+  /// be emitted when the translation unit is complete.
   CtorList GlobalCtors;
 
-  /// GlobalDtors - Store the list of global destructors and their respective
-  /// priorities to be emitted when the translation unit is complete.
+  /// Store the list of global destructors and their respective priorities to be
+  /// emitted when the translation unit is complete.
   CtorList GlobalDtors;
 
-  /// MangledDeclNames - A map of canonical GlobalDecls to their mangled names.
+  /// A map of canonical GlobalDecls to their mangled names.
   llvm::DenseMap<GlobalDecl, StringRef> MangledDeclNames;
   llvm::BumpPtrAllocator MangledNamesAllocator;
   
@@ -347,8 +365,7 @@
   /// before any thread_local variable in this TU is odr-used.
   std::vector<llvm::Constant*> CXXThreadLocalInits;
 
-  /// CXXGlobalInits - Global variables with initializers that need to run
-  /// before main.
+  /// Global variables with initializers that need to run before main.
   std::vector<llvm::Constant*> CXXGlobalInits;
 
   /// When a C++ decl with an initializer is deferred, null is
@@ -366,12 +383,11 @@
     }
   };
 
-  /// - Global variables with initializers whose order of initialization
-  /// is set by init_priority attribute.
+  /// Global variables with initializers whose order of initialization is set by
+  /// init_priority attribute.
   SmallVector<GlobalInitData, 8> PrioritizedCXXGlobalInits;
 
-  /// CXXGlobalDtors - Global destructor functions and arguments that need to
-  /// run on termination.
+  /// Global destructor functions and arguments that need to run on termination.
   std::vector<std::pair<llvm::WeakVH,llvm::Constant*> > CXXGlobalDtors;
 
   /// \brief The complete set of modules that has been imported.
@@ -383,12 +399,12 @@
   /// @name Cache for Objective-C runtime types
   /// @{
 
-  /// CFConstantStringClassRef - Cached reference to the class for constant
-  /// strings. This value has type int * but is actually an Obj-C class pointer.
+  /// Cached reference to the class for constant strings. This value has type
+  /// int * but is actually an Obj-C class pointer.
   llvm::WeakVH CFConstantStringClassRef;
 
-  /// ConstantStringClassRef - Cached reference to the class for constant
-  /// strings. This value has type int * but is actually an Obj-C class pointer.
+  /// Cached reference to the class for constant strings. This value has type
+  /// int * but is actually an Obj-C class pointer.
   llvm::WeakVH ConstantStringClassRef;
 
   /// \brief The LLVM type corresponding to NSConstantString.
@@ -404,6 +420,7 @@
   void createObjCRuntime();
 
   void createOpenCLRuntime();
+  void createOpenMPRuntime();
   void createCUDARuntime();
 
   bool isTriviallyRecursive(const FunctionDecl *F);
@@ -447,45 +464,48 @@
 
   void clear();
 
-  /// Release - Finalize LLVM code generation.
+  /// Finalize LLVM code generation.
   void Release();
 
-  /// getObjCRuntime() - Return a reference to the configured
-  /// Objective-C runtime.
+  /// Return a reference to the configured Objective-C runtime.
   CGObjCRuntime &getObjCRuntime() {
     if (!ObjCRuntime) createObjCRuntime();
     return *ObjCRuntime;
   }
 
-  /// hasObjCRuntime() - Return true iff an Objective-C runtime has
-  /// been configured.
+  /// Return true iff an Objective-C runtime has been configured.
   bool hasObjCRuntime() { return !!ObjCRuntime; }
 
-  /// getOpenCLRuntime() - Return a reference to the configured OpenCL runtime.
+  /// Return a reference to the configured OpenCL runtime.
   CGOpenCLRuntime &getOpenCLRuntime() {
-    assert(OpenCLRuntime != 0);
+    assert(OpenCLRuntime != nullptr);
     return *OpenCLRuntime;
   }
 
-  /// getCUDARuntime() - Return a reference to the configured CUDA runtime.
+  /// Return a reference to the configured OpenMP runtime.
+  CGOpenMPRuntime &getOpenMPRuntime() {
+    assert(OpenMPRuntime != nullptr);
+    return *OpenMPRuntime;
+  }
+
+  /// Return a reference to the configured CUDA runtime.
   CGCUDARuntime &getCUDARuntime() {
-    assert(CUDARuntime != 0);
+    assert(CUDARuntime != nullptr);
     return *CUDARuntime;
   }
 
   ARCEntrypoints &getARCEntrypoints() const {
-    assert(getLangOpts().ObjCAutoRefCount && ARCData != 0);
+    assert(getLangOpts().ObjCAutoRefCount && ARCData != nullptr);
     return *ARCData;
   }
 
   RREntrypoints &getRREntrypoints() const {
-    assert(RRData != 0);
+    assert(RRData != nullptr);
     return *RRData;
   }
 
-  PGOProfileData *getPGOData() const {
-    return PGOData;
-  }
+  InstrProfStats &getPGOStats() { return PGOStats; }
+  llvm::IndexedInstrProfReader *getPGOReader() const { return PGOReader.get(); }
 
   llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) {
     return StaticLocalDeclMap[D];
@@ -519,10 +539,10 @@
     AtomicGetterHelperFnMap[Ty] = Fn;
   }
 
-  llvm::Constant *getTypeDescriptor(QualType Ty) {
+  llvm::Constant *getTypeDescriptorFromMap(QualType Ty) {
     return TypeDescriptorMap[Ty];
   }
-  void setTypeDescriptor(QualType Ty, llvm::Constant *C) {
+  void setTypeDescriptorInMap(QualType Ty, llvm::Constant *C) {
     TypeDescriptorMap[Ty] = C;
   }
 
@@ -545,8 +565,8 @@
   const TargetInfo &getTarget() const { return Target; }
   CGCXXABI &getCXXABI() const { return *ABI; }
   llvm::LLVMContext &getLLVMContext() { return VMContext; }
-  
-  bool shouldUseTBAA() const { return TBAA != 0; }
+
+  bool shouldUseTBAA() const { return TBAA != nullptr; }
 
   const TargetCodeGenInfo &getTargetCodeGenInfo(); 
   
@@ -584,15 +604,14 @@
                            llvm::MDNode *TBAAInfo,
                            bool ConvertTypeToTag = true);
 
-  /// getSize - Emit the given number of characters as a value of type size_t.
+  /// Emit the given number of characters as a value of type size_t.
   llvm::ConstantInt *getSize(CharUnits numChars);
 
-  /// setGlobalVisibility - Set the visibility for the given LLVM
-  /// GlobalValue.
+  /// Set the visibility for the given LLVM GlobalValue.
   void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const;
 
-  /// setTLSMode - Set the TLS mode for the given LLVM GlobalVariable
-  /// for the thread-local variable declaration D.
+  /// Set the TLS mode for the given LLVM GlobalVariable for the thread-local
+  /// variable declaration D.
   void setTLSMode(llvm::GlobalVariable *GV, const VarDecl &D) const;
 
   static llvm::GlobalValue::VisibilityTypes GetLLVMVisibility(Visibility V) {
@@ -617,50 +636,47 @@
       return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()));
   }
 
-  /// CreateOrReplaceCXXRuntimeVariable - Will return a global variable of the
-  /// given type. If a variable with a different type already exists then a new 
-  /// variable with the right type will be created and all uses of the old
-  /// variable will be replaced with a bitcast to the new variable.
+  /// Will return a global variable of the given type. If a variable with a
+  /// different type already exists then a new  variable with the right type
+  /// will be created and all uses of the old variable will be replaced with a
+  /// bitcast to the new variable.
   llvm::GlobalVariable *
   CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty,
                                     llvm::GlobalValue::LinkageTypes Linkage);
 
-  /// GetGlobalVarAddressSpace - Return the address space of the underlying
-  /// global variable for D, as determined by its declaration.  Normally this
-  /// is the same as the address space of D's type, but in CUDA, address spaces
-  /// are associated with declarations, not types.
+  /// Return the address space of the underlying global variable for D, as
+  /// determined by its declaration. Normally this is the same as the address
+  /// space of D's type, but in CUDA, address spaces are associated with
+  /// declarations, not types.
   unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
 
-  /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the
-  /// given global variable.  If Ty is non-null and if the global doesn't exist,
-  /// then it will be greated with the specified type instead of whatever the
-  /// normal requested type would be.
+  /// Return the llvm::Constant for the address of the given global variable.
+  /// If Ty is non-null and if the global doesn't exist, then it will be greated
+  /// with the specified type instead of whatever the normal requested type
+  /// would be.
   llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D,
-                                     llvm::Type *Ty = 0);
+                                     llvm::Type *Ty = nullptr);
 
-
-  /// GetAddrOfFunction - Return the address of the given function.  If Ty is
-  /// non-null, then this function will use the specified type if it has to
-  /// create it.
+  /// Return the address of the given function. If Ty is non-null, then this
+  /// function will use the specified type if it has to create it.
   llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = 0,
                                     bool ForVTable = false,
                                     bool DontDefer = false);
 
-  /// GetAddrOfRTTIDescriptor - Get the address of the RTTI descriptor 
-  /// for the given type.
+  /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
-  /// GetAddrOfUuidDescriptor - Get the address of a uuid descriptor .
+  /// Get the address of a uuid descriptor .
   llvm::Constant *GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
-  /// GetAddrOfThunk - Get the address of the thunk for the given global decl.
+  /// Get the address of the thunk for the given global decl.
   llvm::Constant *GetAddrOfThunk(GlobalDecl GD, const ThunkInfo &Thunk);
 
-  /// GetWeakRefReference - Get a reference to the target of VD.
+  /// Get a reference to the target of VD.
   llvm::Constant *GetWeakRefReference(const ValueDecl *VD);
 
-  /// GetNonVirtualBaseClassOffset - Returns the offset from a derived class to 
-  /// a class. Returns null if the offset is 0. 
+  /// Returns the offset from a derived class to  a class. Returns null if the
+  /// offset is 0.
   llvm::Constant *
   GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl,
                                CastExpr::path_const_iterator PathBegin,
@@ -696,67 +712,66 @@
 
   llvm::FoldingSet<ByrefHelpers> ByrefHelpersCache;
 
-  /// getUniqueBlockCount - Fetches the global unique block count.
+  /// Fetches the global unique block count.
   int getUniqueBlockCount() { return ++Block.GlobalUniqueCount; }
   
-  /// getBlockDescriptorType - Fetches the type of a generic block
-  /// descriptor.
+  /// Fetches the type of a generic block descriptor.
   llvm::Type *getBlockDescriptorType();
 
-  /// getGenericBlockLiteralType - The type of a generic block literal.
+  /// The type of a generic block literal.
   llvm::Type *getGenericBlockLiteralType();
 
-  /// GetAddrOfGlobalBlock - Gets the address of a block which
-  /// requires no captures.
+  /// \brief Gets or a creats a Microsoft TypeDescriptor.
+  llvm::Constant *getMSTypeDescriptor(QualType Ty);
+  /// \brief Gets or a creats a Microsoft CompleteObjectLocator.
+  llvm::GlobalVariable *getMSCompleteObjectLocator(const CXXRecordDecl *RD,
+                                                   const VPtrInfo *Info);
+
+  /// Gets the address of a block which requires no captures.
   llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, const char *);
   
-  /// GetAddrOfConstantCFString - Return a pointer to a constant CFString object
-  /// for the given string.
+  /// Return a pointer to a constant CFString object for the given string.
   llvm::Constant *GetAddrOfConstantCFString(const StringLiteral *Literal);
-  
-  /// GetAddrOfConstantString - Return a pointer to a constant NSString object
-  /// for the given string. Or a user defined String object as defined via
+
+  /// Return a pointer to a constant NSString object for the given string. Or a
+  /// user defined String object as defined via
   /// -fconstant-string-class=class_name option.
   llvm::Constant *GetAddrOfConstantString(const StringLiteral *Literal);
 
-  /// GetConstantArrayFromStringLiteral - Return a constant array for the given
-  /// string.
+  /// Return a constant array for the given string.
   llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
 
-  /// GetAddrOfConstantStringFromLiteral - Return a pointer to a constant array
-  /// for the given string literal.
+  /// Return a pointer to a constant array for the given string literal.
   llvm::Constant *GetAddrOfConstantStringFromLiteral(const StringLiteral *S);
 
-  /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
-  /// array for the given ObjCEncodeExpr node.
+  /// Return a pointer to a constant array for the given ObjCEncodeExpr node.
   llvm::Constant *GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *);
 
-  /// GetAddrOfConstantString - Returns a pointer to a character array
-  /// containing the literal. This contents are exactly that of the given
-  /// string, i.e. it will not be null terminated automatically; see
-  /// GetAddrOfConstantCString. Note that whether the result is actually a
-  /// pointer to an LLVM constant depends on Feature.WriteableStrings.
+  /// Returns a pointer to a character array containing the literal. This
+  /// contents are exactly that of the given string, i.e. it will not be null
+  /// terminated automatically; see GetAddrOfConstantCString. Note that whether
+  /// the result is actually a pointer to an LLVM constant depends on
+  /// Feature.WriteableStrings.
   ///
   /// The result has pointer to array type.
   ///
   /// \param GlobalName If provided, the name to use for the global
   /// (if one is created).
   llvm::Constant *GetAddrOfConstantString(StringRef Str,
-                                          const char *GlobalName=0,
+                                          const char *GlobalName=nullptr,
                                           unsigned Alignment=0);
 
-  /// GetAddrOfConstantCString - Returns a pointer to a character array
-  /// containing the literal and a terminating '\0' character. The result has
-  /// pointer to array type.
+  /// Returns a pointer to a character array containing the literal and a
+  /// terminating '\0' character. The result has pointer to array type.
   ///
   /// \param GlobalName If provided, the name to use for the global (if one is
   /// created).
   llvm::Constant *GetAddrOfConstantCString(const std::string &str,
-                                           const char *GlobalName=0,
+                                           const char *GlobalName=nullptr,
                                            unsigned Alignment=0);
 
-  /// GetAddrOfConstantCompoundLiteral - Returns a pointer to a constant global
-  /// variable for the given file-scope compound literal expression.
+  /// Returns a pointer to a constant global variable for the given file-scope
+  /// compound literal expression.
   llvm::Constant *GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr*E);
 
   /// \brief Returns a pointer to a global variable representing a temporary
@@ -768,33 +783,31 @@
   /// Objective-C fast enumeration loop (for..in).
   QualType getObjCFastEnumerationStateType();
   
-  /// GetAddrOfCXXConstructor - Return the address of the constructor of the
-  /// given type.
-  llvm::GlobalValue *GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor,
-                                             CXXCtorType ctorType,
-                                             const CGFunctionInfo *fnInfo = 0,
-                                             bool DontDefer = false);
+  /// Return the address of the constructor of the given type.
+  llvm::GlobalValue *
+  GetAddrOfCXXConstructor(const CXXConstructorDecl *ctor, CXXCtorType ctorType,
+                          const CGFunctionInfo *fnInfo = nullptr,
+                          bool DontDefer = false);
 
-  /// GetAddrOfCXXDestructor - Return the address of the constructor of the
-  /// given type.
-  llvm::GlobalValue *GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
-                                            CXXDtorType dtorType,
-                                            const CGFunctionInfo *fnInfo = 0,
-                                            llvm::FunctionType *fnType = 0,
-                                            bool DontDefer = false);
+  /// Return the address of the constructor of the given type.
+  llvm::GlobalValue *
+  GetAddrOfCXXDestructor(const CXXDestructorDecl *dtor,
+                         CXXDtorType dtorType,
+                         const CGFunctionInfo *fnInfo = nullptr,
+                         llvm::FunctionType *fnType = nullptr,
+                         bool DontDefer = false);
 
-  /// getBuiltinLibFunction - Given a builtin id for a function like
-  /// "__builtin_fabsf", return a Function* for "fabsf".
+  /// Given a builtin id for a function like "__builtin_fabsf", return a
+  /// Function* for "fabsf".
   llvm::Value *getBuiltinLibFunction(const FunctionDecl *FD,
                                      unsigned BuiltinID);
 
   llvm::Function *getIntrinsic(unsigned IID, ArrayRef<llvm::Type*> Tys = None);
 
-  /// EmitTopLevelDecl - Emit code for a single top level declaration.
+  /// Emit code for a single top level declaration.
   void EmitTopLevelDecl(Decl *D);
 
-  /// HandleCXXStaticMemberVarInstantiation - Tell the consumer that this
-  // variable has been instantiated.
+  /// Tell the consumer that this variable has been instantiated.
   void HandleCXXStaticMemberVarInstantiation(VarDecl *VD);
 
   /// \brief If the declaration has internal linkage but is inside an
@@ -809,20 +822,17 @@
   /// Add a global to a list to be added to the llvm.compiler.used metadata.
   void addCompilerUsedGlobal(llvm::GlobalValue *GV);
 
-  /// AddCXXDtorEntry - Add a destructor and object to add to the C++ global
-  /// destructor function.
+  /// Add a destructor and object to add to the C++ global destructor function.
   void AddCXXDtorEntry(llvm::Constant *DtorFn, llvm::Constant *Object) {
     CXXGlobalDtors.push_back(std::make_pair(DtorFn, Object));
   }
 
-  /// CreateRuntimeFunction - Create a new runtime function with the specified
-  /// type and name.
+  /// Create a new runtime function with the specified type and name.
   llvm::Constant *CreateRuntimeFunction(llvm::FunctionType *Ty,
                                         StringRef Name,
                                         llvm::AttributeSet ExtraAttrs =
                                           llvm::AttributeSet());
-  /// CreateRuntimeVariable - Create a new runtime global variable with the
-  /// specified type and name.
+  /// Create a new runtime global variable with the specified type and name.
   llvm::Constant *CreateRuntimeVariable(llvm::Type *Ty,
                                         StringRef Name);
 
@@ -839,89 +849,80 @@
   llvm::Constant *getLLVMLifetimeStartFn();
   llvm::Constant *getLLVMLifetimeEndFn();
 
-  // UpdateCompleteType - Make sure that this type is translated.
+  // Make sure that this type is translated.
   void UpdateCompletedType(const TagDecl *TD);
 
   llvm::Constant *getMemberPointerConstant(const UnaryOperator *e);
 
-  /// EmitConstantInit - Try to emit the initializer for the given declaration
-  /// as a constant; returns 0 if the expression cannot be emitted as a
-  /// constant.
-  llvm::Constant *EmitConstantInit(const VarDecl &D, CodeGenFunction *CGF = 0);
+  /// Try to emit the initializer for the given declaration as a constant;
+  /// returns 0 if the expression cannot be emitted as a constant.
+  llvm::Constant *EmitConstantInit(const VarDecl &D,
+                                   CodeGenFunction *CGF = nullptr);
 
-  /// EmitConstantExpr - Try to emit the given expression as a
-  /// constant; returns 0 if the expression cannot be emitted as a
-  /// constant.
+  /// Try to emit the given expression as a constant; returns 0 if the
+  /// expression cannot be emitted as a constant.
   llvm::Constant *EmitConstantExpr(const Expr *E, QualType DestType,
-                                   CodeGenFunction *CGF = 0);
+                                   CodeGenFunction *CGF = nullptr);
 
-  /// EmitConstantValue - Emit the given constant value as a constant, in the
-  /// type's scalar representation.
+  /// Emit the given constant value as a constant, in the type's scalar
+  /// representation.
   llvm::Constant *EmitConstantValue(const APValue &Value, QualType DestType,
-                                    CodeGenFunction *CGF = 0);
+                                    CodeGenFunction *CGF = nullptr);
 
-  /// EmitConstantValueForMemory - Emit the given constant value as a constant,
-  /// in the type's memory representation.
+  /// Emit the given constant value as a constant, in the type's memory
+  /// representation.
   llvm::Constant *EmitConstantValueForMemory(const APValue &Value,
                                              QualType DestType,
-                                             CodeGenFunction *CGF = 0);
+                                             CodeGenFunction *CGF = nullptr);
 
-  /// EmitNullConstant - Return the result of value-initializing the given
-  /// type, i.e. a null expression of the given type.  This is usually,
-  /// but not always, an LLVM null constant.
+  /// Return the result of value-initializing the given type, i.e. a null
+  /// expression of the given type.  This is usually, but not always, an LLVM
+  /// null constant.
   llvm::Constant *EmitNullConstant(QualType T);
 
-  /// EmitNullConstantForBase - Return a null constant appropriate for 
-  /// zero-initializing a base class with the given type.  This is usually,
-  /// but not always, an LLVM null constant.
+  /// Return a null constant appropriate for zero-initializing a base class with
+  /// the given type. This is usually, but not always, an LLVM null constant.
   llvm::Constant *EmitNullConstantForBase(const CXXRecordDecl *Record);
 
-  /// Error - Emit a general error that something can't be done.
+  /// Emit a general error that something can't be done.
   void Error(SourceLocation loc, StringRef error);
 
-  /// ErrorUnsupported - Print out an error that codegen doesn't support the
-  /// specified stmt yet.
+  /// Print out an error that codegen doesn't support the specified stmt yet.
   void ErrorUnsupported(const Stmt *S, const char *Type);
 
-  /// ErrorUnsupported - Print out an error that codegen doesn't support the
-  /// specified decl yet.
+  /// Print out an error that codegen doesn't support the specified decl yet.
   void ErrorUnsupported(const Decl *D, const char *Type);
 
-  /// SetInternalFunctionAttributes - Set the attributes on the LLVM
-  /// function for the given decl and function info. This applies
-  /// attributes necessary for handling the ABI as well as user
-  /// specified attributes like section.
+  /// Set the attributes on the LLVM function for the given decl and function
+  /// info. This applies attributes necessary for handling the ABI as well as
+  /// user specified attributes like section.
   void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F,
                                      const CGFunctionInfo &FI);
 
-  /// SetLLVMFunctionAttributes - Set the LLVM function attributes
-  /// (sext, zext, etc).
+  /// Set the LLVM function attributes (sext, zext, etc).
   void SetLLVMFunctionAttributes(const Decl *D,
                                  const CGFunctionInfo &Info,
                                  llvm::Function *F);
 
-  /// SetLLVMFunctionAttributesForDefinition - Set the LLVM function attributes
-  /// which only apply to a function definintion.
+  /// Set the LLVM function attributes which only apply to a function
+  /// definintion.
   void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F);
 
-  /// ReturnTypeUsesSRet - Return true iff the given type uses 'sret' when used
-  /// as a return type.
+  /// Return true iff the given type uses 'sret' when used as a return type.
   bool ReturnTypeUsesSRet(const CGFunctionInfo &FI);
 
-  /// ReturnSlotInterferesWithArgs - Return true iff the given type uses an
-  /// argument slot when 'sret' is used as a return type.
+  /// Return true iff the given type uses an argument slot when 'sret' is used
+  /// as a return type.
   bool ReturnSlotInterferesWithArgs(const CGFunctionInfo &FI);
 
-  /// ReturnTypeUsesFPRet - Return true iff the given type uses 'fpret' when
-  /// used as a return type.
+  /// Return true iff the given type uses 'fpret' when used as a return type.
   bool ReturnTypeUsesFPRet(QualType ResultType);
 
-  /// ReturnTypeUsesFP2Ret - Return true iff the given type uses 'fp2ret' when
-  /// used as a return type.
+  /// Return true iff the given type uses 'fp2ret' when used as a return type.
   bool ReturnTypeUsesFP2Ret(QualType ResultType);
 
-  /// ConstructAttributeList - Get the LLVM attributes and calling convention to
-  /// use for a particular function type.
+  /// Get the LLVM attributes and calling convention to use for a particular
+  /// function type.
   ///
   /// \param Info - The function type information.
   /// \param TargetDecl - The decl these attributes are being constructed
@@ -943,8 +944,7 @@
 
   void EmitVTable(CXXRecordDecl *Class, bool DefinitionRequired);
 
-  /// EmitFundamentalRTTIDescriptors - Emit the RTTI descriptors for the
-  /// builtin types.
+  /// Emit the RTTI descriptors for the builtin types.
   void EmitFundamentalRTTIDescriptors();
 
   /// \brief Appends Opts to the "Linker Options" metadata value.
@@ -958,23 +958,29 @@
 
   llvm::GlobalVariable::LinkageTypes getFunctionLinkage(GlobalDecl GD);
 
-  void setFunctionLinkage(GlobalDecl GD, llvm::GlobalValue *V) {
-    V->setLinkage(getFunctionLinkage(GD));
+  void setFunctionLinkage(GlobalDecl GD, llvm::Function *F) {
+    F->setLinkage(getFunctionLinkage(GD));
   }
 
-  /// getVTableLinkage - Return the appropriate linkage for the vtable, VTT,
-  /// and type information of the given class.
+  /// \brief Returns the appropriate linkage for the TypeInfo struct for a type.
+  llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(QualType Ty);
+
+  /// Return the appropriate linkage for the vtable, VTT, and type information
+  /// of the given class.
   llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD);
 
-  /// GetTargetTypeStoreSize - Return the store size, in character units, of
-  /// the given LLVM type.
+  /// Return the store size, in character units, of the given LLVM type.
   CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const;
   
-  /// GetLLVMLinkageVarDefinition - Returns LLVM linkage for a global 
-  /// variable.
-  llvm::GlobalValue::LinkageTypes 
-  GetLLVMLinkageVarDefinition(const VarDecl *D, bool isConstant);
-  
+  /// Returns LLVM linkage for a declarator.
+  llvm::GlobalValue::LinkageTypes
+  getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage,
+                              bool IsConstantVariable);
+
+  /// Returns LLVM linkage for a declarator.
+  llvm::GlobalValue::LinkageTypes
+  getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant);
+
   /// Emit all the global annotations.
   void EmitGlobalAnnotations();
 
@@ -987,8 +993,8 @@
   /// Emit the annotation line number.
   llvm::Constant *EmitAnnotationLineNo(SourceLocation L);
 
-  /// EmitAnnotateAttr - Generate the llvm::ConstantStruct which contains the
-  /// annotation information for a given GlobalValue. The annotation struct is
+  /// Generate the llvm::ConstantStruct which contains the annotation
+  /// information for a given GlobalValue. The annotation struct is
   /// {i8 *, i8 *, i8 *, i32}. The first field is a constant expression, the
   /// GlobalValue being annotated. The second field is the constant string
   /// created from the AnnotateAttr's annotation. The third field is a constant
@@ -1012,8 +1018,8 @@
     DeferredVTables.push_back(RD);
   }
 
-  /// EmitGlobal - Emit code for a singal global function or var decl. Forward
-  /// declarations are emitted lazily.
+  /// Emit code for a singal global function or var decl. Forward declarations
+  /// are emitted lazily.
   void EmitGlobal(GlobalDecl D);
 
 private:
@@ -1026,27 +1032,26 @@
 
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
-                                        const VarDecl *D,
-                                        bool UnnamedAddr = false);
+                                        const VarDecl *D);
 
-  /// SetCommonAttributes - Set attributes which are common to any
-  /// form of a global definition (alias, Objective-C method,
-  /// function, global variable).
+  /// Set attributes which are common to any form of a global definition (alias,
+  /// Objective-C method, function, global variable).
   ///
   /// NOTE: This should only be called for definitions.
   void SetCommonAttributes(const Decl *D, llvm::GlobalValue *GV);
 
-  /// SetFunctionDefinitionAttributes - Set attributes for a global definition.
-  void SetFunctionDefinitionAttributes(const FunctionDecl *D,
-                                       llvm::GlobalValue *GV);
+  void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO);
 
-  /// SetFunctionAttributes - Set function attributes for a function
-  /// declaration.
+  /// Set attributes for a global definition.
+  void setFunctionDefinitionAttributes(const FunctionDecl *D,
+                                       llvm::Function *F);
+
+  /// Set function attributes for a function declaration.
   void SetFunctionAttributes(GlobalDecl GD,
                              llvm::Function *F,
                              bool IsIncompleteFunction);
 
-  void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = 0);
+  void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
 
   void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
   void EmitGlobalVarDefinition(const VarDecl *D);
@@ -1064,44 +1069,41 @@
   void EmitLinkageSpec(const LinkageSpecDecl *D);
   void CompleteDIClassType(const CXXMethodDecl* D);
 
-  /// EmitCXXConstructor - Emit a single constructor with the given type from
-  /// a C++ constructor Decl.
+  /// Emit a single constructor with the given type from a C++ constructor Decl.
   void EmitCXXConstructor(const CXXConstructorDecl *D, CXXCtorType Type);
 
-  /// EmitCXXDestructor - Emit a single destructor with the given type from
-  /// a C++ destructor Decl.
+  /// Emit a single destructor with the given type from a C++ destructor Decl.
   void EmitCXXDestructor(const CXXDestructorDecl *D, CXXDtorType Type);
 
   /// \brief Emit the function that initializes C++ thread_local variables.
   void EmitCXXThreadLocalInitFunc();
 
-  /// EmitCXXGlobalInitFunc - Emit the function that initializes C++ globals.
+  /// Emit the function that initializes C++ globals.
   void EmitCXXGlobalInitFunc();
 
-  /// EmitCXXGlobalDtorFunc - Emit the function that destroys C++ globals.
+  /// Emit the function that destroys C++ globals.
   void EmitCXXGlobalDtorFunc();
 
-  /// EmitCXXGlobalVarDeclInitFunc - Emit the function that initializes the
-  /// specified global (if PerformInit is true) and registers its destructor.
+  /// Emit the function that initializes the specified global (if PerformInit is
+  /// true) and registers its destructor.
   void EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                     llvm::GlobalVariable *Addr,
                                     bool PerformInit);
 
   // FIXME: Hardcoding priority here is gross.
-  void AddGlobalCtor(llvm::Function *Ctor, int Priority=65535);
-  void AddGlobalDtor(llvm::Function *Dtor, int Priority=65535);
+  void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
+                     llvm::Constant *AssociatedData = 0);
+  void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535);
 
-  /// EmitCtorList - Generates a global array of functions and priorities using
-  /// the given list and name. This array will have appending linkage and is
-  /// suitable for use as a LLVM constructor or destructor array.
+  /// Generates a global array of functions and priorities using the given list
+  /// and name. This array will have appending linkage and is suitable for use
+  /// as a LLVM constructor or destructor array.
   void EmitCtorList(const CtorList &Fns, const char *GlobalName);
 
-  /// EmitFundamentalRTTIDescriptor - Emit the RTTI descriptors for the
-  /// given type.
+  /// Emit the RTTI descriptors for the given type.
   void EmitFundamentalRTTIDescriptor(QualType Type);
 
-  /// EmitDeferred - Emit any needed decls for which code generation
-  /// was deferred.
+  /// Emit any needed decls for which code generation was deferred.
   void EmitDeferred();
 
   /// Call replaceAllUsesWith on all pairs in Replacements.
@@ -1109,8 +1111,7 @@
 
   void checkAliases();
 
-  /// EmitDeferredVTables - Emit any vtables which we deferred and
-  /// still have a use for.
+  /// Emit any vtables which we deferred and still have a use for.
   void EmitDeferredVTables();
 
   /// Emit the llvm.used and llvm.compiler.used metadata.
@@ -1128,20 +1129,19 @@
   /// \brief Emit the Clang version as llvm.ident metadata.
   void EmitVersionIdentMetadata();
 
-  /// EmitCoverageFile - Emit the llvm.gcov metadata used to tell LLVM where
-  /// to emit the .gcno and .gcda files in a way that persists in .bc files.
+  /// Emit the llvm.gcov metadata used to tell LLVM where to emit the .gcno and
+  /// .gcda files in a way that persists in .bc files.
   void EmitCoverageFile();
 
   /// Emits the initializer for a uuidof string.
   llvm::Constant *EmitUuidofInitializer(StringRef uuidstr, QualType IIDType);
 
-  /// MayDeferGeneration - Determine if the given decl can be emitted
-  /// lazily; this is only relevant for definitions. The given decl
-  /// must be either a function or var decl.
+  /// Determine if the given decl can be emitted lazily; this is only relevant
+  /// for definitions. The given decl must be either a function or var decl.
   bool MayDeferGeneration(const ValueDecl *D);
 
-  /// SimplifyPersonality - Check whether we can use a "simpler", more
-  /// core exceptions personality function.
+  /// Check whether we can use a "simpler", more core exceptions personality
+  /// function.
   void SimplifyPersonality();
 };
 }  // end namespace CodeGen
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index ac6b7bc..22534b8 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -15,143 +15,15 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/StmtVisitor.h"
-#include "llvm/Config/config.h" // for strtoull()/strtoll() define
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MD5.h"
 
 using namespace clang;
 using namespace CodeGen;
 
-static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) {
-  DiagnosticsEngine &Diags = CGM.getDiags();
-  unsigned diagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0");
-  Diags.Report(diagID) << Message;
-}
-
-PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path)
-  : CGM(CGM) {
-  if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) {
-    ReportBadPGOData(CGM, "failed to open pgo data file");
-    return;
-  }
-
-  if (DataBuffer->getBufferSize() > std::numeric_limits<unsigned>::max()) {
-    ReportBadPGOData(CGM, "pgo data file too big");
-    return;
-  }
-
-  // Scan through the data file and map each function to the corresponding
-  // file offset where its counts are stored.
-  const char *BufferStart = DataBuffer->getBufferStart();
-  const char *BufferEnd = DataBuffer->getBufferEnd();
-  const char *CurPtr = BufferStart;
-  uint64_t MaxCount = 0;
-  while (CurPtr < BufferEnd) {
-    // Read the function name.
-    const char *FuncStart = CurPtr;
-    // For Objective-C methods, the name may include whitespace, so search
-    // backward from the end of the line to find the space that separates the
-    // name from the number of counters. (This is a temporary hack since we are
-    // going to completely replace this file format in the near future.)
-    CurPtr = strchr(CurPtr, '\n');
-    if (!CurPtr) {
-      ReportBadPGOData(CGM, "pgo data file has malformed function entry");
-      return;
-    }
-    StringRef FuncName(FuncStart, CurPtr - FuncStart);
-
-    // Skip over the function hash.
-    CurPtr = strchr(++CurPtr, '\n');
-    if (!CurPtr) {
-      ReportBadPGOData(CGM, "pgo data file is missing the function hash");
-      return;
-    }
-
-    // Read the number of counters.
-    char *EndPtr;
-    unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
-    if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) {
-      ReportBadPGOData(CGM, "pgo data file has unexpected number of counters");
-      return;
-    }
-    CurPtr = EndPtr;
-
-    // Read function count.
-    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
-    if (EndPtr == CurPtr || *EndPtr != '\n') {
-      ReportBadPGOData(CGM, "pgo-data file has bad count value");
-      return;
-    }
-    CurPtr = EndPtr; // Point to '\n'.
-    FunctionCounts[FuncName] = Count;
-    MaxCount = Count > MaxCount ? Count : MaxCount;
-
-    // There is one line for each counter; skip over those lines.
-    // Since function count is already read, we start the loop from 1.
-    for (unsigned N = 1; N < NumCounters; ++N) {
-      CurPtr = strchr(++CurPtr, '\n');
-      if (!CurPtr) {
-        ReportBadPGOData(CGM, "pgo data file is missing some counter info");
-        return;
-      }
-    }
-
-    // Skip over the blank line separating functions.
-    CurPtr += 2;
-
-    DataOffsets[FuncName] = FuncStart - BufferStart;
-  }
-  MaxFunctionCount = MaxCount;
-}
-
-bool PGOProfileData::getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
-                                       std::vector<uint64_t> &Counts) {
-  // Find the relevant section of the pgo-data file.
-  llvm::StringMap<unsigned>::const_iterator OffsetIter =
-    DataOffsets.find(FuncName);
-  if (OffsetIter == DataOffsets.end())
-    return true;
-  const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue();
-
-  // Skip over the function name.
-  CurPtr = strchr(CurPtr, '\n');
-  assert(CurPtr && "pgo-data has corrupted function entry");
-
-  char *EndPtr;
-  // Read the function hash.
-  FuncHash = strtoll(++CurPtr, &EndPtr, 10);
-  assert(EndPtr != CurPtr && *EndPtr == '\n' &&
-         "pgo-data file has corrupted function hash");
-  CurPtr = EndPtr;
-
-  // Read the number of counters.
-  unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10);
-  assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 &&
-         "pgo-data file has corrupted number of counters");
-  CurPtr = EndPtr;
-
-  Counts.reserve(NumCounters);
-
-  for (unsigned N = 0; N < NumCounters; ++N) {
-    // Read the count value.
-    uint64_t Count = strtoll(CurPtr, &EndPtr, 10);
-    if (EndPtr == CurPtr || *EndPtr != '\n') {
-      ReportBadPGOData(CGM, "pgo-data file has bad count value");
-      return true;
-    }
-    Counts.push_back(Count);
-    CurPtr = EndPtr + 1;
-  }
-
-  // Make sure the number of counters matches up.
-  if (Counts.size() != NumCounters) {
-    ReportBadPGOData(CGM, "pgo-data file has inconsistent counters");
-    return true;
-  }
-
-  return false;
-}
-
 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
   RawFuncName = Fn->getName();
 
@@ -268,13 +140,22 @@
   Data->setSection(getDataSection(CGM));
   Data->setAlignment(8);
 
+  // Hide all these symbols so that we correctly get a copy for each
+  // executable.  The profile format expects names and counters to be
+  // contiguous, so references into shared objects would be invalid.
+  if (!llvm::GlobalValue::isLocalLinkage(VarLinkage)) {
+    Name->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    Data->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    RegionCounters->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  }
+
   // Make sure the data doesn't get deleted.
   CGM.addUsedGlobal(Data);
   return Data;
 }
 
 void CodeGenPGO::emitInstrumentationData() {
-  if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
+  if (!RegionCounters)
     return;
 
   // Build the data.
@@ -294,9 +175,8 @@
   if (!CGM.getCodeGenOpts().ProfileInstrGenerate)
     return nullptr;
 
-  // Only need to create this once per module.
-  if (CGM.getModule().getFunction("__llvm_profile_init"))
-    return nullptr;
+  assert(CGM.getModule().getFunction("__llvm_profile_init") == nullptr &&
+         "profile initialization already emitted");
 
   // Get the function to call at initialization.
   llvm::Constant *RegisterF = getRegisterFunc(CGM);
@@ -322,144 +202,154 @@
 }
 
 namespace {
-  /// A StmtVisitor that fills a map of statements to PGO counters.
-  struct MapRegionCounters : public ConstStmtVisitor<MapRegionCounters> {
+/// \brief Stable hasher for PGO region counters.
+///
+/// PGOHash produces a stable hash of a given function's control flow.
+///
+/// Changing the output of this hash will invalidate all previously generated
+/// profiles -- i.e., don't do it.
+///
+/// \note  When this hash does eventually change (years?), we still need to
+/// support old hashes.  We'll need to pull in the version number from the
+/// profile data format and use the matching hash function.
+class PGOHash {
+  uint64_t Working;
+  unsigned Count;
+  llvm::MD5 MD5;
+
+  static const int NumBitsPerType = 6;
+  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
+  static const unsigned TooBig = 1u << NumBitsPerType;
+
+public:
+  /// \brief Hash values for AST nodes.
+  ///
+  /// Distinct values for AST nodes that have region counters attached.
+  ///
+  /// These values must be stable.  All new members must be added at the end,
+  /// and no members should be removed.  Changing the enumeration value for an
+  /// AST node will affect the hash of every function that contains that node.
+  enum HashType : unsigned char {
+    None = 0,
+    LabelStmt = 1,
+    WhileStmt,
+    DoStmt,
+    ForStmt,
+    CXXForRangeStmt,
+    ObjCForCollectionStmt,
+    SwitchStmt,
+    CaseStmt,
+    DefaultStmt,
+    IfStmt,
+    CXXTryStmt,
+    CXXCatchStmt,
+    ConditionalOperator,
+    BinaryOperatorLAnd,
+    BinaryOperatorLOr,
+    BinaryConditionalOperator,
+
+    // Keep this last.  It's for the static assert that follows.
+    LastHashType
+  };
+  static_assert(LastHashType <= TooBig, "Too many types in HashType");
+
+  // TODO: When this format changes, take in a version number here, and use the
+  // old hash calculation for file formats that used the old hash.
+  PGOHash() : Working(0), Count(0) {}
+  void combine(HashType Type);
+  uint64_t finalize();
+};
+const int PGOHash::NumBitsPerType;
+const unsigned PGOHash::NumTypesPerWord;
+const unsigned PGOHash::TooBig;
+
+  /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
+  struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
     /// The next counter value to assign.
     unsigned NextCounter;
+    /// The function hash.
+    PGOHash Hash;
     /// The map of statements to counters.
     llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
 
     MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
         : NextCounter(0), CounterMap(CounterMap) {}
 
-    void VisitChildren(const Stmt *S) {
-      for (Stmt::const_child_range I = S->children(); I; ++I)
-        if (*I)
-         this->Visit(*I);
-    }
-    void VisitStmt(const Stmt *S) { VisitChildren(S); }
+    // Blocks and lambdas are handled as separate functions, so we need not
+    // traverse them in the parent context.
+    bool TraverseBlockExpr(BlockExpr *BE) { return true; }
+    bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
+    bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
 
-    /// Assign a counter to track entry to the function body.
-    void VisitFunctionDecl(const FunctionDecl *S) {
-      CounterMap[S->getBody()] = NextCounter++;
-      Visit(S->getBody());
+    bool VisitDecl(const Decl *D) {
+      switch (D->getKind()) {
+      default:
+        break;
+      case Decl::Function:
+      case Decl::CXXMethod:
+      case Decl::CXXConstructor:
+      case Decl::CXXDestructor:
+      case Decl::CXXConversion:
+      case Decl::ObjCMethod:
+      case Decl::Block:
+      case Decl::Captured:
+        CounterMap[D->getBody()] = NextCounter++;
+        break;
+      }
+      return true;
     }
-    void VisitObjCMethodDecl(const ObjCMethodDecl *S) {
-      CounterMap[S->getBody()] = NextCounter++;
-      Visit(S->getBody());
-    }
-    void VisitBlockDecl(const BlockDecl *S) {
-      CounterMap[S->getBody()] = NextCounter++;
-      Visit(S->getBody());
-    }
-    /// Assign a counter to track the block following a label.
-    void VisitLabelStmt(const LabelStmt *S) {
+
+    bool VisitStmt(const Stmt *S) {
+      auto Type = getHashType(S);
+      if (Type == PGOHash::None)
+        return true;
+
       CounterMap[S] = NextCounter++;
-      Visit(S->getSubStmt());
+      Hash.combine(Type);
+      return true;
     }
-    /// Assign a counter for the body of a while loop.
-    void VisitWhileStmt(const WhileStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getCond());
-      Visit(S->getBody());
-    }
-    /// Assign a counter for the body of a do-while loop.
-    void VisitDoStmt(const DoStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getBody());
-      Visit(S->getCond());
-    }
-    /// Assign a counter for the body of a for loop.
-    void VisitForStmt(const ForStmt *S) {
-      CounterMap[S] = NextCounter++;
-      if (S->getInit())
-        Visit(S->getInit());
-      const Expr *E;
-      if ((E = S->getCond()))
-        Visit(E);
-      if ((E = S->getInc()))
-        Visit(E);
-      Visit(S->getBody());
-    }
-    /// Assign a counter for the body of a for-range loop.
-    void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getRangeStmt());
-      Visit(S->getBeginEndStmt());
-      Visit(S->getCond());
-      Visit(S->getLoopVarStmt());
-      Visit(S->getBody());
-      Visit(S->getInc());
-    }
-    /// Assign a counter for the body of a for-collection loop.
-    void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getElement());
-      Visit(S->getBody());
-    }
-    /// Assign a counter for the exit block of the switch statement.
-    void VisitSwitchStmt(const SwitchStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getCond());
-      Visit(S->getBody());
-    }
-    /// Assign a counter for a particular case in a switch. This counts jumps
-    /// from the switch header as well as fallthrough from the case before this
-    /// one.
-    void VisitCaseStmt(const CaseStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getSubStmt());
-    }
-    /// Assign a counter for the default case of a switch statement. The count
-    /// is the number of branches from the loop header to the default, and does
-    /// not include fallthrough from previous cases. If we have multiple
-    /// conditional branch blocks from the switch instruction to the default
-    /// block, as with large GNU case ranges, this is the counter for the last
-    /// edge in that series, rather than the first.
-    void VisitDefaultStmt(const DefaultStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getSubStmt());
-    }
-    /// Assign a counter for the "then" part of an if statement. The count for
-    /// the "else" part, if it exists, will be calculated from this counter.
-    void VisitIfStmt(const IfStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getCond());
-      Visit(S->getThen());
-      if (S->getElse())
-        Visit(S->getElse());
-    }
-    /// Assign a counter for the continuation block of a C++ try statement.
-    void VisitCXXTryStmt(const CXXTryStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getTryBlock());
-      for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
-        Visit(S->getHandler(I));
-    }
-    /// Assign a counter for a catch statement's handler block.
-    void VisitCXXCatchStmt(const CXXCatchStmt *S) {
-      CounterMap[S] = NextCounter++;
-      Visit(S->getHandlerBlock());
-    }
-    /// Assign a counter for the "true" part of a conditional operator. The
-    /// count in the "false" part will be calculated from this counter.
-    void VisitConditionalOperator(const ConditionalOperator *E) {
-      CounterMap[E] = NextCounter++;
-      Visit(E->getCond());
-      Visit(E->getTrueExpr());
-      Visit(E->getFalseExpr());
-    }
-    /// Assign a counter for the right hand side of a logical and operator.
-    void VisitBinLAnd(const BinaryOperator *E) {
-      CounterMap[E] = NextCounter++;
-      Visit(E->getLHS());
-      Visit(E->getRHS());
-    }
-    /// Assign a counter for the right hand side of a logical or operator.
-    void VisitBinLOr(const BinaryOperator *E) {
-      CounterMap[E] = NextCounter++;
-      Visit(E->getLHS());
-      Visit(E->getRHS());
+    PGOHash::HashType getHashType(const Stmt *S) {
+      switch (S->getStmtClass()) {
+      default:
+        break;
+      case Stmt::LabelStmtClass:
+        return PGOHash::LabelStmt;
+      case Stmt::WhileStmtClass:
+        return PGOHash::WhileStmt;
+      case Stmt::DoStmtClass:
+        return PGOHash::DoStmt;
+      case Stmt::ForStmtClass:
+        return PGOHash::ForStmt;
+      case Stmt::CXXForRangeStmtClass:
+        return PGOHash::CXXForRangeStmt;
+      case Stmt::ObjCForCollectionStmtClass:
+        return PGOHash::ObjCForCollectionStmt;
+      case Stmt::SwitchStmtClass:
+        return PGOHash::SwitchStmt;
+      case Stmt::CaseStmtClass:
+        return PGOHash::CaseStmt;
+      case Stmt::DefaultStmtClass:
+        return PGOHash::DefaultStmt;
+      case Stmt::IfStmtClass:
+        return PGOHash::IfStmt;
+      case Stmt::CXXTryStmtClass:
+        return PGOHash::CXXTryStmt;
+      case Stmt::CXXCatchStmtClass:
+        return PGOHash::CXXCatchStmt;
+      case Stmt::ConditionalOperatorClass:
+        return PGOHash::ConditionalOperator;
+      case Stmt::BinaryConditionalOperatorClass:
+        return PGOHash::BinaryConditionalOperator;
+      case Stmt::BinaryOperatorClass: {
+        const BinaryOperator *BO = cast<BinaryOperator>(S);
+        if (BO->getOpcode() == BO_LAnd)
+          return PGOHash::BinaryOperatorLAnd;
+        if (BO->getOpcode() == BO_LOr)
+          return PGOHash::BinaryOperatorLOr;
+        break;
+      }
+      }
+      return PGOHash::None;
     }
   };
 
@@ -476,7 +366,7 @@
     /// The map of statements to count values.
     llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
 
-    /// BreakContinueStack - Keep counts of breaks and continues inside loops. 
+    /// BreakContinueStack - Keep counts of breaks and continues inside loops.
     struct BreakContinue {
       uint64_t BreakCount;
       uint64_t ContinueCount;
@@ -503,25 +393,41 @@
       }
     }
 
-    void VisitFunctionDecl(const FunctionDecl *S) {
-      RegionCounter Cnt(PGO, S->getBody());
+    void VisitFunctionDecl(const FunctionDecl *D) {
+      // Counter tracks entry to the function body.
+      RegionCounter Cnt(PGO, D->getBody());
       Cnt.beginRegion();
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
     }
 
-    void VisitObjCMethodDecl(const ObjCMethodDecl *S) {
-      RegionCounter Cnt(PGO, S->getBody());
+    // Skip lambda expressions. We visit these as FunctionDecls when we're
+    // generating them and aren't interested in the body when generating a
+    // parent context.
+    void VisitLambdaExpr(const LambdaExpr *LE) {}
+
+    void VisitCapturedDecl(const CapturedDecl *D) {
+      // Counter tracks entry to the capture body.
+      RegionCounter Cnt(PGO, D->getBody());
       Cnt.beginRegion();
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
     }
 
-    void VisitBlockDecl(const BlockDecl *S) {
-      RegionCounter Cnt(PGO, S->getBody());
+    void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
+      // Counter tracks entry to the method body.
+      RegionCounter Cnt(PGO, D->getBody());
       Cnt.beginRegion();
-      CountMap[S->getBody()] = PGO.getCurrentRegionCount();
-      Visit(S->getBody());
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
+    }
+
+    void VisitBlockDecl(const BlockDecl *D) {
+      // Counter tracks entry to the block body.
+      RegionCounter Cnt(PGO, D->getBody());
+      Cnt.beginRegion();
+      CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+      Visit(D->getBody());
     }
 
     void VisitReturnStmt(const ReturnStmt *S) {
@@ -540,6 +446,7 @@
 
     void VisitLabelStmt(const LabelStmt *S) {
       RecordNextStmtCount = false;
+      // Counter tracks the block following the label.
       RegionCounter Cnt(PGO, S);
       Cnt.beginRegion();
       CountMap[S] = PGO.getCurrentRegionCount();
@@ -564,6 +471,7 @@
 
     void VisitWhileStmt(const WhileStmt *S) {
       RecordStmtCount(S);
+      // Counter tracks the body of the loop.
       RegionCounter Cnt(PGO, S);
       BreakContinueStack.push_back(BreakContinue());
       // Visit the body region first so the break/continue adjustments can be
@@ -589,6 +497,7 @@
 
     void VisitDoStmt(const DoStmt *S) {
       RecordStmtCount(S);
+      // Counter tracks the body of the loop.
       RegionCounter Cnt(PGO, S);
       BreakContinueStack.push_back(BreakContinue());
       Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
@@ -615,6 +524,7 @@
       RecordStmtCount(S);
       if (S->getInit())
         Visit(S->getInit());
+      // Counter tracks the body of the loop.
       RegionCounter Cnt(PGO, S);
       BreakContinueStack.push_back(BreakContinue());
       // Visit the body region first. (This is basically the same as a while
@@ -653,6 +563,7 @@
       RecordStmtCount(S);
       Visit(S->getRangeStmt());
       Visit(S->getBeginEndStmt());
+      // Counter tracks the body of the loop.
       RegionCounter Cnt(PGO, S);
       BreakContinueStack.push_back(BreakContinue());
       // Visit the body region first. (This is basically the same as a while
@@ -687,6 +598,7 @@
     void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
       RecordStmtCount(S);
       Visit(S->getElement());
+      // Counter tracks the body of the loop.
       RegionCounter Cnt(PGO, S);
       BreakContinueStack.push_back(BreakContinue());
       Cnt.beginRegion();
@@ -708,6 +620,7 @@
       BreakContinue BC = BreakContinueStack.pop_back_val();
       if (!BreakContinueStack.empty())
         BreakContinueStack.back().ContinueCount += BC.ContinueCount;
+      // Counter tracks the exit block of the switch.
       RegionCounter ExitCnt(PGO, S);
       ExitCnt.beginRegion();
       RecordNextStmtCount = true;
@@ -715,6 +628,9 @@
 
     void VisitCaseStmt(const CaseStmt *S) {
       RecordNextStmtCount = false;
+      // Counter for this particular case. This counts only jumps from the
+      // switch header and does not include fallthrough from the case before
+      // this one.
       RegionCounter Cnt(PGO, S);
       Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
       CountMap[S] = Cnt.getCount();
@@ -724,6 +640,8 @@
 
     void VisitDefaultStmt(const DefaultStmt *S) {
       RecordNextStmtCount = false;
+      // Counter for this default case. This does not include fallthrough from
+      // the previous case.
       RegionCounter Cnt(PGO, S);
       Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
       CountMap[S] = Cnt.getCount();
@@ -733,6 +651,8 @@
 
     void VisitIfStmt(const IfStmt *S) {
       RecordStmtCount(S);
+      // Counter tracks the "then" part of an if statement. The count for
+      // the "else" part, if it exists, will be calculated from this counter.
       RegionCounter Cnt(PGO, S);
       Visit(S->getCond());
 
@@ -756,6 +676,7 @@
       Visit(S->getTryBlock());
       for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
         Visit(S->getHandler(I));
+      // Counter tracks the continuation block of the try statement.
       RegionCounter Cnt(PGO, S);
       Cnt.beginRegion();
       RecordNextStmtCount = true;
@@ -763,14 +684,18 @@
 
     void VisitCXXCatchStmt(const CXXCatchStmt *S) {
       RecordNextStmtCount = false;
+      // Counter tracks the catch statement's handler block.
       RegionCounter Cnt(PGO, S);
       Cnt.beginRegion();
       CountMap[S] = PGO.getCurrentRegionCount();
       Visit(S->getHandlerBlock());
     }
 
-    void VisitConditionalOperator(const ConditionalOperator *E) {
+    void VisitAbstractConditionalOperator(
+        const AbstractConditionalOperator *E) {
       RecordStmtCount(E);
+      // Counter tracks the "true" part of a conditional operator. The
+      // count in the "false" part will be calculated from this counter.
       RegionCounter Cnt(PGO, E);
       Visit(E->getCond());
 
@@ -790,6 +715,7 @@
 
     void VisitBinLAnd(const BinaryOperator *E) {
       RecordStmtCount(E);
+      // Counter tracks the right hand side of a logical and operator.
       RegionCounter Cnt(PGO, E);
       Visit(E->getLHS());
       Cnt.beginRegion();
@@ -802,6 +728,7 @@
 
     void VisitBinLOr(const BinaryOperator *E) {
       RecordStmtCount(E);
+      // Counter tracks the right hand side of a logical or operator.
       RegionCounter Cnt(PGO, E);
       Visit(E->getLHS());
       Cnt.beginRegion();
@@ -814,9 +741,46 @@
   };
 }
 
+void PGOHash::combine(HashType Type) {
+  // Check that we never combine 0 and only have six bits.
+  assert(Type && "Hash is invalid: unexpected type 0");
+  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
+
+  // Pass through MD5 if enough work has built up.
+  if (Count && Count % NumTypesPerWord == 0) {
+    using namespace llvm::support;
+    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
+    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
+    Working = 0;
+  }
+
+  // Accumulate the current type.
+  ++Count;
+  Working = Working << NumBitsPerType | Type;
+}
+
+uint64_t PGOHash::finalize() {
+  // Use Working as the hash directly if we never used MD5.
+  if (Count <= NumTypesPerWord)
+    // No need to byte swap here, since none of the math was endian-dependent.
+    // This number will be byte-swapped as required on endianness transitions,
+    // so we will see the same value on the other side.
+    return Working;
+
+  // Check for remaining work in Working.
+  if (Working)
+    MD5.update(Working);
+
+  // Finalize the MD5 and return the hash.
+  llvm::MD5::MD5Result Result;
+  MD5.final(Result);
+  using namespace llvm::support;
+  return endian::read<uint64_t, little, unaligned>(Result);
+}
+
 static void emitRuntimeHook(CodeGenModule &CGM) {
-  LLVM_CONSTEXPR const char *RuntimeVarName = "__llvm_profile_runtime";
-  LLVM_CONSTEXPR const char *RuntimeUserName = "__llvm_profile_runtime_user";
+  const char *const RuntimeVarName = "__llvm_profile_runtime";
+  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
   if (CGM.getModule().getGlobalVariable(RuntimeVarName))
     return;
 
@@ -845,10 +809,10 @@
 
 void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
   bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
-  PGOProfileData *PGOData = CGM.getPGOData();
-  if (!InstrumentRegions && !PGOData)
+  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
+  if (!InstrumentRegions && !PGOReader)
     return;
-  if (!D)
+  if (D->isImplicit())
     return;
   setFuncName(Fn);
 
@@ -872,10 +836,10 @@
     emitRuntimeHook(CGM);
     emitCounterVariables();
   }
-  if (PGOData) {
-    loadRegionCounts(PGOData);
+  if (PGOReader) {
+    loadRegionCounts(PGOReader);
     computeRegionCounts(D);
-    applyFunctionAttributes(PGOData, Fn);
+    applyFunctionAttributes(PGOReader, Fn);
   }
 }
 
@@ -883,14 +847,16 @@
   RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
   MapRegionCounters Walker(*RegionCounterMap);
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
-    Walker.VisitFunctionDecl(FD);
+    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
   else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
-    Walker.VisitObjCMethodDecl(MD);
+    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
   else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
-    Walker.VisitBlockDecl(BD);
+    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
+  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
+    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
+  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
   NumRegionCounters = Walker.NextCounter;
-  // FIXME: The number of counters isn't sufficient for the hash
-  FunctionHash = NumRegionCounters;
+  FunctionHash = Walker.Hash.finalize();
 }
 
 void CodeGenPGO::computeRegionCounts(const Decl *D) {
@@ -902,14 +868,17 @@
     Walker.VisitObjCMethodDecl(MD);
   else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
     Walker.VisitBlockDecl(BD);
+  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
+    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
 }
 
-void CodeGenPGO::applyFunctionAttributes(PGOProfileData *PGOData,
-                                         llvm::Function *Fn) {
+void
+CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
+                                    llvm::Function *Fn) {
   if (!haveRegionCounts())
     return;
 
-  uint64_t MaxFunctionCount = PGOData->getMaximumFunctionCount();
+  uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
   uint64_t FunctionCount = getRegionCount(0);
   if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
     // Turn on InlineHint attribute for hot functions.
@@ -943,22 +912,25 @@
   Builder.CreateStore(Count, Addr);
 }
 
-void CodeGenPGO::loadRegionCounts(PGOProfileData *PGOData) {
-  // For now, ignore the counts from the PGO data file only if the number of
-  // counters does not match. This could be tightened down in the future to
-  // ignore counts when the input changes in various ways, e.g., by comparing a
-  // hash value based on some characteristics of the input.
+void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader) {
+  CGM.getPGOStats().Visited++;
   RegionCounts.reset(new std::vector<uint64_t>);
   uint64_t Hash;
-  if (PGOData->getFunctionCounts(getFuncName(), Hash, *RegionCounts) ||
-      Hash != FunctionHash || RegionCounts->size() != NumRegionCounters)
+  if (PGOReader->getFunctionCounts(getFuncName(), Hash, *RegionCounts)) {
+    CGM.getPGOStats().Missing++;
     RegionCounts.reset();
+  } else if (Hash != FunctionHash ||
+             RegionCounts->size() != NumRegionCounters) {
+    CGM.getPGOStats().Mismatched++;
+    RegionCounts.reset();
+  }
 }
 
 void CodeGenPGO::destroyRegionCounters() {
   RegionCounterMap.reset();
   StmtCountMap.reset();
   RegionCounts.reset();
+  RegionCounters = nullptr;
 }
 
 /// \brief Calculate what to divide by to scale weights.
@@ -1004,9 +976,13 @@
   if (Weights.size() < 2)
     return nullptr;
 
+  // Check for empty weights.
+  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
+  if (MaxWeight == 0)
+    return nullptr;
+
   // Calculate how to scale down to 32-bits.
-  uint64_t Scale = calculateWeightScale(*std::max_element(Weights.begin(),
-                                                          Weights.end()));
+  uint64_t Scale = calculateWeightScale(MaxWeight);
 
   SmallVector<uint32_t, 16> ScaledWeights;
   ScaledWeights.reserve(Weights.size());
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index c59a58e..c434808 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -26,28 +26,6 @@
 namespace CodeGen {
 class RegionCounter;
 
-/// The raw counter data from an instrumented PGO binary
-class PGOProfileData {
-private:
-  /// The PGO data
-  std::unique_ptr<llvm::MemoryBuffer> DataBuffer;
-  /// Offsets into DataBuffer for each function's counters
-  llvm::StringMap<unsigned> DataOffsets;
-  /// Execution counts for each function.
-  llvm::StringMap<uint64_t> FunctionCounts;
-  /// The maximal execution count among all functions.
-  uint64_t MaxFunctionCount;
-  CodeGenModule &CGM;
-public:
-  PGOProfileData(CodeGenModule &CGM, std::string Path);
-  /// Fill Counts with the profile data for the given function name. Returns
-  /// false on success.
-  bool getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
-                         std::vector<uint64_t> &Counts);
-  /// Return the maximum of all known function counts.
-  uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
-};
-
 /// Per-function PGO state. This class should generally not be used directly,
 /// but instead through the CodeGenFunction and RegionCounter types.
 class CodeGenPGO {
@@ -67,13 +45,13 @@
 
 public:
   CodeGenPGO(CodeGenModule &CGM)
-      : CGM(CGM), NumRegionCounters(0), FunctionHash(0), RegionCounters(0),
-        CurrentRegionCount(0) {}
+      : CGM(CGM), NumRegionCounters(0), FunctionHash(0),
+        RegionCounters(nullptr), CurrentRegionCount(0) {}
 
   /// Whether or not we have PGO region data for the current function. This is
   /// false both when we have no data at all and when our data has been
   /// discarded.
-  bool haveRegionCounts() const { return RegionCounts != 0; }
+  bool haveRegionCounts() const { return RegionCounts != nullptr; }
 
   /// Get the string used to identify this function in the profile data.
   /// For functions with local linkage, this includes the main file name.
@@ -138,8 +116,9 @@
   void setFuncName(llvm::Function *Fn);
   void mapRegionCounters(const Decl *D);
   void computeRegionCounts(const Decl *D);
-  void applyFunctionAttributes(PGOProfileData *PGOData, llvm::Function *Fn);
-  void loadRegionCounts(PGOProfileData *PGOData);
+  void applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
+                               llvm::Function *Fn);
+  void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader);
   void emitCounterVariables();
   llvm::GlobalVariable *buildDataVar();
 
@@ -149,7 +128,7 @@
   /// Return the region counter for the given statement. This should only be
   /// called on statements that have a dedicated counter.
   unsigned getRegionCounter(const Stmt *S) {
-    if (RegionCounterMap == 0)
+    if (!RegionCounterMap)
       return 0;
     return (*RegionCounterMap)[S];
   }
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index 699cc2e..53ba02a 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -33,7 +33,7 @@
                          const CodeGenOptions &CGO,
                          const LangOptions &Features, MangleContext &MContext)
   : Context(Ctx), CodeGenOpts(CGO), Features(Features), MContext(MContext),
-    MDHelper(VMContext), Root(0), Char(0) {
+    MDHelper(VMContext), Root(nullptr), Char(nullptr) {
 }
 
 CodeGenTBAA::~CodeGenTBAA() {
@@ -88,7 +88,7 @@
 CodeGenTBAA::getTBAAInfo(QualType QTy) {
   // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
   if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
-    return NULL;
+    return nullptr;
 
   // If the type has the may_alias attribute (even on a typedef), it is
   // effectively in the general char alias class.
@@ -221,7 +221,7 @@
     return MDHelper.createTBAAStructNode(Fields);
 
   // For now, handle any other kind of type conservatively.
-  return StructMetadataCache[Ty] = NULL;
+  return StructMetadataCache[Ty] = nullptr;
 }
 
 /// Check if the given type can be handled by path-aware TBAA.
@@ -261,7 +261,7 @@
       else
         FieldNode = getTBAAInfo(FieldQTy);
       if (!FieldNode)
-        return StructTypeMetadataCache[Ty] = NULL;
+        return StructTypeMetadataCache[Ty] = nullptr;
       Fields.push_back(std::make_pair(
           FieldNode, Layout.getFieldOffset(idx) / Context.getCharWidth()));
     }
@@ -280,7 +280,7 @@
       MDHelper.createTBAAStructTypeNode(OutName, Fields);
   }
 
-  return StructMetadataCache[Ty] = NULL;
+  return StructMetadataCache[Ty] = nullptr;
 }
 
 /// Return a TBAA tag node for both scalar TBAA and struct-path aware TBAA.
@@ -288,7 +288,7 @@
 CodeGenTBAA::getTBAAStructTagInfo(QualType BaseQTy, llvm::MDNode *AccessNode,
                                   uint64_t Offset) {
   if (!AccessNode)
-    return NULL;
+    return nullptr;
 
   if (!CodeGenOpts.StructPathTBAA)
     return getTBAAScalarTagInfo(AccessNode);
@@ -298,7 +298,7 @@
   if (llvm::MDNode *N = StructTagMetadataCache[PathTag])
     return N;
 
-  llvm::MDNode *BNode = 0;
+  llvm::MDNode *BNode = nullptr;
   if (isTBAAPathStruct(BaseQTy))
     BNode  = getTBAAStructTypeInfo(BaseQTy);
   if (!BNode)
@@ -312,7 +312,7 @@
 llvm::MDNode *
 CodeGenTBAA::getTBAAScalarTagInfo(llvm::MDNode *AccessNode) {
   if (!AccessNode)
-    return NULL;
+    return nullptr;
   if (llvm::MDNode *N = ScalarTagMetadataCache[AccessNode])
     return N;
 
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index b7f3746..d4e2262 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -189,15 +189,15 @@
 bool CodeGenTypes::isFuncParamTypeConvertible(QualType Ty) {
   // If this isn't a tagged type, we can convert it!
   const TagType *TT = Ty->getAs<TagType>();
-  if (TT == 0) return true;
-    
+  if (!TT) return true;
+
   // Incomplete types cannot be converted.
   if (TT->isIncompleteType())
     return false;
   
   // If this is an enum, then it is always safe to convert.
   const RecordType *RT = dyn_cast<RecordType>(TT);
-  if (RT == 0) return true;
+  if (!RT) return true;
 
   // Otherwise, we have to be careful.  If it is a struct that we're in the
   // process of expanding, then we can't convert the function type.  That's ok
@@ -242,6 +242,10 @@
       if (!ConvertType(ED->getIntegerType())->isIntegerTy(32))
         TypeCache.clear();
     }
+    // If necessary, provide the full definition of a type only used with a
+    // declaration so far.
+    if (CGDebugInfo *DI = CGM.getModuleDebugInfo())
+      DI->completeType(ED);
     return;
   }
   
@@ -300,7 +304,7 @@
     return TCI->second;
 
   // If we don't have it in the cache, convert it now.
-  llvm::Type *ResultType = 0;
+  llvm::Type *ResultType = nullptr;
   switch (Ty->getTypeClass()) {
   case Type::Record: // Handled above.
 #define TYPE(Class, Base)
@@ -625,7 +629,7 @@
   llvm::StructType *&Entry = RecordDeclTypes[Key];
 
   // If we don't have a StructType at all yet, create the forward declaration.
-  if (Entry == 0) {
+  if (!Entry) {
     Entry = llvm::StructType::create(getLLVMContext());
     addRecordTypeName(RD, Entry, "");
   }
@@ -634,7 +638,7 @@
   // If this is still a forward declaration, or the LLVM type is already
   // complete, there's nothing more to do.
   RD = RD->getDefinition();
-  if (RD == 0 || !RD->isCompleteDefinition() || !Ty->isOpaque())
+  if (!RD || !RD->isCompleteDefinition() || !Ty->isOpaque())
     return Ty;
   
   // If converting this type would cause us to infinitely loop, don't do it!
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index 166d420..b9ccfb6 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -301,8 +301,8 @@
   void *pushCleanup(CleanupKind K, size_t DataSize);
 
 public:
-  EHScopeStack() : StartOfBuffer(0), EndOfBuffer(0), StartOfData(0),
-                   InnermostNormalCleanup(stable_end()),
+  EHScopeStack() : StartOfBuffer(nullptr), EndOfBuffer(nullptr),
+                   StartOfData(nullptr), InnermostNormalCleanup(stable_end()),
                    InnermostEHScope(stable_end()) {}
   ~EHScopeStack() { delete[] StartOfBuffer; }
 
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 27825ab..f5d2372 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -52,16 +52,14 @@
     CGCXXABI(CGM), UseARMMethodPtrABI(UseARMMethodPtrABI),
     UseARMGuardVarABI(UseARMGuardVarABI) { }
 
-  bool isReturnTypeIndirect(const CXXRecordDecl *RD) const override {
-    // Structures with either a non-trivial destructor or a non-trivial
-    // copy constructor are always indirect.
-    return !RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor();
-  }
+  bool classifyReturnType(CGFunctionInfo &FI) const override;
 
   RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
     // Structures with either a non-trivial destructor or a non-trivial
     // copy constructor are always indirect.
-    if (!RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
+    // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
+    // special members.
+    if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor())
       return RAA_Indirect;
     return RAA_Default;
   }
@@ -761,6 +759,21 @@
   return Result;
 }
 
+bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
+  const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
+  if (!RD)
+    return false;
+
+  // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor.
+  // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
+  // special members.
+  if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) {
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return true;
+  }
+  return false;
+}
+
 /// The Itanium ABI requires non-zero initialization only for data
 /// member pointers, for which '0' is a valid offset.
 bool ItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
@@ -886,7 +899,7 @@
     // FIXME: avoid the fake decl
     QualType T = Context.getPointerType(Context.VoidPtrTy);
     ImplicitParamDecl *VTTDecl
-      = ImplicitParamDecl::Create(Context, 0, MD->getLocation(),
+      = ImplicitParamDecl::Create(Context, nullptr, MD->getLocation(),
                                   &Context.Idents.get("vtt"), T);
     Params.insert(Params.begin() + 1, VTTDecl);
     getStructorImplicitParamDecl(CGF) = VTTDecl;
@@ -938,7 +951,7 @@
   llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
   QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
 
-  llvm::Value *Callee = 0;
+  llvm::Value *Callee = nullptr;
   if (getContext().getLangOpts().AppleKext)
     Callee = CGF.BuildAppleKextVirtualDestructorCall(DD, Type, DD->getParent());
 
@@ -947,7 +960,7 @@
 
   // FIXME: Provide a source location here.
   CGF.EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
-                        VTT, VTTTy, 0, 0);
+                        VTT, VTTTy, nullptr, nullptr);
 }
 
 void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1087,7 +1100,8 @@
       getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty);
 
   CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValueSlot(), This,
-                        /*ImplicitParam=*/0, QualType(), 0, 0);
+                        /*ImplicitParam=*/nullptr, QualType(), nullptr,
+                        nullptr);
 }
 
 void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
@@ -1393,25 +1407,7 @@
   }
 
   // Test whether the variable has completed initialization.
-  llvm::Value *isInitialized;
-
-  // ARM C++ ABI 3.2.3.1:
-  //   To support the potential use of initialization guard variables
-  //   as semaphores that are the target of ARM SWP and LDREX/STREX
-  //   synchronizing instructions we define a static initialization
-  //   guard variable to be a 4-byte aligned, 4- byte word with the
-  //   following inline access protocol.
-  //     #define INITIALIZED 1
-  //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
-  //       if (__cxa_guard_acquire(&obj_guard))
-  //         ...
-  //     }
-  if (UseARMGuardVarABI && !useInt8GuardVariable) {
-    llvm::Value *V = Builder.CreateLoad(guard);
-    llvm::Value *Test1 = llvm::ConstantInt::get(guardTy, 1);
-    V = Builder.CreateAnd(V, Test1);
-    isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
-
+  //
   // Itanium C++ ABI 3.3.2:
   //   The following is pseudo-code showing how these functions can be used:
   //     if (obj_guard.first_byte == 0) {
@@ -1427,29 +1423,45 @@
   //       }
   //     }
 
-    // ARM64 C++ ABI 3.2.2:
-    // This ABI instead only specifies the value bit 0 of the static guard
-    // variable; all other bits are platform defined. Bit 0 shall be 0 when the
-    // variable is not initialized and 1 when it is.
-    // FIXME: Reading one bit is no more efficient than reading one byte so
-    // the codegen is same as generic Itanium ABI.
-  } else {
-    // Load the first byte of the guard variable.
-    llvm::LoadInst *LI = 
+  // Load the first byte of the guard variable.
+  llvm::LoadInst *LI =
       Builder.CreateLoad(Builder.CreateBitCast(guard, CGM.Int8PtrTy));
-    LI->setAlignment(1);
+  LI->setAlignment(1);
 
-    // Itanium ABI:
-    //   An implementation supporting thread-safety on multiprocessor
-    //   systems must also guarantee that references to the initialized
-    //   object do not occur before the load of the initialization flag.
-    //
-    // In LLVM, we do this by marking the load Acquire.
-    if (threadsafe)
-      LI->setAtomic(llvm::Acquire);
+  // Itanium ABI:
+  //   An implementation supporting thread-safety on multiprocessor
+  //   systems must also guarantee that references to the initialized
+  //   object do not occur before the load of the initialization flag.
+  //
+  // In LLVM, we do this by marking the load Acquire.
+  if (threadsafe)
+    LI->setAtomic(llvm::Acquire);
 
-    isInitialized = Builder.CreateIsNull(LI, "guard.uninitialized");
-  }
+  // For ARM, we should only check the first bit, rather than the entire byte:
+  //
+  // ARM C++ ABI 3.2.3.1:
+  //   To support the potential use of initialization guard variables
+  //   as semaphores that are the target of ARM SWP and LDREX/STREX
+  //   synchronizing instructions we define a static initialization
+  //   guard variable to be a 4-byte aligned, 4-byte word with the
+  //   following inline access protocol.
+  //     #define INITIALIZED 1
+  //     if ((obj_guard & INITIALIZED) != INITIALIZED) {
+  //       if (__cxa_guard_acquire(&obj_guard))
+  //         ...
+  //     }
+  //
+  // and similarly for ARM64:
+  //
+  // ARM64 C++ ABI 3.2.2:
+  //   This ABI instead only specifies the value bit 0 of the static guard
+  //   variable; all other bits are platform defined. Bit 0 shall be 0 when the
+  //   variable is not initialized and 1 when it is.
+  llvm::Value *V =
+      (UseARMGuardVarABI && !useInt8GuardVariable)
+          ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1))
+          : LI;
+  llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
 
   llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check");
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
@@ -1584,10 +1596,12 @@
 
   llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, false);
   llvm::Function *Wrapper = llvm::Function::Create(
-      FnTy, getThreadLocalWrapperLinkage(Var->getLinkage()), WrapperName.str(),
-      &CGM.getModule());
+      FnTy, getThreadLocalWrapperLinkage(
+                CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)),
+      WrapperName.str(), &CGM.getModule());
   // Always resolve references to the wrapper at link time.
-  Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  if (!Wrapper->hasLocalLinkage())
+    Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
   return Wrapper;
 }
 
@@ -1609,14 +1623,13 @@
     // If we have a definition for the variable, emit the initialization
     // function as an alias to the global Init function (if any). Otherwise,
     // produce a declaration of the initialization function.
-    llvm::GlobalValue *Init = 0;
+    llvm::GlobalValue *Init = nullptr;
     bool InitIsInitFunc = false;
     if (VD->hasDefinition()) {
       InitIsInitFunc = true;
       if (InitFunc)
-        Init =
-            new llvm::GlobalAlias(InitFunc->getType(), Var->getLinkage(),
-                                  InitFnName.str(), InitFunc, &CGM.getModule());
+        Init = llvm::GlobalAlias::create(Var->getLinkage(), InitFnName.str(),
+                                         InitFunc);
     } else {
       // Emit a weak global function referring to the initialization function.
       // This function will not exist if the TU defining the thread_local
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 9832969..dee565a 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -39,29 +39,18 @@
 
   bool HasThisReturn(GlobalDecl GD) const override;
 
-  bool isReturnTypeIndirect(const CXXRecordDecl *RD) const override {
-    // Structures that are not C++03 PODs are always indirect.
-    return !RD->isPOD();
-  }
+  bool classifyReturnType(CGFunctionInfo &FI) const override;
 
-  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override {
-    if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialDestructor()) {
-      llvm::Triple::ArchType Arch = CGM.getTarget().getTriple().getArch();
-      if (Arch == llvm::Triple::x86)
-        return RAA_DirectInMemory;
-      // On x64, pass non-trivial records indirectly.
-      // FIXME: Test other Windows architectures.
-      return RAA_Indirect;
-    }
-    return RAA_Default;
-  }
+  RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override;
+
+  bool isSRetParameterAfterThis() const override { return true; }
 
   StringRef GetPureVirtualCallName() override { return "_purecall"; }
   // No known support for deleted functions in MSVC yet, so this choice is
   // arbitrary.
   StringRef GetDeletedVirtualCallName() override { return "_purecall"; }
 
-  bool isInlineInitializedStaticDataMemberLinkOnce()  override{ return true; }
+  bool isInlineInitializedStaticDataMemberLinkOnce() override { return true; }
 
   llvm::Value *adjustToCompleteObject(CodeGenFunction &CGF,
                                       llvm::Value *ptr,
@@ -143,7 +132,7 @@
       // FIXME: might want to have a more precise type in the non-virtual
       // multiple inheritance case.
       if (ML.VBase || !ML.VFPtrOffset.isZero())
-        return 0;
+        return nullptr;
     }
     return MD->getParent();
   }
@@ -296,13 +285,13 @@
                                        llvm::Value *Base,
                                        llvm::Value *VBPtrOffset,
                                        llvm::Value *VBTableOffset,
-                                       llvm::Value **VBPtr = 0);
+                                       llvm::Value **VBPtr = nullptr);
 
   llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
                                        llvm::Value *Base,
                                        int32_t VBPtrOffset,
                                        int32_t VBTableOffset,
-                                       llvm::Value **VBPtr = 0) {
+                                       llvm::Value **VBPtr = nullptr) {
     llvm::Value *VBPOffset = llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
                 *VBTOffset = llvm::ConstantInt::get(CGM.IntTy, VBTableOffset);
     return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
@@ -395,7 +384,7 @@
   /// Info on the global variable used to guard initialization of static locals.
   /// The BitIndex field is only used for externally invisible declarations.
   struct GuardInfo {
-    GuardInfo() : Guard(0), BitIndex(0) {}
+    GuardInfo() : Guard(nullptr), BitIndex(0) {}
     llvm::GlobalVariable *Guard;
     unsigned BitIndex;
   };
@@ -407,6 +396,60 @@
 
 }
 
+CGCXXABI::RecordArgABI
+MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
+  switch (CGM.getTarget().getTriple().getArch()) {
+  default:
+    // FIXME: Implement for other architectures.
+    return RAA_Default;
+
+  case llvm::Triple::x86:
+    // All record arguments are passed in memory on x86.  Decide whether to
+    // construct the object directly in argument memory, or to construct the
+    // argument elsewhere and copy the bytes during the call.
+
+    // If C++ prohibits us from making a copy, construct the arguments directly
+    // into argument memory.
+    if (!canCopyArgument(RD))
+      return RAA_DirectInMemory;
+
+    // Otherwise, construct the argument into a temporary and copy the bytes
+    // into the outgoing argument memory.
+    return RAA_Default;
+
+  case llvm::Triple::x86_64:
+    // Win64 passes objects with non-trivial copy ctors indirectly.
+    if (RD->hasNonTrivialCopyConstructor())
+      return RAA_Indirect;
+
+    // Win64 passes objects larger than 8 bytes indirectly.
+    if (getContext().getTypeSize(RD->getTypeForDecl()) > 64)
+      return RAA_Indirect;
+
+    // We have a trivial copy constructor or no copy constructors, but we have
+    // to make sure it isn't deleted.
+    bool CopyDeleted = false;
+    for (const CXXConstructorDecl *CD : RD->ctors()) {
+      if (CD->isCopyConstructor()) {
+        assert(CD->isTrivial());
+        // We had at least one undeleted trivial copy ctor.  Return directly.
+        if (!CD->isDeleted())
+          return RAA_Default;
+        CopyDeleted = true;
+      }
+    }
+
+    // The trivial copy constructor was deleted.  Return indirectly.
+    if (CopyDeleted)
+      return RAA_Indirect;
+
+    // There were no copy ctors.  Return in RAX.
+    return RAA_Default;
+  }
+
+  llvm_unreachable("invalid enum");
+}
+
 llvm::Value *MicrosoftCXXABI::adjustToCompleteObject(CodeGenFunction &CGF,
                                                      llvm::Value *ptr,
                                                      QualType type) {
@@ -440,6 +483,27 @@
   return isa<CXXConstructorDecl>(GD.getDecl());
 }
 
+bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
+  const CXXRecordDecl *RD = FI.getReturnType()->getAsCXXRecordDecl();
+  if (!RD)
+    return false;
+
+  if (FI.isInstanceMethod()) {
+    // If it's an instance method, aggregates are always returned indirectly via
+    // the second parameter.
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
+    return true;
+  } else if (!RD->isPOD()) {
+    // If it's a free function, non-POD types are returned indirectly.
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return true;
+  }
+
+  // Otherwise, use the C ABI rules.
+  return false;
+}
+
 void MicrosoftCXXABI::BuildConstructorSignature(
     const CXXConstructorDecl *Ctor, CXXCtorType Type, CanQualType &ResTy,
     SmallVectorImpl<CanQualType> &ArgTys) {
@@ -503,7 +567,7 @@
 
   unsigned AS =
       cast<llvm::PointerType>(getThisValue(CGF)->getType())->getAddressSpace();
-  llvm::Value *Int8This = 0;  // Initialize lazily.
+  llvm::Value *Int8This = nullptr;  // Initialize lazily.
 
   for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end();
         I != E; ++I) {
@@ -704,7 +768,7 @@
   assert(isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD));
   if (isa<CXXConstructorDecl>(MD) && MD->getParent()->getNumVBases()) {
     ImplicitParamDecl *IsMostDerived
-      = ImplicitParamDecl::Create(Context, 0,
+      = ImplicitParamDecl::Create(Context, nullptr,
                                   CGF.CurGD.getDecl()->getLocation(),
                                   &Context.Idents.get("is_most_derived"),
                                   Context.IntTy);
@@ -718,7 +782,7 @@
     getStructorImplicitParamDecl(CGF) = IsMostDerived;
   } else if (IsDeletingDtor(CGF.CurGD)) {
     ImplicitParamDecl *ShouldDelete
-      = ImplicitParamDecl::Create(Context, 0,
+      = ImplicitParamDecl::Create(Context, nullptr,
                                   CGF.CurGD.getDecl()->getLocation(),
                                   &Context.Idents.get("should_call_delete"),
                                   Context.IntTy);
@@ -823,7 +887,8 @@
 
   // FIXME: Provide a source location here.
   CGF.EmitCXXMemberCall(DD, SourceLocation(), Callee, ReturnValueSlot(), This,
-                        /*ImplicitParam=*/0, /*ImplicitParamTy=*/QualType(), 0, 0);
+                        /*ImplicitParam=*/nullptr,
+                        /*ImplicitParamTy=*/QualType(), nullptr, nullptr);
 }
 
 void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -832,14 +897,15 @@
   VPtrInfoVector VFPtrs = VFTContext.getVFPtrOffsets(RD);
   llvm::GlobalVariable::LinkageTypes Linkage = CGM.getVTableLinkage(RD);
 
-  for (VPtrInfoVector::iterator I = VFPtrs.begin(), E = VFPtrs.end(); I != E;
-       ++I) {
-    llvm::GlobalVariable *VTable = getAddrOfVTable(RD, (*I)->FullOffsetInMDC);
+  for (VPtrInfo *Info : VFPtrs) {
+    llvm::GlobalVariable *VTable = getAddrOfVTable(RD, Info->FullOffsetInMDC);
     if (VTable->hasInitializer())
       continue;
+    if (getContext().getLangOpts().RTTI)
+      CGM.getMSCompleteObjectLocator(RD, Info);
 
     const VTableLayout &VTLayout =
-        VFTContext.getVFTableLayout(RD, (*I)->FullOffsetInMDC);
+      VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);
     llvm::Constant *Init = CGVT.CreateVTableInitializer(
         RD, VTLayout.vtable_component_begin(),
         VTLayout.getNumVTableComponents(), VTLayout.vtable_thunk_begin(),
@@ -854,7 +920,7 @@
 llvm::Value *MicrosoftCXXABI::getVTableAddressPointInStructor(
     CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
     const CXXRecordDecl *NearestVBase, bool &NeedsVirtualOffset) {
-  NeedsVirtualOffset = (NearestVBase != 0);
+  NeedsVirtualOffset = (NearestVBase != nullptr);
 
   llvm::Value *VTableAddressPoint =
       getAddrOfVTable(VTableClass, Base.getBaseOffset());
@@ -887,8 +953,7 @@
   VFTableIdTy ID(RD, VPtrOffset);
   VFTablesMapTy::iterator I;
   bool Inserted;
-  std::tie(I, Inserted) = VFTablesMap.insert(
-      std::make_pair(ID, static_cast<llvm::GlobalVariable *>(0)));
+  std::tie(I, Inserted) = VFTablesMap.insert(std::make_pair(ID, nullptr));
   if (!Inserted)
     return I->second;
 
@@ -976,7 +1041,7 @@
 
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
   CGF.EmitCXXMemberCall(Dtor, CallLoc, Callee, ReturnValueSlot(), This,
-                        ImplicitParam, Context.IntTy, 0, 0);
+                        ImplicitParam, Context.IntTy, nullptr, nullptr);
 }
 
 const VBTableGlobals &
@@ -1047,8 +1112,31 @@
       CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
   llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr);
 
-  // Make the call and return the result.
-  CGF.EmitCallAndReturnForThunk(MD, Callee, 0);
+  unsigned CallingConv;
+  CodeGen::AttributeListType AttributeList;
+  CGM.ConstructAttributeList(FnInfo, MD, AttributeList, CallingConv, true);
+  llvm::AttributeSet Attrs =
+      llvm::AttributeSet::get(CGF.getLLVMContext(), AttributeList);
+
+  // Do a musttail call with perfect argument forwarding.  Any inalloca argument
+  // will be forwarded in place without any copy.
+  SmallVector<llvm::Value *, 8> Args;
+  for (llvm::Argument &A : ThunkFn->args())
+    Args.push_back(&A);
+  llvm::CallInst *Call = CGF.Builder.CreateCall(Callee, Args);
+  Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
+  Call->setAttributes(Attrs);
+  Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
+
+  if (Call->getType()->isVoidTy())
+    CGF.Builder.CreateRetVoid();
+  else
+    CGF.Builder.CreateRet(Call);
+
+  // Finish the function to maintain CodeGenFunction invariants.
+  // FIXME: Don't emit unreachable code.
+  CGF.EmitBlock(CGF.createBasicBlock());
+  CGF.FinishFunction();
 
   return ThunkFn;
 }
@@ -1097,7 +1185,8 @@
   const ASTRecordLayout &DerivedLayout =
     CGM.getContext().getASTRecordLayout(RD);
 
-  SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(), 0);
+  SmallVector<llvm::Constant *, 4> Offsets(1 + ReusingBase->getNumVBases(),
+                                           nullptr);
 
   // The offset from ReusingBase's vbptr to itself always leads.
   CharUnits VBPtrOffset = BaseLayout.getVBPtrOffset();
@@ -1117,7 +1206,7 @@
     Offset -= CompleteVBPtrOffset;
 
     unsigned VBIndex = Context.getVBTableIndex(ReusingBase, VBase);
-    assert(Offsets[VBIndex] == 0 && "The same vbindex seen twice?");
+    assert(Offsets[VBIndex] == nullptr && "The same vbindex seen twice?");
     Offsets[VBIndex] = llvm::ConstantInt::get(CGM.IntTy, Offset.getQuantity());
   }
 
@@ -1260,6 +1349,15 @@
 void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                                       llvm::GlobalVariable *GV,
                                       bool PerformInit) {
+  // MSVC only uses guards for static locals.
+  if (!D.isStaticLocal()) {
+    assert(GV->hasWeakLinkage() || GV->hasLinkOnceLinkage());
+    // GlobalOpt is allowed to discard the initializer, so use linkonce_odr.
+    CGF.CurFn->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
+    CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
+    return;
+  }
+
   // MSVC always uses an i32 bitfield to guard initialization, which is *not*
   // threadsafe.  Since the user may be linking in inline functions compiled by
   // cl.exe, there's no reason to provide a false sense of security by using
@@ -1273,10 +1371,10 @@
   llvm::ConstantInt *Zero = llvm::ConstantInt::get(GuardTy, 0);
 
   // Get the guard variable for this function if we have one already.
-  GuardInfo &GI = GuardVariableMap[D.getDeclContext()];
+  GuardInfo *GI = &GuardVariableMap[D.getDeclContext()];
 
   unsigned BitIndex;
-  if (D.isExternallyVisible()) {
+  if (D.isStaticLocal() && D.isExternallyVisible()) {
     // Externally visible variables have to be numbered in Sema to properly
     // handle unreachable VarDecls.
     BitIndex = getContext().getStaticLocalNumber(&D);
@@ -1284,18 +1382,18 @@
     BitIndex--;
   } else {
     // Non-externally visible variables are numbered here in CodeGen.
-    BitIndex = GI.BitIndex++;
+    BitIndex = GI->BitIndex++;
   }
 
   if (BitIndex >= 32) {
     if (D.isExternallyVisible())
       ErrorUnsupportedABI(CGF, "more than 32 guarded initializations");
     BitIndex %= 32;
-    GI.Guard = 0;
+    GI->Guard = nullptr;
   }
 
   // Lazily create the i32 bitfield for this function.
-  if (!GI.Guard) {
+  if (!GI->Guard) {
     // Mangle the name for the guard.
     SmallString<256> GuardName;
     {
@@ -1306,11 +1404,12 @@
 
     // Create the guard variable with a zero-initializer.  Just absorb linkage
     // and visibility from the guarded variable.
-    GI.Guard = new llvm::GlobalVariable(CGM.getModule(), GuardTy, false,
-                                     GV->getLinkage(), Zero, GuardName.str());
-    GI.Guard->setVisibility(GV->getVisibility());
+    GI->Guard =
+        new llvm::GlobalVariable(CGM.getModule(), GuardTy, false,
+                                 GV->getLinkage(), Zero, GuardName.str());
+    GI->Guard->setVisibility(GV->getVisibility());
   } else {
-    assert(GI.Guard->getLinkage() == GV->getLinkage() &&
+    assert(GI->Guard->getLinkage() == GV->getLinkage() &&
            "static local from the same function had different linkage");
   }
 
@@ -1322,7 +1421,7 @@
 
   // Test our bit from the guard variable.
   llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << BitIndex);
-  llvm::LoadInst *LI = Builder.CreateLoad(GI.Guard);
+  llvm::LoadInst *LI = Builder.CreateLoad(GI->Guard);
   llvm::Value *IsInitialized =
       Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
   llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
@@ -1332,7 +1431,7 @@
   // Set our bit in the guard variable and emit the initializer and add a global
   // destructor if appropriate.
   CGF.EmitBlock(InitBlock);
-  Builder.CreateStore(Builder.CreateOr(LI, Bit), GI.Guard);
+  Builder.CreateStore(Builder.CreateOr(LI, Bit), GI->Guard);
   CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
   Builder.CreateBr(EndBlock);
 
@@ -1572,7 +1671,7 @@
   llvm::Value *Cmp0 = Builder.CreateICmp(Eq, L0, R0, "memptr.cmp.first");
 
   // Compare everything other than the first field.
-  llvm::Value *Res = 0;
+  llvm::Value *Res = nullptr;
   llvm::StructType *LType = cast<llvm::StructType>(L->getType());
   for (unsigned I = 1, E = LType->getNumElements(); I != E; ++I) {
     llvm::Value *LF = Builder.CreateExtractValue(L, I);
@@ -1623,7 +1722,7 @@
   for (int I = 1, E = fields.size(); I < E; ++I) {
     llvm::Value *Field = Builder.CreateExtractValue(MemPtr, I);
     llvm::Value *Next = Builder.CreateICmpNE(Field, fields[I], "memptr.cmp");
-    Res = Builder.CreateAnd(Res, Next, "memptr.tobool");
+    Res = Builder.CreateOr(Res, Next, "memptr.tobool");
   }
   return Res;
 }
@@ -1687,9 +1786,9 @@
     llvm::Value *Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) {
   CGBuilderTy &Builder = CGF.Builder;
   Base = Builder.CreateBitCast(Base, CGM.Int8PtrTy);
-  llvm::BasicBlock *OriginalBB = 0;
-  llvm::BasicBlock *SkipAdjustBB = 0;
-  llvm::BasicBlock *VBaseAdjustBB = 0;
+  llvm::BasicBlock *OriginalBB = nullptr;
+  llvm::BasicBlock *SkipAdjustBB = nullptr;
+  llvm::BasicBlock *VBaseAdjustBB = nullptr;
 
   // In the unspecified inheritance model, there might not be a vbtable at all,
   // in which case we need to skip the virtual base lookup.  If there is a
@@ -1721,7 +1820,7 @@
       offs = getContext().getASTRecordLayout(RD).getVBPtrOffset();
     VBPtrOffset = llvm::ConstantInt::get(CGM.IntTy, offs.getQuantity());
   }
-  llvm::Value *VBPtr = 0;
+  llvm::Value *VBPtr = nullptr;
   llvm::Value *VBaseOffs =
     GetVBaseOffsetFromVBPtr(CGF, Base, VBPtrOffset, VBTableOffset, &VBPtr);
   llvm::Value *AdjustedBase = Builder.CreateInBoundsGEP(VBPtr, VBaseOffs);
@@ -1752,8 +1851,8 @@
   // Extract the fields we need, regardless of model.  We'll apply them if we
   // have them.
   llvm::Value *FieldOffset = MemPtr;
-  llvm::Value *VirtualBaseAdjustmentOffset = 0;
-  llvm::Value *VBPtrOffset = 0;
+  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Value *VBPtrOffset = nullptr;
   if (MemPtr->getType()->isStructTy()) {
     // We need to extract values.
     unsigned I = 0;
@@ -1839,9 +1938,9 @@
 
   // Decompose src.
   llvm::Value *FirstField = Src;
-  llvm::Value *NonVirtualBaseAdjustment = 0;
-  llvm::Value *VirtualBaseAdjustmentOffset = 0;
-  llvm::Value *VBPtrOffset = 0;
+  llvm::Value *NonVirtualBaseAdjustment = nullptr;
+  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Value *VBPtrOffset = nullptr;
   MSInheritanceAttr::Spelling SrcInheritance = SrcRD->getMSInheritanceModel();
   if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
     // We need to extract values.
@@ -1924,9 +2023,9 @@
 
   // Decompose src.
   llvm::Constant *FirstField = Src;
-  llvm::Constant *NonVirtualBaseAdjustment = 0;
-  llvm::Constant *VirtualBaseAdjustmentOffset = 0;
-  llvm::Constant *VBPtrOffset = 0;
+  llvm::Constant *NonVirtualBaseAdjustment = nullptr;
+  llvm::Constant *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Constant *VBPtrOffset = nullptr;
   bool IsFunc = SrcTy->isMemberFunctionPointer();
   if (!MSInheritanceAttr::hasOnlyOneField(IsFunc, SrcInheritance)) {
     // We need to extract values.
@@ -1990,9 +2089,9 @@
   // Extract the fields we need, regardless of model.  We'll apply them if we
   // have them.
   llvm::Value *FunctionPointer = MemPtr;
-  llvm::Value *NonVirtualBaseAdjustment = NULL;
-  llvm::Value *VirtualBaseAdjustmentOffset = NULL;
-  llvm::Value *VBPtrOffset = NULL;
+  llvm::Value *NonVirtualBaseAdjustment = nullptr;
+  llvm::Value *VirtualBaseAdjustmentOffset = nullptr;
+  llvm::Value *VBPtrOffset = nullptr;
   if (MemPtr->getType()->isStructTy()) {
     // We need to extract values.
     unsigned I = 0;
diff --git a/lib/CodeGen/MicrosoftRTTI.cpp b/lib/CodeGen/MicrosoftRTTI.cpp
new file mode 100644
index 0000000..0eb8100
--- /dev/null
+++ b/lib/CodeGen/MicrosoftRTTI.cpp
@@ -0,0 +1,447 @@
+//===--- CGCXXRTTI.cpp - Emit LLVM Code for C++ RTTI descriptors ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code dealing with C++ code generation of RTTI descriptors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenModule.h"
+#include "CGCXXABI.h"
+#include "CGObjCRuntime.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/Type.h"
+#include "clang/Frontend/CodeGenOptions.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+// MS RTTI Overview:
+// The run time type information emitted by cl.exe contains 5 distinct types of
+// structures.  Many of them reference each other.
+//
+// TypeInfo:  Static classes that are returned by typeid.
+//
+// CompleteObjectLocator:  Referenced by vftables.  They contain information
+//   required for dynamic casting, including OffsetFromTop.  They also contain
+//   a reference to the TypeInfo for the type and a reference to the
+//   CompleteHierarchyDescriptor for the type.
+//
+// ClassHieararchyDescriptor: Contains information about a class hierarchy.
+//   Used during dynamic_cast to walk a class hierarchy.  References a base
+//   class array and the size of said array.
+//
+// BaseClassArray: Contains a list of classes in a hierarchy.  BaseClassArray is
+//   somewhat of a misnomer because the most derived class is also in the list
+//   as well as multiple copies of virtual bases (if they occur multiple times
+//   in the hiearchy.)  The BaseClassArray contains one BaseClassDescriptor for
+//   every path in the hierarchy, in pre-order depth first order.  Note, we do
+//   not declare a specific llvm type for BaseClassArray, it's merely an array
+//   of BaseClassDescriptor pointers.
+//
+// BaseClassDescriptor: Contains information about a class in a class hierarchy.
+//   BaseClassDescriptor is also somewhat of a misnomer for the same reason that
+//   BaseClassArray is.  It contains information about a class within a
+//   hierarchy such as: is this base is ambiguous and what is its offset in the
+//   vbtable.  The names of the BaseClassDescriptors have all of their fields
+//   mangled into them so they can be aggressively deduplicated by the linker.
+
+// 5 routines for constructing the llvm types for MS RTTI structs.
+static llvm::StructType *getClassHierarchyDescriptorType(CodeGenModule &CGM);
+
+static llvm::StructType *getTypeDescriptorType(CodeGenModule &CGM,
+                                               StringRef TypeInfoString) {
+  llvm::SmallString<32> TDTypeName("MSRTTITypeDescriptor");
+  TDTypeName += TypeInfoString.size();
+  if (auto Type = CGM.getModule().getTypeByName(TDTypeName))
+    return Type;
+  llvm::Type *FieldTypes[] = {
+      CGM.Int8PtrPtrTy,
+      CGM.Int8PtrTy,
+      llvm::ArrayType::get(CGM.Int8Ty, TypeInfoString.size() + 1)};
+  return llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, TDTypeName);
+}
+
+static llvm::StructType *getBaseClassDescriptorType(CodeGenModule &CGM) {
+  static const char Name[] = "MSRTTIBaseClassDescriptor";
+  if (auto Type = CGM.getModule().getTypeByName(Name))
+    return Type;
+  llvm::Type *FieldTypes[] = {
+      CGM.Int8PtrTy,
+      CGM.IntTy,
+      CGM.IntTy,
+      CGM.IntTy,
+      CGM.IntTy,
+      CGM.IntTy,
+      getClassHierarchyDescriptorType(CGM)->getPointerTo()};
+  return llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, Name);
+}
+
+static llvm::StructType *getClassHierarchyDescriptorType(CodeGenModule &CGM) {
+  static const char Name[] = "MSRTTIClassHierarchyDescriptor";
+  if (auto Type = CGM.getModule().getTypeByName(Name))
+    return Type;
+  // Forward declare RTTIClassHierarchyDescriptor to break a cycle.
+  llvm::StructType *Type = llvm::StructType::create(CGM.getLLVMContext(), Name);
+  llvm::Type *FieldTypes[] = {
+    CGM.IntTy,
+    CGM.IntTy,
+    CGM.IntTy,
+    getBaseClassDescriptorType(CGM)->getPointerTo()->getPointerTo()};
+  Type->setBody(FieldTypes);
+  return Type;
+}
+
+static llvm::StructType *getCompleteObjectLocatorType(CodeGenModule &CGM) {
+  static const char Name[] = "MSRTTICompleteObjectLocator";
+  if (auto Type = CGM.getModule().getTypeByName(Name))
+    return Type;
+  llvm::Type *FieldTypes[] = {
+    CGM.IntTy,
+    CGM.IntTy,
+    CGM.IntTy,
+    CGM.Int8PtrTy,
+    getClassHierarchyDescriptorType(CGM)->getPointerTo() };
+  return llvm::StructType::create(CGM.getLLVMContext(), FieldTypes, Name);
+}
+
+static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) {
+  StringRef MangledName("\01??_7type_info@@6B@");
+  if (auto VTable = CGM.getModule().getNamedGlobal(MangledName))
+    return VTable;
+  return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy,
+                                  /*Constant=*/true,
+                                  llvm::GlobalVariable::ExternalLinkage,
+                                  /*Initializer=*/0, MangledName);
+}
+
+namespace {
+
+/// \brief A Helper struct that stores information about a class in a class
+/// hierarchy.  The information stored in these structs struct is used during
+/// the generation of ClassHierarchyDescriptors and BaseClassDescriptors.
+// During RTTI creation, MSRTTIClasses are stored in a contiguous array with
+// implicit depth first pre-order tree connectivity.  getFirstChild and
+// getNextSibling allow us to walk the tree efficiently.
+struct MSRTTIClass {
+  enum {
+    IsPrivateOnPath = 1 | 8,
+    IsAmbiguous = 2,
+    IsPrivate = 4,
+    IsVirtual = 16,
+    HasHierarchyDescriptor = 64
+  };
+  MSRTTIClass(const CXXRecordDecl *RD) : RD(RD) {}
+  uint32_t initialize(const MSRTTIClass *Parent,
+                      const CXXBaseSpecifier *Specifier);
+
+  MSRTTIClass *getFirstChild() { return this + 1; }
+  static MSRTTIClass *getNextChild(MSRTTIClass *Child) {
+    return Child + 1 + Child->NumBases;
+  }
+
+  const CXXRecordDecl *RD, *VirtualRoot;
+  uint32_t Flags, NumBases, OffsetInVBase;
+};
+
+/// \brief Recursively initialize the base class array.
+uint32_t MSRTTIClass::initialize(const MSRTTIClass *Parent,
+                                 const CXXBaseSpecifier *Specifier) {
+  Flags = HasHierarchyDescriptor;
+  if (!Parent) {
+    VirtualRoot = 0;
+    OffsetInVBase = 0;
+  } else {
+    if (Specifier->getAccessSpecifier() != AS_public)
+      Flags |= IsPrivate | IsPrivateOnPath;
+    if (Specifier->isVirtual()) {
+      Flags |= IsVirtual;
+      VirtualRoot = RD;
+      OffsetInVBase = 0;
+    } else {
+      if (Parent->Flags & IsPrivateOnPath)
+        Flags |= IsPrivateOnPath;
+      VirtualRoot = Parent->VirtualRoot;
+      OffsetInVBase = Parent->OffsetInVBase + RD->getASTContext()
+          .getASTRecordLayout(Parent->RD).getBaseClassOffset(RD).getQuantity();
+    }
+  }
+  NumBases = 0;
+  MSRTTIClass *Child = getFirstChild();
+  for (const CXXBaseSpecifier &Base : RD->bases()) {
+    NumBases += Child->initialize(this, &Base) + 1;
+    Child = getNextChild(Child);
+  }
+  return NumBases;
+}
+
+/// \brief An ephemeral helper class for building MS RTTI types.  It caches some
+/// calls to the module and information about the most derived class in a
+/// hierarchy.
+struct MSRTTIBuilder {
+  enum {
+    HasBranchingHierarchy = 1,
+    HasVirtualBranchingHierarchy = 2,
+    HasAmbiguousBases = 4
+  };
+
+  MSRTTIBuilder(CodeGenModule &CGM, const CXXRecordDecl *RD)
+      : CGM(CGM), Context(CGM.getContext()), VMContext(CGM.getLLVMContext()),
+        Module(CGM.getModule()), RD(RD), Linkage(CGM.getVTableLinkage(RD)),
+        Mangler(
+            cast<MicrosoftMangleContext>(CGM.getCXXABI().getMangleContext())) {}
+
+  llvm::GlobalVariable *getBaseClassDescriptor(const MSRTTIClass &Classes);
+  llvm::GlobalVariable *
+  getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes);
+  llvm::GlobalVariable *getClassHierarchyDescriptor();
+  llvm::GlobalVariable *getCompleteObjectLocator(const VPtrInfo *Info);
+
+  CodeGenModule &CGM;
+  ASTContext &Context;
+  llvm::LLVMContext &VMContext;
+  llvm::Module &Module;
+  const CXXRecordDecl *RD;
+  llvm::GlobalVariable::LinkageTypes Linkage;
+  MicrosoftMangleContext &Mangler;
+};
+
+} // namespace
+
+/// \brief Recursively serializes a class hierarchy in pre-order depth first
+/// order.
+static void serializeClassHierarchy(SmallVectorImpl<MSRTTIClass> &Classes,
+                                    const CXXRecordDecl *RD) {
+  Classes.push_back(MSRTTIClass(RD));
+  for (const CXXBaseSpecifier &Base : RD->bases())
+    serializeClassHierarchy(Classes, Base.getType()->getAsCXXRecordDecl());
+}
+
+/// \brief Find ambiguity among base classes.
+static void
+detectAmbiguousBases(SmallVectorImpl<MSRTTIClass> &Classes) {
+  llvm::SmallPtrSet<const CXXRecordDecl *, 8> VirtualBases;
+  llvm::SmallPtrSet<const CXXRecordDecl *, 8> UniqueBases;
+  llvm::SmallPtrSet<const CXXRecordDecl *, 8> AmbiguousBases;
+  for (MSRTTIClass *Class = &Classes.front(); Class <= &Classes.back();) {
+    if ((Class->Flags & MSRTTIClass::IsVirtual) &&
+        !VirtualBases.insert(Class->RD)) {
+      Class = MSRTTIClass::getNextChild(Class);
+      continue;
+    }
+    if (!UniqueBases.insert(Class->RD))
+      AmbiguousBases.insert(Class->RD);
+    Class++;
+  }
+  if (AmbiguousBases.empty())
+    return;
+  for (MSRTTIClass &Class : Classes)
+    if (AmbiguousBases.count(Class.RD))
+      Class.Flags |= MSRTTIClass::IsAmbiguous;
+}
+
+llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() {
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    Mangler.mangleCXXRTTIClassHierarchyDescriptor(RD, Out);
+  }
+
+  // Check to see if we've already declared this ClassHierarchyDescriptor.
+  if (auto CHD = Module.getNamedGlobal(MangledName))
+    return CHD;
+
+  // Serialize the class hierarchy and initalize the CHD Fields.
+  SmallVector<MSRTTIClass, 8> Classes;
+  serializeClassHierarchy(Classes, RD);
+  Classes.front().initialize(/*Parent=*/0, /*Specifier=*/0);
+  detectAmbiguousBases(Classes);
+  int Flags = 0;
+  for (auto Class : Classes) {
+    if (Class.RD->getNumBases() > 1)
+      Flags |= HasBranchingHierarchy;
+    // Note: cl.exe does not calculate "HasAmbiguousBases" correctly.  We
+    // believe the field isn't actually used.
+    if (Class.Flags & MSRTTIClass::IsAmbiguous)
+      Flags |= HasAmbiguousBases;
+  }
+  if ((Flags & HasBranchingHierarchy) && RD->getNumVBases() != 0)
+    Flags |= HasVirtualBranchingHierarchy;
+  // These gep indices are used to get the address of the first element of the
+  // base class array.
+  llvm::Value *GEPIndices[] = {llvm::ConstantInt::get(CGM.IntTy, 0),
+                               llvm::ConstantInt::get(CGM.IntTy, 0)};
+
+  // Forward declare the class hierarchy descriptor
+  auto Type = getClassHierarchyDescriptorType(CGM);
+  auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+                                      /*Initializer=*/0, MangledName.c_str());
+
+  // Initialize the base class ClassHierarchyDescriptor.
+  llvm::Constant *Fields[] = {
+    llvm::ConstantInt::get(CGM.IntTy, 0), // Unknown
+    llvm::ConstantInt::get(CGM.IntTy, Flags),
+    llvm::ConstantInt::get(CGM.IntTy, Classes.size()),
+    llvm::ConstantExpr::getInBoundsGetElementPtr(
+        getBaseClassArray(Classes),
+        llvm::ArrayRef<llvm::Value *>(GEPIndices))};
+  CHD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
+  return CHD;
+}
+
+llvm::GlobalVariable *
+MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) {
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    Mangler.mangleCXXRTTIBaseClassArray(RD, Out);
+  }
+
+  // Foward declare the base class array.
+  // cl.exe pads the base class array with 1 (in 32 bit mode) or 4 (in 64 bit
+  // mode) bytes of padding.  We provide a pointer sized amount of padding by
+  // adding +1 to Classes.size().  The sections have pointer alignment and are
+  // marked pick-any so it shouldn't matter.
+  auto PtrType = getBaseClassDescriptorType(CGM)->getPointerTo();
+  auto ArrayType = llvm::ArrayType::get(PtrType, Classes.size() + 1);
+  auto BCA = new llvm::GlobalVariable(Module, ArrayType,
+      /*Constant=*/true, Linkage, /*Initializer=*/0, MangledName.c_str());
+
+  // Initialize the BaseClassArray.
+  SmallVector<llvm::Constant *, 8> BaseClassArrayData;
+  for (MSRTTIClass &Class : Classes)
+    BaseClassArrayData.push_back(getBaseClassDescriptor(Class));
+  BaseClassArrayData.push_back(llvm::ConstantPointerNull::get(PtrType));
+  BCA->setInitializer(llvm::ConstantArray::get(ArrayType, BaseClassArrayData));
+  return BCA;
+}
+
+llvm::GlobalVariable *
+MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) {
+  // Compute the fields for the BaseClassDescriptor.  They are computed up front
+  // because they are mangled into the name of the object.
+  uint32_t OffsetInVBTable = 0;
+  int32_t VBPtrOffset = -1;
+  if (Class.VirtualRoot) {
+    auto &VTableContext = CGM.getMicrosoftVTableContext();
+    OffsetInVBTable = VTableContext.getVBTableIndex(RD, Class.VirtualRoot) * 4;
+    VBPtrOffset = Context.getASTRecordLayout(RD).getVBPtrOffset().getQuantity();
+  }
+
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    Mangler.mangleCXXRTTIBaseClassDescriptor(Class.RD, Class.OffsetInVBase,
+                                             VBPtrOffset, OffsetInVBTable,
+                                             Class.Flags, Out);
+  }
+
+  // Check to see if we've already declared declared this object.
+  if (auto BCD = Module.getNamedGlobal(MangledName))
+    return BCD;
+
+  // Forward declare the base class descriptor.
+  auto Type = getBaseClassDescriptorType(CGM);
+  auto BCD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+                                      /*Initializer=*/0, MangledName.c_str());
+
+  // Initialize the BaseClassDescriptor.
+  llvm::Constant *Fields[] = {
+    CGM.getMSTypeDescriptor(Context.getTypeDeclType(Class.RD)),
+    llvm::ConstantInt::get(CGM.IntTy, Class.NumBases),
+    llvm::ConstantInt::get(CGM.IntTy, Class.OffsetInVBase),
+    llvm::ConstantInt::get(CGM.IntTy, VBPtrOffset),
+    llvm::ConstantInt::get(CGM.IntTy, OffsetInVBTable),
+    llvm::ConstantInt::get(CGM.IntTy, Class.Flags),
+    MSRTTIBuilder(CGM, Class.RD).getClassHierarchyDescriptor()};
+  BCD->setInitializer(llvm::ConstantStruct::get(Type, Fields));
+  return BCD;
+}
+
+llvm::GlobalVariable *
+MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo *Info) {
+  SmallString<256> MangledName;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    Mangler.mangleCXXRTTICompleteObjectLocator(RD, Info->MangledPath, Out);
+  }
+
+  // Check to see if we've already computed this complete object locator.
+  if (auto COL = Module.getNamedGlobal(MangledName))
+    return COL;
+
+  // Compute the fields of the complete object locator.
+  int OffsetToTop = Info->FullOffsetInMDC.getQuantity();
+  int VFPtrOffset = 0;
+  // The offset includes the vtordisp if one exists.
+  if (const CXXRecordDecl *VBase = Info->getVBaseWithVPtr())
+    if (Context.getASTRecordLayout(RD)
+      .getVBaseOffsetsMap()
+      .find(VBase)
+      ->second.hasVtorDisp())
+      VFPtrOffset = Info->NonVirtualOffset.getQuantity() + 4;
+
+  // Forward declare the complete object locator.
+  llvm::StructType *Type = getCompleteObjectLocatorType(CGM);
+  auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
+    /*Initializer=*/0, MangledName.c_str());
+
+  // Initialize the CompleteObjectLocator.
+  llvm::Constant *Fields[] = {
+    llvm::ConstantInt::get(CGM.IntTy, 0), // IsDeltaEncoded
+    llvm::ConstantInt::get(CGM.IntTy, OffsetToTop),
+    llvm::ConstantInt::get(CGM.IntTy, VFPtrOffset),
+    CGM.getMSTypeDescriptor(Context.getTypeDeclType(RD)),
+    getClassHierarchyDescriptor()};
+  COL->setInitializer(llvm::ConstantStruct::get(Type, Fields));
+  return COL;
+}
+
+
+/// \brief Gets a TypeDescriptor.  Returns a llvm::Constant * rather than a
+/// llvm::GlobalVariable * because different type descriptors have different
+/// types, and need to be abstracted.  They are abstracting by casting the
+/// address to an Int8PtrTy.
+llvm::Constant *CodeGenModule::getMSTypeDescriptor(QualType Type) {
+  auto &Mangler(cast<MicrosoftMangleContext>(getCXXABI().getMangleContext()));
+  SmallString<256> MangledName, TypeInfoString;
+  {
+    llvm::raw_svector_ostream Out(MangledName);
+    Mangler.mangleCXXRTTI(Type, Out);
+  }
+
+  // Check to see if we've already declared this TypeDescriptor.
+  if (auto TypeDescriptor = getModule().getNamedGlobal(MangledName))
+    return llvm::ConstantExpr::getBitCast(TypeDescriptor, Int8PtrTy);
+
+  // Compute the fields for the TypeDescriptor.
+  {
+    llvm::raw_svector_ostream Out(TypeInfoString);
+    Mangler.mangleCXXRTTIName(Type, Out);
+  }
+
+  // Declare and initialize the TypeDescriptor.
+  llvm::Constant *Fields[] = {
+    getTypeInfoVTable(*this),                  // VFPtr
+    llvm::ConstantPointerNull::get(Int8PtrTy), // Runtime data
+    llvm::ConstantDataArray::getString(VMContext, TypeInfoString)};
+  auto TypeDescriptorType = getTypeDescriptorType(*this, TypeInfoString);
+  return llvm::ConstantExpr::getBitCast(
+      new llvm::GlobalVariable(
+          getModule(), TypeDescriptorType, /*Constant=*/false,
+          getTypeInfoLinkage(Type),
+          llvm::ConstantStruct::get(TypeDescriptorType, Fields),
+          MangledName.c_str()),
+      Int8PtrTy);
+}
+
+llvm::GlobalVariable *
+CodeGenModule::getMSCompleteObjectLocator(const CXXRecordDecl *RD,
+                                          const VPtrInfo *Info) {
+  return MSRTTIBuilder(*this, RD).getCompleteObjectLocator(Info);
+}
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index 7873f44..78cb82d 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -81,6 +81,20 @@
       return true;
     }
 
+    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+      if (Diags.hasErrorOccurred())
+        return;
+
+      assert(D->doesThisDeclarationHaveABody());
+
+      // We may have member functions that need to be emitted at this point.
+      if (!D->isDependentContext() &&
+          (D->hasAttr<UsedAttr>() || D->hasAttr<ConstructorAttr>() ||
+           D->hasAttr<DLLExportAttr>())) {
+        Builder->EmitTopLevelDecl(D);
+      }
+    }
+
     /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl
     /// to (e.g. struct, union, enum, class) is completed. This allows the
     /// client hack on the type, which can occur at any point in the file
@@ -90,17 +104,6 @@
         return;
 
       Builder->UpdateCompletedType(D);
-      
-      // In C++, we may have member functions that need to be emitted at this 
-      // point.
-      if (Ctx->getLangOpts().CPlusPlus && !D->isDependentContext()) {
-        for (auto *M : D->decls())
-          if (auto *Method = dyn_cast<CXXMethodDecl>(M))
-            if (Method->doesThisDeclarationHaveABody() &&
-                (Method->hasAttr<UsedAttr>() || 
-                 Method->hasAttr<ConstructorAttr>()))
-              Builder->EmitTopLevelDecl(Method);
-      }
     }
 
     void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index c602b1f..ab4c388 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -23,6 +23,9 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>    // std::sort
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -45,22 +48,6 @@
 
 ABIInfo::~ABIInfo() {}
 
-static bool isRecordReturnIndirect(const RecordType *RT,
-                                   CGCXXABI &CXXABI) {
-  const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
-  if (!RD)
-    return false;
-  return CXXABI.isReturnTypeIndirect(RD);
-}
-
-
-static bool isRecordReturnIndirect(QualType T, CGCXXABI &CXXABI) {
-  const RecordType *RT = T->getAs<RecordType>();
-  if (!RT)
-    return false;
-  return isRecordReturnIndirect(RT, CXXABI);
-}
-
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
                                               CGCXXABI &CXXABI) {
   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
@@ -230,13 +217,13 @@
 static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
   const RecordType *RT = T->getAsStructureType();
   if (!RT)
-    return 0;
+    return nullptr;
 
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
-    return 0;
+    return nullptr;
 
-  const Type *Found = 0;
+  const Type *Found = nullptr;
 
   // If this is a C++ record, check the bases first.
   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
@@ -247,13 +234,13 @@
 
       // If we already found an element then this isn't a single-element struct.
       if (Found)
-        return 0;
+        return nullptr;
 
       // If this is non-empty and not a single element struct, the composite
       // cannot be a single element struct.
       Found = isSingleElementStruct(I.getType(), Context);
       if (!Found)
-        return 0;
+        return nullptr;
     }
   }
 
@@ -268,7 +255,7 @@
     // If we already found an element then this isn't a single-element
     // struct.
     if (Found)
-      return 0;
+      return nullptr;
 
     // Treat single element arrays as the element.
     while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
@@ -282,14 +269,14 @@
     } else {
       Found = isSingleElementStruct(FT, Context);
       if (!Found)
-        return 0;
+        return nullptr;
     }
   }
 
   // We don't consider a struct a single-element struct if it has
   // padding beyond the element type.
   if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T))
-    return 0;
+    return nullptr;
 
   return Found;
 }
@@ -367,7 +354,8 @@
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments())
       I.info = classifyArgumentType(I.type);
   }
@@ -384,18 +372,12 @@
 
 llvm::Value *DefaultABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                        CodeGenFunction &CGF) const {
-  return 0;
+  return nullptr;
 }
 
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
-  if (isAggregateTypeForABI(Ty)) {
-    // Records with non-trivial destructors/constructors should not be passed
-    // by value.
-    if (isRecordReturnIndirect(Ty, getCXXABI()))
-      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
+  if (isAggregateTypeForABI(Ty))
     return ABIArgInfo::getIndirect(0);
-  }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -446,15 +428,16 @@
 };
 
 void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  if (!getCXXABI().classifyReturnType(FI))
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
-    for (auto &I : FI.arguments())
-      I.info = classifyArgumentType(I.type);
-  }
+  for (auto &I : FI.arguments())
+    I.info = classifyArgumentType(I.type);
+}
 
 llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                        CodeGenFunction &CGF) const {
-  return 0;
+  return nullptr;
 }
 
 /// \brief Classify argument of given type \p Ty.
@@ -505,7 +488,7 @@
   if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) {
     if (cast<llvm::VectorType>(Ty)->getBitWidth() != 64) {
       // Invalid MMX constraint
-      return 0;
+      return nullptr;
     }
 
     return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
@@ -547,8 +530,7 @@
     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
   }
 
-  bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context,
-                                  bool IsInstanceMethod) const;
+  bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;
 
   /// getIndirectResult - Give a source type \arg Ty, return a suitable result
   /// such that the argument will be passed in memory.
@@ -560,8 +542,7 @@
   unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
 
   Class classify(QualType Ty) const;
-  ABIArgInfo classifyReturnType(QualType RetTy, CCState &State,
-                                bool IsInstanceMethod) const;
+  ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
   bool shouldUseInReg(QualType Ty, CCState &State, bool &NeedsPadding) const;
 
@@ -627,8 +608,8 @@
 
 /// shouldReturnTypeInRegister - Determine if the given type should be
 /// passed in a register (for the Darwin ABI).
-bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, ASTContext &Context,
-                                               bool IsInstanceMethod) const {
+bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
+                                               ASTContext &Context) const {
   uint64_t Size = Context.getTypeSize(Ty);
 
   // Type must be register sized.
@@ -653,8 +634,7 @@
 
   // Arrays are treated like records.
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
-    return shouldReturnTypeInRegister(AT->getElementType(), Context,
-                                      IsInstanceMethod);
+    return shouldReturnTypeInRegister(AT->getElementType(), Context);
 
   // Otherwise, it must be a record type.
   const RecordType *RT = Ty->getAs<RecordType>();
@@ -662,11 +642,6 @@
 
   // FIXME: Traverse bases here too.
 
-  // For thiscall conventions, structures will never be returned in
-  // a register.  This is for compatibility with the MSVC ABI
-  if (IsWin32StructABI && IsInstanceMethod && RT->isStructureType())
-    return false;
-
   // Structure types are passed in register if all fields would be
   // passed in a register.
   for (const auto *FD : RT->getDecl()->fields()) {
@@ -675,7 +650,7 @@
       continue;
 
     // Check fields recursively.
-    if (!shouldReturnTypeInRegister(FD->getType(), Context, IsInstanceMethod))
+    if (!shouldReturnTypeInRegister(FD->getType(), Context))
       return false;
   }
   return true;
@@ -691,8 +666,7 @@
   return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
 }
 
-ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State,
-                                             bool IsInstanceMethod) const {
+ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
@@ -723,9 +697,6 @@
 
   if (isAggregateTypeForABI(RetTy)) {
     if (const RecordType *RT = RetTy->getAs<RecordType>()) {
-      if (isRecordReturnIndirect(RT, getCXXABI()))
-        return getIndirectReturnResult(State);
-
       // Structures with flexible arrays are always indirect.
       if (RT->getDecl()->hasFlexibleArrayMember())
         return getIndirectReturnResult(State);
@@ -737,7 +708,7 @@
 
     // Small structures which are register sized are generally returned
     // in a register.
-    if (shouldReturnTypeInRegister(RetTy, getContext(), IsInstanceMethod)) {
+    if (shouldReturnTypeInRegister(RetTy, getContext())) {
       uint64_t Size = getContext().getTypeSize(RetTy);
 
       // As a special-case, if the struct is a "single-element" struct, and
@@ -929,7 +900,7 @@
       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
       return ABIArgInfo::getDirectInReg(Result);
     }
-    llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : 0;
+    llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
 
     // Expand small (<= 128-bit) record types when we know that the stack layout
     // of those arguments will match the struct. This is important because the
@@ -986,16 +957,8 @@
   else
     State.FreeRegs = DefaultNumRegisterParameters;
 
-  FI.getReturnInfo() =
-      classifyReturnType(FI.getReturnType(), State, FI.isInstanceMethod());
-
-  // On win32, use the x86_cdeclmethodcc convention for cdecl methods that use
-  // sret.  This convention swaps the order of the first two parameters behind
-  // the scenes to match MSVC.
-  if (IsWin32StructABI && FI.isInstanceMethod() &&
-      FI.getCallingConvention() == llvm::CallingConv::C &&
-      FI.getReturnInfo().isIndirect())
-    FI.setEffectiveCallingConvention(llvm::CallingConv::X86_CDeclMethod);
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
 
   bool UsedInAlloca = false;
   for (auto &I : FI.arguments()) {
@@ -1013,24 +976,22 @@
 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
                                    unsigned &StackOffset,
                                    ABIArgInfo &Info, QualType Type) const {
+  assert(StackOffset % 4U == 0 && "unaligned inalloca struct");
+  Info = ABIArgInfo::getInAlloca(FrameFields.size());
+  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
+  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
+
   // Insert padding bytes to respect alignment.  For x86_32, each argument is 4
   // byte aligned.
-  unsigned Align = 4U;
-  if (Info.getKind() == ABIArgInfo::Indirect && Info.getIndirectByVal())
-    Align = std::max(Align, Info.getIndirectAlign());
-  if (StackOffset & (Align - 1)) {
+  if (StackOffset % 4U) {
     unsigned OldOffset = StackOffset;
-    StackOffset = llvm::RoundUpToAlignment(StackOffset, Align);
+    StackOffset = llvm::RoundUpToAlignment(StackOffset, 4U);
     unsigned NumBytes = StackOffset - OldOffset;
     assert(NumBytes);
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
     Ty = llvm::ArrayType::get(Ty, NumBytes);
     FrameFields.push_back(Ty);
   }
-
-  Info = ABIArgInfo::getInAlloca(FrameFields.size());
-  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
-  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
 }
 
 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
@@ -2203,7 +2164,7 @@
   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
   assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification.");
 
-  llvm::Type *ResType = 0;
+  llvm::Type *ResType = nullptr;
   switch (Lo) {
   case NoClass:
     if (Hi == NoClass)
@@ -2264,7 +2225,7 @@
     break;
   }
 
-  llvm::Type *HighPart = 0;
+  llvm::Type *HighPart = nullptr;
   switch (Hi) {
     // Memory was handled previously and X87 should
     // never occur as a hi class.
@@ -2336,7 +2297,7 @@
 
   neededInt = 0;
   neededSSE = 0;
-  llvm::Type *ResType = 0;
+  llvm::Type *ResType = nullptr;
   switch (Lo) {
   case NoClass:
     if (Hi == NoClass)
@@ -2397,7 +2358,7 @@
   }
   }
 
-  llvm::Type *HighPart = 0;
+  llvm::Type *HighPart = nullptr;
   switch (Hi) {
     // Memory was handled previously, ComplexX87 and X87 should
     // never occur as hi classes, and X87Up must be preceded by X87,
@@ -2450,7 +2411,8 @@
 
 void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
   // Keep track of the number of assigned registers.
   unsigned freeIntRegs = 6, freeSSERegs = 8;
@@ -2571,9 +2533,9 @@
   // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
   // register save space).
 
-  llvm::Value *InRegs = 0;
-  llvm::Value *gp_offset_p = 0, *gp_offset = 0;
-  llvm::Value *fp_offset_p = 0, *fp_offset = 0;
+  llvm::Value *InRegs = nullptr;
+  llvm::Value *gp_offset_p = nullptr, *gp_offset = nullptr;
+  llvm::Value *fp_offset_p = nullptr, *fp_offset = nullptr;
   if (neededInt) {
     gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
@@ -2719,11 +2681,9 @@
 
   uint64_t Size = getContext().getTypeSize(Ty);
 
-  if (const RecordType *RT = Ty->getAs<RecordType>()) {
-    if (IsReturnType) {
-      if (isRecordReturnIndirect(RT, getCXXABI()))
-        return ABIArgInfo::getIndirect(0, false);
-    } else {
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (RT) {
+    if (!IsReturnType) {
       if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
         return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
     }
@@ -2735,15 +2695,24 @@
     if (Size == 128 && getTarget().getTriple().isWindowsGNUEnvironment())
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                           Size));
+  }
 
+  if (Ty->isMemberPointerType()) {
+    // If the member pointer is represented by an LLVM int or ptr, pass it
+    // directly.
+    llvm::Type *LLTy = CGT.ConvertType(Ty);
+    if (LLTy->isPointerTy() || LLTy->isIntegerTy())
+      return ABIArgInfo::getDirect();
+  }
+
+  if (RT || Ty->isMemberPointerType()) {
     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
     // not 1, 2, 4, or 8 bytes, must be passed by reference."
-    if (Size <= 64 &&
-        (Size & (Size - 1)) == 0)
-      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
-                                                          Size));
+    if (Size > 64 || !llvm::isPowerOf2_64(Size))
+      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    // Otherwise, coerce it to a small integer.
+    return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
   if (Ty->isPromotableIntegerType())
@@ -2753,9 +2722,8 @@
 }
 
 void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-
-  QualType RetTy = FI.getReturnType();
-  FI.getReturnInfo() = classify(RetTy, true);
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classify(FI.getReturnType(), true);
 
   for (auto &I : FI.arguments())
     I.info = classify(I.type, false);
@@ -2900,7 +2868,8 @@
   // entry.  This would require changing the logic in PPCISelLowering
   // when lowering the parameters in the caller and args in the callee.
   void computeInfo(CGFunctionInfo &FI) const override {
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments()) {
       // We rely on the default argument classification for the most part.
       // One exception:  An aggregate containing a single floating-point
@@ -3135,12 +3104,12 @@
 }
 
 //===----------------------------------------------------------------------===//
-// ARM64 ABI Implementation
+// AArch64 ABI Implementation
 //===----------------------------------------------------------------------===//
 
 namespace {
 
-class ARM64ABIInfo : public ABIInfo {
+class AArch64ABIInfo : public ABIInfo {
 public:
   enum ABIKind {
     AAPCS = 0,
@@ -3151,7 +3120,7 @@
   ABIKind Kind;
 
 public:
-  ARM64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
 
 private:
   ABIKind getABIKind() const { return Kind; }
@@ -3160,7 +3129,7 @@
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
                                   bool &IsHA, unsigned &AllocatedGPR,
-                                  bool &IsSmallAggr) const;
+                                  bool &IsSmallAggr, bool IsNamedArg) const;
   bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const {
@@ -3171,40 +3140,54 @@
     // and Floating-point Registers (with one register per member of the HFA or
     // HVA). Otherwise, the NSRN is set to 8.
     unsigned AllocatedVFP = 0;
+
     // To correctly handle small aggregates, we need to keep track of the number
     // of GPRs allocated so far. If the small aggregate can't all fit into
     // registers, it will be on stack. We don't allow the aggregate to be
     // partially in registers.
     unsigned AllocatedGPR = 0;
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+
+    // Find the number of named arguments. Variadic arguments get special
+    // treatment with the Darwin ABI.
+    unsigned NumRequiredArgs = (FI.isVariadic() ?
+                                FI.getRequiredArgs().getNumRequiredArgs() :
+                                FI.arg_size());
+
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
          it != ie; ++it) {
       unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
       bool IsHA = false, IsSmallAggr = false;
       const unsigned NumVFPs = 8;
       const unsigned NumGPRs = 8;
+      bool IsNamedArg = ((it - FI.arg_begin()) <
+                         static_cast<signed>(NumRequiredArgs));
       it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
-                                      AllocatedGPR, IsSmallAggr);
+                                      AllocatedGPR, IsSmallAggr, IsNamedArg);
+
+      // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
+      // as sequences of floats since they'll get "holes" inserted as
+      // padding by the back end.
+      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
+          getContext().getTypeAlign(it->type) < 64) {
+        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
+        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
+
+        llvm::Type *CoerceTy = llvm::ArrayType::get(
+            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
+        it->info = ABIArgInfo::getDirect(CoerceTy);
+      }
+
       // If we do not have enough VFP registers for the HA, any VFP registers
       // that are unallocated are marked as unavailable. To achieve this, we add
       // padding of (NumVFPs - PreAllocation) floats.
       if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
         llvm::Type *PaddingTy = llvm::ArrayType::get(
             llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
-        if (isDarwinPCS())
-          it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
-        else {
-          // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
-          // as sequences of floats since they'll get "holes" inserted as
-          // padding by the back end.
-          uint32_t NumStackSlots = getContext().getTypeSize(it->type);
-          NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
-
-          llvm::Type *CoerceTy = llvm::ArrayType::get(
-              llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
-          it->info = ABIArgInfo::getDirect(CoerceTy, 0, PaddingTy);
-        }
+        it->info.setPaddingType(PaddingTy);
       }
+
       // If we do not have enough GPRs for the small aggregate, any GPR regs
       // that are unallocated are marked as unavailable.
       if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
@@ -3229,10 +3212,10 @@
   }
 };
 
-class ARM64TargetCodeGenInfo : public TargetCodeGenInfo {
+class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  ARM64TargetCodeGenInfo(CodeGenTypes &CGT, ARM64ABIInfo::ABIKind Kind)
-      : TargetCodeGenInfo(new ARM64ABIInfo(CGT, Kind)) {}
+  AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
+      : TargetCodeGenInfo(new AArch64ABIInfo(CGT, Kind)) {}
 
   StringRef getARCRetainAutoreleasedReturnValueMarker() const {
     return "mov\tfp, fp\t\t; marker for objc_retainAutoreleaseReturnValue";
@@ -3246,13 +3229,14 @@
 
 static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
                                    ASTContext &Context,
-                                   uint64_t *HAMembers = 0);
+                                   uint64_t *HAMembers = nullptr);
 
-ABIArgInfo ARM64ABIInfo::classifyArgumentType(QualType Ty,
-                                              unsigned &AllocatedVFP,
-                                              bool &IsHA,
-                                              unsigned &AllocatedGPR,
-                                              bool &IsSmallAggr) const {
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
+                                                unsigned &AllocatedVFP,
+                                                bool &IsHA,
+                                                unsigned &AllocatedGPR,
+                                                bool &IsSmallAggr,
+                                                bool IsNamedArg) const {
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
@@ -3293,6 +3277,10 @@
       Ty = EnumTy->getDecl()->getIntegerType();
 
     if (!Ty->isFloatingType() && !Ty->isVectorType()) {
+      unsigned Alignment = getContext().getTypeAlign(Ty);
+      if (!isDarwinPCS() && Alignment > 64)
+        AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
+
       int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
       AllocatedGPR += RegsNeeded;
     }
@@ -3303,9 +3291,10 @@
 
   // Structures with either a non-trivial destructor or a non-trivial
   // copy constructor are always indirect.
-  if (isRecordReturnIndirect(Ty, getCXXABI())) {
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
     AllocatedGPR++;
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
+                                          CGCXXABI::RAA_DirectInMemory);
   }
 
   // Empty records are always ignored on Darwin, but actually passed in C++ mode
@@ -3319,23 +3308,34 @@
   }
 
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
-  const Type *Base = 0;
+  const Type *Base = nullptr;
   uint64_t Members = 0;
   if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
-    AllocatedVFP += Members;
     IsHA = true;
+    if (!IsNamedArg && isDarwinPCS()) {
+      // With the Darwin ABI, variadic arguments are always passed on the stack
+      // and should not be expanded. Treat variadic HFAs as arrays of doubles.
+      uint64_t Size = getContext().getTypeSize(Ty);
+      llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext());
+      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
+    }
+    AllocatedVFP += Members;
     return ABIArgInfo::getExpand();
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size <= 128) {
+    unsigned Alignment = getContext().getTypeAlign(Ty);
+    if (!isDarwinPCS() && Alignment > 64)
+      AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
+
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
     AllocatedGPR += Size / 64;
     IsSmallAggr = true;
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
     // For aggregates with 16-byte alignment, we use i128.
-    if (getContext().getTypeAlign(Ty) < 128 && Size == 128) {
+    if (Alignment < 128 && Size == 128) {
       llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
       return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
     }
@@ -3346,7 +3346,7 @@
   return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 }
 
-ABIArgInfo ARM64ABIInfo::classifyReturnType(QualType RetTy) const {
+ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
@@ -3359,19 +3359,15 @@
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
-    return (RetTy->isPromotableIntegerType() ? ABIArgInfo::getExtend()
-                                             : ABIArgInfo::getDirect());
+    return (RetTy->isPromotableIntegerType() && isDarwinPCS()
+                ? ABIArgInfo::getExtend()
+                : ABIArgInfo::getDirect());
   }
 
-  // Structures with either a non-trivial destructor or a non-trivial
-  // copy constructor are always indirect.
-  if (isRecordReturnIndirect(RetTy, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
   if (isEmptyRecord(getContext(), RetTy, true))
     return ABIArgInfo::getIgnore();
 
-  const Type *Base = 0;
+  const Type *Base = nullptr;
   if (isHomogeneousAggregate(RetTy, Base, getContext()))
     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
     return ABIArgInfo::getDirect();
@@ -3386,8 +3382,8 @@
   return ABIArgInfo::getIndirect(0);
 }
 
-/// isIllegalVectorType - check whether the vector type is legal for ARM64.
-bool ARM64ABIInfo::isIllegalVectorType(QualType Ty) const {
+/// isIllegalVectorType - check whether the vector type is legal for AArch64.
+bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     // Check whether VT is legal.
     unsigned NumElements = VT->getNumElements();
@@ -3420,7 +3416,7 @@
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
   auto &Ctx = CGF.getContext();
 
-  llvm::Value *reg_offs_p = 0, *reg_offs = 0;
+  llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
   int reg_top_index;
   int RegSize;
   if (AllocatedGPR) {
@@ -3447,14 +3443,14 @@
   // argument. We don't want to keep updating reg_offs (in case it overflows,
   // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
   // whatever they get).
-  llvm::Value *UsingStack = 0;
+  llvm::Value *UsingStack = nullptr;
   UsingStack = CGF.Builder.CreateICmpSGE(
       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));
 
   CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
 
   // Otherwise, at least some kind of argument could go in these registers, the
-  // quesiton is whether this particular type is too big.
+  // question is whether this particular type is too big.
   CGF.EmitBlock(MaybeRegBlock);
 
   // Integer arguments may need to correct register alignment (for example a
@@ -3472,14 +3468,14 @@
   }
 
   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
-  llvm::Value *NewOffset = 0;
+  llvm::Value *NewOffset = nullptr;
   NewOffset = CGF.Builder.CreateAdd(
       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
   CGF.Builder.CreateStore(NewOffset, reg_offs_p);
 
   // Now we're in a position to decide whether this argument really was in
   // registers or not.
-  llvm::Value *InRegs = 0;
+  llvm::Value *InRegs = nullptr;
   InRegs = CGF.Builder.CreateICmpSLE(
       NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");
 
@@ -3493,12 +3489,12 @@
   // registers. First start the appropriate block:
   CGF.EmitBlock(InRegBlock);
 
-  llvm::Value *reg_top_p = 0, *reg_top = 0;
+  llvm::Value *reg_top_p = nullptr, *reg_top = nullptr;
   reg_top_p =
       CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
   llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
-  llvm::Value *RegAddr = 0;
+  llvm::Value *RegAddr = nullptr;
   llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
 
   if (IsIndirect) {
@@ -3507,9 +3503,10 @@
     MemTy = llvm::PointerType::getUnqual(MemTy);
   }
 
-  const Type *Base = 0;
+  const Type *Base = nullptr;
   uint64_t NumMembers;
-  if (isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers) && NumMembers > 1) {
+  bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers);
+  if (IsHFA && NumMembers > 1) {
     // Homogeneous aggregates passed in registers will have their elements split
     // and stored 16-bytes apart regardless of size (they're notionally in qN,
     // qN+1, ...). We reload and store into a temporary local variable
@@ -3538,7 +3535,8 @@
   } else {
     // Otherwise the object is contiguous in memory
     unsigned BeAlign = reg_top_index == 2 ? 16 : 8;
-    if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
+    if (CGF.CGM.getDataLayout().isBigEndian() &&
+        (IsHFA || !isAggregateTypeForABI(Ty)) &&
         Ctx.getTypeSize(Ty) < (BeAlign * 8)) {
       int Offset = BeAlign - Ctx.getTypeSize(Ty) / 8;
       BaseAddr = CGF.Builder.CreatePtrToInt(BaseAddr, CGF.Int64Ty);
@@ -3559,7 +3557,7 @@
   //=======================================
   CGF.EmitBlock(OnStackBlock);
 
-  llvm::Value *stack_p = 0, *OnStackAddr = 0;
+  llvm::Value *stack_p = nullptr, *OnStackAddr = nullptr;
   stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
   OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
 
@@ -3626,30 +3624,30 @@
   return ResAddr;
 }
 
-llvm::Value *ARM64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
                                           CodeGenFunction &CGF) const {
 
   unsigned AllocatedGPR = 0, AllocatedVFP = 0;
   bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI =
-      classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR, IsSmallAggr);
+  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
+                                       IsSmallAggr, false /*IsNamedArg*/);
 
   return EmitAArch64VAArg(VAListAddr, Ty, AllocatedGPR, AllocatedVFP,
                           AI.isIndirect(), CGF);
 }
 
-llvm::Value *ARM64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
+llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
                                            CodeGenFunction &CGF) const {
   // We do not support va_arg for aggregates or illegal vector types.
   // Lower VAArg here for these cases and use the LLVM va_arg instruction for
   // other cases.
   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
-    return 0;
+    return nullptr;
 
   uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
   uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
 
-  const Type *Base = 0;
+  const Type *Base = nullptr;
   bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
 
   bool isIndirect = false;
@@ -3753,7 +3751,7 @@
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, bool &IsHA, bool isVariadic,
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
                                   bool &IsCPRC) const;
   bool isIllegalVectorType(QualType Ty) const;
 
@@ -3854,38 +3852,37 @@
   // unallocated are marked as unavailable. 
   resetAllocatedRegs();
 
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
+  if (getCXXABI().classifyReturnType(FI)) {
+    if (FI.getReturnInfo().isIndirect())
+      markAllocatedGPRs(1, 1);
+  } else {
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
+  }
   for (auto &I : FI.arguments()) {
     unsigned PreAllocationVFPs = AllocatedVFPs;
     unsigned PreAllocationGPRs = AllocatedGPRs;
-    bool IsHA = false;
     bool IsCPRC = false;
     // 6.1.2.3 There is one VFP co-processor register class using registers
     // s0-s15 (d0-d7) for passing arguments.
-    I.info = classifyArgumentType(I.type, IsHA, FI.isVariadic(), IsCPRC);
-    assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs");
-    // If we do not have enough VFP registers for the HA, any VFP registers
-    // that are unallocated are marked as unavailable. To achieve this, we add
-    // padding of (NumVFPs - PreAllocationVFP) floats.
-    // Note that IsHA will only be set when using the AAPCS-VFP calling convention,
-    // and the callee is not variadic.
-    if (IsHA && AllocatedVFPs > NumVFPs && PreAllocationVFPs < NumVFPs) {
-      llvm::Type *PaddingTy = llvm::ArrayType::get(
-          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFPs);
-      I.info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
-    }
+    I.info = classifyArgumentType(I.type, FI.isVariadic(), IsCPRC);
 
     // If we have allocated some arguments onto the stack (due to running
     // out of VFP registers), we cannot split an argument between GPRs and
     // the stack. If this situation occurs, we add padding to prevent the
-    // GPRs from being used. In this situiation, the current argument could
+    // GPRs from being used. In this situation, the current argument could
     // only be allocated by rule C.8, so rule C.6 would mark these GPRs as
     // unusable anyway.
     const bool StackUsed = PreAllocationGPRs > NumGPRs || PreAllocationVFPs > NumVFPs;
     if (!IsCPRC && PreAllocationGPRs < NumGPRs && AllocatedGPRs > NumGPRs && StackUsed) {
       llvm::Type *PaddingTy = llvm::ArrayType::get(
           llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
-      I.info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+      if (I.info.canHaveCoerceToType()) {
+        I.info = ABIArgInfo::getDirect(I.info.getCoerceToType() /* type */, 0 /* offset */,
+                                       PaddingTy);
+      } else {
+        I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */,
+                                       PaddingTy);
+      }
     }
   }
 
@@ -4069,8 +4066,7 @@
     VFPRegs[i] = 0;
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool &IsHA,
-                                            bool isVariadic,
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
                                             bool &IsCPRC) const {
   // We update number of allocated VFPs according to
   // 6.1.2.1 The following argument types are VFP CPRCs:
@@ -4165,7 +4161,7 @@
   if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
     // Homogeneous Aggregates need to be expanded when we can fit the aggregate
     // into VFP registers.
-    const Type *Base = 0;
+    const Type *Base = nullptr;
     uint64_t Members = 0;
     if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
@@ -4182,9 +4178,8 @@
                Base->isSpecificBuiltinType(BuiltinType::LongDouble));
         markAllocatedVFPs(2, Members * 2);
       }
-      IsHA = true;
       IsCPRC = true;
-      return ABIArgInfo::getExpand();
+      return ABIArgInfo::getDirect();
     }
   }
 
@@ -4198,7 +4193,7 @@
       getABIKind() == ARMABIInfo::AAPCS)
     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
-      // Update Allocated GPRs
+    // Update Allocated GPRs
     markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
            /*Realign=*/TyAlign > ABIAlign);
@@ -4329,13 +4324,6 @@
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
-  // Structures with either a non-trivial destructor or a non-trivial
-  // copy constructor are always indirect.
-  if (isRecordReturnIndirect(RetTy, getCXXABI())) {
-    markAllocatedGPRs(1, 1);
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-  }
-
   // Are we following APCS?
   if (getABIKind() == APCS) {
     if (isEmptyRecord(getContext(), RetTy, false))
@@ -4372,7 +4360,7 @@
 
   // Check for homogeneous aggregates with AAPCS-VFP.
   if (getABIKind() == AAPCS_VFP && !isVariadic) {
-    const Type *Base = 0;
+    const Type *Base = nullptr;
     if (isHomogeneousAggregate(RetTy, Base, getContext())) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
@@ -4521,221 +4509,6 @@
 }
 
 //===----------------------------------------------------------------------===//
-// AArch64 ABI Implementation
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-class AArch64ABIInfo : public ABIInfo {
-public:
-  AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
-
-private:
-  // The AArch64 PCS is explicit about return types and argument types being
-  // handled identically, so we don't need to draw a distinction between
-  // Argument and Return classification.
-  ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs,
-                                 int &FreeVFPRegs) const;
-
-  ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
-                        llvm::Type *DirectTy = 0) const;
-
-  void computeInfo(CGFunctionInfo &FI) const override;
-
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
-};
-
-class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
-public:
-  AArch64TargetCodeGenInfo(CodeGenTypes &CGT)
-    :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {}
-
-  const AArch64ABIInfo &getABIInfo() const {
-    return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
-  }
-
-  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
-    return 31;
-  }
-
-  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                               llvm::Value *Address) const override {
-    // 0-31 are x0-x30 and sp: 8 bytes each
-    llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
-    AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
-
-    // 64-95 are v0-v31: 16 bytes each
-    llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
-    AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95);
-
-    return false;
-  }
-
-};
-
-}
-
-void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  int FreeIntRegs = 8, FreeVFPRegs = 8;
-
-  FI.getReturnInfo() = classifyGenericType(FI.getReturnType(),
-                                           FreeIntRegs, FreeVFPRegs);
-
-  FreeIntRegs = FreeVFPRegs = 8;
-  for (auto &I : FI.arguments()) {
-    I.info = classifyGenericType(I.type, FreeIntRegs, FreeVFPRegs);
-
-  }
-}
-
-ABIArgInfo
-AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded,
-                           bool IsInt, llvm::Type *DirectTy) const {
-  if (FreeRegs >= RegsNeeded) {
-    FreeRegs -= RegsNeeded;
-    return ABIArgInfo::getDirect(DirectTy);
-  }
-
-  llvm::Type *Padding = 0;
-
-  // We need padding so that later arguments don't get filled in anyway. That
-  // wouldn't happen if only ByVal arguments followed in the same category, but
-  // a large structure will simply seem to be a pointer as far as LLVM is
-  // concerned.
-  if (FreeRegs > 0) {
-    if (IsInt)
-      Padding = llvm::Type::getInt64Ty(getVMContext());
-    else
-      Padding = llvm::Type::getFloatTy(getVMContext());
-
-    // Either [N x i64] or [N x float].
-    Padding = llvm::ArrayType::get(Padding, FreeRegs);
-    FreeRegs = 0;
-  }
-
-  return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8,
-                                 /*IsByVal=*/ true, /*Realign=*/ false,
-                                 Padding);
-}
-
-
-ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty,
-                                               int &FreeIntRegs,
-                                               int &FreeVFPRegs) const {
-  // Can only occurs for return, but harmless otherwise.
-  if (Ty->isVoidType())
-    return ABIArgInfo::getIgnore();
-
-  // Large vector types should be returned via memory. There's no such concept
-  // in the ABI, but they'd be over 16 bytes anyway so no matter how they're
-  // classified they'd go into memory (see B.3).
-  if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) {
-    if (FreeIntRegs > 0)
-      --FreeIntRegs;
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-  }
-
-  // All non-aggregate LLVM types have a concrete ABI representation so they can
-  // be passed directly. After this block we're guaranteed to be in a
-  // complicated case.
-  if (!isAggregateTypeForABI(Ty)) {
-    // Treat an enum type as its underlying type.
-    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
-      Ty = EnumTy->getDecl()->getIntegerType();
-
-    if (Ty->isFloatingType() || Ty->isVectorType())
-      return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false);
-
-    assert(getContext().getTypeSize(Ty) <= 128 &&
-           "unexpectedly large scalar type");
-
-    int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-
-    // If the type may need padding registers to ensure "alignment", we must be
-    // careful when this is accounted for. Increasing the effective size covers
-    // all cases.
-    if (getContext().getTypeAlign(Ty) == 128)
-      RegsNeeded += FreeIntRegs % 2 != 0;
-
-    return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true);
-  }
-
-  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    if (FreeIntRegs > 0 && RAA == CGCXXABI::RAA_Indirect)
-      --FreeIntRegs;
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
-  }
-
-  if (isEmptyRecord(getContext(), Ty, true)) {
-    if (!getContext().getLangOpts().CPlusPlus) {
-      // Empty structs outside C++ mode are a GNU extension, so no ABI can
-      // possibly tell us what to do. It turns out (I believe) that GCC ignores
-      // the object for parameter-passsing purposes.
-      return ABIArgInfo::getIgnore();
-    }
-
-    // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode
-    // description of va_arg in the PCS require that an empty struct does
-    // actually occupy space for parameter-passing. I'm hoping for a
-    // clarification giving an explicit paragraph to point to in future.
-    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true,
-                      llvm::Type::getInt8Ty(getVMContext()));
-  }
-
-  // Homogeneous vector aggregates get passed in registers or on the stack.
-  const Type *Base = 0;
-  uint64_t NumMembers = 0;
-  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) {
-    assert(Base && "Base class should be set for homogeneous aggregate");
-    // Homogeneous aggregates are passed and returned directly.
-    return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers,
-                      /*IsInt=*/ false);
-  }
-
-  uint64_t Size = getContext().getTypeSize(Ty);
-  if (Size <= 128) {
-    // Small structs can use the same direct type whether they're in registers
-    // or on the stack.
-    llvm::Type *BaseTy;
-    unsigned NumBases;
-    int SizeInRegs = (Size + 63) / 64;
-
-    if (getContext().getTypeAlign(Ty) == 128) {
-      BaseTy = llvm::Type::getIntNTy(getVMContext(), 128);
-      NumBases = 1;
-
-      // If the type may need padding registers to ensure "alignment", we must
-      // be careful when this is accounted for. Increasing the effective size
-      // covers all cases.
-      SizeInRegs += FreeIntRegs % 2 != 0;
-    } else {
-      BaseTy = llvm::Type::getInt64Ty(getVMContext());
-      NumBases = SizeInRegs;
-    }
-    llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases);
-
-    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs,
-                      /*IsInt=*/ true, DirectTy);
-  }
-
-  // If the aggregate is > 16 bytes, it's passed and returned indirectly. In
-  // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere.
-  --FreeIntRegs;
-  return ABIArgInfo::getIndirect(0, /* byVal = */ false);
-}
-
-llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
-  int FreeIntRegs = 8, FreeVFPRegs = 8;
-  Ty = CGF.getContext().getCanonicalType(Ty);
-  ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
-
-  return EmitAArch64VAArg(VAListAddr, Ty, 8 - FreeIntRegs, 8 - FreeVFPRegs,
-                          AI.isIndirect(), CGF);
-}
-
-//===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -4761,7 +4534,9 @@
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 private:
-  static void addKernelMetadata(llvm::Function *F);
+  // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
+  // resulting MDNode to the nvvm.annotations MDNode.
+  static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand);
 };
 
 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
@@ -4790,7 +4565,8 @@
 }
 
 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type);
 
@@ -4820,7 +4596,8 @@
     // By default, all functions are device functions
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL __kernel functions get kernel metadata
-      addKernelMetadata(F);
+      // Create !{<func-ref>, metadata !"kernel", i32 1} node
+      addNVVMMetadata(F, "kernel", 1);
       // And kernel functions are not subject to inlining
       F->addFnAttr(llvm::Attribute::NoInline);
     }
@@ -4831,28 +4608,41 @@
     // CUDA __global__ functions get a kernel metadata entry.  Since
     // __global__ functions cannot be called from the device, we do not
     // need to set the noinline attribute.
-    if (FD->hasAttr<CUDAGlobalAttr>())
-      addKernelMetadata(F);
+    if (FD->hasAttr<CUDAGlobalAttr>()) {
+      // Create !{<func-ref>, metadata !"kernel", i32 1} node
+      addNVVMMetadata(F, "kernel", 1);
+    }
+    if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
+      // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+      addNVVMMetadata(F, "maxntidx",
+                      FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
+      // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
+      // zero value from getMinBlocks either means it was not specified in
+      // __launch_bounds__ or the user specified a 0 value. In both cases, we
+      // don't have to add a PTX directive.
+      int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
+      if (MinCTASM > 0) {
+        // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+        addNVVMMetadata(F, "minctasm", MinCTASM);
+      }
+    }
   }
 }
 
-void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
+                                             int Operand) {
   llvm::Module *M = F->getParent();
   llvm::LLVMContext &Ctx = M->getContext();
 
   // Get "nvvm.annotations" metadata node
   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
 
-  // Create !{<func-ref>, metadata !"kernel", i32 1} node
-  llvm::SmallVector<llvm::Value *, 3> MDVals;
-  MDVals.push_back(F);
-  MDVals.push_back(llvm::MDString::get(Ctx, "kernel"));
-  MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1));
-
+  llvm::Value *MDVals[] = {
+      F, llvm::MDString::get(Ctx, Name),
+      llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand)};
   // Append metadata to nvvm.annotations
   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
 }
-
 }
 
 //===----------------------------------------------------------------------===//
@@ -4873,7 +4663,8 @@
   ABIArgInfo classifyArgumentType(QualType ArgTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &I : FI.arguments())
       I.info = classifyArgumentType(I.type);
   }
@@ -5171,7 +4962,7 @@
   if (isCompoundType(Ty))
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
-  return ABIArgInfo::getDirect(0);
+  return ABIArgInfo::getDirect(nullptr);
 }
 
 //===----------------------------------------------------------------------===//
@@ -5206,9 +4997,8 @@
 
       // Step 3: Emit ISR vector alias.
       unsigned Num = attr->getNumber() / 2;
-      new llvm::GlobalAlias(GV->getType(), llvm::Function::ExternalLinkage,
-                            "__isr_" + Twine(Num),
-                            GV, &M.getModule());
+      llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
+                                "__isr_" + Twine(Num), F);
     }
   }
 }
@@ -5349,7 +5139,7 @@
 llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset,
                                         uint64_t Offset) const {
   if (OrigOffset + MinABIStackAlignInBytes > Offset)
-    return 0;
+    return nullptr;
 
   return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
 }
@@ -5390,7 +5180,7 @@
     return ABIArgInfo::getExtend();
 
   return ABIArgInfo::getDirect(
-      0, 0, IsO32 ? 0 : getPaddingType(OrigOffset, CurrOffset));
+      nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
 }
 
 llvm::Type*
@@ -5442,9 +5232,6 @@
     return ABIArgInfo::getIgnore();
 
   if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) {
-    if (isRecordReturnIndirect(RetTy, getCXXABI()))
-      return ABIArgInfo::getIndirect(0);
-
     if (Size <= 128) {
       if (RetTy->isAnyComplexType())
         return ABIArgInfo::getDirect();
@@ -5470,7 +5257,8 @@
 
 void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
   ABIArgInfo &RetInfo = FI.getReturnInfo();
-  RetInfo = classifyReturnType(FI.getReturnType());
+  if (!getCXXABI().classifyReturnType(FI))
+    RetInfo = classifyReturnType(FI.getReturnType());
 
   // Check if a pointer to an aggregate is passed as a hidden argument.  
   uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
@@ -5638,7 +5426,8 @@
 }
 
 void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  if (!getCXXABI().classifyReturnType(FI))
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type);
 }
@@ -5691,11 +5480,6 @@
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
-  // Structures with either a non-trivial destructor or a non-trivial
-  // copy constructor are always indirect.
-  if (isRecordReturnIndirect(RetTy, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
   if (isEmptyRecord(getContext(), RetTy, true))
     return ABIArgInfo::getIgnore();
 
@@ -6048,7 +5832,101 @@
 //===----------------------------------------------------------------------===//
 // XCore ABI Implementation
 //===----------------------------------------------------------------------===//
+
 namespace {
+
+/// A SmallStringEnc instance is used to build up the TypeString by passing
+/// it by reference between functions that append to it.
+typedef llvm::SmallString<128> SmallStringEnc;
+
+/// TypeStringCache caches the meta encodings of Types.
+///
+/// The reason for caching TypeStrings is two fold:
+///   1. To cache a type's encoding for later uses;
+///   2. As a means to break recursive member type inclusion.
+///
+/// A cache Entry can have a Status of:
+///   NonRecursive:   The type encoding is not recursive;
+///   Recursive:      The type encoding is recursive;
+///   Incomplete:     An incomplete TypeString;
+///   IncompleteUsed: An incomplete TypeString that has been used in a
+///                   Recursive type encoding.
+///
+/// A NonRecursive entry will have all of its sub-members expanded as fully
+/// as possible. Whilst it may contain types which are recursive, the type
+/// itself is not recursive and thus its encoding may be safely used whenever
+/// the type is encountered.
+///
+/// A Recursive entry will have all of its sub-members expanded as fully as
+/// possible. The type itself is recursive and it may contain other types which
+/// are recursive. The Recursive encoding must not be used during the expansion
+/// of a recursive type's recursive branch. For simplicity the code uses
+/// IncompleteCount to reject all usage of Recursive encodings for member types.
+///
+/// An Incomplete entry is always a RecordType and only encodes its
+/// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and
+/// are placed into the cache during type expansion as a means to identify and
+/// handle recursive inclusion of types as sub-members. If there is recursion
+/// the entry becomes IncompleteUsed.
+///
+/// During the expansion of a RecordType's members:
+///
+///   If the cache contains a NonRecursive encoding for the member type, the
+///   cached encoding is used;
+///
+///   If the cache contains a Recursive encoding for the member type, the
+///   cached encoding is 'Swapped' out, as it may be incorrect, and...
+///
+///   If the member is a RecordType, an Incomplete encoding is placed into the
+///   cache to break potential recursive inclusion of itself as a sub-member;
+///
+///   Once a member RecordType has been expanded, its temporary incomplete
+///   entry is removed from the cache. If a Recursive encoding was swapped out
+///   it is swapped back in;
+///
+///   If an incomplete entry is used to expand a sub-member, the incomplete
+///   entry is marked as IncompleteUsed. The cache keeps count of how many
+///   IncompleteUsed entries it currently contains in IncompleteUsedCount;
+///
+///   If a member's encoding is found to be a NonRecursive or Recursive viz:
+///   IncompleteUsedCount==0, the member's encoding is added to the cache.
+///   Else the member is part of a recursive type and thus the recursion has
+///   been exited too soon for the encoding to be correct for the member.
+///
+class TypeStringCache {
+  enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed};
+  struct Entry {
+    std::string Str;     // The encoded TypeString for the type.
+    enum Status State;   // Information about the encoding in 'Str'.
+    std::string Swapped; // A temporary place holder for a Recursive encoding
+                         // during the expansion of RecordType's members.
+  };
+  std::map<const IdentifierInfo *, struct Entry> Map;
+  unsigned IncompleteCount;     // Number of Incomplete entries in the Map.
+  unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
+public:
+  TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {};
+  void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
+  bool removeIncomplete(const IdentifierInfo *ID);
+  void addIfComplete(const IdentifierInfo *ID, StringRef Str,
+                     bool IsRecursive);
+  StringRef lookupStr(const IdentifierInfo *ID);
+};
+
+/// TypeString encodings for enum & union fields must be order.
+/// FieldEncoding is a helper for this ordering process.
+class FieldEncoding {
+  bool HasName;
+  std::string Enc;
+public:
+  FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {};
+  StringRef str() {return Enc.c_str();};
+  bool operator<(const FieldEncoding &rhs) const {
+    if (HasName != rhs.HasName) return HasName;
+    return Enc < rhs.Enc;
+  }
+};
+
 class XCoreABIInfo : public DefaultABIInfo {
 public:
   XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
@@ -6057,10 +5935,14 @@
 };
 
 class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
+  mutable TypeStringCache TSC;
 public:
   XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
     :TargetCodeGenInfo(new XCoreABIInfo(CGT)) {}
+  void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                    CodeGen::CodeGenModule &M) const override;
 };
+
 } // End anonymous namespace.
 
 llvm::Value *XCoreABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -6112,6 +5994,455 @@
   return Val;
 }
 
+/// During the expansion of a RecordType, an incomplete TypeString is placed
+/// into the cache as a means to identify and break recursion.
+/// If there is a Recursive encoding in the cache, it is swapped out and will
+/// be reinserted by removeIncomplete().
+/// All other types of encoding should have been used rather than arriving here.
+void TypeStringCache::addIncomplete(const IdentifierInfo *ID,
+                                    std::string StubEnc) {
+  if (!ID)
+    return;
+  Entry &E = Map[ID];
+  assert( (E.Str.empty() || E.State == Recursive) &&
+         "Incorrectly use of addIncomplete");
+  assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()");
+  E.Swapped.swap(E.Str); // swap out the Recursive
+  E.Str.swap(StubEnc);
+  E.State = Incomplete;
+  ++IncompleteCount;
+}
+
+/// Once the RecordType has been expanded, the temporary incomplete TypeString
+/// must be removed from the cache.
+/// If a Recursive was swapped out by addIncomplete(), it will be replaced.
+/// Returns true if the RecordType was defined recursively.
+bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
+  if (!ID)
+    return false;
+  auto I = Map.find(ID);
+  assert(I != Map.end() && "Entry not present");
+  Entry &E = I->second;
+  assert( (E.State == Incomplete ||
+           E.State == IncompleteUsed) &&
+         "Entry must be an incomplete type");
+  bool IsRecursive = false;
+  if (E.State == IncompleteUsed) {
+    // We made use of our Incomplete encoding, thus we are recursive.
+    IsRecursive = true;
+    --IncompleteUsedCount;
+  }
+  if (E.Swapped.empty())
+    Map.erase(I);
+  else {
+    // Swap the Recursive back.
+    E.Swapped.swap(E.Str);
+    E.Swapped.clear();
+    E.State = Recursive;
+  }
+  --IncompleteCount;
+  return IsRecursive;
+}
+
+/// Add the encoded TypeString to the cache only if it is NonRecursive or
+/// Recursive (viz: all sub-members were expanded as fully as possible).
+void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
+                                    bool IsRecursive) {
+  if (!ID || IncompleteUsedCount)
+    return; // No key or it is is an incomplete sub-type so don't add.
+  Entry &E = Map[ID];
+  if (IsRecursive && !E.Str.empty()) {
+    assert(E.State==Recursive && E.Str.size() == Str.size() &&
+           "This is not the same Recursive entry");
+    // The parent container was not recursive after all, so we could have used
+    // this Recursive sub-member entry after all, but we assumed the worse when
+    // we started viz: IncompleteCount!=0.
+    return;
+  }
+  assert(E.Str.empty() && "Entry already present");
+  E.Str = Str.str();
+  E.State = IsRecursive? Recursive : NonRecursive;
+}
+
+/// Return a cached TypeString encoding for the ID. If there isn't one, or we
+/// are recursively expanding a type (IncompleteCount != 0) and the cached
+/// encoding is Recursive, return an empty StringRef.
+StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
+  if (!ID)
+    return StringRef();   // We have no key.
+  auto I = Map.find(ID);
+  if (I == Map.end())
+    return StringRef();   // We have no encoding.
+  Entry &E = I->second;
+  if (E.State == Recursive && IncompleteCount)
+    return StringRef();   // We don't use Recursive encodings for member types.
+
+  if (E.State == Incomplete) {
+    // The incomplete type is being used to break out of recursion.
+    E.State = IncompleteUsed;
+    ++IncompleteUsedCount;
+  }
+  return E.Str.c_str();
+}
+
+/// The XCore ABI includes a type information section that communicates symbol
+/// type information to the linker. The linker uses this information to verify
+/// safety/correctness of things such as array bound and pointers et al.
+/// The ABI only requires C (and XC) language modules to emit TypeStrings.
+/// This type information (TypeString) is emitted into meta data for all global
+/// symbols: definitions, declarations, functions & variables.
+///
+/// The TypeString carries type, qualifier, name, size & value details.
+/// Please see 'Tools Development Guide' section 2.16.2 for format details:
+/// <https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf>
+/// The output is tested by test/CodeGen/xcore-stringtype.c.
+///
+static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
+                          CodeGen::CodeGenModule &CGM, TypeStringCache &TSC);
+
+/// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
+void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                                          CodeGen::CodeGenModule &CGM) const {
+  SmallStringEnc Enc;
+  if (getTypeString(Enc, D, CGM, TSC)) {
+    llvm::LLVMContext &Ctx = CGM.getModule().getContext();
+    llvm::SmallVector<llvm::Value *, 2> MDVals;
+    MDVals.push_back(GV);
+    MDVals.push_back(llvm::MDString::get(Ctx, Enc.str()));
+    llvm::NamedMDNode *MD =
+      CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
+    MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+  }
+}
+
+static bool appendType(SmallStringEnc &Enc, QualType QType,
+                       const CodeGen::CodeGenModule &CGM,
+                       TypeStringCache &TSC);
+
+/// Helper function for appendRecordType().
+/// Builds a SmallVector containing the encoded field types in declaration order.
+static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
+                             const RecordDecl *RD,
+                             const CodeGen::CodeGenModule &CGM,
+                             TypeStringCache &TSC) {
+  for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
+       I != E; ++I) {
+    SmallStringEnc Enc;
+    Enc += "m(";
+    Enc += I->getName();
+    Enc += "){";
+    if (I->isBitField()) {
+      Enc += "b(";
+      llvm::raw_svector_ostream OS(Enc);
+      OS.resync();
+      OS << I->getBitWidthValue(CGM.getContext());
+      OS.flush();
+      Enc += ':';
+    }
+    if (!appendType(Enc, I->getType(), CGM, TSC))
+      return false;
+    if (I->isBitField())
+      Enc += ')';
+    Enc += '}';
+    FE.push_back(FieldEncoding(!I->getName().empty(), Enc));
+  }
+  return true;
+}
+
+/// Appends structure and union types to Enc and adds encoding to cache.
+/// Recursively calls appendType (via extractFieldType) for each field.
+/// Union types have their fields ordered according to the ABI.
+static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
+                             const CodeGen::CodeGenModule &CGM,
+                             TypeStringCache &TSC, const IdentifierInfo *ID) {
+  // Append the cached TypeString if we have one.
+  StringRef TypeString = TSC.lookupStr(ID);
+  if (!TypeString.empty()) {
+    Enc += TypeString;
+    return true;
+  }
+
+  // Start to emit an incomplete TypeString.
+  size_t Start = Enc.size();
+  Enc += (RT->isUnionType()? 'u' : 's');
+  Enc += '(';
+  if (ID)
+    Enc += ID->getName();
+  Enc += "){";
+
+  // We collect all encoded fields and order as necessary.
+  bool IsRecursive = false;
+  const RecordDecl *RD = RT->getDecl()->getDefinition();
+  if (RD && !RD->field_empty()) {
+    // An incomplete TypeString stub is placed in the cache for this RecordType
+    // so that recursive calls to this RecordType will use it whilst building a
+    // complete TypeString for this RecordType.
+    SmallVector<FieldEncoding, 16> FE;
+    std::string StubEnc(Enc.substr(Start).str());
+    StubEnc += '}';  // StubEnc now holds a valid incomplete TypeString.
+    TSC.addIncomplete(ID, std::move(StubEnc));
+    if (!extractFieldType(FE, RD, CGM, TSC)) {
+      (void) TSC.removeIncomplete(ID);
+      return false;
+    }
+    IsRecursive = TSC.removeIncomplete(ID);
+    // The ABI requires unions to be sorted but not structures.
+    // See FieldEncoding::operator< for sort algorithm.
+    if (RT->isUnionType())
+      std::sort(FE.begin(), FE.end());
+    // We can now complete the TypeString.
+    unsigned E = FE.size();
+    for (unsigned I = 0; I != E; ++I) {
+      if (I)
+        Enc += ',';
+      Enc += FE[I].str();
+    }
+  }
+  Enc += '}';
+  TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive);
+  return true;
+}
+
+/// Appends enum types to Enc and adds the encoding to the cache.
+static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
+                           TypeStringCache &TSC,
+                           const IdentifierInfo *ID) {
+  // Append the cached TypeString if we have one.
+  StringRef TypeString = TSC.lookupStr(ID);
+  if (!TypeString.empty()) {
+    Enc += TypeString;
+    return true;
+  }
+
+  size_t Start = Enc.size();
+  Enc += "e(";
+  if (ID)
+    Enc += ID->getName();
+  Enc += "){";
+
+  // We collect all encoded enumerations and order them alphanumerically.
+  if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
+    SmallVector<FieldEncoding, 16> FE;
+    for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
+         ++I) {
+      SmallStringEnc EnumEnc;
+      EnumEnc += "m(";
+      EnumEnc += I->getName();
+      EnumEnc += "){";
+      I->getInitVal().toString(EnumEnc);
+      EnumEnc += '}';
+      FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
+    }
+    std::sort(FE.begin(), FE.end());
+    unsigned E = FE.size();
+    for (unsigned I = 0; I != E; ++I) {
+      if (I)
+        Enc += ',';
+      Enc += FE[I].str();
+    }
+  }
+  Enc += '}';
+  TSC.addIfComplete(ID, Enc.substr(Start), false);
+  return true;
+}
+
+/// Appends type's qualifier to Enc.
+/// This is done prior to appending the type's encoding.
+static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
+  // Qualifiers are emitted in alphabetical order.
+  static const char *Table[] = {"","c:","r:","cr:","v:","cv:","rv:","crv:"};
+  int Lookup = 0;
+  if (QT.isConstQualified())
+    Lookup += 1<<0;
+  if (QT.isRestrictQualified())
+    Lookup += 1<<1;
+  if (QT.isVolatileQualified())
+    Lookup += 1<<2;
+  Enc += Table[Lookup];
+}
+
+/// Appends built-in types to Enc.
+static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) {
+  const char *EncType;
+  switch (BT->getKind()) {
+    case BuiltinType::Void:
+      EncType = "0";
+      break;
+    case BuiltinType::Bool:
+      EncType = "b";
+      break;
+    case BuiltinType::Char_U:
+      EncType = "uc";
+      break;
+    case BuiltinType::UChar:
+      EncType = "uc";
+      break;
+    case BuiltinType::SChar:
+      EncType = "sc";
+      break;
+    case BuiltinType::UShort:
+      EncType = "us";
+      break;
+    case BuiltinType::Short:
+      EncType = "ss";
+      break;
+    case BuiltinType::UInt:
+      EncType = "ui";
+      break;
+    case BuiltinType::Int:
+      EncType = "si";
+      break;
+    case BuiltinType::ULong:
+      EncType = "ul";
+      break;
+    case BuiltinType::Long:
+      EncType = "sl";
+      break;
+    case BuiltinType::ULongLong:
+      EncType = "ull";
+      break;
+    case BuiltinType::LongLong:
+      EncType = "sll";
+      break;
+    case BuiltinType::Float:
+      EncType = "ft";
+      break;
+    case BuiltinType::Double:
+      EncType = "d";
+      break;
+    case BuiltinType::LongDouble:
+      EncType = "ld";
+      break;
+    default:
+      return false;
+  }
+  Enc += EncType;
+  return true;
+}
+
+/// Appends a pointer encoding to Enc before calling appendType for the pointee.
+static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT,
+                              const CodeGen::CodeGenModule &CGM,
+                              TypeStringCache &TSC) {
+  Enc += "p(";
+  if (!appendType(Enc, PT->getPointeeType(), CGM, TSC))
+    return false;
+  Enc += ')';
+  return true;
+}
+
+/// Appends array encoding to Enc before calling appendType for the element.
+static bool appendArrayType(SmallStringEnc &Enc, const ArrayType *AT,
+                            const CodeGen::CodeGenModule &CGM,
+                            TypeStringCache &TSC, StringRef NoSizeEnc) {
+  if (AT->getSizeModifier() != ArrayType::Normal)
+    return false;
+  Enc += "a(";
+  if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+    CAT->getSize().toStringUnsigned(Enc);
+  else
+    Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "".
+  Enc += ':';
+  if (!appendType(Enc, AT->getElementType(), CGM, TSC))
+    return false;
+  Enc += ')';
+  return true;
+}
+
+/// Appends a function encoding to Enc, calling appendType for the return type
+/// and the arguments.
+static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT,
+                             const CodeGen::CodeGenModule &CGM,
+                             TypeStringCache &TSC) {
+  Enc += "f{";
+  if (!appendType(Enc, FT->getReturnType(), CGM, TSC))
+    return false;
+  Enc += "}(";
+  if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) {
+    // N.B. we are only interested in the adjusted param types.
+    auto I = FPT->param_type_begin();
+    auto E = FPT->param_type_end();
+    if (I != E) {
+      do {
+        if (!appendType(Enc, *I, CGM, TSC))
+          return false;
+        ++I;
+        if (I != E)
+          Enc += ',';
+      } while (I != E);
+      if (FPT->isVariadic())
+        Enc += ",va";
+    } else {
+      if (FPT->isVariadic())
+        Enc += "va";
+      else
+        Enc += '0';
+    }
+  }
+  Enc += ')';
+  return true;
+}
+
+/// Handles the type's qualifier before dispatching a call to handle specific
+/// type encodings.
+static bool appendType(SmallStringEnc &Enc, QualType QType,
+                       const CodeGen::CodeGenModule &CGM,
+                       TypeStringCache &TSC) {
+
+  QualType QT = QType.getCanonicalType();
+
+  appendQualifier(Enc, QT);
+
+  if (const BuiltinType *BT = QT->getAs<BuiltinType>())
+    return appendBuiltinType(Enc, BT);
+
+  if (const ArrayType *AT = QT->getAsArrayTypeUnsafe())
+    return appendArrayType(Enc, AT, CGM, TSC, "");
+
+  if (const PointerType *PT = QT->getAs<PointerType>())
+    return appendPointerType(Enc, PT, CGM, TSC);
+
+  if (const EnumType *ET = QT->getAs<EnumType>())
+    return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier());
+
+  if (const RecordType *RT = QT->getAsStructureType())
+    return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
+
+  if (const RecordType *RT = QT->getAsUnionType())
+    return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());
+
+  if (const FunctionType *FT = QT->getAs<FunctionType>())
+    return appendFunctionType(Enc, FT, CGM, TSC);
+
+  return false;
+}
+
+static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
+                          CodeGen::CodeGenModule &CGM, TypeStringCache &TSC) {
+  if (!D)
+    return false;
+
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+    if (FD->getLanguageLinkage() != CLanguageLinkage)
+      return false;
+    return appendType(Enc, FD->getType(), CGM, TSC);
+  }
+
+  if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
+    if (VD->getLanguageLinkage() != CLanguageLinkage)
+      return false;
+    QualType QT = VD->getType().getCanonicalType();
+    if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) {
+      // Global ArrayTypes are given a size of '*' if the size is unknown.
+      appendQualifier(Enc, QT);
+      return appendArrayType(Enc, AT, CGM, TSC, "*");
+    }
+    return appendType(Enc, QT, CGM, TSC);
+  }
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 // Driver code
 //===----------------------------------------------------------------------===//
@@ -6135,17 +6466,16 @@
   case llvm::Triple::mips64el:
     return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
 
-  case llvm::Triple::arm64: {
-    ARM64ABIInfo::ABIKind Kind = ARM64ABIInfo::AAPCS;
-    if (strcmp(getTarget().getABI(), "darwinpcs") == 0)
-      Kind = ARM64ABIInfo::DarwinPCS;
-
-    return *(TheTargetCodeGenInfo = new ARM64TargetCodeGenInfo(Types, Kind));
-  }
-
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
-    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
+  case llvm::Triple::arm64:
+  case llvm::Triple::arm64_be: {
+    AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
+    if (strcmp(getTarget().getABI(), "darwinpcs") == 0)
+      Kind = AArch64ABIInfo::DarwinPCS;
+
+    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind));
+  }
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
@@ -6220,8 +6550,6 @@
 
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
-    case llvm::Triple::MinGW32:
-    case llvm::Triple::Cygwin:
       return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types));
     case llvm::Triple::NaCl:
       return *(TheTargetCodeGenInfo = new NaClX86_64TargetCodeGenInfo(Types,
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index 6c3ab64..2616820 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -21,189 +21,195 @@
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
-  class Constant;
-  class GlobalValue;
-  class Type;
-  class Value;
+class Constant;
+class GlobalValue;
+class Type;
+class Value;
 }
 
 namespace clang {
-  class ABIInfo;
-  class Decl;
+class ABIInfo;
+class Decl;
 
-  namespace CodeGen {
-    class CallArgList;
-    class CodeGenModule;
-    class CodeGenFunction;
-    class CGFunctionInfo;
+namespace CodeGen {
+class CallArgList;
+class CodeGenModule;
+class CodeGenFunction;
+class CGFunctionInfo;
+}
+
+/// TargetCodeGenInfo - This class organizes various target-specific
+/// codegeneration issues, like target-specific attributes, builtins and so
+/// on.
+class TargetCodeGenInfo {
+  ABIInfo *Info;
+
+public:
+  // WARNING: Acquires the ownership of ABIInfo.
+  TargetCodeGenInfo(ABIInfo *info = 0) : Info(info) {}
+  virtual ~TargetCodeGenInfo();
+
+  /// getABIInfo() - Returns ABI info helper for the target.
+  const ABIInfo &getABIInfo() const { return *Info; }
+
+  /// SetTargetAttributes - Provides a convenient hook to handle extra
+  /// target-specific attributes for the given global.
+  virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                                   CodeGen::CodeGenModule &M) const {}
+
+  /// EmitTargetMD - Provides a convenient hook to handle extra
+  /// target-specific metadata for the given global.
+  virtual void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                            CodeGen::CodeGenModule &M) const {}
+
+  /// Determines the size of struct _Unwind_Exception on this platform,
+  /// in 8-bit units.  The Itanium ABI defines this as:
+  ///   struct _Unwind_Exception {
+  ///     uint64 exception_class;
+  ///     _Unwind_Exception_Cleanup_Fn exception_cleanup;
+  ///     uint64 private_1;
+  ///     uint64 private_2;
+  ///   };
+  virtual unsigned getSizeOfUnwindException() const;
+
+  /// Controls whether __builtin_extend_pointer should sign-extend
+  /// pointers to uint64_t or zero-extend them (the default).  Has
+  /// no effect for targets:
+  ///   - that have 64-bit pointers, or
+  ///   - that cannot address through registers larger than pointers, or
+  ///   - that implicitly ignore/truncate the top bits when addressing
+  ///     through such registers.
+  virtual bool extendPointerWithSExt() const { return false; }
+
+  /// Determines the DWARF register number for the stack pointer, for
+  /// exception-handling purposes.  Implements __builtin_dwarf_sp_column.
+  ///
+  /// Returns -1 if the operation is unsupported by this target.
+  virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+    return -1;
   }
 
-  /// TargetCodeGenInfo - This class organizes various target-specific
-  /// codegeneration issues, like target-specific attributes, builtins and so
-  /// on.
-  class TargetCodeGenInfo {
-    ABIInfo *Info;
-  public:
-    // WARNING: Acquires the ownership of ABIInfo.
-    TargetCodeGenInfo(ABIInfo *info = 0):Info(info) { }
-    virtual ~TargetCodeGenInfo();
+  /// Initializes the given DWARF EH register-size table, a char*.
+  /// Implements __builtin_init_dwarf_reg_size_table.
+  ///
+  /// Returns true if the operation is unsupported by this target.
+  virtual bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                                       llvm::Value *Address) const {
+    return true;
+  }
 
-    /// getABIInfo() - Returns ABI info helper for the target.
-    const ABIInfo& getABIInfo() const { return *Info; }
+  /// Performs the code-generation required to convert a return
+  /// address as stored by the system into the actual address of the
+  /// next instruction that will be executed.
+  ///
+  /// Used by __builtin_extract_return_addr().
+  virtual llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
+                                           llvm::Value *Address) const {
+    return Address;
+  }
 
-    /// SetTargetAttributes - Provides a convenient hook to handle extra
-    /// target-specific attributes for the given global.
-    virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
-                                     CodeGen::CodeGenModule &M) const { }
+  /// Performs the code-generation required to convert the address
+  /// of an instruction into a return address suitable for storage
+  /// by the system in a return slot.
+  ///
+  /// Used by __builtin_frob_return_addr().
+  virtual llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
+                                           llvm::Value *Address) const {
+    return Address;
+  }
 
-    /// Determines the size of struct _Unwind_Exception on this platform,
-    /// in 8-bit units.  The Itanium ABI defines this as:
-    ///   struct _Unwind_Exception {
-    ///     uint64 exception_class;
-    ///     _Unwind_Exception_Cleanup_Fn exception_cleanup;
-    ///     uint64 private_1;
-    ///     uint64 private_2;
-    ///   };
-    virtual unsigned getSizeOfUnwindException() const;
+  /// Corrects the low-level LLVM type for a given constraint and "usual"
+  /// type.
+  ///
+  /// \returns A pointer to a new LLVM type, possibly the same as the original
+  /// on success; 0 on failure.
+  virtual llvm::Type *adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
+                                          StringRef Constraint,
+                                          llvm::Type *Ty) const {
+    return Ty;
+  }
 
-    /// Controls whether __builtin_extend_pointer should sign-extend
-    /// pointers to uint64_t or zero-extend them (the default).  Has
-    /// no effect for targets:
-    ///   - that have 64-bit pointers, or
-    ///   - that cannot address through registers larger than pointers, or
-    ///   - that implicitly ignore/truncate the top bits when addressing
-    ///     through such registers.
-    virtual bool extendPointerWithSExt() const { return false; }
+  /// doesReturnSlotInterfereWithArgs - Return true if the target uses an
+  /// argument slot for an 'sret' type.
+  virtual bool doesReturnSlotInterfereWithArgs() const { return true; }
 
-    /// Determines the DWARF register number for the stack pointer, for
-    /// exception-handling purposes.  Implements __builtin_dwarf_sp_column.
-    ///
-    /// Returns -1 if the operation is unsupported by this target.
-    virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
-      return -1;
-    }
+  /// Retrieve the address of a function to call immediately before
+  /// calling objc_retainAutoreleasedReturnValue.  The
+  /// implementation of objc_autoreleaseReturnValue sniffs the
+  /// instruction stream following its return address to decide
+  /// whether it's a call to objc_retainAutoreleasedReturnValue.
+  /// This can be prohibitively expensive, depending on the
+  /// relocation model, and so on some targets it instead sniffs for
+  /// a particular instruction sequence.  This functions returns
+  /// that instruction sequence in inline assembly, which will be
+  /// empty if none is required.
+  virtual StringRef getARCRetainAutoreleasedReturnValueMarker() const {
+    return "";
+  }
 
-    /// Initializes the given DWARF EH register-size table, a char*.
-    /// Implements __builtin_init_dwarf_reg_size_table.
-    ///
-    /// Returns true if the operation is unsupported by this target.
-    virtual bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
-                                         llvm::Value *Address) const {
-      return true;
-    }
+  /// Return a constant used by UBSan as a signature to identify functions
+  /// possessing type information, or 0 if the platform is unsupported.
+  virtual llvm::Constant *
+  getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const {
+    return nullptr;
+  }
 
-    /// Performs the code-generation required to convert a return
-    /// address as stored by the system into the actual address of the
-    /// next instruction that will be executed.
-    ///
-    /// Used by __builtin_extract_return_addr().
-    virtual llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
-                                             llvm::Value *Address) const {
-      return Address;
-    }
+  /// Determine whether a call to an unprototyped functions under
+  /// the given calling convention should use the variadic
+  /// convention or the non-variadic convention.
+  ///
+  /// There's a good reason to make a platform's variadic calling
+  /// convention be different from its non-variadic calling
+  /// convention: the non-variadic arguments can be passed in
+  /// registers (better for performance), and the variadic arguments
+  /// can be passed on the stack (also better for performance).  If
+  /// this is done, however, unprototyped functions *must* use the
+  /// non-variadic convention, because C99 states that a call
+  /// through an unprototyped function type must succeed if the
+  /// function was defined with a non-variadic prototype with
+  /// compatible parameters.  Therefore, splitting the conventions
+  /// makes it impossible to call a variadic function through an
+  /// unprototyped type.  Since function prototypes came out in the
+  /// late 1970s, this is probably an acceptable trade-off.
+  /// Nonetheless, not all platforms are willing to make it, and in
+  /// particularly x86-64 bends over backwards to make the
+  /// conventions compatible.
+  ///
+  /// The default is false.  This is correct whenever:
+  ///   - the conventions are exactly the same, because it does not
+  ///     matter and the resulting IR will be somewhat prettier in
+  ///     certain cases; or
+  ///   - the conventions are substantively different in how they pass
+  ///     arguments, because in this case using the variadic convention
+  ///     will lead to C99 violations.
+  ///
+  /// However, some platforms make the conventions identical except
+  /// for passing additional out-of-band information to a variadic
+  /// function: for example, x86-64 passes the number of SSE
+  /// arguments in %al.  On these platforms, it is desirable to
+  /// call unprototyped functions using the variadic convention so
+  /// that unprototyped calls to varargs functions still succeed.
+  ///
+  /// Relatedly, platforms which pass the fixed arguments to this:
+  ///   A foo(B, C, D);
+  /// differently than they would pass them to this:
+  ///   A foo(B, C, D, ...);
+  /// may need to adjust the debugger-support code in Sema to do the
+  /// right thing when calling a function with no know signature.
+  virtual bool isNoProtoCallVariadic(const CodeGen::CallArgList &args,
+                                     const FunctionNoProtoType *fnType) const;
 
-    /// Performs the code-generation required to convert the address
-    /// of an instruction into a return address suitable for storage
-    /// by the system in a return slot.
-    ///
-    /// Used by __builtin_frob_return_addr().
-    virtual llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
-                                             llvm::Value *Address) const {
-      return Address;
-    }
+  /// Gets the linker options necessary to link a dependent library on this
+  /// platform.
+  virtual void getDependentLibraryOption(llvm::StringRef Lib,
+                                         llvm::SmallString<24> &Opt) const;
 
-    /// Corrects the low-level LLVM type for a given constraint and "usual"
-    /// type.
-    ///
-    /// \returns A pointer to a new LLVM type, possibly the same as the original
-    /// on success; 0 on failure.
-    virtual llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
-                                            StringRef Constraint,
-                                            llvm::Type* Ty) const {
-      return Ty;
-    }
-
-    /// doesReturnSlotInterfereWithArgs - Return true if the target uses an
-    /// argument slot for an 'sret' type.
-    virtual bool doesReturnSlotInterfereWithArgs() const { return true; }
-
-    /// Retrieve the address of a function to call immediately before
-    /// calling objc_retainAutoreleasedReturnValue.  The
-    /// implementation of objc_autoreleaseReturnValue sniffs the
-    /// instruction stream following its return address to decide
-    /// whether it's a call to objc_retainAutoreleasedReturnValue.
-    /// This can be prohibitively expensive, depending on the
-    /// relocation model, and so on some targets it instead sniffs for
-    /// a particular instruction sequence.  This functions returns
-    /// that instruction sequence in inline assembly, which will be
-    /// empty if none is required.
-    virtual StringRef getARCRetainAutoreleasedReturnValueMarker() const {
-      return "";
-    }
-
-    /// Return a constant used by UBSan as a signature to identify functions
-    /// possessing type information, or 0 if the platform is unsupported.
-    virtual llvm::Constant *getUBSanFunctionSignature(
-        CodeGen::CodeGenModule &CGM) const {
-      return 0;
-    }
-
-    /// Determine whether a call to an unprototyped functions under
-    /// the given calling convention should use the variadic
-    /// convention or the non-variadic convention.
-    ///
-    /// There's a good reason to make a platform's variadic calling
-    /// convention be different from its non-variadic calling
-    /// convention: the non-variadic arguments can be passed in
-    /// registers (better for performance), and the variadic arguments
-    /// can be passed on the stack (also better for performance).  If
-    /// this is done, however, unprototyped functions *must* use the
-    /// non-variadic convention, because C99 states that a call
-    /// through an unprototyped function type must succeed if the
-    /// function was defined with a non-variadic prototype with
-    /// compatible parameters.  Therefore, splitting the conventions
-    /// makes it impossible to call a variadic function through an
-    /// unprototyped type.  Since function prototypes came out in the
-    /// late 1970s, this is probably an acceptable trade-off.
-    /// Nonetheless, not all platforms are willing to make it, and in
-    /// particularly x86-64 bends over backwards to make the
-    /// conventions compatible.
-    ///
-    /// The default is false.  This is correct whenever:
-    ///   - the conventions are exactly the same, because it does not
-    ///     matter and the resulting IR will be somewhat prettier in
-    ///     certain cases; or
-    ///   - the conventions are substantively different in how they pass
-    ///     arguments, because in this case using the variadic convention
-    ///     will lead to C99 violations.
-    ///
-    /// However, some platforms make the conventions identical except
-    /// for passing additional out-of-band information to a variadic
-    /// function: for example, x86-64 passes the number of SSE
-    /// arguments in %al.  On these platforms, it is desirable to
-    /// call unprototyped functions using the variadic convention so
-    /// that unprototyped calls to varargs functions still succeed.
-    ///
-    /// Relatedly, platforms which pass the fixed arguments to this:
-    ///   A foo(B, C, D);
-    /// differently than they would pass them to this:
-    ///   A foo(B, C, D, ...);
-    /// may need to adjust the debugger-support code in Sema to do the
-    /// right thing when calling a function with no know signature.
-    virtual bool isNoProtoCallVariadic(const CodeGen::CallArgList &args,
-                                       const FunctionNoProtoType *fnType) const;
-
-    /// Gets the linker options necessary to link a dependent library on this
-    /// platform.
-    virtual void getDependentLibraryOption(llvm::StringRef Lib,
-                                           llvm::SmallString<24> &Opt) const;
-
-    /// Gets the linker options necessary to detect object file mismatches on
-    /// this platform.
-    virtual void getDetectMismatchOption(llvm::StringRef Name,
-                                         llvm::StringRef Value,
-                                         llvm::SmallString<32> &Opt) const {}
-  };
+  /// Gets the linker options necessary to detect object file mismatches on
+  /// this platform.
+  virtual void getDetectMismatchOption(llvm::StringRef Name,
+                                       llvm::StringRef Value,
+                                       llvm::SmallString<32> &Opt) const {}
+};
 }
 
 #endif // CLANG_CODEGEN_TARGETINFO_H