Update aosp/master clang for rebase to r239765

Change-Id: I0393bcc952590a7226af8c4b58534a8ee5fd2d99
diff --git a/lib/ARCMigrate/ARCMT.cpp b/lib/ARCMigrate/ARCMT.cpp
index 0a61cfe..f266eaf 100644
--- a/lib/ARCMigrate/ARCMT.cpp
+++ b/lib/ARCMigrate/ARCMT.cpp
@@ -432,7 +432,7 @@
   ARCMTMacroTrackerPPCallbacks(std::vector<SourceLocation> &ARCMTMacroLocs)
     : ARCMTMacroLocs(ARCMTMacroLocs) { }
 
-  void MacroExpands(const Token &MacroNameTok, const MacroDirective *MD,
+  void MacroExpands(const Token &MacroNameTok, const MacroDefinition &MD,
                     SourceRange Range, const MacroArgs *Args) override {
     if (MacroNameTok.getIdentifierInfo()->getName() == getARCMTMacroName())
       ARCMTMacroLocs.push_back(MacroNameTok.getLocation());
diff --git a/lib/ARCMigrate/ObjCMT.cpp b/lib/ARCMigrate/ObjCMT.cpp
index 9e7be20..8c2e0f4 100644
--- a/lib/ARCMigrate/ObjCMT.cpp
+++ b/lib/ARCMigrate/ObjCMT.cpp
@@ -468,7 +468,7 @@
   ASTContext &Context = NS.getASTContext();
   bool LParenAdded = false;
   std::string PropertyString = "@property ";
-  if (UseNsIosOnlyMacro && Context.Idents.get("NS_NONATOMIC_IOSONLY").hasMacroDefinition()) {
+  if (UseNsIosOnlyMacro && NS.isMacroDefined("NS_NONATOMIC_IOSONLY")) {
     PropertyString += "(NS_NONATOMIC_IOSONLY";
     LParenAdded = true;
   } else if (!Atomic) {
@@ -1277,7 +1277,7 @@
 
   QualType RT = OM->getReturnType();
   if (!TypeIsInnerPointer(RT) ||
-      !Ctx.Idents.get("NS_RETURNS_INNER_POINTER").hasMacroDefinition())
+      !NSAPIObj->isMacroDefined("NS_RETURNS_INNER_POINTER"))
     return;
   
   edit::Commit commit(*Editor);
@@ -1288,9 +1288,9 @@
 void ObjCMigrateASTConsumer::migratePropertyNsReturnsInnerPointer(ASTContext &Ctx,
                                                                   ObjCPropertyDecl *P) {
   QualType T = P->getType();
-  
+
   if (!TypeIsInnerPointer(T) ||
-      !Ctx.Idents.get("NS_RETURNS_INNER_POINTER").hasMacroDefinition())
+      !NSAPIObj->isMacroDefined("NS_RETURNS_INNER_POINTER"))
     return;
   edit::Commit commit(*Editor);
   commit.insertBefore(P->getLocEnd(), " NS_RETURNS_INNER_POINTER ");
@@ -1408,7 +1408,7 @@
 void ObjCMigrateASTConsumer::AnnotateImplicitBridging(ASTContext &Ctx) {
   if (CFFunctionIBCandidates.empty())
     return;
-  if (!Ctx.Idents.get("CF_IMPLICIT_BRIDGING_ENABLED").hasMacroDefinition()) {
+  if (!NSAPIObj->isMacroDefined("CF_IMPLICIT_BRIDGING_ENABLED")) {
     CFFunctionIBCandidates.clear();
     FileId = FileID();
     return;
@@ -1483,16 +1483,14 @@
     RetEffect Ret = CE.getReturnValue();
     const char *AnnotationString = nullptr;
     if (Ret.getObjKind() == RetEffect::CF) {
-      if (Ret.isOwned() &&
-          Ctx.Idents.get("CF_RETURNS_RETAINED").hasMacroDefinition())
+      if (Ret.isOwned() && NSAPIObj->isMacroDefined("CF_RETURNS_RETAINED"))
         AnnotationString = " CF_RETURNS_RETAINED";
       else if (Ret.notOwned() &&
-               Ctx.Idents.get("CF_RETURNS_NOT_RETAINED").hasMacroDefinition())
+               NSAPIObj->isMacroDefined("CF_RETURNS_NOT_RETAINED"))
         AnnotationString = " CF_RETURNS_NOT_RETAINED";
     }
     else if (Ret.getObjKind() == RetEffect::ObjC) {
-      if (Ret.isOwned() &&
-          Ctx.Idents.get("NS_RETURNS_RETAINED").hasMacroDefinition())
+      if (Ret.isOwned() && NSAPIObj->isMacroDefined("NS_RETURNS_RETAINED"))
         AnnotationString = " NS_RETURNS_RETAINED";
     }
     
@@ -1509,13 +1507,13 @@
     const ParmVarDecl *pd = *pi;
     ArgEffect AE = AEArgs[i];
     if (AE == DecRef && !pd->hasAttr<CFConsumedAttr>() &&
-        Ctx.Idents.get("CF_CONSUMED").hasMacroDefinition()) {
+        NSAPIObj->isMacroDefined("CF_CONSUMED")) {
       edit::Commit commit(*Editor);
       commit.insertBefore(pd->getLocation(), "CF_CONSUMED ");
       Editor->commit(commit);
     }
     else if (AE == DecRefMsg && !pd->hasAttr<NSConsumedAttr>() &&
-             Ctx.Idents.get("NS_CONSUMED").hasMacroDefinition()) {
+             NSAPIObj->isMacroDefined("NS_CONSUMED")) {
       edit::Commit commit(*Editor);
       commit.insertBefore(pd->getLocation(), "NS_CONSUMED ");
       Editor->commit(commit);
@@ -1600,11 +1598,10 @@
     RetEffect Ret = CE.getReturnValue();
     const char *AnnotationString = nullptr;
     if (Ret.getObjKind() == RetEffect::CF) {
-      if (Ret.isOwned() &&
-          Ctx.Idents.get("CF_RETURNS_RETAINED").hasMacroDefinition())
+      if (Ret.isOwned() && NSAPIObj->isMacroDefined("CF_RETURNS_RETAINED"))
         AnnotationString = " CF_RETURNS_RETAINED";
       else if (Ret.notOwned() &&
-               Ctx.Idents.get("CF_RETURNS_NOT_RETAINED").hasMacroDefinition())
+               NSAPIObj->isMacroDefined("CF_RETURNS_NOT_RETAINED"))
         AnnotationString = " CF_RETURNS_NOT_RETAINED";
     }
     else if (Ret.getObjKind() == RetEffect::ObjC) {
@@ -1618,8 +1615,7 @@
           break;
           
         default:
-          if (Ret.isOwned() &&
-              Ctx.Idents.get("NS_RETURNS_RETAINED").hasMacroDefinition())
+          if (Ret.isOwned() && NSAPIObj->isMacroDefined("NS_RETURNS_RETAINED"))
             AnnotationString = " NS_RETURNS_RETAINED";
           break;
       }
@@ -1638,7 +1634,7 @@
     const ParmVarDecl *pd = *pi;
     ArgEffect AE = AEArgs[i];
     if (AE == DecRef && !pd->hasAttr<CFConsumedAttr>() &&
-        Ctx.Idents.get("CF_CONSUMED").hasMacroDefinition()) {
+        NSAPIObj->isMacroDefined("CF_CONSUMED")) {
       edit::Commit commit(*Editor);
       commit.insertBefore(pd->getLocation(), "CF_CONSUMED ");
       Editor->commit(commit);
@@ -1658,12 +1654,12 @@
                                   MethodDecl->hasAttr<NSReturnsRetainedAttr>() ||
                                   MethodDecl->hasAttr<NSReturnsNotRetainedAttr>() ||
                                   MethodDecl->hasAttr<NSReturnsAutoreleasedAttr>());
-  
-  if (CE.getReceiver() ==  DecRefMsg &&
+
+  if (CE.getReceiver() == DecRefMsg &&
       !MethodDecl->hasAttr<NSConsumesSelfAttr>() &&
       MethodDecl->getMethodFamily() != OMF_init &&
       MethodDecl->getMethodFamily() != OMF_release &&
-      Ctx.Idents.get("NS_CONSUMES_SELF").hasMacroDefinition()) {
+      NSAPIObj->isMacroDefined("NS_CONSUMES_SELF")) {
     edit::Commit commit(*Editor);
     commit.insertBefore(MethodDecl->getLocEnd(), " NS_CONSUMES_SELF");
     Editor->commit(commit);
@@ -1729,7 +1725,7 @@
   const ObjCInterfaceDecl *IFace = ImplD->getClassInterface();
   if (!IFace || IFace->hasDesignatedInitializers())
     return;
-  if (!Ctx.Idents.get("NS_DESIGNATED_INITIALIZER").hasMacroDefinition())
+  if (!NSAPIObj->isMacroDefined("NS_DESIGNATED_INITIALIZER"))
     return;
 
   for (const auto *MD : ImplD->instance_methods()) {
@@ -2287,7 +2283,7 @@
       continue;
     }
 
-    remap.push_back(std::make_pair(I->first->getName(), TempFile));
+    remap.emplace_back(I->first->getName(), TempFile);
   }
 
   return hasErrorOccurred;
diff --git a/lib/ARCMigrate/TransAPIUses.cpp b/lib/ARCMigrate/TransAPIUses.cpp
index 544cb0a..40c8a07 100644
--- a/lib/ARCMigrate/TransAPIUses.cpp
+++ b/lib/ARCMigrate/TransAPIUses.cpp
@@ -95,7 +95,7 @@
       Pass.TA.clearDiagnostic(diag::err_unavailable,
                               diag::err_unavailable_message,
                               E->getSelectorLoc(0));
-      Pass.TA.replace(E->getSourceRange(), getNilString(Pass.Ctx));
+      Pass.TA.replace(E->getSourceRange(), getNilString(Pass));
     }
     return true;
   }
diff --git a/lib/ARCMigrate/TransRetainReleaseDealloc.cpp b/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
index bcbc9e9..7db1a1c 100644
--- a/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
+++ b/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
@@ -145,7 +145,7 @@
       // when an exception is thrown.
       Pass.TA.replace(RecContainer->getSourceRange(), RecRange);
       std::string str = " = ";
-      str += getNilString(Pass.Ctx);
+      str += getNilString(Pass);
       Pass.TA.insertAfterToken(RecRange.getEnd(), str);
       return true;
     }
diff --git a/lib/ARCMigrate/TransUnusedInitDelegate.cpp b/lib/ARCMigrate/TransUnusedInitDelegate.cpp
index 98571c0..70370ec 100644
--- a/lib/ARCMigrate/TransUnusedInitDelegate.cpp
+++ b/lib/ARCMigrate/TransUnusedInitDelegate.cpp
@@ -58,7 +58,7 @@
       SourceRange ExprRange = ME->getSourceRange();
       Pass.TA.insert(ExprRange.getBegin(), "if (!(self = ");
       std::string retStr = ")) return ";
-      retStr += getNilString(Pass.Ctx);
+      retStr += getNilString(Pass);
       Pass.TA.insertAfterToken(ExprRange.getEnd(), retStr);
     }
     return true;
diff --git a/lib/ARCMigrate/Transforms.cpp b/lib/ARCMigrate/Transforms.cpp
index 6ff7b6b..56d3af7 100644
--- a/lib/ARCMigrate/Transforms.cpp
+++ b/lib/ARCMigrate/Transforms.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/DenseSet.h"
@@ -212,11 +213,8 @@
   return false;  
 }
 
-StringRef trans::getNilString(ASTContext &Ctx) {
-  if (Ctx.Idents.get("nil").hasMacroDefinition())
-    return "nil";
-  else
-    return "0";
+StringRef trans::getNilString(MigrationPass &Pass) {
+  return Pass.SemaRef.PP.isMacroDefined("nil") ? "nil" : "0";
 }
 
 namespace {
diff --git a/lib/ARCMigrate/Transforms.h b/lib/ARCMigrate/Transforms.h
index 12551d2..7e3dd34 100644
--- a/lib/ARCMigrate/Transforms.h
+++ b/lib/ARCMigrate/Transforms.h
@@ -180,7 +180,7 @@
 bool hasSideEffects(Expr *E, ASTContext &Ctx);
 bool isGlobalVar(Expr *E);
 /// \brief Returns "nil" or "0" if 'nil' macro is not actually defined.
-StringRef getNilString(ASTContext &Ctx);
+StringRef getNilString(MigrationPass &Pass);
 
 template <typename BODY_TRANS>
 class BodyTransform : public RecursiveASTVisitor<BodyTransform<BODY_TRANS> > {
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 899f8c5..4a831d9 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -866,6 +866,31 @@
   BumpAlloc.PrintStats();
 }
 
+void ASTContext::mergeDefinitionIntoModule(NamedDecl *ND, Module *M,
+                                           bool NotifyListeners) {
+  if (NotifyListeners)
+    if (auto *Listener = getASTMutationListener())
+      Listener->RedefinedHiddenDefinition(ND, M);
+
+  if (getLangOpts().ModulesLocalVisibility)
+    MergedDefModules[ND].push_back(M);
+  else
+    ND->setHidden(false);
+}
+
+void ASTContext::deduplicateMergedDefinitonsFor(NamedDecl *ND) {
+  auto It = MergedDefModules.find(ND);
+  if (It == MergedDefModules.end())
+    return;
+
+  auto &Merged = It->second;
+  llvm::DenseSet<Module*> Found;
+  for (Module *&M : Merged)
+    if (!Found.insert(M).second)
+      M = nullptr;
+  Merged.erase(std::remove(Merged.begin(), Merged.end(), nullptr), Merged.end());
+}
+
 ExternCContextDecl *ASTContext::getExternCContextDecl() const {
   if (!ExternCContext)
     ExternCContext = ExternCContextDecl::Create(*this, getTranslationUnitDecl());
@@ -1335,7 +1360,7 @@
       }
       Align = std::max(Align, getPreferredTypeAlign(T.getTypePtr()));
       if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
-        if (VD->hasGlobalStorage())
+        if (VD->hasGlobalStorage() && !ForAlignof)
           Align = std::max(Align, getTargetInfo().getMinGlobalAlign());
       }
     }
@@ -1797,11 +1822,16 @@
   TypeInfo TI = getTypeInfo(T);
   unsigned ABIAlign = TI.Align;
 
+  T = T->getBaseElementTypeUnsafe();
+
+  // The preferred alignment of member pointers is that of a pointer.
+  if (T->isMemberPointerType())
+    return getPreferredTypeAlign(getPointerDiffType().getTypePtr());
+
   if (Target->getTriple().getArch() == llvm::Triple::xcore)
     return ABIAlign;  // Never overalign on XCore.
 
   // Double and long long should be naturally aligned if possible.
-  T = T->getBaseElementTypeUnsafe();
   if (const ComplexType *CT = T->getAs<ComplexType>())
     T = CT->getElementType().getTypePtr();
   if (const EnumType *ET = T->getAs<EnumType>())
@@ -1817,6 +1847,13 @@
   return ABIAlign;
 }
 
+/// getTargetDefaultAlignForAttributeAligned - Return the default alignment
+/// for __attribute__((aligned)) on this target, to be used if no alignment
+/// value is specified.
+unsigned ASTContext::getTargetDefaultAlignForAttributeAligned(void) const {
+  return getTargetInfo().getDefaultAlignForAttributeAligned();
+}
+
 /// getAlignOfGlobalVar - Return the alignment in bits that should be given
 /// to a global variable of the specified type.
 unsigned ASTContext::getAlignOfGlobalVar(QualType T) const {
@@ -4894,7 +4931,7 @@
 bool ASTContext::isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const {
   return getLangOpts().MSVCCompat && VD->isStaticDataMember() &&
          VD->getType()->isIntegralOrEnumerationType() &&
-         !VD->getFirstDecl()->isOutOfLine() && VD->getFirstDecl()->hasInit();
+         VD->isFirstDecl() && !VD->isOutOfLine() && VD->hasInit();
 }
 
 static inline 
diff --git a/lib/AST/ASTDumper.cpp b/lib/AST/ASTDumper.cpp
index 711c329..60cbb06 100644
--- a/lib/AST/ASTDumper.cpp
+++ b/lib/AST/ASTDumper.cpp
@@ -977,8 +977,10 @@
     dumpSourceRange(D->getSourceRange());
     OS << ' ';
     dumpLocation(D->getLocation());
-    if (Module *M = D->getOwningModule())
+    if (Module *M = D->getImportedOwningModule())
       OS << " in " << M->getFullModuleName();
+    else if (Module *M = D->getLocalOwningModule())
+      OS << " in (local) " << M->getFullModuleName();
     if (const NamedDecl *ND = dyn_cast<NamedDecl>(D))
       if (ND->isHidden())
         OS << " hidden";
diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp
index eb9b00a..911f168 100644
--- a/lib/AST/ASTImporter.cpp
+++ b/lib/AST/ASTImporter.cpp
@@ -81,7 +81,7 @@
     // Importing declarations                            
     bool ImportDeclParts(NamedDecl *D, DeclContext *&DC, 
                          DeclContext *&LexicalDC, DeclarationName &Name, 
-                         SourceLocation &Loc);
+                         NamedDecl *&ToD, SourceLocation &Loc);
     void ImportDefinitionIfNeeded(Decl *FromD, Decl *ToD = nullptr);
     void ImportDeclarationNameLoc(const DeclarationNameInfo &From,
                                   DeclarationNameInfo& To);
@@ -168,7 +168,44 @@
     Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D);
 
     // Importing statements
+    DeclGroupRef ImportDeclGroup(DeclGroupRef DG);
+
     Stmt *VisitStmt(Stmt *S);
+    Stmt *VisitDeclStmt(DeclStmt *S);
+    Stmt *VisitNullStmt(NullStmt *S);
+    Stmt *VisitCompoundStmt(CompoundStmt *S);
+    Stmt *VisitCaseStmt(CaseStmt *S);
+    Stmt *VisitDefaultStmt(DefaultStmt *S);
+    Stmt *VisitLabelStmt(LabelStmt *S);
+    Stmt *VisitAttributedStmt(AttributedStmt *S);
+    Stmt *VisitIfStmt(IfStmt *S);
+    Stmt *VisitSwitchStmt(SwitchStmt *S);
+    Stmt *VisitWhileStmt(WhileStmt *S);
+    Stmt *VisitDoStmt(DoStmt *S);
+    Stmt *VisitForStmt(ForStmt *S);
+    Stmt *VisitGotoStmt(GotoStmt *S);
+    Stmt *VisitIndirectGotoStmt(IndirectGotoStmt *S);
+    Stmt *VisitContinueStmt(ContinueStmt *S);
+    Stmt *VisitBreakStmt(BreakStmt *S);
+    Stmt *VisitReturnStmt(ReturnStmt *S);
+    // FIXME: GCCAsmStmt
+    // FIXME: MSAsmStmt
+    // FIXME: SEHExceptStmt
+    // FIXME: SEHFinallyStmt
+    // FIXME: SEHTryStmt
+    // FIXME: SEHLeaveStmt
+    // FIXME: CapturedStmt
+    Stmt *VisitCXXCatchStmt(CXXCatchStmt *S);
+    Stmt *VisitCXXTryStmt(CXXTryStmt *S);
+    Stmt *VisitCXXForRangeStmt(CXXForRangeStmt *S);
+    // FIXME: MSDependentExistsStmt
+    Stmt *VisitObjCForCollectionStmt(ObjCForCollectionStmt *S);
+    Stmt *VisitObjCAtCatchStmt(ObjCAtCatchStmt *S);
+    Stmt *VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *S);
+    Stmt *VisitObjCAtTryStmt(ObjCAtTryStmt *S);
+    Stmt *VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S);
+    Stmt *VisitObjCAtThrowStmt(ObjCAtThrowStmt *S);
+    Stmt *VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S);
 
     // Importing expressions
     Expr *VisitExpr(Expr *E);
@@ -182,6 +219,9 @@
     Expr *VisitCompoundAssignOperator(CompoundAssignOperator *E);
     Expr *VisitImplicitCastExpr(ImplicitCastExpr *E);
     Expr *VisitCStyleCastExpr(CStyleCastExpr *E);
+    Expr *VisitCXXConstructExpr(CXXConstructExpr *E);
+    Expr *VisitMemberExpr(MemberExpr *E);
+    Expr *VisitCallExpr(CallExpr *E);
   };
 }
 using namespace clang;
@@ -1830,6 +1870,7 @@
 bool ASTNodeImporter::ImportDeclParts(NamedDecl *D, DeclContext *&DC, 
                                       DeclContext *&LexicalDC, 
                                       DeclarationName &Name, 
+                                      NamedDecl *&ToD,
                                       SourceLocation &Loc) {
   // Import the context of this declaration.
   DC = Importer.ImportContext(D->getDeclContext());
@@ -1850,6 +1891,7 @@
   
   // Import the location of this declaration.
   Loc = Importer.Import(D->getLocation());
+  ToD = cast_or_null<NamedDecl>(Importer.GetAlreadyImportedOrNull(D));
   return false;
 }
 
@@ -2031,7 +2073,7 @@
 
 bool ASTNodeImporter::ImportDefinition(VarDecl *From, VarDecl *To,
                                        ImportDefinitionKind Kind) {
-  if (To->getDefinition())
+  if (To->getAnyInitializer())
     return false;
 
   // FIXME: Can we really import any initializer? Alternatively, we could force
@@ -2261,8 +2303,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   NamespaceDecl *MergeWithNamespace = nullptr;
   if (!Name) {
@@ -2329,8 +2374,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // If this typedef is not in block scope, determine whether we've
   // seen a typedef with the same name (that we can merge with) or any
@@ -2403,8 +2451,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Figure out what enum name we're looking for.
   unsigned IDNS = Decl::IDNS_Tag;
@@ -2488,8 +2539,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Figure out what structure name we're looking for.
   unsigned IDNS = Decl::IDNS_Tag;
@@ -2614,8 +2668,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   QualType T = Importer.Import(D->getType());
   if (T.isNull())
@@ -2670,8 +2727,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Try to find a function in our own ("to") context with the same name, same
   // type, and in the same context as the function we're importing.
@@ -2763,10 +2823,11 @@
   // Create the imported function.
   TypeSourceInfo *TInfo = Importer.Import(D->getTypeSourceInfo());
   FunctionDecl *ToFunction = nullptr;
+  SourceLocation InnerLocStart = Importer.Import(D->getInnerLocStart());
   if (CXXConstructorDecl *FromConstructor = dyn_cast<CXXConstructorDecl>(D)) {
     ToFunction = CXXConstructorDecl::Create(Importer.getToContext(),
                                             cast<CXXRecordDecl>(DC),
-                                            D->getInnerLocStart(),
+                                            InnerLocStart,
                                             NameInfo, T, TInfo, 
                                             FromConstructor->isExplicit(),
                                             D->isInlineSpecified(), 
@@ -2775,7 +2836,7 @@
   } else if (isa<CXXDestructorDecl>(D)) {
     ToFunction = CXXDestructorDecl::Create(Importer.getToContext(),
                                            cast<CXXRecordDecl>(DC),
-                                           D->getInnerLocStart(),
+                                           InnerLocStart,
                                            NameInfo, T, TInfo,
                                            D->isInlineSpecified(),
                                            D->isImplicit());
@@ -2783,7 +2844,7 @@
                                            = dyn_cast<CXXConversionDecl>(D)) {
     ToFunction = CXXConversionDecl::Create(Importer.getToContext(), 
                                            cast<CXXRecordDecl>(DC),
-                                           D->getInnerLocStart(),
+                                           InnerLocStart,
                                            NameInfo, T, TInfo,
                                            D->isInlineSpecified(),
                                            FromConversion->isExplicit(),
@@ -2792,7 +2853,7 @@
   } else if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
     ToFunction = CXXMethodDecl::Create(Importer.getToContext(), 
                                        cast<CXXRecordDecl>(DC),
-                                       D->getInnerLocStart(),
+                                       InnerLocStart,
                                        NameInfo, T, TInfo,
                                        Method->getStorageClass(),
                                        Method->isInlineSpecified(),
@@ -2800,7 +2861,7 @@
                                        Importer.Import(D->getLocEnd()));
   } else {
     ToFunction = FunctionDecl::Create(Importer.getToContext(), DC,
-                                      D->getInnerLocStart(),
+                                      InnerLocStart,
                                       NameInfo, T, TInfo, D->getStorageClass(),
                                       D->isInlineSpecified(),
                                       D->hasWrittenPrototype(),
@@ -2831,6 +2892,13 @@
     ToFunction->setType(T);
   }
 
+  // Import the body, if any.
+  if (Stmt *FromBody = D->getBody()) {
+    if (Stmt *ToBody = Importer.Import(FromBody)) {
+      ToFunction->setBody(ToBody);
+    }
+  }
+
   // FIXME: Other bits to merge?
 
   // Add this function to the lexical context.
@@ -2877,8 +2945,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Determine whether we've already imported this field. 
   SmallVector<NamedDecl *, 2> FoundDecls;
@@ -2933,8 +3004,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Determine whether we've already imported this field. 
   SmallVector<NamedDecl *, 2> FoundDecls;
@@ -3000,8 +3074,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Determine whether we've already imported this ivar 
   SmallVector<NamedDecl *, 2> FoundDecls;
@@ -3050,8 +3127,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Try to find a variable in our own ("to") context with the same name and
   // in the same context as the variable we're importing.
@@ -3159,6 +3239,10 @@
   Importer.Imported(D, ToVar);
   LexicalDC->addDeclInternal(ToVar);
 
+  if (!D->isFileVarDecl() &&
+      D->isUsed())
+    ToVar->setIsUsed();
+
   // Merge the initializer.
   if (ImportDefinition(D, ToVar))
     return nullptr;
@@ -3218,6 +3302,10 @@
                                             T, TInfo, D->getStorageClass(),
                                             /*FIXME: Default argument*/nullptr);
   ToParm->setHasInheritedDefaultArg(D->hasInheritedDefaultArg());
+
+  if (D->isUsed())
+    ToParm->setIsUsed();
+
   return Importer.Imported(D, ToParm);
 }
 
@@ -3226,8 +3314,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   SmallVector<NamedDecl *, 2> FoundDecls;
   DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
@@ -3337,8 +3428,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   ObjCInterfaceDecl *ToInterface
     = cast_or_null<ObjCInterfaceDecl>(Importer.Import(D->getClassInterface()));
@@ -3461,8 +3555,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   ObjCProtocolDecl *MergeWithProtocol = nullptr;
   SmallVector<NamedDecl *, 2> FoundDecls;
@@ -3636,8 +3733,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Look for an existing interface with the same name.
   ObjCInterfaceDecl *MergeWithIface = nullptr;
@@ -3791,8 +3891,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // Check whether we have already imported this property.
   SmallVector<NamedDecl *, 2> FoundDecls;
@@ -4022,8 +4125,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // We may already have a template of the same name; try to find and match it.
   if (!DC->isFunctionOrMethod()) {
@@ -4210,8 +4316,11 @@
   DeclContext *DC, *LexicalDC;
   DeclarationName Name;
   SourceLocation Loc;
-  if (ImportDeclParts(D, DC, LexicalDC, Name, Loc))
+  NamedDecl *ToD;
+  if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
     return nullptr;
+  if (ToD)
+    return ToD;
 
   // We may already have a template of the same name; try to find and match it.
   assert(!DC->isFunctionOrMethod() &&
@@ -4393,10 +4502,457 @@
 // Import Statements
 //----------------------------------------------------------------------------
 
-Stmt *ASTNodeImporter::VisitStmt(Stmt *S) {
-  Importer.FromDiag(S->getLocStart(), diag::err_unsupported_ast_node)
-    << S->getStmtClassName();
-  return nullptr;
+DeclGroupRef ASTNodeImporter::ImportDeclGroup(DeclGroupRef DG) {
+  if (DG.isNull())
+    return DeclGroupRef::Create(Importer.getToContext(), nullptr, 0);
+  size_t NumDecls = DG.end() - DG.begin();
+  SmallVector<Decl *, 1> ToDecls(NumDecls);
+  auto &_Importer = this->Importer;
+  std::transform(DG.begin(), DG.end(), ToDecls.begin(),
+    [&_Importer](Decl *D) -> Decl * {
+      return _Importer.Import(D);
+    });
+  return DeclGroupRef::Create(Importer.getToContext(),
+                              ToDecls.begin(),
+                              NumDecls);
+}
+
+ Stmt *ASTNodeImporter::VisitStmt(Stmt *S) {
+   Importer.FromDiag(S->getLocStart(), diag::err_unsupported_ast_node)
+     << S->getStmtClassName();
+   return nullptr;
+ }
+ 
+Stmt *ASTNodeImporter::VisitDeclStmt(DeclStmt *S) {
+  DeclGroupRef ToDG = ImportDeclGroup(S->getDeclGroup());
+  for (Decl *ToD : ToDG) {
+    if (!ToD)
+      return nullptr;
+  }
+  SourceLocation ToStartLoc = Importer.Import(S->getStartLoc());
+  SourceLocation ToEndLoc = Importer.Import(S->getEndLoc());
+  return new (Importer.getToContext()) DeclStmt(ToDG, ToStartLoc, ToEndLoc);
+}
+
+Stmt *ASTNodeImporter::VisitNullStmt(NullStmt *S) {
+  SourceLocation ToSemiLoc = Importer.Import(S->getSemiLoc());
+  return new (Importer.getToContext()) NullStmt(ToSemiLoc,
+                                                S->hasLeadingEmptyMacro());
+}
+
+Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) {
+  SmallVector<Stmt *, 4> ToStmts(S->size());
+  auto &_Importer = this->Importer;
+  std::transform(S->body_begin(), S->body_end(), ToStmts.begin(),
+    [&_Importer](Stmt *CS) -> Stmt * {
+      return _Importer.Import(CS);
+    });
+  for (Stmt *ToS : ToStmts) {
+    if (!ToS)
+      return nullptr;
+  }
+  SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());
+  SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc());
+  return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(),
+                                                    ToStmts,
+                                                    ToLBraceLoc, ToRBraceLoc);
+}
+
+Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) {
+  Expr *ToLHS = Importer.Import(S->getLHS());
+  if (!ToLHS)
+    return nullptr;
+  Expr *ToRHS = Importer.Import(S->getRHS());
+  if (!ToRHS && S->getRHS())
+    return nullptr;
+  SourceLocation ToCaseLoc = Importer.Import(S->getCaseLoc());
+  SourceLocation ToEllipsisLoc = Importer.Import(S->getEllipsisLoc());
+  SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
+  return new (Importer.getToContext()) CaseStmt(ToLHS, ToRHS,
+                                                ToCaseLoc, ToEllipsisLoc,
+                                                ToColonLoc);
+}
+
+Stmt *ASTNodeImporter::VisitDefaultStmt(DefaultStmt *S) {
+  SourceLocation ToDefaultLoc = Importer.Import(S->getDefaultLoc());
+  SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
+  Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
+  if (!ToSubStmt && S->getSubStmt())
+    return nullptr;
+  return new (Importer.getToContext()) DefaultStmt(ToDefaultLoc, ToColonLoc,
+                                                   ToSubStmt);
+}
+
+Stmt *ASTNodeImporter::VisitLabelStmt(LabelStmt *S) {
+  SourceLocation ToIdentLoc = Importer.Import(S->getIdentLoc());
+  LabelDecl *ToLabelDecl =
+    cast_or_null<LabelDecl>(Importer.Import(S->getDecl()));
+  if (!ToLabelDecl && S->getDecl())
+    return nullptr;
+  Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
+  if (!ToSubStmt && S->getSubStmt())
+    return nullptr;
+  return new (Importer.getToContext()) LabelStmt(ToIdentLoc, ToLabelDecl,
+                                                 ToSubStmt);
+}
+
+Stmt *ASTNodeImporter::VisitAttributedStmt(AttributedStmt *S) {
+  SourceLocation ToAttrLoc = Importer.Import(S->getAttrLoc());
+  ArrayRef<const Attr*> FromAttrs(S->getAttrs());
+  SmallVector<const Attr *, 1> ToAttrs(FromAttrs.size());
+  ASTContext &_ToContext = Importer.getToContext();
+  std::transform(FromAttrs.begin(), FromAttrs.end(), ToAttrs.begin(),
+    [&_ToContext](const Attr *A) -> const Attr * {
+      return A->clone(_ToContext);
+    });
+  for (const Attr *ToA : ToAttrs) {
+    if (!ToA)
+      return nullptr;
+  }
+  Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
+  if (!ToSubStmt && S->getSubStmt())
+    return nullptr;
+  return AttributedStmt::Create(Importer.getToContext(), ToAttrLoc,
+                                ToAttrs, ToSubStmt);
+}
+
+Stmt *ASTNodeImporter::VisitIfStmt(IfStmt *S) {
+  SourceLocation ToIfLoc = Importer.Import(S->getIfLoc());
+  VarDecl *ToConditionVariable = nullptr;
+  if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
+    ToConditionVariable =
+      dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
+    if (!ToConditionVariable)
+      return nullptr;
+  }
+  Expr *ToCondition = Importer.Import(S->getCond());
+  if (!ToCondition && S->getCond())
+    return nullptr;
+  Stmt *ToThenStmt = Importer.Import(S->getThen());
+  if (!ToThenStmt && S->getThen())
+    return nullptr;
+  SourceLocation ToElseLoc = Importer.Import(S->getElseLoc());
+  Stmt *ToElseStmt = Importer.Import(S->getElse());
+  if (!ToElseStmt && S->getElse())
+    return nullptr;
+  return new (Importer.getToContext()) IfStmt(Importer.getToContext(),
+                                              ToIfLoc, ToConditionVariable,
+                                              ToCondition, ToThenStmt,
+                                              ToElseLoc, ToElseStmt);
+}
+
+Stmt *ASTNodeImporter::VisitSwitchStmt(SwitchStmt *S) {
+  VarDecl *ToConditionVariable = nullptr;
+  if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
+    ToConditionVariable =
+      dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
+    if (!ToConditionVariable)
+      return nullptr;
+  }
+  Expr *ToCondition = Importer.Import(S->getCond());
+  if (!ToCondition && S->getCond())
+    return nullptr;
+  SwitchStmt *ToStmt = new (Importer.getToContext()) SwitchStmt(
+                         Importer.getToContext(), ToConditionVariable,
+                         ToCondition);
+  Stmt *ToBody = Importer.Import(S->getBody());
+  if (!ToBody && S->getBody())
+    return nullptr;
+  ToStmt->setBody(ToBody);
+  ToStmt->setSwitchLoc(Importer.Import(S->getSwitchLoc()));
+  // Now we have to re-chain the cases.
+  SwitchCase *LastChainedSwitchCase = nullptr;
+  for (SwitchCase *SC = S->getSwitchCaseList(); SC != nullptr;
+       SC = SC->getNextSwitchCase()) {
+    SwitchCase *ToSC = dyn_cast_or_null<SwitchCase>(Importer.Import(SC));
+    if (!ToSC)
+      return nullptr;
+    if (LastChainedSwitchCase)
+      LastChainedSwitchCase->setNextSwitchCase(ToSC);
+    else
+      ToStmt->setSwitchCaseList(ToSC);
+    LastChainedSwitchCase = ToSC;
+  }
+  return ToStmt;
+}
+
+Stmt *ASTNodeImporter::VisitWhileStmt(WhileStmt *S) {
+  VarDecl *ToConditionVariable = nullptr;
+  if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
+    ToConditionVariable =
+      dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
+    if (!ToConditionVariable)
+      return nullptr;
+  }
+  Expr *ToCondition = Importer.Import(S->getCond());
+  if (!ToCondition && S->getCond())
+    return nullptr;
+  Stmt *ToBody = Importer.Import(S->getBody());
+  if (!ToBody && S->getBody())
+    return nullptr;
+  SourceLocation ToWhileLoc = Importer.Import(S->getWhileLoc());
+  return new (Importer.getToContext()) WhileStmt(Importer.getToContext(),
+                                                 ToConditionVariable,
+                                                 ToCondition, ToBody,
+                                                 ToWhileLoc);
+}
+
+Stmt *ASTNodeImporter::VisitDoStmt(DoStmt *S) {
+  Stmt *ToBody = Importer.Import(S->getBody());
+  if (!ToBody && S->getBody())
+    return nullptr;
+  Expr *ToCondition = Importer.Import(S->getCond());
+  if (!ToCondition && S->getCond())
+    return nullptr;
+  SourceLocation ToDoLoc = Importer.Import(S->getDoLoc());
+  SourceLocation ToWhileLoc = Importer.Import(S->getWhileLoc());
+  SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
+  return new (Importer.getToContext()) DoStmt(ToBody, ToCondition,
+                                              ToDoLoc, ToWhileLoc,
+                                              ToRParenLoc);
+}
+
+Stmt *ASTNodeImporter::VisitForStmt(ForStmt *S) {
+  Stmt *ToInit = Importer.Import(S->getInit());
+  if (!ToInit && S->getInit())
+    return nullptr;
+  Expr *ToCondition = Importer.Import(S->getCond());
+  if (!ToCondition && S->getCond())
+    return nullptr;
+  VarDecl *ToConditionVariable = nullptr;
+  if (VarDecl *FromConditionVariable = S->getConditionVariable()) {
+    ToConditionVariable =
+      dyn_cast_or_null<VarDecl>(Importer.Import(FromConditionVariable));
+    if (!ToConditionVariable)
+      return nullptr;
+  }
+  Expr *ToInc = Importer.Import(S->getInc());
+  if (!ToInc && S->getInc())
+    return nullptr;
+  Stmt *ToBody = Importer.Import(S->getBody());
+  if (!ToBody && S->getBody())
+    return nullptr;
+  SourceLocation ToForLoc = Importer.Import(S->getForLoc());
+  SourceLocation ToLParenLoc = Importer.Import(S->getLParenLoc());
+  SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
+  return new (Importer.getToContext()) ForStmt(Importer.getToContext(),
+                                               ToInit, ToCondition,
+                                               ToConditionVariable,
+                                               ToInc, ToBody,
+                                               ToForLoc, ToLParenLoc,
+                                               ToRParenLoc);
+}
+
+Stmt *ASTNodeImporter::VisitGotoStmt(GotoStmt *S) {
+  LabelDecl *ToLabel = nullptr;
+  if (LabelDecl *FromLabel = S->getLabel()) {
+    ToLabel = dyn_cast_or_null<LabelDecl>(Importer.Import(FromLabel));
+    if (!ToLabel)
+      return nullptr;
+  }
+  SourceLocation ToGotoLoc = Importer.Import(S->getGotoLoc());
+  SourceLocation ToLabelLoc = Importer.Import(S->getLabelLoc());
+  return new (Importer.getToContext()) GotoStmt(ToLabel,
+                                                ToGotoLoc, ToLabelLoc);
+}
+
+Stmt *ASTNodeImporter::VisitIndirectGotoStmt(IndirectGotoStmt *S) {
+  SourceLocation ToGotoLoc = Importer.Import(S->getGotoLoc());
+  SourceLocation ToStarLoc = Importer.Import(S->getStarLoc());
+  Expr *ToTarget = Importer.Import(S->getTarget());
+  if (!ToTarget && S->getTarget())
+    return nullptr;
+  return new (Importer.getToContext()) IndirectGotoStmt(ToGotoLoc, ToStarLoc,
+                                                        ToTarget);
+}
+
+Stmt *ASTNodeImporter::VisitContinueStmt(ContinueStmt *S) {
+  SourceLocation ToContinueLoc = Importer.Import(S->getContinueLoc());
+  return new (Importer.getToContext()) ContinueStmt(ToContinueLoc);
+}
+
+Stmt *ASTNodeImporter::VisitBreakStmt(BreakStmt *S) {
+  SourceLocation ToBreakLoc = Importer.Import(S->getBreakLoc());
+  return new (Importer.getToContext()) BreakStmt(ToBreakLoc);
+}
+
+Stmt *ASTNodeImporter::VisitReturnStmt(ReturnStmt *S) {
+  SourceLocation ToRetLoc = Importer.Import(S->getReturnLoc());
+  Expr *ToRetExpr = Importer.Import(S->getRetValue());
+  if (!ToRetExpr && S->getRetValue())
+    return nullptr;
+  VarDecl *NRVOCandidate = const_cast<VarDecl*>(S->getNRVOCandidate());
+  VarDecl *ToNRVOCandidate = cast_or_null<VarDecl>(Importer.Import(NRVOCandidate));
+  if (!ToNRVOCandidate && NRVOCandidate)
+    return nullptr;
+  return new (Importer.getToContext()) ReturnStmt(ToRetLoc, ToRetExpr,
+                                                  ToNRVOCandidate);
+}
+
+Stmt *ASTNodeImporter::VisitCXXCatchStmt(CXXCatchStmt *S) {
+  SourceLocation ToCatchLoc = Importer.Import(S->getCatchLoc());
+  VarDecl *ToExceptionDecl = nullptr;
+  if (VarDecl *FromExceptionDecl = S->getExceptionDecl()) {
+    ToExceptionDecl =
+      dyn_cast_or_null<VarDecl>(Importer.Import(FromExceptionDecl));
+    if (!ToExceptionDecl)
+      return nullptr;
+  }
+  Stmt *ToHandlerBlock = Importer.Import(S->getHandlerBlock());
+  if (!ToHandlerBlock && S->getHandlerBlock())
+    return nullptr;
+  return new (Importer.getToContext()) CXXCatchStmt(ToCatchLoc,
+                                                    ToExceptionDecl,
+                                                    ToHandlerBlock);
+}
+
+Stmt *ASTNodeImporter::VisitCXXTryStmt(CXXTryStmt *S) {
+  SourceLocation ToTryLoc = Importer.Import(S->getTryLoc());
+  Stmt *ToTryBlock = Importer.Import(S->getTryBlock());
+  if (!ToTryBlock && S->getTryBlock())
+    return nullptr;
+  SmallVector<Stmt *, 1> ToHandlers(S->getNumHandlers());
+  for (unsigned HI = 0, HE = S->getNumHandlers(); HI != HE; ++HI) {
+    CXXCatchStmt *FromHandler = S->getHandler(HI);
+    if (Stmt *ToHandler = Importer.Import(FromHandler))
+      ToHandlers[HI] = ToHandler;
+    else
+      return nullptr;
+  }
+  return CXXTryStmt::Create(Importer.getToContext(), ToTryLoc, ToTryBlock,
+                            ToHandlers);
+}
+
+Stmt *ASTNodeImporter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
+  DeclStmt *ToRange =
+    dyn_cast_or_null<DeclStmt>(Importer.Import(S->getRangeStmt()));
+  if (!ToRange && S->getRangeStmt())
+    return nullptr;
+  DeclStmt *ToBeginEnd =
+    dyn_cast_or_null<DeclStmt>(Importer.Import(S->getBeginEndStmt()));
+  if (!ToBeginEnd && S->getBeginEndStmt())
+    return nullptr;
+  Expr *ToCond = Importer.Import(S->getCond());
+  if (!ToCond && S->getCond())
+    return nullptr;
+  Expr *ToInc = Importer.Import(S->getInc());
+  if (!ToInc && S->getInc())
+    return nullptr;
+  DeclStmt *ToLoopVar =
+    dyn_cast_or_null<DeclStmt>(Importer.Import(S->getLoopVarStmt()));
+  if (!ToLoopVar && S->getLoopVarStmt())
+    return nullptr;
+  Stmt *ToBody = Importer.Import(S->getBody());
+  if (!ToBody && S->getBody())
+    return nullptr;
+  SourceLocation ToForLoc = Importer.Import(S->getForLoc());
+  SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
+  SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
+  return new (Importer.getToContext()) CXXForRangeStmt(ToRange, ToBeginEnd,
+                                                       ToCond, ToInc,
+                                                       ToLoopVar, ToBody,
+                                                       ToForLoc, ToColonLoc,
+                                                       ToRParenLoc);
+}
+
+Stmt *ASTNodeImporter::VisitObjCForCollectionStmt(ObjCForCollectionStmt *S) {
+  Stmt *ToElem = Importer.Import(S->getElement());
+  if (!ToElem && S->getElement())
+    return nullptr;
+  Expr *ToCollect = Importer.Import(S->getCollection());
+  if (!ToCollect && S->getCollection())
+    return nullptr;
+  Stmt *ToBody = Importer.Import(S->getBody());
+  if (!ToBody && S->getBody())
+    return nullptr;
+  SourceLocation ToForLoc = Importer.Import(S->getForLoc());
+  SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
+  return new (Importer.getToContext()) ObjCForCollectionStmt(ToElem,
+                                                             ToCollect,
+                                                             ToBody, ToForLoc,
+                                                             ToRParenLoc);
+}
+
+Stmt *ASTNodeImporter::VisitObjCAtCatchStmt(ObjCAtCatchStmt *S) {
+  SourceLocation ToAtCatchLoc = Importer.Import(S->getAtCatchLoc());
+  SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
+  VarDecl *ToExceptionDecl = nullptr;
+  if (VarDecl *FromExceptionDecl = S->getCatchParamDecl()) {
+    ToExceptionDecl =
+      dyn_cast_or_null<VarDecl>(Importer.Import(FromExceptionDecl));
+    if (!ToExceptionDecl)
+      return nullptr;
+  }
+  Stmt *ToBody = Importer.Import(S->getCatchBody());
+  if (!ToBody && S->getCatchBody())
+    return nullptr;
+  return new (Importer.getToContext()) ObjCAtCatchStmt(ToAtCatchLoc,
+                                                       ToRParenLoc,
+                                                       ToExceptionDecl,
+                                                       ToBody);
+}
+
+Stmt *ASTNodeImporter::VisitObjCAtFinallyStmt(ObjCAtFinallyStmt *S) {
+  SourceLocation ToAtFinallyLoc = Importer.Import(S->getAtFinallyLoc());
+  Stmt *ToAtFinallyStmt = Importer.Import(S->getFinallyBody());
+  if (!ToAtFinallyStmt && S->getFinallyBody())
+    return nullptr;
+  return new (Importer.getToContext()) ObjCAtFinallyStmt(ToAtFinallyLoc,
+                                                         ToAtFinallyStmt);
+}
+
+Stmt *ASTNodeImporter::VisitObjCAtTryStmt(ObjCAtTryStmt *S) {
+  SourceLocation ToAtTryLoc = Importer.Import(S->getAtTryLoc());
+  Stmt *ToAtTryStmt = Importer.Import(S->getTryBody());
+  if (!ToAtTryStmt && S->getTryBody())
+    return nullptr;
+  SmallVector<Stmt *, 1> ToCatchStmts(S->getNumCatchStmts());
+  for (unsigned CI = 0, CE = S->getNumCatchStmts(); CI != CE; ++CI) {
+    ObjCAtCatchStmt *FromCatchStmt = S->getCatchStmt(CI);
+    if (Stmt *ToCatchStmt = Importer.Import(FromCatchStmt))
+      ToCatchStmts[CI] = ToCatchStmt;
+    else
+      return nullptr;
+  }
+  Stmt *ToAtFinallyStmt = Importer.Import(S->getFinallyStmt());
+  if (!ToAtFinallyStmt && S->getFinallyStmt())
+    return nullptr;
+  return ObjCAtTryStmt::Create(Importer.getToContext(),
+                               ToAtTryLoc, ToAtTryStmt,
+                               ToCatchStmts.begin(), ToCatchStmts.size(),
+                               ToAtFinallyStmt);
+}
+
+Stmt *ASTNodeImporter::VisitObjCAtSynchronizedStmt
+  (ObjCAtSynchronizedStmt *S) {
+  SourceLocation ToAtSynchronizedLoc =
+    Importer.Import(S->getAtSynchronizedLoc());
+  Expr *ToSynchExpr = Importer.Import(S->getSynchExpr());
+  if (!ToSynchExpr && S->getSynchExpr())
+    return nullptr;
+  Stmt *ToSynchBody = Importer.Import(S->getSynchBody());
+  if (!ToSynchBody && S->getSynchBody())
+    return nullptr;
+  return new (Importer.getToContext()) ObjCAtSynchronizedStmt(
+    ToAtSynchronizedLoc, ToSynchExpr, ToSynchBody);
+}
+
+Stmt *ASTNodeImporter::VisitObjCAtThrowStmt(ObjCAtThrowStmt *S) {
+  SourceLocation ToAtThrowLoc = Importer.Import(S->getThrowLoc());
+  Expr *ToThrow = Importer.Import(S->getThrowExpr());
+  if (!ToThrow && S->getThrowExpr())
+    return nullptr;
+  return new (Importer.getToContext()) ObjCAtThrowStmt(ToAtThrowLoc, ToThrow);
+}
+
+Stmt *ASTNodeImporter::VisitObjCAutoreleasePoolStmt
+  (ObjCAutoreleasePoolStmt *S) {
+  SourceLocation ToAtLoc = Importer.Import(S->getAtLoc());
+  Stmt *ToSubStmt = Importer.Import(S->getSubStmt());
+  if (!ToSubStmt && S->getSubStmt())
+    return nullptr;
+  return new (Importer.getToContext()) ObjCAutoreleasePoolStmt(ToAtLoc,
+                                                               ToSubStmt);
 }
 
 //----------------------------------------------------------------------------
@@ -4607,6 +5163,107 @@
                                 Importer.Import(E->getRParenLoc()));
 }
 
+Expr *ASTNodeImporter::VisitCXXConstructExpr(CXXConstructExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  CXXConstructorDecl *ToCCD =
+    dyn_cast<CXXConstructorDecl>(Importer.Import(E->getConstructor()));
+  if (!ToCCD && E->getConstructor())
+    return nullptr;
+
+  size_t NumArgs = E->getNumArgs();
+  SmallVector<Expr *, 1> ToArgs(NumArgs);
+  ASTImporter &_Importer = Importer;
+  std::transform(E->arg_begin(), E->arg_end(), ToArgs.begin(),
+    [&_Importer](Expr *AE) -> Expr * {
+      return _Importer.Import(AE);
+    });
+  for (Expr *ToA : ToArgs) {
+    if (!ToA)
+      return nullptr;
+  }
+
+  return CXXConstructExpr::Create(Importer.getToContext(), T,
+                                  Importer.Import(E->getLocation()),
+                                  ToCCD, E->isElidable(),
+                                  ToArgs, E->hadMultipleCandidates(),
+                                  E->isListInitialization(),
+                                  E->isStdInitListInitialization(),
+                                  E->requiresZeroInitialization(),
+                                  E->getConstructionKind(),
+                                  Importer.Import(E->getParenOrBraceRange()));
+}
+
+Expr *ASTNodeImporter::VisitMemberExpr(MemberExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *ToBase = Importer.Import(E->getBase());
+  if (!ToBase && E->getBase())
+    return nullptr;
+
+  ValueDecl *ToMember = dyn_cast<ValueDecl>(Importer.Import(E->getMemberDecl()));
+  if (!ToMember && E->getMemberDecl())
+    return nullptr;
+
+  DeclAccessPair ToFoundDecl = DeclAccessPair::make(
+    dyn_cast<NamedDecl>(Importer.Import(E->getFoundDecl().getDecl())),
+    E->getFoundDecl().getAccess());
+
+  DeclarationNameInfo ToMemberNameInfo(
+    Importer.Import(E->getMemberNameInfo().getName()),
+    Importer.Import(E->getMemberNameInfo().getLoc()));
+
+  if (E->hasExplicitTemplateArgs()) {
+    return nullptr; // FIXME: handle template arguments
+  }
+
+  return MemberExpr::Create(Importer.getToContext(), ToBase,
+                            E->isArrow(),
+                            Importer.Import(E->getOperatorLoc()),
+                            Importer.Import(E->getQualifierLoc()),
+                            Importer.Import(E->getTemplateKeywordLoc()),
+                            ToMember, ToFoundDecl, ToMemberNameInfo,
+                            nullptr, T, E->getValueKind(),
+                            E->getObjectKind());
+}
+
+Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {
+  QualType T = Importer.Import(E->getType());
+  if (T.isNull())
+    return nullptr;
+
+  Expr *ToCallee = Importer.Import(E->getCallee());
+  if (!ToCallee && E->getCallee())
+    return nullptr;
+
+  unsigned NumArgs = E->getNumArgs();
+
+  llvm::SmallVector<Expr *, 2> ToArgs(NumArgs);
+
+  for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai) {
+    Expr *FromArg = E->getArg(ai);
+    Expr *ToArg = Importer.Import(FromArg);
+    if (!ToArg)
+      return nullptr;
+    ToArgs[ai] = ToArg;
+  }
+
+  Expr **ToArgs_Copied = new (Importer.getToContext()) 
+    Expr*[NumArgs];
+
+  for (unsigned ai = 0, ae = NumArgs; ai != ae; ++ai)
+    ToArgs_Copied[ai] = ToArgs[ai];
+
+  return new (Importer.getToContext())
+    CallExpr(Importer.getToContext(), ToCallee, 
+             ArrayRef<Expr*>(ToArgs_Copied, NumArgs), T, E->getValueKind(),
+             Importer.Import(E->getRParenLoc()));
+}
+
 ASTImporter::ASTImporter(ASTContext &ToContext, FileManager &ToFileManager,
                          ASTContext &FromContext, FileManager &FromFileManager,
                          bool MinimalImport)
@@ -4658,6 +5315,17 @@
                         FromTSI->getTypeLoc().getLocStart());
 }
 
+Decl *ASTImporter::GetAlreadyImportedOrNull(Decl *FromD) {
+  llvm::DenseMap<Decl *, Decl *>::iterator Pos = ImportedDecls.find(FromD);
+  if (Pos != ImportedDecls.end()) {
+    Decl *ToD = Pos->second;
+    ASTNodeImporter(*this).ImportDefinitionIfNeeded(FromD, ToD);
+    return ToD;
+  } else {
+    return nullptr;
+  }
+}
+
 Decl *ASTImporter::Import(Decl *FromD) {
   if (!FromD)
     return nullptr;
@@ -4949,8 +5617,9 @@
   FileID ToFileID = Import(Decomposed.first);
   if (ToFileID.isInvalid())
     return SourceLocation();
-  return ToSM.getLocForStartOfFile(ToFileID)
-             .getLocWithOffset(Decomposed.second);
+  SourceLocation ret = ToSM.getLocForStartOfFile(ToFileID)
+                           .getLocWithOffset(Decomposed.second);
+  return ret;
 }
 
 SourceRange ASTImporter::Import(SourceRange FromRange) {
@@ -4974,7 +5643,7 @@
   // Map the FileID for to the "to" source manager.
   FileID ToID;
   const SrcMgr::ContentCache *Cache = FromSLoc.getFile().getContentCache();
-  if (Cache->OrigEntry) {
+  if (Cache->OrigEntry && Cache->OrigEntry->getDir()) {
     // FIXME: We probably want to use getVirtualFile(), so we don't hit the
     // disk again
     // FIXME: We definitely want to re-use the existing MemoryBuffer, rather
diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp
index 628c9b0..8eff4c4 100644
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -44,6 +44,12 @@
   return !getLexicalDeclContext()->Equals(getDeclContext());
 }
 
+TranslationUnitDecl::TranslationUnitDecl(ASTContext &ctx)
+    : Decl(TranslationUnit, nullptr, SourceLocation()),
+      DeclContext(TranslationUnit), Ctx(ctx), AnonymousNamespace(nullptr) {
+  Hidden = Ctx.getLangOpts().ModulesLocalVisibility;
+}
+
 //===----------------------------------------------------------------------===//
 // NamedDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -894,13 +900,13 @@
   if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
     // If the type of the function uses a type with unique-external
     // linkage, it's not legally usable from outside this translation unit.
-    // But only look at the type-as-written. If this function has an auto-deduced
-    // return type, we can't compute the linkage of that type because it could
-    // require looking at the linkage of this function, and we don't need this
-    // for correctness because the type is not part of the function's
-    // signature.
-    // FIXME: This is a hack. We should be able to solve this circularity and the
-    // one in getLVForNamespaceScopeDecl for Functions some other way.
+    // But only look at the type-as-written. If this function has an
+    // auto-deduced return type, we can't compute the linkage of that type
+    // because it could require looking at the linkage of this function, and we
+    // don't need this for correctness because the type is not part of the
+    // function's signature.
+    // FIXME: This is a hack. We should be able to solve this circularity and
+    // the one in getLVForNamespaceScopeDecl for Functions some other way.
     {
       QualType TypeAsWritten = MD->getType();
       if (TypeSourceInfo *TSI = MD->getTypeSourceInfo())
@@ -1769,6 +1775,8 @@
                 "VarDeclBitfields too large!");
   static_assert(sizeof(ParmVarDeclBitfields) <= sizeof(unsigned),
                 "ParmVarDeclBitfields too large!");
+  static_assert(sizeof(NonParmVarDeclBitfields) <= sizeof(unsigned),
+                "NonParmVarDeclBitfields too large!");
   AllBits = 0;
   VarDeclBits.SClass = SC;
   // Everything else is implicitly initialized to false.
@@ -1795,9 +1803,12 @@
 VarDecl::TLSKind VarDecl::getTLSKind() const {
   switch (VarDeclBits.TSCSpec) {
   case TSCS_unspecified:
-    if (hasAttr<ThreadAttr>())
-      return TLS_Static;
-    return TLS_None;
+    if (!hasAttr<ThreadAttr>())
+      return TLS_None;
+    return getASTContext().getLangOpts().isCompatibleWithMSVC(
+               LangOptions::MSVC2015)
+               ? TLS_Dynamic
+               : TLS_Static;
   case TSCS___thread: // Fall through.
   case TSCS__Thread_local:
       return TLS_Static;
@@ -1915,9 +1926,13 @@
   if (hasInit())
     return Definition;
 
-  if (hasAttr<AliasAttr>() || hasAttr<SelectAnyAttr>())
+  if (hasAttr<AliasAttr>())
     return Definition;
 
+  if (const auto *SAA = getAttr<SelectAnyAttr>())
+    if (!SAA->isInherited())
+      return Definition;
+
   // A variable template specialization (other than a static data member
   // template or an explicit specialization) is a declaration until we
   // instantiate its initializer.
@@ -2568,10 +2583,6 @@
     IsInline = true;
 }
 
-const FunctionDecl *FunctionDecl::getCanonicalDecl() const {
-  return getFirstDecl();
-}
-
 FunctionDecl *FunctionDecl::getCanonicalDecl() { return getFirstDecl(); }
 
 /// \brief Returns a value indicating whether this function
@@ -3931,10 +3942,17 @@
 
 void TypedefNameDecl::anchor() { }
 
-TagDecl *TypedefNameDecl::getAnonDeclWithTypedefName() const {
-  if (auto *TT = getTypeSourceInfo()->getType()->getAs<TagType>())
-    if (TT->getDecl()->getTypedefNameForAnonDecl() == this)
+TagDecl *TypedefNameDecl::getAnonDeclWithTypedefName(bool AnyRedecl) const {
+  if (auto *TT = getTypeSourceInfo()->getType()->getAs<TagType>()) {
+    auto *OwningTypedef = TT->getDecl()->getTypedefNameForAnonDecl();
+    auto *ThisTypedef = this;
+    if (AnyRedecl && OwningTypedef) {
+      OwningTypedef = OwningTypedef->getCanonicalDecl();
+      ThisTypedef = ThisTypedef->getCanonicalDecl();
+    }
+    if (OwningTypedef == ThisTypedef)
       return TT->getDecl();
+  }
 
   return nullptr;
 }
diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp
index 2f0fffea..70bd16f 100644
--- a/lib/AST/DeclBase.cpp
+++ b/lib/AST/DeclBase.cpp
@@ -66,6 +66,12 @@
 void *Decl::operator new(std::size_t Size, const ASTContext &Ctx,
                          DeclContext *Parent, std::size_t Extra) {
   assert(!Parent || &Parent->getParentASTContext() == &Ctx);
+  // With local visibility enabled, we track the owning module even for local
+  // declarations.
+  if (Ctx.getLangOpts().ModulesLocalVisibility) {
+    void *Buffer = ::operator new(sizeof(Module *) + Size + Extra, Ctx);
+    return new (Buffer) Module*(nullptr) + 1;
+  }
   return ::operator new(Size + Extra, Ctx);
 }
 
@@ -74,6 +80,10 @@
   return getASTContext().getExternalSource()->getModule(getOwningModuleID());
 }
 
+bool Decl::hasLocalOwningModuleStorage() const {
+  return getASTContext().getLangOpts().ModulesLocalVisibility;
+}
+
 const char *Decl::getDeclKindName() const {
   switch (DeclKind) {
   default: llvm_unreachable("Declaration not in DeclNodes.inc!");
diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp
index 8dc62dd..b00b8a0 100644
--- a/lib/AST/DeclCXX.cpp
+++ b/lib/AST/DeclCXX.cpp
@@ -1315,6 +1315,28 @@
   return Dtor;
 }
 
+bool CXXRecordDecl::isAnyDestructorNoReturn() const {
+  // Destructor is noreturn.
+  if (const CXXDestructorDecl *Destructor = getDestructor())
+    if (Destructor->isNoReturn())
+      return true;
+
+  // Check base classes destructor for noreturn.
+  for (const auto &Base : bases())
+    if (Base.getType()->getAsCXXRecordDecl()->isAnyDestructorNoReturn())
+      return true;
+
+  // Check fields for noreturn.
+  for (const auto *Field : fields())
+    if (const CXXRecordDecl *RD =
+            Field->getType()->getBaseElementTypeUnsafe()->getAsCXXRecordDecl())
+      if (RD->isAnyDestructorNoReturn())
+        return true;
+
+  // All destructors are not noreturn.
+  return false;
+}
+
 void CXXRecordDecl::completeDefinition() {
   completeDefinition(nullptr);
 }
diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp
index c0f3e17..d8cd40e 100644
--- a/lib/AST/DeclPrinter.cpp
+++ b/lib/AST/DeclPrinter.cpp
@@ -733,8 +733,10 @@
 void DeclPrinter::VisitStaticAssertDecl(StaticAssertDecl *D) {
   Out << "static_assert(";
   D->getAssertExpr()->printPretty(Out, nullptr, Policy, Indentation);
-  Out << ", ";
-  D->getMessage()->printPretty(Out, nullptr, Policy, Indentation);
+  if (StringLiteral *SL = D->getMessage()) {
+    Out << ", ";
+    SL->printPretty(Out, nullptr, Policy, Indentation);
+  }
   Out << ")";
 }
 
diff --git a/lib/AST/DeclTemplate.cpp b/lib/AST/DeclTemplate.cpp
index 6374a92..2544c85 100644
--- a/lib/AST/DeclTemplate.cpp
+++ b/lib/AST/DeclTemplate.cpp
@@ -124,6 +124,12 @@
   }
 }
 
+namespace clang {
+void *allocateDefaultArgStorageChain(const ASTContext &C) {
+  return new (C) char[sizeof(void*) * 2];
+}
+}
+
 //===----------------------------------------------------------------------===//
 // RedeclarableTemplateDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -504,14 +510,14 @@
 
 SourceLocation TemplateTypeParmDecl::getDefaultArgumentLoc() const {
   return hasDefaultArgument()
-    ? DefaultArgument->getTypeLoc().getBeginLoc()
-    : SourceLocation();
+             ? getDefaultArgumentInfo()->getTypeLoc().getBeginLoc()
+             : SourceLocation();
 }
 
 SourceRange TemplateTypeParmDecl::getSourceRange() const {
   if (hasDefaultArgument() && !defaultArgumentWasInherited())
     return SourceRange(getLocStart(),
-                       DefaultArgument->getTypeLoc().getEndLoc());
+                       getDefaultArgumentInfo()->getTypeLoc().getEndLoc());
   else
     return TypeDecl::getSourceRange();
 }
@@ -543,10 +549,8 @@
                                                  unsigned NumExpandedTypes,
                                                 TypeSourceInfo **ExpandedTInfos)
   : DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
-    TemplateParmPosition(D, P), DefaultArgumentAndInherited(nullptr, false),
-    ParameterPack(true), ExpandedParameterPack(true),
-    NumExpandedTypes(NumExpandedTypes)
-{
+    TemplateParmPosition(D, P), ParameterPack(true),
+    ExpandedParameterPack(true), NumExpandedTypes(NumExpandedTypes) {
   if (ExpandedTypes && ExpandedTInfos) {
     void **TypesAndInfos = reinterpret_cast<void **>(this + 1);
     for (unsigned I = 0; I != NumExpandedTypes; ++I) {
@@ -621,8 +625,7 @@
     IdentifierInfo *Id, TemplateParameterList *Params,
     unsigned NumExpansions, TemplateParameterList * const *Expansions)
   : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
-    TemplateParmPosition(D, P), DefaultArgument(),
-    DefaultArgumentWasInherited(false), ParameterPack(true),
+    TemplateParmPosition(D, P), ParameterPack(true),
     ExpandedParameterPack(true), NumExpandedParams(NumExpansions) {
   if (Expansions)
     std::memcpy(reinterpret_cast<void*>(this + 1), Expansions,
@@ -663,6 +666,14 @@
                                nullptr, NumExpansions, nullptr);
 }
 
+void TemplateTemplateParmDecl::setDefaultArgument(
+    const ASTContext &C, const TemplateArgumentLoc &DefArg) {
+  if (DefArg.getArgument().isNull())
+    DefaultArgument.set(nullptr);
+  else
+    DefaultArgument.set(new (C) TemplateArgumentLoc(DefArg));
+}
+
 //===----------------------------------------------------------------------===//
 // TemplateArgumentList Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 76a4da2..36f4139 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -1238,7 +1238,7 @@
   return FDecl->getBuiltinID();
 }
 
-bool CallExpr::isUnevaluatedBuiltinCall(ASTContext &Ctx) const {
+bool CallExpr::isUnevaluatedBuiltinCall(const ASTContext &Ctx) const {
   if (unsigned BI = getBuiltinCallee())
     return Ctx.BuiltinInfo.isUnevaluated(BI);
   return false;
@@ -2772,6 +2772,11 @@
     const Expr *Exp = cast<CompoundLiteralExpr>(this)->getInitializer();
     return Exp->isConstantInitializer(Ctx, false, Culprit);
   }
+  case DesignatedInitUpdateExprClass: {
+    const DesignatedInitUpdateExpr *DIUE = cast<DesignatedInitUpdateExpr>(this);
+    return DIUE->getBase()->isConstantInitializer(Ctx, false, Culprit) &&
+           DIUE->getUpdater()->isConstantInitializer(Ctx, false, Culprit);
+  }
   case InitListExprClass: {
     const InitListExpr *ILE = cast<InitListExpr>(this);
     if (ILE->getType()->isArrayType()) {
@@ -2818,6 +2823,7 @@
     break;
   }
   case ImplicitValueInitExprClass:
+  case NoInitExprClass:
     return true;
   case ParenExprClass:
     return cast<ParenExpr>(this)->getSubExpr()
@@ -2881,6 +2887,28 @@
   return false;
 }
 
+namespace {
+  /// \brief Look for any side effects within a Stmt.
+  class SideEffectFinder : public ConstEvaluatedExprVisitor<SideEffectFinder> {
+    typedef ConstEvaluatedExprVisitor<SideEffectFinder> Inherited;
+    const bool IncludePossibleEffects;
+    bool HasSideEffects;
+
+  public:
+    explicit SideEffectFinder(const ASTContext &Context, bool IncludePossible)
+      : Inherited(Context),
+        IncludePossibleEffects(IncludePossible), HasSideEffects(false) { }
+
+    bool hasSideEffects() const { return HasSideEffects; }
+
+    void VisitExpr(const Expr *E) {
+      if (!HasSideEffects &&
+          E->HasSideEffects(Context, IncludePossibleEffects))
+        HasSideEffects = true;
+    }
+  };
+}
+
 bool Expr::HasSideEffects(const ASTContext &Ctx,
                           bool IncludePossibleEffects) const {
   // In circumstances where we care about definite side effects instead of
@@ -2925,6 +2953,7 @@
   case UnaryExprOrTypeTraitExprClass:
   case AddrLabelExprClass:
   case GNUNullExprClass:
+  case NoInitExprClass:
   case CXXBoolLiteralExprClass:
   case CXXNullPtrLiteralExprClass:
   case CXXThisExprClass:
@@ -2967,7 +2996,6 @@
   case CompoundAssignOperatorClass:
   case VAArgExprClass:
   case AtomicExprClass:
-  case StmtExprClass:
   case CXXThrowExprClass:
   case CXXNewExprClass:
   case CXXDeleteExprClass:
@@ -2975,6 +3003,13 @@
     // These always have a side-effect.
     return true;
 
+  case StmtExprClass: {
+    // StmtExprs have a side-effect if any substatement does.
+    SideEffectFinder Finder(Ctx, IncludePossibleEffects);
+    Finder.Visit(cast<StmtExpr>(this)->getSubStmt());
+    return Finder.hasSideEffects();
+  }
+
   case ParenExprClass:
   case ArraySubscriptExprClass:
   case MemberExprClass:
@@ -2983,6 +3018,7 @@
   case CompoundLiteralExprClass:
   case ExtVectorElementExprClass:
   case DesignatedInitExprClass:
+  case DesignatedInitUpdateExprClass:
   case ParenListExprClass:
   case CXXPseudoDestructorExprClass:
   case CXXStdInitializerListExprClass:
@@ -3128,21 +3164,21 @@
 
 namespace {
   /// \brief Look for a call to a non-trivial function within an expression.
-  class NonTrivialCallFinder : public EvaluatedExprVisitor<NonTrivialCallFinder>
+  class NonTrivialCallFinder : public ConstEvaluatedExprVisitor<NonTrivialCallFinder>
   {
-    typedef EvaluatedExprVisitor<NonTrivialCallFinder> Inherited;
-    
+    typedef ConstEvaluatedExprVisitor<NonTrivialCallFinder> Inherited;
+
     bool NonTrivial;
     
   public:
-    explicit NonTrivialCallFinder(ASTContext &Context) 
+    explicit NonTrivialCallFinder(const ASTContext &Context)
       : Inherited(Context), NonTrivial(false) { }
     
     bool hasNonTrivialCall() const { return NonTrivial; }
-    
-    void VisitCallExpr(CallExpr *E) {
-      if (CXXMethodDecl *Method
-          = dyn_cast_or_null<CXXMethodDecl>(E->getCalleeDecl())) {
+
+    void VisitCallExpr(const CallExpr *E) {
+      if (const CXXMethodDecl *Method
+          = dyn_cast_or_null<const CXXMethodDecl>(E->getCalleeDecl())) {
         if (Method->isTrivial()) {
           // Recurse to children of the call.
           Inherited::VisitStmt(E);
@@ -3152,8 +3188,8 @@
       
       NonTrivial = true;
     }
-    
-    void VisitCXXConstructExpr(CXXConstructExpr *E) {
+
+    void VisitCXXConstructExpr(const CXXConstructExpr *E) {
       if (E->getConstructor()->isTrivial()) {
         // Recurse to children of the call.
         Inherited::VisitStmt(E);
@@ -3162,8 +3198,8 @@
       
       NonTrivial = true;
     }
-    
-    void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
+
+    void VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *E) {
       if (E->getTemporary()->getDestructor()->isTrivial()) {
         Inherited::VisitStmt(E);
         return;
@@ -3174,7 +3210,7 @@
   };
 }
 
-bool Expr::hasNonTrivialCall(ASTContext &Ctx) {
+bool Expr::hasNonTrivialCall(const ASTContext &Ctx) const {
   NonTrivialCallFinder Finder(Ctx);
   Finder.Visit(this);
   return Finder.hasNonTrivialCall();  
@@ -3989,6 +4025,25 @@
   NumDesignators = NumDesignators - 1 + NumNewDesignators;
 }
 
+DesignatedInitUpdateExpr::DesignatedInitUpdateExpr(const ASTContext &C,
+    SourceLocation lBraceLoc, Expr *baseExpr, SourceLocation rBraceLoc)
+  : Expr(DesignatedInitUpdateExprClass, baseExpr->getType(), VK_RValue,
+         OK_Ordinary, false, false, false, false) {
+  BaseAndUpdaterExprs[0] = baseExpr;
+
+  InitListExpr *ILE = new (C) InitListExpr(C, lBraceLoc, None, rBraceLoc);
+  ILE->setType(baseExpr->getType());
+  BaseAndUpdaterExprs[1] = ILE;
+}
+
+SourceLocation DesignatedInitUpdateExpr::getLocStart() const {
+  return getBase()->getLocStart();
+}
+
+SourceLocation DesignatedInitUpdateExpr::getLocEnd() const {
+  return getBase()->getLocEnd();
+}
+
 ParenListExpr::ParenListExpr(const ASTContext& C, SourceLocation lparenloc,
                              ArrayRef<Expr*> exprs,
                              SourceLocation rparenloc)
diff --git a/lib/AST/ExprCXX.cpp b/lib/AST/ExprCXX.cpp
index f23b3eb..d6f2ce6 100644
--- a/lib/AST/ExprCXX.cpp
+++ b/lib/AST/ExprCXX.cpp
@@ -1027,6 +1027,11 @@
   return new (Mem) LambdaExpr(EmptyShell(), NumCaptures, NumArrayIndexVars > 0);
 }
 
+bool LambdaExpr::isInitCapture(const LambdaCapture *C) const {
+  return (C->capturesVariable() && C->getCapturedVar()->isInitCapture() &&
+          (getCallOperator() == C->getCapturedVar()->getDeclContext()));
+}
+
 LambdaExpr::capture_iterator LambdaExpr::capture_begin() const {
   return getLambdaClass()->getLambdaData().Captures;
 }
diff --git a/lib/AST/ExprClassification.cpp b/lib/AST/ExprClassification.cpp
index 3073a53..9cc612e 100644
--- a/lib/AST/ExprClassification.cpp
+++ b/lib/AST/ExprClassification.cpp
@@ -183,6 +183,8 @@
   case Expr::ObjCIndirectCopyRestoreExprClass:
   case Expr::AtomicExprClass:
   case Expr::CXXFoldExprClass:
+  case Expr::NoInitExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
     return Cl::CL_PRValue;
 
     // Next come the complicated cases.
@@ -606,7 +608,7 @@
   if (CT.isConstQualified())
     return Cl::CM_ConstQualified;
   if (CT.getQualifiers().getAddressSpace() == LangAS::opencl_constant)
-    return Cl::CM_ConstQualified;
+    return Cl::CM_ConstAddrSpace;
 
   // Arrays are not modifiable, only their elements are.
   if (CT->isArrayType())
@@ -672,6 +674,7 @@
     llvm_unreachable("CM_LValueCast and CL_LValue don't match");
   case Cl::CM_NoSetterProperty: return MLV_NoSetterProperty;
   case Cl::CM_ConstQualified: return MLV_ConstQualified;
+  case Cl::CM_ConstAddrSpace: return MLV_ConstAddrSpace;
   case Cl::CM_ArrayType: return MLV_ArrayType;
   case Cl::CM_IncompleteType: return MLV_IncompleteType;
   }
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index 280ba57..8e472f1 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -3727,8 +3727,9 @@
   // Skip this for non-union classes with no fields; in that case, the defaulted
   // copy/move does not actually read the object.
   const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Callee);
-  if (MD && MD->isDefaulted() && MD->isTrivial() &&
-      (MD->getParent()->isUnion() || hasFields(MD->getParent()))) {
+  if (MD && MD->isDefaulted() &&
+      (MD->getParent()->isUnion() ||
+       (MD->isTrivial() && hasFields(MD->getParent())))) {
     assert(This &&
            (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator()));
     LValue RHS;
@@ -3792,11 +3793,9 @@
   // Skip this for empty non-union classes; we should not perform an
   // lvalue-to-rvalue conversion on them because their copy constructor does not
   // actually read them.
-  if (Definition->isDefaulted() &&
-      ((Definition->isCopyConstructor() && Definition->isTrivial()) ||
-       (Definition->isMoveConstructor() && Definition->isTrivial())) &&
+  if (Definition->isDefaulted() && Definition->isCopyOrMoveConstructor() &&
       (Definition->getParent()->isUnion() ||
-       hasFields(Definition->getParent()))) {
+       (Definition->isTrivial() && hasFields(Definition->getParent())))) {
     LValue RHS;
     RHS.setFrom(Info.Ctx, ArgValues[0]);
     return handleLValueToRValueConversion(Info, Args[0], Args[0]->getType(),
@@ -4277,6 +4276,9 @@
 
     BlockScopeRAII Scope(Info);
     const CompoundStmt *CS = E->getSubStmt();
+    if (CS->body_empty())
+      return true;
+
     for (CompoundStmt::const_body_iterator BI = CS->body_begin(),
                                            BE = CS->body_end();
          /**/; ++BI) {
@@ -4302,6 +4304,8 @@
         return false;
       }
     }
+
+    llvm_unreachable("Return from function from the loop above.");
   }
 
   /// Visit a value which is evaluated, but whose value is ignored.
@@ -8671,6 +8675,8 @@
   case Expr::CompoundLiteralExprClass:
   case Expr::ExtVectorElementExprClass:
   case Expr::DesignatedInitExprClass:
+  case Expr::NoInitExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
   case Expr::ImplicitValueInitExprClass:
   case Expr::ParenListExprClass:
   case Expr::VAArgExprClass:
diff --git a/lib/AST/ItaniumCXXABI.cpp b/lib/AST/ItaniumCXXABI.cpp
index 7420782..7503cbf 100644
--- a/lib/AST/ItaniumCXXABI.cpp
+++ b/lib/AST/ItaniumCXXABI.cpp
@@ -106,7 +106,7 @@
     TargetInfo::IntType PtrDiff = Target.getPtrDiffType(0);
     uint64_t Width = Target.getTypeWidth(PtrDiff);
     unsigned Align = Target.getTypeAlign(PtrDiff);
-    if (MPT->getPointeeType()->isFunctionType())
+    if (MPT->isMemberFunctionPointer())
       Width = 2 * Width;
     return std::make_pair(Width, Align);
   }
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp
index 6e55655..98e1006 100644
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -42,8 +42,8 @@
 
 namespace {
 
-/// \brief Retrieve the declaration context that should be used when mangling 
-/// the given declaration.
+/// Retrieve the declaration context that should be used when mangling the given
+/// declaration.
 static const DeclContext *getEffectiveDeclContext(const Decl *D) {
   // The ABI assumes that lambda closure types that occur within 
   // default arguments live in the context of the function. However, due to
@@ -210,7 +210,7 @@
   /// @}
 };
 
-/// CXXNameMangler - Manage the mangling of a single name.
+/// Manage the mangling of a single name.
 class CXXNameMangler {
   ItaniumMangleContextImpl &Context;
   raw_ostream &Out;
@@ -221,7 +221,7 @@
   const NamedDecl *Structor;
   unsigned StructorType;
 
-  /// SeqID - The next subsitution sequence number.
+  /// The next substitution sequence number.
   unsigned SeqID;
 
   class FunctionTypeDepthState {
@@ -536,7 +536,7 @@
   return DC;
 }
 
-/// isStd - Return whether a given namespace is the 'std' namespace.
+/// Return whether a given namespace is the 'std' namespace.
 static bool isStd(const NamespaceDecl *NS) {
   if (!IgnoreLinkageSpecDecls(getEffectiveParentContext(NS))
                                 ->isTranslationUnit())
@@ -2010,7 +2010,11 @@
   case BuiltinType::Half: Out << "Dh"; break;
   case BuiltinType::Float: Out << 'f'; break;
   case BuiltinType::Double: Out << 'd'; break;
-  case BuiltinType::LongDouble: Out << 'e'; break;
+  case BuiltinType::LongDouble:
+    Out << (getASTContext().getTargetInfo().useFloat128ManglingForLongDouble()
+                ? 'g'
+                : 'e');
+    break;
   case BuiltinType::NullPtr: Out << "Dn"; break;
 
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -2311,6 +2315,7 @@
       EltName = "Poly16";
       break;
     case BuiltinType::ULong:
+    case BuiltinType::ULongLong:
       EltName = "Poly64";
       break;
     default:
@@ -2675,7 +2680,9 @@
   // These all can only appear in local or variable-initialization
   // contexts and so should never appear in a mangling.
   case Expr::AddrLabelExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
   case Expr::ImplicitValueInitExprClass:
+  case Expr::NoInitExprClass:
   case Expr::ParenListExprClass:
   case Expr::LambdaExprClass:
   case Expr::MSPropertyRefExprClass:
@@ -2884,9 +2891,9 @@
 
   case Expr::UnresolvedMemberExprClass: {
     const UnresolvedMemberExpr *ME = cast<UnresolvedMemberExpr>(E);
-    mangleMemberExpr(ME->getBase(), ME->isArrow(),
-                     ME->getQualifier(), nullptr, ME->getMemberName(),
-                     Arity);
+    mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(),
+                     ME->isArrow(), ME->getQualifier(), nullptr,
+                     ME->getMemberName(), Arity);
     if (ME->hasExplicitTemplateArgs())
       mangleTemplateArgs(ME->getExplicitTemplateArgs());
     break;
@@ -2895,8 +2902,9 @@
   case Expr::CXXDependentScopeMemberExprClass: {
     const CXXDependentScopeMemberExpr *ME
       = cast<CXXDependentScopeMemberExpr>(E);
-    mangleMemberExpr(ME->getBase(), ME->isArrow(),
-                     ME->getQualifier(), ME->getFirstQualifierFoundInScope(),
+    mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(),
+                     ME->isArrow(), ME->getQualifier(),
+                     ME->getFirstQualifierFoundInScope(),
                      ME->getMember(), Arity);
     if (ME->hasExplicitTemplateArgs())
       mangleTemplateArgs(ME->getExplicitTemplateArgs());
@@ -3638,8 +3646,8 @@
   return mangleSubstitution(reinterpret_cast<uintptr_t>(ND));
 }
 
-/// \brief Determine whether the given type has any qualifiers that are
-/// relevant for substitutions.
+/// Determine whether the given type has any qualifiers that are relevant for
+/// substitutions.
 static bool hasMangledSubstitutionQualifiers(QualType T) {
   Qualifiers Qs = T.getQualifiers();
   return Qs.getCVRQualifiers() || Qs.hasAddressSpace();
@@ -3685,8 +3693,8 @@
     T->isSpecificBuiltinType(BuiltinType::Char_U);
 }
 
-/// isCharSpecialization - Returns whether a given type is a template
-/// specialization of a given name with a single argument of type char.
+/// Returns whether a given type is a template specialization of a given name
+/// with a single argument of type char.
 static bool isCharSpecialization(QualType T, const char *Name) {
   if (T.isNull())
     return false;
@@ -3836,8 +3844,8 @@
 
 //
 
-/// \brief Mangles the name of the declaration D and emits that name to the
-/// given output stream.
+/// Mangles the name of the declaration D and emits that name to the given
+/// output stream.
 ///
 /// If the declaration D requires a mangled name, this routine will emit that
 /// mangled name to \p os and return true. Otherwise, \p os will be unchanged
@@ -3929,8 +3937,7 @@
   Mangler.mangleFunctionEncoding(DD);
 }
 
-/// mangleGuardVariable - Returns the mangled name for a guard variable
-/// for the passed in VarDecl.
+/// Returns the mangled name for a guard variable for the passed in VarDecl.
 void ItaniumMangleContextImpl::mangleStaticGuardVariable(const VarDecl *D,
                                                          raw_ostream &Out) {
   //  <special-name> ::= GV <object name>       # Guard variable for one-time
diff --git a/lib/AST/MicrosoftCXXABI.cpp b/lib/AST/MicrosoftCXXABI.cpp
index fb3beff..aba6796 100644
--- a/lib/AST/MicrosoftCXXABI.cpp
+++ b/lib/AST/MicrosoftCXXABI.cpp
@@ -31,11 +31,12 @@
   llvm::DenseMap<const Type *, unsigned> ManglingNumbers;
   unsigned LambdaManglingNumber;
   unsigned StaticLocalNumber;
+  unsigned StaticThreadlocalNumber;
 
 public:
   MicrosoftNumberingContext()
       : MangleNumberingContext(), LambdaManglingNumber(0),
-        StaticLocalNumber(0) {}
+        StaticLocalNumber(0), StaticThreadlocalNumber(0) {}
 
   unsigned getManglingNumber(const CXXMethodDecl *CallOperator) override {
     return ++LambdaManglingNumber;
@@ -47,6 +48,8 @@
   }
 
   unsigned getStaticLocalNumber(const VarDecl *VD) override {
+    if (VD->getTLSKind())
+      return ++StaticThreadlocalNumber;
     return ++StaticLocalNumber;
   }
 
@@ -176,8 +179,9 @@
 //     // slot.
 //     void *FunctionPointerOrVirtualThunk;
 //
-//     // An offset to add to the address of the vbtable pointer after (possibly)
-//     // selecting the virtual base but before resolving and calling the function.
+//     // An offset to add to the address of the vbtable pointer after
+//     // (possibly) selecting the virtual base but before resolving and calling
+//     // the function.
 //     // Only needed if the class has any virtual bases or bases at a non-zero
 //     // offset.
 //     int NonVirtualBaseAdjustment;
@@ -213,29 +217,28 @@
 
 std::pair<uint64_t, unsigned> MicrosoftCXXABI::getMemberPointerWidthAndAlign(
     const MemberPointerType *MPT) const {
-  const TargetInfo &Target = Context.getTargetInfo();
-  assert(Target.getTriple().getArch() == llvm::Triple::x86 ||
-         Target.getTriple().getArch() == llvm::Triple::x86_64);
-  unsigned Ptrs, Ints;
-  std::tie(Ptrs, Ints) = getMSMemberPointerSlots(MPT);
   // The nominal struct is laid out with pointers followed by ints and aligned
   // to a pointer width if any are present and an int width otherwise.
+  const TargetInfo &Target = Context.getTargetInfo();
   unsigned PtrSize = Target.getPointerWidth(0);
   unsigned IntSize = Target.getIntWidth();
+
+  unsigned Ptrs, Ints;
+  std::tie(Ptrs, Ints) = getMSMemberPointerSlots(MPT);
   uint64_t Width = Ptrs * PtrSize + Ints * IntSize;
   unsigned Align;
 
   // When MSVC does x86_32 record layout, it aligns aggregate member pointers to
   // 8 bytes.  However, __alignof usually returns 4 for data memptrs and 8 for
   // function memptrs.
-  if (Ptrs + Ints > 1 && Target.getTriple().getArch() == llvm::Triple::x86)
-    Align = 8 * 8;
+  if (Ptrs + Ints > 1 && Target.getTriple().isArch32Bit())
+    Align = 64;
   else if (Ptrs)
     Align = Target.getPointerAlign(0);
   else
     Align = Target.getIntAlign();
 
-  if (Target.getTriple().getArch() == llvm::Triple::x86_64)
+  if (Target.getTriple().isArch64Bit())
     Width = llvm::RoundUpToAlignment(Width, Align);
   return std::make_pair(Width, Align);
 }
diff --git a/lib/AST/MicrosoftMangle.cpp b/lib/AST/MicrosoftMangle.cpp
index 3689102..db5b48e 100644
--- a/lib/AST/MicrosoftMangle.cpp
+++ b/lib/AST/MicrosoftMangle.cpp
@@ -147,6 +147,8 @@
   void mangleReferenceTemporary(const VarDecl *, unsigned ManglingNumber,
                                 raw_ostream &) override;
   void mangleStaticGuardVariable(const VarDecl *D, raw_ostream &Out) override;
+  void mangleThreadSafeStaticGuardVariable(const VarDecl *D, unsigned GuardNum,
+                                           raw_ostream &Out) override;
   void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) override;
   void mangleDynamicAtExitDestructor(const VarDecl *D,
                                      raw_ostream &Out) override;
@@ -248,7 +250,7 @@
 
   void mangle(const NamedDecl *D, StringRef Prefix = "\01?");
   void mangleName(const NamedDecl *ND);
-  void mangleFunctionEncoding(const FunctionDecl *FD);
+  void mangleFunctionEncoding(const FunctionDecl *FD, bool ShouldMangle);
   void mangleVariableEncoding(const VarDecl *VD);
   void mangleMemberDataPointer(const CXXRecordDecl *RD, const ValueDecl *VD);
   void mangleMemberFunctionPointer(const CXXRecordDecl *RD,
@@ -275,7 +277,7 @@
   void mangleQualifiers(Qualifiers Quals, bool IsMember);
   void mangleRefQualifier(RefQualifierKind RefQualifier);
   void manglePointerCVQualifiers(Qualifiers Quals);
-  void manglePointerExtQualifiers(Qualifiers Quals, const Type *PointeeType);
+  void manglePointerExtQualifiers(Qualifiers Quals, QualType PointeeType);
 
   void mangleUnscopedTemplateName(const TemplateDecl *ND);
   void
@@ -289,6 +291,7 @@
 #define ABSTRACT_TYPE(CLASS, PARENT)
 #define NON_CANONICAL_TYPE(CLASS, PARENT)
 #define TYPE(CLASS, PARENT) void mangleType(const CLASS##Type *T, \
+                                            Qualifiers Quals, \
                                             SourceRange Range);
 #include "clang/AST/TypeNodes.def"
 #undef ABSTRACT_TYPE
@@ -381,7 +384,7 @@
   Out << Prefix;
   mangleName(D);
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
-    mangleFunctionEncoding(FD);
+    mangleFunctionEncoding(FD, Context.shouldMangleDeclName(FD));
   else if (const VarDecl *VD = dyn_cast<VarDecl>(D))
     mangleVariableEncoding(VD);
   else {
@@ -394,7 +397,8 @@
   }
 }
 
-void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {
+void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD,
+                                                     bool ShouldMangle) {
   // <type-encoding> ::= <function-class> <function-type>
 
   // Since MSVC operates on the type as written and not the canonical type, it
@@ -409,13 +413,20 @@
   // extern "C" functions can hold entities that must be mangled.
   // As it stands, these functions still need to get expressed in the full
   // external name.  They have their class and type omitted, replaced with '9'.
-  if (Context.shouldMangleDeclName(FD)) {
-    // First, the function class.
+  if (ShouldMangle) {
+    // We would like to mangle all extern "C" functions using this additional
+    // component but this would break compatibility with MSVC's behavior.
+    // Instead, do this when we know that compatibility isn't important (in
+    // other words, when it is an overloaded extern "C" funciton).
+    if (FD->isExternC() && FD->hasAttr<OverloadableAttr>())
+      Out << "$$J0";
+
     mangleFunctionClass(FD);
 
     mangleFunctionType(FT, FD);
-  } else
+  } else {
     Out << '9';
+  }
 }
 
 void MicrosoftCXXNameMangler::mangleVariableEncoding(const VarDecl *VD) {
@@ -451,7 +462,7 @@
       Ty->isMemberPointerType()) {
     mangleType(Ty, SR, QMM_Drop);
     manglePointerExtQualifiers(
-        Ty.getDesugaredType(getASTContext()).getLocalQualifiers(), nullptr);
+        Ty.getDesugaredType(getASTContext()).getLocalQualifiers(), QualType());
     if (const MemberPointerType *MPT = Ty->getAs<MemberPointerType>()) {
       mangleQualifiers(MPT->getPointeeType().getQualifiers(), true);
       // Member pointers are suffixed with a back reference to the member
@@ -554,7 +565,7 @@
       }
     } else {
       mangleName(MD);
-      mangleFunctionEncoding(MD);
+      mangleFunctionEncoding(MD, /*ShouldMangle=*/true);
     }
   } else {
     // Null single inheritance member functions are encoded as a simple nullptr.
@@ -1169,10 +1180,13 @@
           cast<ValueDecl>(ND));
     } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
       const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
-      if (MD && MD->isInstance())
+      if (MD && MD->isInstance()) {
         mangleMemberFunctionPointer(MD->getParent()->getMostRecentDecl(), MD);
-      else
-        mangle(FD, "$1?");
+      } else {
+        Out << "$1?";
+        mangleName(FD);
+        mangleFunctionEncoding(FD, /*ShouldMangle=*/true);
+      }
     } else {
       mangle(ND, TA.getParamTypeForDecl()->isReferenceType() ? "$E?" : "$1?");
     }
@@ -1208,7 +1222,8 @@
           isa<TemplateTemplateParmDecl>(Parm))
         // MSVC 2015 changed the mangling for empty expanded template packs,
         // use the old mangling for link compatibility for old versions.
-        Out << (Context.getASTContext().getLangOpts().isCompatibleWithMSVC(19)
+        Out << (Context.getASTContext().getLangOpts().isCompatibleWithMSVC(
+                    LangOptions::MSVC2015)
                     ? "$$V"
                     : "$$$V");
       else if (isa<NonTypeTemplateParmDecl>(Parm))
@@ -1337,11 +1352,11 @@
   }
 }
 
-void
-MicrosoftCXXNameMangler::manglePointerExtQualifiers(Qualifiers Quals,
-                                                    const Type *PointeeType) {
+void MicrosoftCXXNameMangler::manglePointerExtQualifiers(Qualifiers Quals,
+                                                         QualType PointeeType) {
   bool HasRestrict = Quals.hasRestrict();
-  if (PointersAre64Bit && (!PointeeType || !PointeeType->isFunctionType()))
+  if (PointersAre64Bit &&
+      (PointeeType.isNull() || !PointeeType->isFunctionType()))
     Out << 'E';
 
   if (HasRestrict)
@@ -1377,29 +1392,38 @@
   // e.g.
   // void (*x)(void) will not form a backreference with void x(void)
   void *TypePtr;
-  if (const DecayedType *DT = T->getAs<DecayedType>()) {
-    TypePtr = DT->getOriginalType().getCanonicalType().getAsOpaquePtr();
+  if (const auto *DT = T->getAs<DecayedType>()) {
+    QualType OriginalType = DT->getOriginalType();
+    // All decayed ArrayTypes should be treated identically; as-if they were
+    // a decayed IncompleteArrayType.
+    if (const auto *AT = getASTContext().getAsArrayType(OriginalType))
+      OriginalType = getASTContext().getIncompleteArrayType(
+          AT->getElementType(), AT->getSizeModifier(),
+          AT->getIndexTypeCVRQualifiers());
+
+    TypePtr = OriginalType.getCanonicalType().getAsOpaquePtr();
     // If the original parameter was textually written as an array,
     // instead treat the decayed parameter like it's const.
     //
     // e.g.
     // int [] -> int * const
-    if (DT->getOriginalType()->isArrayType())
+    if (OriginalType->isArrayType())
       T = T.withConst();
-  } else
+  } else {
     TypePtr = T.getCanonicalType().getAsOpaquePtr();
+  }
 
   ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr);
 
   if (Found == TypeBackReferences.end()) {
-    size_t OutSizeBefore = Out.GetNumBytesInBuffer();
+    size_t OutSizeBefore = Out.tell();
 
     mangleType(T, Range, QMM_Drop);
 
     // See if it's worth creating a back reference.
     // Only types longer than 1 character are considered
     // and only 10 back references slots are available:
-    bool LongerThanOneChar = (Out.GetNumBytesInBuffer() - OutSizeBefore > 1);
+    bool LongerThanOneChar = (Out.tell() - OutSizeBefore > 1);
     if (LongerThanOneChar && TypeBackReferences.size() < 10) {
       size_t Size = TypeBackReferences.size();
       TypeBackReferences[TypePtr] = Size;
@@ -1427,7 +1451,7 @@
   }
 
   bool IsPointer = T->isAnyPointerType() || T->isMemberPointerType() ||
-                   T->isBlockPointerType();
+                   T->isReferenceType() || T->isBlockPointerType();
 
   switch (QMM) {
   case QMM_Drop:
@@ -1454,11 +1478,6 @@
     break;
   }
 
-  // We have to mangle these now, while we still have enough information.
-  if (IsPointer) {
-    manglePointerCVQualifiers(Quals);
-    manglePointerExtQualifiers(Quals, T->getPointeeType().getTypePtr());
-  }
   const Type *ty = T.getTypePtr();
 
   switch (ty->getTypeClass()) {
@@ -1469,7 +1488,7 @@
     return;
 #define TYPE(CLASS, PARENT) \
   case Type::CLASS: \
-    mangleType(cast<CLASS##Type>(ty), Range); \
+    mangleType(cast<CLASS##Type>(ty), Quals, Range); \
     break;
 #include "clang/AST/TypeNodes.def"
 #undef ABSTRACT_TYPE
@@ -1478,7 +1497,7 @@
   }
 }
 
-void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T,
+void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
                                          SourceRange Range) {
   //  <type>         ::= <builtin-type>
   //  <builtin-type> ::= X  # void
@@ -1564,7 +1583,7 @@
 }
 
 // <type>          ::= <function-type>
-void MicrosoftCXXNameMangler::mangleType(const FunctionProtoType *T,
+void MicrosoftCXXNameMangler::mangleType(const FunctionProtoType *T, Qualifiers,
                                          SourceRange) {
   // Structors only appear in decls, so at this point we know it's not a
   // structor type.
@@ -1578,7 +1597,7 @@
   }
 }
 void MicrosoftCXXNameMangler::mangleType(const FunctionNoProtoType *T,
-                                         SourceRange) {
+                                         Qualifiers, SourceRange) {
   llvm_unreachable("Can't mangle K&R function prototypes");
 }
 
@@ -1614,7 +1633,7 @@
   // this pointer.
   if (HasThisQuals) {
     Qualifiers Quals = Qualifiers::fromCVRMask(Proto->getTypeQuals());
-    manglePointerExtQualifiers(Quals, /*PointeeType=*/nullptr);
+    manglePointerExtQualifiers(Quals, /*PointeeType=*/QualType());
     mangleRefQualifier(Proto->getRefQualifier());
     mangleQualifiers(Quals, /*IsMember=*/false);
   }
@@ -1745,8 +1764,9 @@
         else
           Out << 'Q';
     }
-  } else
+  } else {
     Out << 'Y';
+  }
 }
 void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
   // <calling-convention> ::= A # __cdecl
@@ -1794,7 +1814,7 @@
 }
 
 void MicrosoftCXXNameMangler::mangleType(const UnresolvedUsingType *T,
-                                         SourceRange Range) {
+                                         Qualifiers, SourceRange Range) {
   // Probably should be mangled as a template instantiation; need to see what
   // VC does first.
   DiagnosticsEngine &Diags = Context.getDiags();
@@ -1809,10 +1829,12 @@
 // <struct-type> ::= U <name>
 // <class-type>  ::= V <name>
 // <enum-type>   ::= W4 <name>
-void MicrosoftCXXNameMangler::mangleType(const EnumType *T, SourceRange) {
+void MicrosoftCXXNameMangler::mangleType(const EnumType *T, Qualifiers,
+                                         SourceRange) {
   mangleType(cast<TagType>(T)->getDecl());
 }
-void MicrosoftCXXNameMangler::mangleType(const RecordType *T, SourceRange) {
+void MicrosoftCXXNameMangler::mangleType(const RecordType *T, Qualifiers,
+                                         SourceRange) {
   mangleType(cast<TagType>(T)->getDecl());
 }
 void MicrosoftCXXNameMangler::mangleType(const TagDecl *TD) {
@@ -1847,39 +1869,41 @@
   manglePointerCVQualifiers(T->getElementType().getQualifiers());
   mangleType(T->getElementType(), SourceRange());
 }
-void MicrosoftCXXNameMangler::mangleType(const ConstantArrayType *T,
+void MicrosoftCXXNameMangler::mangleType(const ConstantArrayType *T, Qualifiers,
                                          SourceRange) {
   llvm_unreachable("Should have been special cased");
 }
-void MicrosoftCXXNameMangler::mangleType(const VariableArrayType *T,
+void MicrosoftCXXNameMangler::mangleType(const VariableArrayType *T, Qualifiers,
                                          SourceRange) {
   llvm_unreachable("Should have been special cased");
 }
 void MicrosoftCXXNameMangler::mangleType(const DependentSizedArrayType *T,
-                                         SourceRange) {
+                                         Qualifiers, SourceRange) {
   llvm_unreachable("Should have been special cased");
 }
 void MicrosoftCXXNameMangler::mangleType(const IncompleteArrayType *T,
-                                         SourceRange) {
+                                         Qualifiers, SourceRange) {
   llvm_unreachable("Should have been special cased");
 }
 void MicrosoftCXXNameMangler::mangleArrayType(const ArrayType *T) {
   QualType ElementTy(T, 0);
   SmallVector<llvm::APInt, 3> Dimensions;
   for (;;) {
-    if (const ConstantArrayType *CAT =
-            getASTContext().getAsConstantArrayType(ElementTy)) {
+    if (ElementTy->isConstantArrayType()) {
+      const ConstantArrayType *CAT =
+          getASTContext().getAsConstantArrayType(ElementTy);
       Dimensions.push_back(CAT->getSize());
       ElementTy = CAT->getElementType();
+    } else if (ElementTy->isIncompleteArrayType()) {
+      const IncompleteArrayType *IAT =
+          getASTContext().getAsIncompleteArrayType(ElementTy);
+      Dimensions.push_back(llvm::APInt(32, 0));
+      ElementTy = IAT->getElementType();
     } else if (ElementTy->isVariableArrayType()) {
       const VariableArrayType *VAT =
         getASTContext().getAsVariableArrayType(ElementTy);
-      DiagnosticsEngine &Diags = Context.getDiags();
-      unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-        "cannot mangle this variable-length array yet");
-      Diags.Report(VAT->getSizeExpr()->getExprLoc(), DiagID)
-        << VAT->getBracketsRange();
-      return;
+      Dimensions.push_back(llvm::APInt(32, 0));
+      ElementTy = VAT->getElementType();
     } else if (ElementTy->isDependentSizedArrayType()) {
       // The dependent expression has to be folded into a constant (TODO).
       const DependentSizedArrayType *DSAT =
@@ -1890,12 +1914,9 @@
       Diags.Report(DSAT->getSizeExpr()->getExprLoc(), DiagID)
         << DSAT->getBracketsRange();
       return;
-    } else if (const IncompleteArrayType *IAT =
-                   getASTContext().getAsIncompleteArrayType(ElementTy)) {
-      Dimensions.push_back(llvm::APInt(32, 0));
-      ElementTy = IAT->getElementType();
+    } else {
+      break;
     }
-    else break;
   }
   Out << 'Y';
   // <dimension-count> ::= <number> # number of extra dimensions
@@ -1908,9 +1929,11 @@
 // <type>                   ::= <pointer-to-member-type>
 // <pointer-to-member-type> ::= <pointer-cvr-qualifiers> <cvr-qualifiers>
 //                                                          <class name> <type>
-void MicrosoftCXXNameMangler::mangleType(const MemberPointerType *T,
+void MicrosoftCXXNameMangler::mangleType(const MemberPointerType *T, Qualifiers Quals,
                                          SourceRange Range) {
   QualType PointeeType = T->getPointeeType();
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
   if (const FunctionProtoType *FPT = PointeeType->getAs<FunctionProtoType>()) {
     Out << '8';
     mangleName(T->getClass()->castAs<RecordType>()->getDecl());
@@ -1923,7 +1946,7 @@
 }
 
 void MicrosoftCXXNameMangler::mangleType(const TemplateTypeParmType *T,
-                                         SourceRange Range) {
+                                         Qualifiers, SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
     "cannot mangle this template type parameter type yet");
@@ -1931,9 +1954,8 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(
-                                       const SubstTemplateTypeParmPackType *T,
-                                       SourceRange Range) {
+void MicrosoftCXXNameMangler::mangleType(const SubstTemplateTypeParmPackType *T,
+                                         Qualifiers, SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
     "cannot mangle this substituted parameter pack yet");
@@ -1944,40 +1966,46 @@
 // <type> ::= <pointer-type>
 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <cvr-qualifiers> <type>
 //                       # the E is required for 64-bit non-static pointers
-void MicrosoftCXXNameMangler::mangleType(const PointerType *T,
+void MicrosoftCXXNameMangler::mangleType(const PointerType *T, Qualifiers Quals,
                                          SourceRange Range) {
-  QualType PointeeTy = T->getPointeeType();
-  mangleType(PointeeTy, Range);
+  QualType PointeeType = T->getPointeeType();
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleType(PointeeType, Range);
 }
 void MicrosoftCXXNameMangler::mangleType(const ObjCObjectPointerType *T,
-                                         SourceRange Range) {
+                                         Qualifiers Quals, SourceRange Range) {
+  QualType PointeeType = T->getPointeeType();
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
   // Object pointers never have qualifiers.
   Out << 'A';
-  manglePointerExtQualifiers(Qualifiers(), T->getPointeeType().getTypePtr());
-  mangleType(T->getPointeeType(), Range);
+  mangleType(PointeeType, Range);
 }
 
 // <type> ::= <reference-type>
 // <reference-type> ::= A E? <cvr-qualifiers> <type>
 //                 # the E is required for 64-bit non-static lvalue references
 void MicrosoftCXXNameMangler::mangleType(const LValueReferenceType *T,
-                                         SourceRange Range) {
-  Out << 'A';
-  manglePointerExtQualifiers(Qualifiers(), T->getPointeeType().getTypePtr());
-  mangleType(T->getPointeeType(), Range);
+                                         Qualifiers Quals, SourceRange Range) {
+  QualType PointeeType = T->getPointeeType();
+  Out << (Quals.hasVolatile() ? 'B' : 'A');
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleType(PointeeType, Range);
 }
 
 // <type> ::= <r-value-reference-type>
 // <r-value-reference-type> ::= $$Q E? <cvr-qualifiers> <type>
 //                 # the E is required for 64-bit non-static rvalue references
 void MicrosoftCXXNameMangler::mangleType(const RValueReferenceType *T,
-                                         SourceRange Range) {
-  Out << "$$Q";
-  manglePointerExtQualifiers(Qualifiers(), T->getPointeeType().getTypePtr());
-  mangleType(T->getPointeeType(), Range);
+                                         Qualifiers Quals, SourceRange Range) {
+  QualType PointeeType = T->getPointeeType();
+  Out << (Quals.hasVolatile() ? "$$R" : "$$Q");
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleType(PointeeType, Range);
 }
 
-void MicrosoftCXXNameMangler::mangleType(const ComplexType *T,
+void MicrosoftCXXNameMangler::mangleType(const ComplexType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -1986,41 +2014,47 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const VectorType *T,
+void MicrosoftCXXNameMangler::mangleType(const VectorType *T, Qualifiers Quals,
                                          SourceRange Range) {
   const BuiltinType *ET = T->getElementType()->getAs<BuiltinType>();
   assert(ET && "vectors with non-builtin elements are unsupported");
   uint64_t Width = getASTContext().getTypeSize(T);
   // Pattern match exactly the typedefs in our intrinsic headers.  Anything that
   // doesn't match the Intel types uses a custom mangling below.
-  bool IntelVector = true;
-  if (Width == 64 && ET->getKind() == BuiltinType::LongLong) {
-    Out << "T__m64";
-  } else if (Width == 128 || Width == 256) {
-    if (ET->getKind() == BuiltinType::Float)
-      Out << "T__m" << Width;
-    else if (ET->getKind() == BuiltinType::LongLong)
-      Out << "T__m" << Width << 'i';
-    else if (ET->getKind() == BuiltinType::Double)
-      Out << "U__m" << Width << 'd';
-    else
-      IntelVector = false;
+  bool IsBuiltin = true;
+  llvm::Triple::ArchType AT =
+      getASTContext().getTargetInfo().getTriple().getArch();
+  if (AT == llvm::Triple::x86 || AT == llvm::Triple::x86_64) {
+    if (Width == 64 && ET->getKind() == BuiltinType::LongLong) {
+      Out << "T__m64";
+    } else if (Width >= 128) {
+      if (ET->getKind() == BuiltinType::Float)
+        Out << "T__m" << Width;
+      else if (ET->getKind() == BuiltinType::LongLong)
+        Out << "T__m" << Width << 'i';
+      else if (ET->getKind() == BuiltinType::Double)
+        Out << "U__m" << Width << 'd';
+      else
+        IsBuiltin = false;
+    } else {
+      IsBuiltin = false;
+    }
   } else {
-    IntelVector = false;
+    IsBuiltin = false;
   }
 
-  if (!IntelVector) {
+  if (!IsBuiltin) {
     // The MS ABI doesn't have a special mangling for vector types, so we define
     // our own mangling to handle uses of __vector_size__ on user-specified
     // types, and for extensions like __v4sf.
     Out << "T__clang_vec" << T->getNumElements() << '_';
-    mangleType(ET, Range);
+    mangleType(ET, Quals, Range);
   }
 
   Out << "@@";
 }
 
-void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T,
+void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2029,7 +2063,7 @@
     << Range;
 }
 void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T,
-                                         SourceRange Range) {
+                                         Qualifiers, SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
     "cannot mangle this dependent-sized extended vector type yet");
@@ -2037,14 +2071,14 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const ObjCInterfaceType *T,
+void MicrosoftCXXNameMangler::mangleType(const ObjCInterfaceType *T, Qualifiers,
                                          SourceRange) {
   // ObjC interfaces have structs underlying them.
   Out << 'U';
   mangleName(T->getDecl());
 }
 
-void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T,
+void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T, Qualifiers,
                                          SourceRange Range) {
   // We don't allow overloading by different protocol qualification,
   // so mangling them isn't necessary.
@@ -2052,20 +2086,23 @@
 }
 
 void MicrosoftCXXNameMangler::mangleType(const BlockPointerType *T,
-                                         SourceRange Range) {
+                                         Qualifiers Quals, SourceRange Range) {
+  QualType PointeeType = T->getPointeeType();
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
+
   Out << "_E";
 
-  QualType pointee = T->getPointeeType();
-  mangleFunctionType(pointee->castAs<FunctionProtoType>());
+  mangleFunctionType(PointeeType->castAs<FunctionProtoType>());
 }
 
 void MicrosoftCXXNameMangler::mangleType(const InjectedClassNameType *,
-                                         SourceRange) {
+                                         Qualifiers, SourceRange) {
   llvm_unreachable("Cannot mangle injected class name type.");
 }
 
 void MicrosoftCXXNameMangler::mangleType(const TemplateSpecializationType *T,
-                                         SourceRange Range) {
+                                         Qualifiers, SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
     "cannot mangle this template specialization type yet");
@@ -2073,7 +2110,7 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T,
+void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2083,8 +2120,8 @@
 }
 
 void MicrosoftCXXNameMangler::mangleType(
-                                 const DependentTemplateSpecializationType *T,
-                                 SourceRange Range) {
+    const DependentTemplateSpecializationType *T, Qualifiers,
+    SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
     "cannot mangle this dependent template specialization type yet");
@@ -2092,7 +2129,7 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T,
+void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2101,7 +2138,7 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const TypeOfType *T,
+void MicrosoftCXXNameMangler::mangleType(const TypeOfType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2110,7 +2147,7 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const TypeOfExprType *T,
+void MicrosoftCXXNameMangler::mangleType(const TypeOfExprType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2119,7 +2156,7 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const DecltypeType *T,
+void MicrosoftCXXNameMangler::mangleType(const DecltypeType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2129,7 +2166,7 @@
 }
 
 void MicrosoftCXXNameMangler::mangleType(const UnaryTransformType *T,
-                                         SourceRange Range) {
+                                         Qualifiers, SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
     "cannot mangle this unary transform type yet");
@@ -2137,7 +2174,8 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const AutoType *T, SourceRange Range) {
+void MicrosoftCXXNameMangler::mangleType(const AutoType *T, Qualifiers,
+                                         SourceRange Range) {
   assert(T->getDeducedType().isNull() && "expecting a dependent type!");
 
   DiagnosticsEngine &Diags = Context.getDiags();
@@ -2147,7 +2185,7 @@
     << Range;
 }
 
-void MicrosoftCXXNameMangler::mangleType(const AtomicType *T,
+void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, Qualifiers,
                                          SourceRange Range) {
   DiagnosticsEngine &Diags = Context.getDiags();
   unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
@@ -2513,18 +2551,18 @@
   getDiags().Report(VD->getLocation(), DiagID);
 }
 
+void MicrosoftMangleContextImpl::mangleThreadSafeStaticGuardVariable(
+    const VarDecl *VD, unsigned GuardNum, raw_ostream &Out) {
+  MicrosoftCXXNameMangler Mangler(*this, Out);
+
+  Mangler.getStream() << "\01?$TSS" << GuardNum << '@';
+  Mangler.mangleNestedName(VD);
+}
+
 void MicrosoftMangleContextImpl::mangleStaticGuardVariable(const VarDecl *VD,
                                                            raw_ostream &Out) {
-  // TODO: This is not correct, especially with respect to VS "14".  VS "14"
-  // utilizes thread local variables to implement thread safe, re-entrant
-  // initialization for statics.  They no longer differentiate between an
-  // externally visible and non-externally visible static with respect to
-  // mangling, they all get $TSS <number>.
-  //
-  // N.B. This means that they can get more than 32 static variable guards in a
-  // scope.  It also means that they broke compatibility with their own ABI.
-
   // <guard-name> ::= ?_B <postfix> @5 <scope-depth>
+  //              ::= ?__J <postfix> @5 <scope-depth>
   //              ::= ?$S <guard-num> @ <postfix> @4IA
 
   // The first mangling is what MSVC uses to guard static locals in inline
@@ -2536,8 +2574,11 @@
   MicrosoftCXXNameMangler Mangler(*this, Out);
 
   bool Visible = VD->isExternallyVisible();
-  // <operator-name> ::= ?_B # local static guard
-  Mangler.getStream() << (Visible ? "\01??_B" : "\01?$S1@");
+  if (Visible) {
+    Mangler.getStream() << (VD->getTLSKind() ? "\01??__J" : "\01??_B");
+  } else {
+    Mangler.getStream() << "\01?$S1@";
+  }
   unsigned ScopeDepth = 0;
   if (Visible && !getNextDiscriminator(VD, ScopeDepth))
     // If we do not have a discriminator and are emitting a guard variable for
diff --git a/lib/AST/NSAPI.cpp b/lib/AST/NSAPI.cpp
index 033a87b..2749100 100644
--- a/lib/AST/NSAPI.cpp
+++ b/lib/AST/NSAPI.cpp
@@ -505,6 +505,11 @@
   return StringRef();
 }
 
+bool NSAPI::isMacroDefined(StringRef Id) const {
+  // FIXME: Check whether the relevant module macros are visible.
+  return Ctx.Idents.get(Id).hasMacroDefinition();
+}
+
 bool NSAPI::isObjCTypedef(QualType T,
                           StringRef name, IdentifierInfo *&II) const {
   if (!Ctx.getLangOpts().ObjC1)
diff --git a/lib/AST/RecordLayoutBuilder.cpp b/lib/AST/RecordLayoutBuilder.cpp
index ba92587..2101a55 100644
--- a/lib/AST/RecordLayoutBuilder.cpp
+++ b/lib/AST/RecordLayoutBuilder.cpp
@@ -2379,8 +2379,9 @@
   // In 64-bit mode we always perform an alignment step after laying out vbases.
   // In 32-bit mode we do not.  The check to see if we need to perform alignment
   // checks the RequiredAlignment field and performs alignment if it isn't 0.
-  RequiredAlignment = Context.getTargetInfo().getPointerWidth(0) == 64 ?
-                      CharUnits::One() : CharUnits::Zero();
+  RequiredAlignment = Context.getTargetInfo().getTriple().isArch64Bit()
+                          ? CharUnits::One()
+                          : CharUnits::Zero();
   // Compute the maximum field alignment.
   MaxFieldAlignment = CharUnits::Zero();
   // Honor the default struct packing maximum alignment flag.
@@ -2417,7 +2418,8 @@
   // injection.
   PointerInfo.Size =
       Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
-  PointerInfo.Alignment = PointerInfo.Size;
+  PointerInfo.Alignment =
+      Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerAlign(0));
   // Respect pragma pack.
   if (!MaxFieldAlignment.isZero())
     PointerInfo.Alignment = std::min(PointerInfo.Alignment, MaxFieldAlignment);
@@ -2974,11 +2976,11 @@
   // Look up the cache entry.  Since we're working with the first
   // declaration, its parent must be the class definition, which is
   // the correct key for the KeyFunctions hash.
-  llvm::DenseMap<const CXXRecordDecl*, LazyDeclPtr>::iterator
-    I = KeyFunctions.find(Method->getParent());
+  const auto &Map = KeyFunctions;
+  auto I = Map.find(Method->getParent());
 
   // If it's not cached, there's nothing to do.
-  if (I == KeyFunctions.end()) return;
+  if (I == Map.end()) return;
 
   // If it is cached, check whether it's the target method, and if so,
   // remove it from the cache. Note, the call to 'get' might invalidate
diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp
index 0e8652f..09bb17b 100644
--- a/lib/AST/Stmt.cpp
+++ b/lib/AST/Stmt.cpp
@@ -592,7 +592,7 @@
       SourceLocation EndLoc =
           getAsmString()->getLocationOfByte(CurPtr - StrStart, SM, LO, TI);
 
-      Pieces.push_back(AsmStringPiece(N, Str, BeginLoc, EndLoc));
+      Pieces.emplace_back(N, std::move(Str), BeginLoc, EndLoc);
       continue;
     }
 
@@ -626,7 +626,7 @@
       SourceLocation EndLoc =
           getAsmString()->getLocationOfByte(NameEnd + 1 - StrStart, SM, LO, TI);
 
-      Pieces.push_back(AsmStringPiece(N, Str, BeginLoc, EndLoc));
+      Pieces.emplace_back(N, std::move(Str), BeginLoc, EndLoc);
 
       CurPtr = NameEnd+1;
       continue;
@@ -1581,10 +1581,7 @@
 
 const OMPClause *
 OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const {
-  auto ClauseFilter =
-      [=](const OMPClause *C) -> bool { return C->getClauseKind() == K; };
-  OMPExecutableDirective::filtered_clause_iterator<decltype(ClauseFilter)> I(
-      clauses(), ClauseFilter);
+  auto &&I = getClausesOfKind(K);
 
   if (I) {
     auto *Clause = *I;
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index b68f3a3..aabe974 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -396,8 +396,9 @@
     }
 
     VisitStringLiteral(Node->getOutputConstraintLiteral(i));
-    OS << " ";
+    OS << " (";
     Visit(Node->getOutputExpr(i));
+    OS << ")";
   }
 
   // Inputs
@@ -415,8 +416,9 @@
     }
 
     VisitStringLiteral(Node->getInputConstraintLiteral(i));
-    OS << " ";
+    OS << " (";
     Visit(Node->getInputExpr(i));
+    OS << ")";
   }
 
   // Clobbers
@@ -1395,13 +1397,16 @@
 }
 
 void StmtPrinter::VisitDesignatedInitExpr(DesignatedInitExpr *Node) {
+  bool NeedsEquals = true;
   for (DesignatedInitExpr::designators_iterator D = Node->designators_begin(),
                       DEnd = Node->designators_end();
        D != DEnd; ++D) {
     if (D->isFieldDesignator()) {
       if (D->getDotLoc().isInvalid()) {
-        if (IdentifierInfo *II = D->getFieldName())
+        if (IdentifierInfo *II = D->getFieldName()) {
           OS << II->getName() << ":";
+          NeedsEquals = false;
+        }
       } else {
         OS << "." << D->getFieldName()->getName();
       }
@@ -1418,10 +1423,29 @@
     }
   }
 
-  OS << " = ";
+  if (NeedsEquals)
+    OS << " = ";
+  else
+    OS << " ";
   PrintExpr(Node->getInit());
 }
 
+void StmtPrinter::VisitDesignatedInitUpdateExpr(
+    DesignatedInitUpdateExpr *Node) {
+  OS << "{";
+  OS << "/*base*/";
+  PrintExpr(Node->getBase());
+  OS << ", ";
+
+  OS << "/*updater*/";
+  PrintExpr(Node->getUpdater());
+  OS << "}";
+}
+
+void StmtPrinter::VisitNoInitExpr(NoInitExpr *Node) {
+  OS << "/*no init*/";
+}
+
 void StmtPrinter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *Node) {
   if (Policy.LangOpts.CPlusPlus) {
     OS << "/*implicit*/";
@@ -1758,7 +1782,7 @@
       break;
 
     case LCK_ByRef:
-      if (Node->getCaptureDefault() != LCD_ByRef || C->isInitCapture())
+      if (Node->getCaptureDefault() != LCD_ByRef || Node->isInitCapture(C))
         OS << '&';
       OS << C->getCapturedVar()->getName();
       break;
@@ -1770,7 +1794,7 @@
       llvm_unreachable("VLA type in explicit captures.");
     }
 
-    if (C->isInitCapture())
+    if (Node->isInitCapture(C))
       PrintExpr(C->getCapturedVar()->getInit());
   }
   OS << ']';
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index f6df1ca..fb5350e 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -298,8 +298,12 @@
 void OMPClauseProfiler::VisitOMPProcBindClause(const OMPProcBindClause *C) { }
 
 void OMPClauseProfiler::VisitOMPScheduleClause(const OMPScheduleClause *C) {
-  if (C->getChunkSize())
+  if (C->getChunkSize()) {
     Profiler->VisitStmt(C->getChunkSize());
+    if (C->getHelperChunkSize()) {
+      Profiler->VisitStmt(C->getChunkSize());
+    }
+  }
 }
 
 void OMPClauseProfiler::VisitOMPOrderedClause(const OMPOrderedClause *) {}
@@ -740,6 +744,18 @@
   }
 }
 
+// Seems that if VisitInitListExpr() only works on the syntactic form of an
+// InitListExpr, then a DesignatedInitUpdateExpr is not encountered.
+void StmtProfiler::VisitDesignatedInitUpdateExpr(
+    const DesignatedInitUpdateExpr *S) {
+  llvm_unreachable("Unexpected DesignatedInitUpdateExpr in syntactic form of "
+                   "initializer");
+}
+
+void StmtProfiler::VisitNoInitExpr(const NoInitExpr *S) {
+  llvm_unreachable("Unexpected NoInitExpr in syntactic form of initializer");
+}
+
 void StmtProfiler::VisitImplicitValueInitExpr(const ImplicitValueInitExpr *S) {
   VisitExpr(S);
 }
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index 0eb5d8c..09bb769 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -729,7 +729,7 @@
 bool Type::isSignedIntegerOrEnumerationType() const {
   if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) {
     return BT->getKind() >= BuiltinType::Char_S &&
-    BT->getKind() <= BuiltinType::Int128;
+           BT->getKind() <= BuiltinType::Int128;
   }
   
   if (const EnumType *ET = dyn_cast<EnumType>(CanonicalType)) {
diff --git a/lib/AST/VTableBuilder.cpp b/lib/AST/VTableBuilder.cpp
index 9a768a9..ca5f0aa 100644
--- a/lib/AST/VTableBuilder.cpp
+++ b/lib/AST/VTableBuilder.cpp
@@ -13,9 +13,11 @@
 
 #include "clang/AST/VTableBuilder.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -216,7 +218,7 @@
 #endif
 }
 
-static BaseOffset ComputeBaseOffset(ASTContext &Context, 
+static BaseOffset ComputeBaseOffset(const ASTContext &Context,
                                     const CXXRecordDecl *DerivedRD,
                                     const CXXBasePath &Path) {
   CharUnits NonVirtualOffset = CharUnits::Zero();
@@ -255,7 +257,7 @@
   
 }
 
-static BaseOffset ComputeBaseOffset(ASTContext &Context, 
+static BaseOffset ComputeBaseOffset(const ASTContext &Context,
                                     const CXXRecordDecl *BaseRD,
                                     const CXXRecordDecl *DerivedRD) {
   CXXBasePaths Paths(/*FindAmbiguities=*/false,
@@ -2736,8 +2738,9 @@
     CharUnits ThisOffset = Overrider.Offset;
     CharUnits LastVBaseOffset;
 
-    // For each path from the overrider to the parents of the overridden methods,
-    // traverse the path, calculating the this offset in the most derived class.
+    // For each path from the overrider to the parents of the overridden
+    // methods, traverse the path, calculating the this offset in the most
+    // derived class.
     for (int J = 0, F = Path.size(); J != F; ++J) {
       const CXXBasePathElement &Element = Path[J];
       QualType CurTy = Element.Base->getType();
@@ -2969,7 +2972,8 @@
   const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
 
   // See if this class expands a vftable of the base we look at, which is either
-  // the one defined by the vfptr base path or the primary base of the current class.
+  // the one defined by the vfptr base path or the primary base of the current
+  // class.
   const CXXRecordDecl *NextBase = nullptr, *NextLastVBase = LastVBase;
   CharUnits NextBaseOffset;
   if (BaseDepth < WhichVFPtr.PathToBaseWithVPtr.size()) {
@@ -3027,7 +3031,8 @@
                                   ThisAdjustmentOffset);
 
     if (OverriddenMD) {
-      // If MD overrides anything in this vftable, we need to update the entries.
+      // If MD overrides anything in this vftable, we need to update the
+      // entries.
       MethodInfoMapTy::iterator OverriddenMDIterator =
           MethodInfoMap.find(OverriddenMD);
 
@@ -3442,55 +3447,176 @@
   llvm::DeleteContainerSeconds(VBaseInfo);
 }
 
-static bool
-findPathForVPtr(ASTContext &Context, const ASTRecordLayout &MostDerivedLayout,
-                const CXXRecordDecl *RD, CharUnits Offset,
-                llvm::SmallPtrSetImpl<const CXXRecordDecl *> &VBasesSeen,
-                VPtrInfo::BasePath &FullPath, VPtrInfo *Info) {
-  if (RD == Info->BaseWithVPtr && Offset == Info->FullOffsetInMDC) {
-    Info->PathToBaseWithVPtr = FullPath;
-    return true;
+namespace {
+typedef llvm::SetVector<BaseSubobject, std::vector<BaseSubobject>,
+                        llvm::DenseSet<BaseSubobject>> FullPathTy;
+}
+
+// This recursive function finds all paths from a subobject centered at
+// (RD, Offset) to the subobject located at BaseWithVPtr.
+static void findPathsToSubobject(ASTContext &Context,
+                                 const ASTRecordLayout &MostDerivedLayout,
+                                 const CXXRecordDecl *RD, CharUnits Offset,
+                                 BaseSubobject BaseWithVPtr,
+                                 FullPathTy &FullPath,
+                                 std::list<FullPathTy> &Paths) {
+  if (BaseSubobject(RD, Offset) == BaseWithVPtr) {
+    Paths.push_back(FullPath);
+    return;
   }
 
   const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
 
-  // Recurse with non-virtual bases first.
-  // FIXME: Does this need to be in layout order? Virtual bases will be in base
-  // specifier order, which isn't necessarily layout order.
-  SmallVector<CXXBaseSpecifier, 4> Bases(RD->bases_begin(), RD->bases_end());
-  std::stable_partition(Bases.begin(), Bases.end(),
-                        [](CXXBaseSpecifier bs) { return !bs.isVirtual(); });
-
-  for (const auto &B : Bases) {
-    const CXXRecordDecl *Base = B.getType()->getAsCXXRecordDecl();
-    CharUnits NewOffset;
-    if (!B.isVirtual())
-      NewOffset = Offset + Layout.getBaseClassOffset(Base);
-    else {
-      if (!VBasesSeen.insert(Base).second)
-        return false;
-      NewOffset = MostDerivedLayout.getVBaseClassOffset(Base);
-    }
-    FullPath.push_back(Base);
-    if (findPathForVPtr(Context, MostDerivedLayout, Base, NewOffset, VBasesSeen,
-                        FullPath, Info))
-      return true;
+  for (const CXXBaseSpecifier &BS : RD->bases()) {
+    const CXXRecordDecl *Base = BS.getType()->getAsCXXRecordDecl();
+    CharUnits NewOffset = BS.isVirtual()
+                              ? MostDerivedLayout.getVBaseClassOffset(Base)
+                              : Offset + Layout.getBaseClassOffset(Base);
+    FullPath.insert(BaseSubobject(Base, NewOffset));
+    findPathsToSubobject(Context, MostDerivedLayout, Base, NewOffset,
+                         BaseWithVPtr, FullPath, Paths);
     FullPath.pop_back();
   }
-  return false;
+}
+
+// Return the paths which are not subsets of other paths.
+static void removeRedundantPaths(std::list<FullPathTy> &FullPaths) {
+  FullPaths.remove_if([&](const FullPathTy &SpecificPath) {
+    for (const FullPathTy &OtherPath : FullPaths) {
+      if (&SpecificPath == &OtherPath)
+        continue;
+      if (std::all_of(SpecificPath.begin(), SpecificPath.end(),
+                      [&](const BaseSubobject &BSO) {
+                        return OtherPath.count(BSO) != 0;
+                      })) {
+        return true;
+      }
+    }
+    return false;
+  });
+}
+
+static CharUnits getOffsetOfFullPath(ASTContext &Context,
+                                     const CXXRecordDecl *RD,
+                                     const FullPathTy &FullPath) {
+  const ASTRecordLayout &MostDerivedLayout =
+      Context.getASTRecordLayout(RD);
+  CharUnits Offset = CharUnits::fromQuantity(-1);
+  for (const BaseSubobject &BSO : FullPath) {
+    const CXXRecordDecl *Base = BSO.getBase();
+    // The first entry in the path is always the most derived record, skip it.
+    if (Base == RD) {
+      assert(Offset.getQuantity() == -1);
+      Offset = CharUnits::Zero();
+      continue;
+    }
+    assert(Offset.getQuantity() != -1);
+    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
+    // While we know which base has to be traversed, we don't know if that base
+    // was a virtual base.
+    const CXXBaseSpecifier *BaseBS = std::find_if(
+        RD->bases_begin(), RD->bases_end(), [&](const CXXBaseSpecifier &BS) {
+          return BS.getType()->getAsCXXRecordDecl() == Base;
+        });
+    Offset = BaseBS->isVirtual() ? MostDerivedLayout.getVBaseClassOffset(Base)
+                                 : Offset + Layout.getBaseClassOffset(Base);
+    RD = Base;
+  }
+  return Offset;
+}
+
+// We want to select the path which introduces the most covariant overrides.  If
+// two paths introduce overrides which the other path doesn't contain, issue a
+// diagnostic.
+static const FullPathTy *selectBestPath(ASTContext &Context,
+                                        const CXXRecordDecl *RD, VPtrInfo *Info,
+                                        std::list<FullPathTy> &FullPaths) {
+  // Handle some easy cases first.
+  if (FullPaths.empty())
+    return nullptr;
+  if (FullPaths.size() == 1)
+    return &FullPaths.front();
+
+  const FullPathTy *BestPath = nullptr;
+  typedef std::set<const CXXMethodDecl *> OverriderSetTy;
+  OverriderSetTy LastOverrides;
+  for (const FullPathTy &SpecificPath : FullPaths) {
+    assert(!SpecificPath.empty());
+    OverriderSetTy CurrentOverrides;
+    const CXXRecordDecl *TopLevelRD = SpecificPath.begin()->getBase();
+    // Find the distance from the start of the path to the subobject with the
+    // VPtr.
+    CharUnits BaseOffset =
+        getOffsetOfFullPath(Context, TopLevelRD, SpecificPath);
+    FinalOverriders Overriders(TopLevelRD, CharUnits::Zero(), TopLevelRD);
+    for (const CXXMethodDecl *MD : Info->BaseWithVPtr->methods()) {
+      if (!MD->isVirtual())
+        continue;
+      FinalOverriders::OverriderInfo OI =
+          Overriders.getOverrider(MD->getCanonicalDecl(), BaseOffset);
+      const CXXMethodDecl *OverridingMethod = OI.Method;
+      // Only overriders which have a return adjustment introduce problematic
+      // thunks.
+      if (ComputeReturnAdjustmentBaseOffset(Context, OverridingMethod, MD)
+              .isEmpty())
+        continue;
+      // It's possible that the overrider isn't in this path.  If so, skip it
+      // because this path didn't introduce it.
+      const CXXRecordDecl *OverridingParent = OverridingMethod->getParent();
+      if (std::none_of(SpecificPath.begin(), SpecificPath.end(),
+                       [&](const BaseSubobject &BSO) {
+                         return BSO.getBase() == OverridingParent;
+                       }))
+        continue;
+      CurrentOverrides.insert(OverridingMethod);
+    }
+    OverriderSetTy NewOverrides =
+        llvm::set_difference(CurrentOverrides, LastOverrides);
+    if (NewOverrides.empty())
+      continue;
+    OverriderSetTy MissingOverrides =
+        llvm::set_difference(LastOverrides, CurrentOverrides);
+    if (MissingOverrides.empty()) {
+      // This path is a strict improvement over the last path, let's use it.
+      BestPath = &SpecificPath;
+      std::swap(CurrentOverrides, LastOverrides);
+    } else {
+      // This path introduces an overrider with a conflicting covariant thunk.
+      DiagnosticsEngine &Diags = Context.getDiagnostics();
+      const CXXMethodDecl *CovariantMD = *NewOverrides.begin();
+      const CXXMethodDecl *ConflictMD = *MissingOverrides.begin();
+      Diags.Report(RD->getLocation(), diag::err_vftable_ambiguous_component)
+          << RD;
+      Diags.Report(CovariantMD->getLocation(), diag::note_covariant_thunk)
+          << CovariantMD;
+      Diags.Report(ConflictMD->getLocation(), diag::note_covariant_thunk)
+          << ConflictMD;
+    }
+  }
+  // Go with the path that introduced the most covariant overrides.  If there is
+  // no such path, pick the first path.
+  return BestPath ? BestPath : &FullPaths.front();
 }
 
 static void computeFullPathsForVFTables(ASTContext &Context,
                                         const CXXRecordDecl *RD,
                                         VPtrInfoVector &Paths) {
-  llvm::SmallPtrSet<const CXXRecordDecl*, 4> VBasesSeen;
   const ASTRecordLayout &MostDerivedLayout = Context.getASTRecordLayout(RD);
-  VPtrInfo::BasePath FullPath;
+  FullPathTy FullPath;
+  std::list<FullPathTy> FullPaths;
   for (VPtrInfo *Info : Paths) {
-    findPathForVPtr(Context, MostDerivedLayout, RD, CharUnits::Zero(),
-                    VBasesSeen, FullPath, Info);
-    VBasesSeen.clear();
+    findPathsToSubobject(
+        Context, MostDerivedLayout, RD, CharUnits::Zero(),
+        BaseSubobject(Info->BaseWithVPtr, Info->FullOffsetInMDC), FullPath,
+        FullPaths);
     FullPath.clear();
+    removeRedundantPaths(FullPaths);
+    Info->PathToBaseWithVPtr.clear();
+    if (const FullPathTy *BestPath =
+            selectBestPath(Context, RD, Info, FullPaths))
+      for (const BaseSubobject &BSO : *BestPath)
+        Info->PathToBaseWithVPtr.push_back(BSO.getBase());
+    FullPaths.clear();
   }
 }
 
diff --git a/lib/ASTMatchers/ASTMatchFinder.cpp b/lib/ASTMatchers/ASTMatchFinder.cpp
index c5f3063..e3b666e 100644
--- a/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -912,37 +912,37 @@
 
 void MatchFinder::addMatcher(const DeclarationMatcher &NodeMatch,
                              MatchCallback *Action) {
-  Matchers.DeclOrStmt.push_back(std::make_pair(NodeMatch, Action));
+  Matchers.DeclOrStmt.emplace_back(NodeMatch, Action);
   Matchers.AllCallbacks.push_back(Action);
 }
 
 void MatchFinder::addMatcher(const TypeMatcher &NodeMatch,
                              MatchCallback *Action) {
-  Matchers.Type.push_back(std::make_pair(NodeMatch, Action));
+  Matchers.Type.emplace_back(NodeMatch, Action);
   Matchers.AllCallbacks.push_back(Action);
 }
 
 void MatchFinder::addMatcher(const StatementMatcher &NodeMatch,
                              MatchCallback *Action) {
-  Matchers.DeclOrStmt.push_back(std::make_pair(NodeMatch, Action));
+  Matchers.DeclOrStmt.emplace_back(NodeMatch, Action);
   Matchers.AllCallbacks.push_back(Action);
 }
 
 void MatchFinder::addMatcher(const NestedNameSpecifierMatcher &NodeMatch,
                              MatchCallback *Action) {
-  Matchers.NestedNameSpecifier.push_back(std::make_pair(NodeMatch, Action));
+  Matchers.NestedNameSpecifier.emplace_back(NodeMatch, Action);
   Matchers.AllCallbacks.push_back(Action);
 }
 
 void MatchFinder::addMatcher(const NestedNameSpecifierLocMatcher &NodeMatch,
                              MatchCallback *Action) {
-  Matchers.NestedNameSpecifierLoc.push_back(std::make_pair(NodeMatch, Action));
+  Matchers.NestedNameSpecifierLoc.emplace_back(NodeMatch, Action);
   Matchers.AllCallbacks.push_back(Action);
 }
 
 void MatchFinder::addMatcher(const TypeLocMatcher &NodeMatch,
                              MatchCallback *Action) {
-  Matchers.TypeLoc.push_back(std::make_pair(NodeMatch, Action));
+  Matchers.TypeLoc.emplace_back(NodeMatch, Action);
   Matchers.AllCallbacks.push_back(Action);
 }
 
diff --git a/lib/ASTMatchers/Dynamic/Diagnostics.cpp b/lib/ASTMatchers/Dynamic/Diagnostics.cpp
index f6d3449..72f1271 100644
--- a/lib/ASTMatchers/Dynamic/Diagnostics.cpp
+++ b/lib/ASTMatchers/Dynamic/Diagnostics.cpp
@@ -14,7 +14,7 @@
 namespace dynamic {
 Diagnostics::ArgStream Diagnostics::pushContextFrame(ContextType Type,
                                                      SourceRange Range) {
-  ContextStack.push_back(ContextFrame());
+  ContextStack.emplace_back();
   ContextFrame& data = ContextStack.back();
   data.Type = Type;
   data.Range = Range;
@@ -65,10 +65,10 @@
 
 Diagnostics::ArgStream Diagnostics::addError(const SourceRange &Range,
                                              ErrorType Error) {
-  Errors.push_back(ErrorContent());
+  Errors.emplace_back();
   ErrorContent &Last = Errors.back();
   Last.ContextStack = ContextStack;
-  Last.Messages.push_back(ErrorContent::Message());
+  Last.Messages.emplace_back();
   Last.Messages.back().Range = Range;
   Last.Messages.back().Type = Error;
   return ArgStream(&Last.Messages.back().Args);
diff --git a/lib/ASTMatchers/Dynamic/Registry.cpp b/lib/ASTMatchers/Dynamic/Registry.cpp
index e46e6da..59c204d 100644
--- a/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -128,6 +128,7 @@
   REGISTER_MATCHER(constructorDecl);
   REGISTER_MATCHER(containsDeclaration);
   REGISTER_MATCHER(continueStmt);
+  REGISTER_MATCHER(conversionDecl);
   REGISTER_MATCHER(cStyleCastExpr);
   REGISTER_MATCHER(ctorInitializer);
   REGISTER_MATCHER(CUDAKernelCallExpr);
@@ -310,6 +311,7 @@
   REGISTER_MATCHER(specifiesTypeLoc);
   REGISTER_MATCHER(statementCountIs);
   REGISTER_MATCHER(staticCastExpr);
+  REGISTER_MATCHER(staticAssertDecl);
   REGISTER_MATCHER(stmt);
   REGISTER_MATCHER(stringLiteral);
   REGISTER_MATCHER(substNonTypeTemplateParmExpr);
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 94fa1d9..b2fdd27 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -203,9 +203,9 @@
   return D;
 }
 
-/// BlockScopePosPair - Structure for specifying position in CFG during its
-/// build process. It consists of CFGBlock that specifies position in CFG graph
-/// and  LocalScope::const_iterator that specifies position in LocalScope graph.
+/// Structure for specifying position in CFG during its build process. It
+/// consists of CFGBlock that specifies position in CFG and
+/// LocalScope::const_iterator that specifies position in LocalScope graph.
 struct BlockScopePosPair {
   BlockScopePosPair() : block(nullptr) {}
   BlockScopePosPair(CFGBlock *b, LocalScope::const_iterator scopePos)
@@ -1095,6 +1095,19 @@
       // generating destructors for the second time.
       return Visit(cast<ExprWithCleanups>(Init)->getSubExpr());
     }
+    if (BuildOpts.AddCXXDefaultInitExprInCtors) {
+      if (CXXDefaultInitExpr *Default = dyn_cast<CXXDefaultInitExpr>(Init)) {
+        // In general, appending the expression wrapped by a CXXDefaultInitExpr
+        // may cause the same Expr to appear more than once in the CFG. Doing it
+        // here is safe because there's only one initializer per field.
+        autoCreateBlock();
+        appendStmt(Block, Default);
+        if (Stmt *Child = Default->getExpr())
+          if (CFGBlock *R = Visit(Child))
+            Block = R;
+        return Block;
+      }
+    }
     return Visit(Init);
   }
 
@@ -1179,8 +1192,7 @@
     }
     Ty = Context->getBaseElementType(Ty);
 
-    const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor();
-    if (Dtor->isNoReturn())
+    if (Ty->getAsCXXRecordDecl()->isAnyDestructorNoReturn())
       Block = createNoReturnBlock();
     else
       autoCreateBlock();
@@ -3682,7 +3694,7 @@
 
     const CXXDestructorDecl *Dtor = E->getTemporary()->getDestructor();
 
-    if (Dtor->isNoReturn()) {
+    if (Dtor->getParent()->isAnyDestructorNoReturn()) {
       // If the destructor is marked as a no-return destructor, we need to
       // create a new block for the destructor which does not have as a
       // successor anything built thus far. Control won't flow out of this
diff --git a/lib/Analysis/FormatString.cpp b/lib/Analysis/FormatString.cpp
index 1b60894..0948bc0 100644
--- a/lib/Analysis/FormatString.cpp
+++ b/lib/Analysis/FormatString.cpp
@@ -799,7 +799,8 @@
   llvm_unreachable("Invalid LengthModifier Kind!");
 }
 
-bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const {
+bool FormatSpecifier::hasStandardConversionSpecifier(
+    const LangOptions &LangOpt) const {
   switch (CS.getKind()) {
     case ConversionSpecifier::cArg:
     case ConversionSpecifier::dArg:
diff --git a/lib/Analysis/UninitializedValues.cpp b/lib/Analysis/UninitializedValues.cpp
index 3c7bc4e..f2f7919 100644
--- a/lib/Analysis/UninitializedValues.cpp
+++ b/lib/Analysis/UninitializedValues.cpp
@@ -36,7 +36,7 @@
 static bool isTrackedVar(const VarDecl *vd, const DeclContext *dc) {
   if (vd->isLocalVarDecl() && !vd->hasGlobalStorage() &&
       !vd->isExceptionVariable() && !vd->isInitCapture() &&
-      vd->getDeclContext() == dc) {
+      !vd->isImplicit() && vd->getDeclContext() == dc) {
     QualType ty = vd->getType();
     return ty->isScalarType() || ty->isVectorType() || ty->isRecordType();
   }
diff --git a/lib/Basic/Android.mk b/lib/Basic/Android.mk
index a5bd987..fe8061a 100644
--- a/lib/Basic/Android.mk
+++ b/lib/Basic/Android.mk
@@ -25,6 +25,7 @@
   CharInfo.cpp \
   Diagnostic.cpp \
   DiagnosticIDs.cpp \
+  DiagnosticOptions.cpp \
   FileManager.cpp \
   FileSystemStatCache.cpp \
   IdentifierTable.cpp \
diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt
index 50a06d9..cfad8c3 100644
--- a/lib/Basic/CMakeLists.txt
+++ b/lib/Basic/CMakeLists.txt
@@ -61,6 +61,7 @@
   CharInfo.cpp
   Diagnostic.cpp
   DiagnosticIDs.cpp
+  DiagnosticOptions.cpp
   FileManager.cpp
   FileSystemStatCache.cpp
   IdentifierTable.cpp
diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp
index 631b978..7f5a15d 100644
--- a/lib/Basic/Diagnostic.cpp
+++ b/lib/Basic/Diagnostic.cpp
@@ -112,7 +112,7 @@
 
   // Create a DiagState and DiagStatePoint representing diagnostic changes
   // through command-line.
-  DiagStates.push_back(DiagState());
+  DiagStates.emplace_back();
   DiagStatePoints.push_back(DiagStatePoint(&DiagStates.back(), FullSourceLoc()));
 }
 
@@ -321,18 +321,10 @@
   NumDiagArgs = 0;
 
   DiagRanges.clear();
-  DiagRanges.reserve(storedDiag.range_size());
-  for (StoredDiagnostic::range_iterator
-         RI = storedDiag.range_begin(),
-         RE = storedDiag.range_end(); RI != RE; ++RI)
-    DiagRanges.push_back(*RI);
+  DiagRanges.append(storedDiag.range_begin(), storedDiag.range_end());
 
   DiagFixItHints.clear();
-  DiagFixItHints.reserve(storedDiag.fixit_size());
-  for (StoredDiagnostic::fixit_iterator
-         FI = storedDiag.fixit_begin(),
-         FE = storedDiag.fixit_end(); FI != FE; ++FI)
-    DiagFixItHints.push_back(*FI);
+  DiagFixItHints.append(storedDiag.fixit_begin(), storedDiag.fixit_end());
 
   assert(Client && "DiagnosticConsumer not set!");
   Level DiagLevel = storedDiag.getLevel();
diff --git a/lib/Basic/DiagnosticOptions.cpp b/lib/Basic/DiagnosticOptions.cpp
new file mode 100644
index 0000000..f54a0ef
--- /dev/null
+++ b/lib/Basic/DiagnosticOptions.cpp
@@ -0,0 +1,24 @@
+//===--- DiagnosticOptions.cpp - C Language Family Diagnostic Handling ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the DiagnosticOptions related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/DiagnosticOptions.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+
+raw_ostream& operator<<(raw_ostream& Out, DiagnosticLevelMask M) {
+  using UT = std::underlying_type<DiagnosticLevelMask>::type;
+  return Out << static_cast<UT>(M);
+}
+
+} // end namespace clang
diff --git a/lib/Basic/FileSystemStatCache.cpp b/lib/Basic/FileSystemStatCache.cpp
index 83e42bd..187ea37 100644
--- a/lib/Basic/FileSystemStatCache.cpp
+++ b/lib/Basic/FileSystemStatCache.cpp
@@ -15,19 +15,8 @@
 #include "clang/Basic/VirtualFileSystem.h"
 #include "llvm/Support/Path.h"
 
-// FIXME: This is terrible, we need this for ::close.
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#include <sys/uio.h>
-#else
-#include <io.h>
-#endif
 using namespace clang;
 
-#if defined(_MSC_VER)
-#define S_ISDIR(s) ((_S_IFDIR & s) !=0)
-#endif
-
 void FileSystemStatCache::anchor() { }
 
 static void copyStatusToFileData(const vfs::Status &Status,
diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp
index bd2840d..4e06352 100644
--- a/lib/Basic/IdentifierTable.cpp
+++ b/lib/Basic/IdentifierTable.cpp
@@ -35,7 +35,7 @@
   HasMacro = false;
   HadMacro = false;
   IsExtension = false;
-  IsCXX11CompatKeyword = false;
+  IsFutureCompatKeyword = false;
   IsPoisoned = false;
   IsCPPOperatorKeyword = false;
   NeedsHandleIdentifier = false;
@@ -109,7 +109,8 @@
     KEYNOOPENCL = 0x02000,
     WCHARSUPPORT = 0x04000,
     HALFSUPPORT = 0x08000,
-    KEYALL = (0xffff & ~KEYNOMS18 &
+    KEYCONCEPTS = 0x10000,
+    KEYALL = (0x1ffff & ~KEYNOMS18 &
               ~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude.
   };
 
@@ -143,6 +144,7 @@
   // We treat bridge casts as objective-C keywords so we can warn on them
   // in non-arc mode.
   if (LangOpts.ObjC2 && (Flags & KEYARC)) return KS_Enabled;
+  if (LangOpts.ConceptsTS && (Flags & KEYCONCEPTS)) return KS_Enabled;
   if (LangOpts.CPlusPlus && (Flags & KEYCXX11)) return KS_Future;
   return KS_Disabled;
 }
@@ -157,7 +159,7 @@
 
   // Don't add this keyword under MSVCCompat.
   if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
-      !LangOpts.isCompatibleWithMSVC(19))
+      !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
     return;
 
   // Don't add this keyword under OpenCL.
@@ -170,7 +172,7 @@
   IdentifierInfo &Info =
       Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
   Info.setIsExtensionToken(AddResult == KS_Extension);
-  Info.setIsCXX11CompatKeyword(AddResult == KS_Future);
+  Info.setIsFutureCompatKeyword(AddResult == KS_Future);
 }
 
 /// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
@@ -215,6 +217,12 @@
   if (LangOpts.ParseUnknownAnytype)
     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
                LangOpts, *this);
+
+  // FIXME: __declspec isn't really a CUDA extension, however it is required for
+  // supporting cuda_builtin_vars.h, which uses __declspec(property). Once that
+  // has been rewritten in terms of something more generic, remove this code.
+  if (LangOpts.CUDA)
+    AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
 }
 
 /// \brief Checks if the specified token kind represents a keyword in the
diff --git a/lib/Basic/Module.cpp b/lib/Basic/Module.cpp
index 5fad1a9..7308665 100644
--- a/lib/Basic/Module.cpp
+++ b/lib/Basic/Module.cpp
@@ -25,14 +25,14 @@
 using namespace clang;
 
 Module::Module(StringRef Name, SourceLocation DefinitionLoc, Module *Parent,
-               bool IsFramework, bool IsExplicit)
+               bool IsFramework, bool IsExplicit, unsigned VisibilityID)
     : Name(Name), DefinitionLoc(DefinitionLoc), Parent(Parent), Directory(),
-      Umbrella(), ASTFile(nullptr), IsMissingRequirement(false),
-      IsAvailable(true), IsFromModuleFile(false), IsFramework(IsFramework),
-      IsExplicit(IsExplicit), IsSystem(false), IsExternC(false),
-      IsInferred(false), InferSubmodules(false), InferExplicitSubmodules(false),
-      InferExportWildcard(false), ConfigMacrosExhaustive(false),
-      NameVisibility(Hidden) {
+      Umbrella(), ASTFile(nullptr), VisibilityID(VisibilityID),
+      IsMissingRequirement(false), IsAvailable(true), IsFromModuleFile(false),
+      IsFramework(IsFramework), IsExplicit(IsExplicit), IsSystem(false),
+      IsExternC(false), IsInferred(false), InferSubmodules(false),
+      InferExplicitSubmodules(false), InferExportWildcard(false),
+      ConfigMacrosExhaustive(false), NameVisibility(Hidden) {
   if (Parent) {
     if (!Parent->isAvailable())
       IsAvailable = false;
@@ -138,11 +138,11 @@
   return Result;
 }
 
-const DirectoryEntry *Module::getUmbrellaDir() const {
-  if (const FileEntry *Header = getUmbrellaHeader())
-    return Header->getDir();
+Module::DirectoryName Module::getUmbrellaDir() const {
+  if (Header U = getUmbrellaHeader())
+    return {"", U.Entry->getDir()};
   
-  return Umbrella.dyn_cast<const DirectoryEntry *>();
+  return {UmbrellaAsWritten, Umbrella.dyn_cast<const DirectoryEntry *>()};
 }
 
 ArrayRef<const FileEntry *> Module::getTopHeaders(FileManager &FileMgr) {
@@ -334,15 +334,15 @@
     OS << "\n";
   }
   
-  if (const FileEntry *UmbrellaHeader = getUmbrellaHeader()) {
+  if (Header H = getUmbrellaHeader()) {
     OS.indent(Indent + 2);
     OS << "umbrella header \"";
-    OS.write_escaped(UmbrellaHeader->getName());
+    OS.write_escaped(H.NameAsWritten);
     OS << "\"\n";
-  } else if (const DirectoryEntry *UmbrellaDir = getUmbrellaDir()) {
+  } else if (DirectoryName D = getUmbrellaDir()) {
     OS.indent(Indent + 2);
     OS << "umbrella \"";
-    OS.write_escaped(UmbrellaDir->getName());
+    OS.write_escaped(D.NameAsWritten);
     OS << "\"\n";    
   }
 
@@ -475,4 +475,47 @@
   print(llvm::errs());
 }
 
+void VisibleModuleSet::setVisible(Module *M, SourceLocation Loc,
+                                  VisibleCallback Vis, ConflictCallback Cb) {
+  if (isVisible(M))
+    return;
 
+  ++Generation;
+
+  struct Visiting {
+    Module *M;
+    Visiting *ExportedBy;
+  };
+
+  std::function<void(Visiting)> VisitModule = [&](Visiting V) {
+    // Modules that aren't available cannot be made visible.
+    if (!V.M->isAvailable())
+      return;
+
+    // Nothing to do for a module that's already visible.
+    unsigned ID = V.M->getVisibilityID();
+    if (ImportLocs.size() <= ID)
+      ImportLocs.resize(ID + 1);
+    else if (ImportLocs[ID].isValid())
+      return;
+
+    ImportLocs[ID] = Loc;
+    Vis(M);
+
+    // Make any exported modules visible.
+    SmallVector<Module *, 16> Exports;
+    V.M->getExportedModules(Exports);
+    for (Module *E : Exports)
+      VisitModule({E, &V});
+
+    for (auto &C : V.M->Conflicts) {
+      if (isVisible(C.Other)) {
+        llvm::SmallVector<Module*, 8> Path;
+        for (Visiting *I = &V; I; I = I->ExportedBy)
+          Path.push_back(I->M);
+        Cb(Path, C.Other, C.Message);
+      }
+    }
+  };
+  VisitModule({M, nullptr});
+}
diff --git a/lib/Basic/Sanitizers.cpp b/lib/Basic/Sanitizers.cpp
index e9aaa36..8c4884b 100644
--- a/lib/Basic/Sanitizers.cpp
+++ b/lib/Basic/Sanitizers.cpp
@@ -11,25 +11,48 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Basic/Sanitizers.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 
-SanitizerSet::SanitizerSet() : Kinds(0) {}
+SanitizerSet::SanitizerSet() : Mask(0) {}
 
-bool SanitizerSet::has(SanitizerKind K) const {
-  unsigned Bit = static_cast<unsigned>(K);
-  return Kinds & (1 << Bit);
+bool SanitizerSet::has(SanitizerMask K) const {
+  assert(llvm::countPopulation(K) == 1);
+  return Mask & K;
 }
 
-void SanitizerSet::set(SanitizerKind K, bool Value) {
-  unsigned Bit = static_cast<unsigned>(K);
-  Kinds = Value ? (Kinds | (1 << Bit)) : (Kinds & ~(1 << Bit));
+void SanitizerSet::set(SanitizerMask K, bool Value) {
+  assert(llvm::countPopulation(K) == 1);
+  Mask = Value ? (Mask | K) : (Mask & ~K);
 }
 
 void SanitizerSet::clear() {
-  Kinds = 0;
+  Mask = 0;
 }
 
 bool SanitizerSet::empty() const {
-  return Kinds == 0;
+  return Mask == 0;
+}
+
+SanitizerMask clang::parseSanitizerValue(StringRef Value, bool AllowGroups) {
+  SanitizerMask ParsedKind = llvm::StringSwitch<SanitizerMask>(Value)
+#define SANITIZER(NAME, ID) .Case(NAME, SanitizerKind::ID)
+#define SANITIZER_GROUP(NAME, ID, ALIAS)                                       \
+  .Case(NAME, AllowGroups ? SanitizerKind::ID##Group : 0)
+#include "clang/Basic/Sanitizers.def"
+    .Default(0);
+  return ParsedKind;
+}
+
+SanitizerMask clang::expandSanitizerGroups(SanitizerMask Kinds) {
+#define SANITIZER(NAME, ID)
+#define SANITIZER_GROUP(NAME, ID, ALIAS)                                       \
+  if (Kinds & SanitizerKind::ID##Group)                                        \
+    Kinds |= SanitizerKind::ID;
+#include "clang/Basic/Sanitizers.def"
+  return Kinds;
 }
diff --git a/lib/Basic/SourceLocation.cpp b/lib/Basic/SourceLocation.cpp
index 6b885a7..d254e86 100644
--- a/lib/Basic/SourceLocation.cpp
+++ b/lib/Basic/SourceLocation.cpp
@@ -134,7 +134,7 @@
 
 StringRef FullSourceLoc::getBufferData(bool *Invalid) const {
   assert(isValid());
-  return SrcMgr->getBuffer(SrcMgr->getFileID(*this), Invalid)->getBuffer();;
+  return SrcMgr->getBuffer(SrcMgr->getFileID(*this), Invalid)->getBuffer();
 }
 
 std::pair<FileID, unsigned> FullSourceLoc::getDecomposedLoc() const {
diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp
index 871bbd5..330258b 100644
--- a/lib/Basic/TargetInfo.cpp
+++ b/lib/Basic/TargetInfo.cpp
@@ -36,6 +36,7 @@
   LongWidth = LongAlign = 32;
   LongLongWidth = LongLongAlign = 64;
   SuitableAlign = 64;
+  DefaultAlignForAttributeAligned = 128;
   MinGlobalAlign = 0;
   HalfWidth = 16;
   HalfAlign = 16;
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index fd8215b..9d8757a 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
 #include <algorithm>
 #include <memory>
 using namespace clang;
@@ -388,7 +389,7 @@
     if (Triple.getEnvironment() == llvm::Triple::Android) {
       Builder.defineMacro("__ANDROID__", "1");
       unsigned Maj, Min, Rev;
-      Triple.getOSVersion(Maj, Min, Rev);
+      Triple.getEnvironmentVersion(Maj, Min, Rev);
       this->PlatformName = "android";
       this->PlatformMinVersion = VersionTuple(Maj, Min, Rev);
     }
@@ -657,7 +658,7 @@
       // FIXME We cannot encode the revision information into 32-bits
       Builder.defineMacro("_MSC_BUILD", Twine(1));
 
-      if (Opts.CPlusPlus11 && Opts.isCompatibleWithMSVC(19))
+      if (Opts.CPlusPlus11 && Opts.isCompatibleWithMSVC(LangOptions::MSVC2015))
         Builder.defineMacro("_HAS_CHAR16_T_LANGUAGE_SUPPORT", Twine(1));
     }
 
@@ -991,6 +992,12 @@
   bool hasSjLjLowering() const override {
     return true;
   }
+
+  bool useFloat128ManglingForLongDouble() const override {
+    return LongDoubleWidth == 128 &&
+           LongDoubleFormat == &llvm::APFloat::PPCDoubleDouble &&
+           getTriple().isOSBinFormatELF();
+  }
 };
 
 const Builtin::Info PPCTargetInfo::BuiltinInfo[] = {
@@ -1028,6 +1035,7 @@
 
     if (Feature == "power8-vector") {
       HasP8Vector = true;
+      HasVSX = true;
       continue;
     }
 
@@ -1038,6 +1046,7 @@
 
     if (Feature == "direct-move") {
       HasDirectMove = true;
+      HasVSX = true;
       continue;
     }
 
@@ -1207,6 +1216,14 @@
     Builder.defineMacro("__CRYPTO__");
   if (HasHTM)
     Builder.defineMacro("__HTM__");
+  if (getTriple().getArch() == llvm::Triple::ppc64le ||
+      (defs & ArchDefinePwr8) || (CPU == "pwr8")) {
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+    if (PointerWidth == 64)
+      Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+  }
 
   // FIXME: The following are not yet generated here by Clang, but are
   //        generated by GCC:
@@ -1695,6 +1712,10 @@
     GK_SEA_ISLANDS
   } GPU;
 
+  bool hasFP64:1;
+  bool hasFMAF:1;
+  bool hasLDEXPF:1;
+
 public:
   R600TargetInfo(const llvm::Triple &Triple)
       : TargetInfo(Triple) {
@@ -1702,9 +1723,15 @@
     if (Triple.getArch() == llvm::Triple::amdgcn) {
       DescriptionString = DescriptionStringSI;
       GPU = GK_SOUTHERN_ISLANDS;
+      hasFP64 = true;
+      hasFMAF = true;
+      hasLDEXPF = true;
     } else {
       DescriptionString = DescriptionStringR600;
       GPU = GK_R600;
+      hasFP64 = false;
+      hasFMAF = false;
+      hasLDEXPF = false;
     }
     AddrSpaceMap = &R600AddrSpaceMap;
     UseAddrSpaceMapMangling = true;
@@ -1751,8 +1778,13 @@
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     Builder.defineMacro("__R600__");
-    if (GPU >= GK_SOUTHERN_ISLANDS && Opts.OpenCL)
+    if (hasFMAF)
+      Builder.defineMacro("__HAS_FMAF__");
+    if (hasLDEXPF)
+      Builder.defineMacro("__HAS_LDEXPF__");
+    if (hasFP64 && Opts.OpenCL) {
       Builder.defineMacro("cl_khr_fp64");
+    }
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
@@ -1810,16 +1842,25 @@
     case GK_EVERGREEN:
     case GK_NORTHERN_ISLANDS:
       DescriptionString = DescriptionStringR600;
+      hasFP64 = false;
+      hasFMAF = false;
+      hasLDEXPF = false;
       break;
     case GK_R600_DOUBLE_OPS:
     case GK_R700_DOUBLE_OPS:
     case GK_EVERGREEN_DOUBLE_OPS:
     case GK_CAYMAN:
       DescriptionString = DescriptionStringR600DoubleOps;
+      hasFP64 = true;
+      hasFMAF = true;
+      hasLDEXPF = false;
       break;
     case GK_SOUTHERN_ISLANDS:
     case GK_SEA_ISLANDS:
       DescriptionString = DescriptionStringSI;
+      hasFP64 = true;
+      hasFMAF = true;
+      hasLDEXPF = true;
       break;
     }
 
@@ -3531,8 +3572,9 @@
     DoubleAlign = LongLongAlign = 64;
     bool IsWinCOFF =
         getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
-    DescriptionString = IsWinCOFF ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-S32"
-                                  : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-S32";
+    DescriptionString = IsWinCOFF
+                            ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+                            : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3560,11 +3602,8 @@
 };
 } // end anonymous namespace
 
-static void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) {
-  Builder.defineMacro("__MSVCRT__");
-  Builder.defineMacro("__MINGW32__");
-
-  // Mingw defines __declspec(a) to __attribute__((a)).  Clang supports
+static void addCygMingDefines(const LangOptions &Opts, MacroBuilder &Builder) {
+  // Mingw and cygwin define __declspec(a) to __attribute__((a)).  Clang supports
   // __declspec natively under -fms-extensions, but we define a no-op __declspec
   // macro anyway for pre-processor compatibility.
   if (Opts.MicrosoftExt)
@@ -3587,6 +3626,12 @@
   }
 }
 
+static void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) {
+  Builder.defineMacro("__MSVCRT__");
+  Builder.defineMacro("__MINGW32__");
+  addCygMingDefines(Opts, Builder);
+}
+
 namespace {
 // x86-32 MinGW target
 class MinGWX86_32TargetInfo : public WindowsX86_32TargetInfo {
@@ -3611,7 +3656,7 @@
     TLSSupported = false;
     WCharType = UnsignedShort;
     DoubleAlign = LongLongAlign = 64;
-    DescriptionString = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-S32";
+    DescriptionString = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3619,6 +3664,7 @@
     Builder.defineMacro("_X86_");
     Builder.defineMacro("__CYGWIN__");
     Builder.defineMacro("__CYGWIN32__");
+    addCygMingDefines(Opts, Builder);
     DefineStd(Builder, "unix", Opts);
     if (Opts.CPlusPlus)
       Builder.defineMacro("_GNU_SOURCE");
@@ -4145,8 +4191,13 @@
     return false;
   }
 
+  // FIXME: This should be based on Arch attributes, not CPU names.
   void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
     StringRef ArchName = getTriple().getArchName();
+    unsigned ArchKind = llvm::ARMTargetParser::parseArch(ArchName);
+    bool IsV8 = (ArchKind == llvm::ARM::AK_ARMV8A ||
+                 ArchKind == llvm::ARM::AK_ARMV8_1A);
+
     if (CPU == "arm1136jf-s" || CPU == "arm1176jzf-s" || CPU == "mpcore")
       Features["vfp2"] = true;
     else if (CPU == "cortex-a8" || CPU == "cortex-a9") {
@@ -4163,25 +4214,15 @@
       Features["neon"] = true;
       Features["hwdiv"] = true;
       Features["hwdiv-arm"] = true;
-    } else if (CPU == "cyclone") {
-      Features["v8fp"] = true;
-      Features["neon"] = true;
-      Features["hwdiv"] = true;
-      Features["hwdiv-arm"] = true;
-    } else if (CPU == "cortex-a53" || CPU == "cortex-a57" || CPU == "cortex-a72") {
+    } else if (CPU == "cyclone" || CPU == "cortex-a53" || CPU == "cortex-a57" ||
+               CPU == "cortex-a72") {
       Features["fp-armv8"] = true;
       Features["neon"] = true;
       Features["hwdiv"] = true;
       Features["hwdiv-arm"] = true;
       Features["crc"] = true;
       Features["crypto"] = true;
-    } else if (CPU == "cortex-r5" || CPU == "cortex-r7" ||
-               // Enable the hwdiv extension for all v8a AArch32 cores by
-               // default.
-               ArchName == "armv8a" || ArchName == "armv8" ||
-               ArchName == "armebv8a" || ArchName == "armebv8" ||
-               ArchName == "thumbv8a" || ArchName == "thumbv8" ||
-               ArchName == "thumbebv8a" || ArchName == "thumbebv8") {
+    } else if (CPU == "cortex-r5" || CPU == "cortex-r7" || IsV8) {
       Features["hwdiv"] = true;
       Features["hwdiv-arm"] = true;
     } else if (CPU == "cortex-m3" || CPU == "cortex-m4" || CPU == "cortex-m7" ||
@@ -4244,12 +4285,10 @@
       Features.push_back("-neonfp");
 
     // Remove front-end specific options which the backend handles differently.
-    const StringRef FrontEndFeatures[] = { "+soft-float", "+soft-float-abi" };
-    for (const auto &FEFeature : FrontEndFeatures) {
-      auto Feature = std::find(Features.begin(), Features.end(), FEFeature);
-      if (Feature != Features.end())
-        Features.erase(Feature);
-    }
+    auto Feature =
+        std::find(Features.begin(), Features.end(), "+soft-float-abi");
+    if (Feature != Features.end())
+      Features.erase(Feature);
 
     return true;
   }
@@ -4264,47 +4303,71 @@
         .Case("hwdiv-arm", HWDiv & HWDivARM)
         .Default(false);
   }
-  // FIXME: Should we actually have some table instead of these switches?
-  static const char *getCPUDefineSuffix(StringRef Name) {
-    return llvm::StringSwitch<const char *>(Name)
-        .Cases("arm8", "arm810", "4")
-        .Cases("strongarm", "strongarm110", "strongarm1100", "strongarm1110",
-               "4")
-        .Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "arm720t", "arm9", "4T")
-        .Cases("arm9tdmi", "arm920", "arm920t", "arm922t", "arm940t", "4T")
-        .Case("ep9312", "4T")
-        .Cases("arm10tdmi", "arm1020t", "5T")
-        .Cases("arm9e", "arm946e-s", "arm966e-s", "arm968e-s", "5TE")
-        .Case("arm926ej-s", "5TEJ")
-        .Cases("arm10e", "arm1020e", "arm1022e", "5TE")
-        .Cases("xscale", "iwmmxt", "5TE")
-        .Case("arm1136j-s", "6J")
-        .Case("arm1136jf-s", "6")
-        .Cases("mpcorenovfp", "mpcore", "6K")
-        .Cases("arm1176jz-s", "arm1176jzf-s", "6K")
-        .Cases("arm1156t2-s", "arm1156t2f-s", "6T2")
-        .Cases("cortex-a5", "cortex-a7", "cortex-a8", "7A")
-        .Cases("cortex-a9", "cortex-a12", "cortex-a15", "cortex-a17", "krait",
-               "7A")
-        .Cases("cortex-r4", "cortex-r4f", "cortex-r5", "cortex-r7", "7R")
-        .Case("swift", "7S")
-        .Case("cyclone", "8A")
-        .Cases("sc300", "cortex-m3", "7M")
-        .Cases("cortex-m4", "cortex-m7", "7EM")
-        .Cases("sc000", "cortex-m0", "cortex-m0plus", "cortex-m1", "6M")
-        .Cases("cortex-a53", "cortex-a57", "cortex-a72", "8A")
-        .Default(nullptr);
+  const char *getCPUDefineSuffix(StringRef Name) const {
+    if(Name == "generic") {
+      auto subarch = getTriple().getSubArch();
+      switch (subarch) {
+        case llvm::Triple::SubArchType::ARMSubArch_v8_1a: 
+          return "8_1A";
+        default:
+          break;
+      }
+    }
+
+    unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(Name);
+    if (ArchKind == llvm::ARM::AK_INVALID)
+      return "";
+
+    // For most sub-arches, the build attribute CPU name is enough.
+    // For Cortex variants, it's slightly different.
+    switch(ArchKind) {
+    default:
+      return llvm::ARMTargetParser::getCPUAttr(ArchKind);
+    case llvm::ARM::AK_ARMV6M:
+    case llvm::ARM::AK_ARMV6SM:
+      return "6M";
+    case llvm::ARM::AK_ARMV7:
+    case llvm::ARM::AK_ARMV7A:
+    case llvm::ARM::AK_ARMV7S:
+      return "7A";
+    case llvm::ARM::AK_ARMV7R:
+      return "7R";
+    case llvm::ARM::AK_ARMV7M:
+      return "7M";
+    case llvm::ARM::AK_ARMV7EM:
+      return "7EM";
+    case llvm::ARM::AK_ARMV8A:
+      return "8A";
+    case llvm::ARM::AK_ARMV8_1A:
+      return "8_1A";
+    }
   }
-  static const char *getCPUProfile(StringRef Name) {
-    return llvm::StringSwitch<const char *>(Name)
-        .Cases("cortex-a5", "cortex-a7", "cortex-a8", "A")
-        .Cases("cortex-a9", "cortex-a12", "cortex-a15", "cortex-a17", "krait",
-               "A")
-        .Cases("cortex-a53", "cortex-a57", "cortex-a72", "A")
-        .Cases("cortex-m3", "cortex-m4", "cortex-m0", "cortex-m0plus", "M")
-        .Cases("cortex-m1", "cortex-m7", "sc000", "sc300", "M")
-        .Cases("cortex-r4",  "cortex-r4f", "cortex-r5", "cortex-r7", "R")
-        .Default("");
+  const char *getCPUProfile(StringRef Name) const {
+    if(Name == "generic") {
+      auto subarch = getTriple().getSubArch();
+      switch (subarch) {
+        case llvm::Triple::SubArchType::ARMSubArch_v8_1a: 
+          return "A";
+        default:
+          break;
+      }
+    }
+
+    unsigned CPUArch = llvm::ARMTargetParser::parseCPUArch(Name);
+    if (CPUArch == llvm::ARM::AK_INVALID)
+      return "";
+
+    StringRef ArchName = llvm::ARMTargetParser::getArchName(CPUArch);
+    switch(llvm::ARMTargetParser::parseArchProfile(ArchName)) {
+      case llvm::ARM::PK_A:
+        return "A";
+      case llvm::ARM::PK_R:
+        return "R";
+      case llvm::ARM::PK_M:
+        return "M";
+      default:
+        return "";
+    }
   }
   bool setCPU(const std::string &Name) override {
     if (!getCPUDefineSuffix(Name))
@@ -4331,6 +4394,7 @@
     // We check both CPUArchVer and ArchName because when only triple is
     // specified, the default CPU is arm1136j-s.
     return ArchName.endswith("v6t2") || ArchName.endswith("v7") ||
+           ArchName.endswith("v8.1a") ||
            ArchName.endswith("v8") || CPUArch == "6T2" || CPUArchVer >= 7;
   }
   void getTargetDefines(const LangOptions &Opts,
@@ -4907,6 +4971,12 @@
 
     if (Crypto)
       Builder.defineMacro("__ARM_FEATURE_CRYPTO");
+
+    // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
   }
 
   void getTargetBuiltins(const Builtin::Info *&Records,
@@ -5316,14 +5386,18 @@
   static const char * const GCCRegNames[];
   bool SoftFloat;
 public:
-  SparcTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {}
+  SparcTargetInfo(const llvm::Triple &Triple)
+      : TargetInfo(Triple), SoftFloat(false) {}
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override {
-    SoftFloat = false;
-    for (unsigned i = 0, e = Features.size(); i != e; ++i)
-      if (Features[i] == "+soft-float")
-        SoftFloat = true;
+    // The backend doesn't actually handle soft float yet, but in case someone
+    // is using the support for the front end continue to support it.
+    auto Feature = std::find(Features.begin(), Features.end(), "+soft-float");
+    if (Feature != Features.end()) {
+      SoftFloat = true;
+      Features.erase(Feature);
+    }
     return true;
   }
   void getTargetDefines(const LangOptions &Opts,
@@ -5433,6 +5507,16 @@
 public:
   SparcV8TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
     DescriptionString = "E-m:e-p:32:32-i64:64-f128:64-n32-S64";
+    // NetBSD uses long (same as llvm default); everyone else uses int.
+    if (getTriple().getOS() == llvm::Triple::NetBSD) {
+      SizeType = UnsignedLong;
+      IntPtrType = SignedLong;
+      PtrDiffType = SignedLong;
+    } else {
+      SizeType = UnsignedInt;
+      IntPtrType = SignedInt;
+      PtrDiffType = SignedInt;
+    }
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -5442,6 +5526,15 @@
   }
 };
 
+// SPARCV8el is the 32-bit little-endian mode selected by Triple::sparcel.
+class SparcV8elTargetInfo : public SparcV8TargetInfo {
+ public:
+  SparcV8elTargetInfo(const llvm::Triple &Triple) : SparcV8TargetInfo(Triple) {
+    DescriptionString = "e-m:e-p:32:32-i64:64-f128:64-n32-S64";
+    BigEndian = false;
+  }
+};
+
 // SPARC v9 is the 64-bit mode selected by Triple::sparcv9.
 class SparcV9TargetInfo : public SparcTargetInfo {
 public:
@@ -5496,24 +5589,16 @@
   }
 };
 
-class SolarisSparcV8TargetInfo : public SolarisTargetInfo<SparcV8TargetInfo> {
-public:
-  SolarisSparcV8TargetInfo(const llvm::Triple &Triple)
-      : SolarisTargetInfo<SparcV8TargetInfo>(Triple) {
-    SizeType = UnsignedInt;
-    PtrDiffType = SignedInt;
-  }
-};
-
 class SystemZTargetInfo : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
   static const char *const GCCRegNames[];
   std::string CPU;
   bool HasTransactionalExecution;
+  bool HasVector;
 
 public:
   SystemZTargetInfo(const llvm::Triple &Triple)
-    : TargetInfo(Triple), CPU("z10"), HasTransactionalExecution(false) {
+    : TargetInfo(Triple), CPU("z10"), HasTransactionalExecution(false), HasVector(false) {
     IntMaxType = SignedLong;
     Int64Type = SignedLong;
     TLSSupported = true;
@@ -5523,6 +5608,7 @@
     LongDoubleWidth = 128;
     LongDoubleAlign = 64;
     LongDoubleFormat = &llvm::APFloat::IEEEquad;
+    DefaultAlignForAttributeAligned = 64;
     MinGlobalAlign = 16;
     DescriptionString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64";
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
@@ -5565,6 +5651,7 @@
       .Case("z10", true)
       .Case("z196", true)
       .Case("zEC12", true)
+      .Case("z13", true)
       .Default(false);
 
     return CPUKnown;
@@ -5572,6 +5659,10 @@
   void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
     if (CPU == "zEC12")
       Features["transactional-execution"] = true;
+    if (CPU == "z13") {
+      Features["transactional-execution"] = true;
+      Features["vector"] = true;
+    }
   }
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
@@ -5580,6 +5671,14 @@
     for (unsigned i = 0, e = Features.size(); i != e; ++i) {
       if (Features[i] == "+transactional-execution")
         HasTransactionalExecution = true;
+      if (Features[i] == "+vector")
+        HasVector = true;
+    }
+    // If we use the vector ABI, vector types are 64-bit aligned.
+    if (HasVector) {
+      MaxVectorAlign = 64;
+      DescriptionString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                          "-v128:64-a:8:16-n32:64";
     }
     return true;
   }
@@ -5588,8 +5687,15 @@
     return llvm::StringSwitch<bool>(Feature)
         .Case("systemz", true)
         .Case("htm", HasTransactionalExecution)
+        .Case("vx", HasVector)
         .Default(false);
   }
+
+  StringRef getABI() const override {
+    if (HasVector)
+      return "vector";
+    return "";
+  }
 };
 
 const Builtin::Info SystemZTargetInfo::BuiltinInfo[] = {
@@ -5792,6 +5898,60 @@
                           unsigned &NumAliases) const override {}
   };
 
+class BPFTargetInfo : public TargetInfo {
+public:
+  BPFTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+    LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
+    SizeType    = UnsignedLong;
+    PtrDiffType = SignedLong;
+    IntPtrType  = SignedLong;
+    IntMaxType  = SignedLong;
+    Int64Type   = SignedLong;
+    RegParmMax = 5;
+    if (Triple.getArch() == llvm::Triple::bpfeb) {
+      BigEndian = true;
+      DescriptionString = "E-m:e-p:64:64-i64:64-n32:64-S128";
+    } else {
+      BigEndian = false;
+      DescriptionString = "e-m:e-p:64:64-i64:64-n32:64-S128";
+    }
+    MaxAtomicPromoteWidth = 64;
+    MaxAtomicInlineWidth = 64;
+    TLSSupported = false;
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "bpf", Opts);
+    Builder.defineMacro("__BPF__");
+  }
+  bool hasFeature(StringRef Feature) const override {
+    return Feature == "bpf";
+  }
+
+  void getTargetBuiltins(const Builtin::Info *&Records,
+                         unsigned &NumRecords) const override {}
+  const char *getClobbers() const override {
+    return "";
+  }
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
+  void getGCCRegNames(const char * const *&Names,
+                      unsigned &NumNames) const override {
+    Names = nullptr;
+    NumNames = 0;
+  }
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &info) const override {
+    return true;
+  }
+  void getGCCRegAliases(const GCCRegAlias *&Aliases,
+                        unsigned &NumAliases) const override {
+    Aliases = nullptr;
+    NumAliases = 0;
+  }
+};
+
 class MipsTargetInfoBase : public TargetInfo {
   virtual void setDescriptionString() = 0;
 
@@ -6081,12 +6241,6 @@
         IsNan2008 = false;
     }
 
-    // Remove front-end specific options.
-    std::vector<std::string>::iterator it =
-      std::find(Features.begin(), Features.end(), "+soft-float");
-    if (it != Features.end())
-      Features.erase(it);
-
     setDescriptionString();
 
     return true;
@@ -6796,6 +6950,10 @@
       return new ARMbeTargetInfo(Triple);
     }
 
+  case llvm::Triple::bpfeb:
+  case llvm::Triple::bpfel:
+    return new BPFTargetInfo(Triple);
+
   case llvm::Triple::msp430:
     return new MSP430TargetInfo(Triple);
 
@@ -6863,10 +7021,10 @@
 
   case llvm::Triple::le32:
     switch (os) {
-      case llvm::Triple::NaCl:
-        return new NaClTargetInfo<PNaClTargetInfo>(Triple);
-      default:
-        return nullptr;
+    case llvm::Triple::NaCl:
+      return new NaClTargetInfo<PNaClTargetInfo>(Triple);
+    default:
+      return nullptr;
     }
 
   case llvm::Triple::le64:
@@ -6930,7 +7088,7 @@
     case llvm::Triple::Linux:
       return new LinuxTargetInfo<SparcV8TargetInfo>(Triple);
     case llvm::Triple::Solaris:
-      return new SolarisSparcV8TargetInfo(Triple);
+      return new SolarisTargetInfo<SparcV8TargetInfo>(Triple);
     case llvm::Triple::NetBSD:
       return new NetBSDTargetInfo<SparcV8TargetInfo>(Triple);
     case llvm::Triple::OpenBSD:
@@ -6941,6 +7099,21 @@
       return new SparcV8TargetInfo(Triple);
     }
 
+  // The 'sparcel' architecture copies all the above cases except for Solaris.
+  case llvm::Triple::sparcel:
+    switch (os) {
+    case llvm::Triple::Linux:
+      return new LinuxTargetInfo<SparcV8elTargetInfo>(Triple);
+    case llvm::Triple::NetBSD:
+      return new NetBSDTargetInfo<SparcV8elTargetInfo>(Triple);
+    case llvm::Triple::OpenBSD:
+      return new OpenBSDTargetInfo<SparcV8elTargetInfo>(Triple);
+    case llvm::Triple::RTEMS:
+      return new RTEMSTargetInfo<SparcV8elTargetInfo>(Triple);
+    default:
+      return new SparcV8elTargetInfo(Triple);
+    }
+
   case llvm::Triple::sparcv9:
     switch (os) {
     case llvm::Triple::Linux:
@@ -6973,6 +7146,8 @@
       return new DarwinI386TargetInfo(Triple);
 
     switch (os) {
+    case llvm::Triple::CloudABI:
+      return new CloudABITargetInfo<X86_32TargetInfo>(Triple);
     case llvm::Triple::Linux: {
       switch (Triple.getEnvironment()) {
       default:
@@ -7067,18 +7242,18 @@
       return new X86_64TargetInfo(Triple);
     }
 
-    case llvm::Triple::spir: {
-      if (Triple.getOS() != llvm::Triple::UnknownOS ||
-          Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
-        return nullptr;
-      return new SPIR32TargetInfo(Triple);
-    }
-    case llvm::Triple::spir64: {
-      if (Triple.getOS() != llvm::Triple::UnknownOS ||
-          Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
-        return nullptr;
-      return new SPIR64TargetInfo(Triple);
-    }
+  case llvm::Triple::spir: {
+    if (Triple.getOS() != llvm::Triple::UnknownOS ||
+        Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
+      return nullptr;
+    return new SPIR32TargetInfo(Triple);
+  }
+  case llvm::Triple::spir64: {
+    if (Triple.getOS() != llvm::Triple::UnknownOS ||
+        Triple.getEnvironment() != llvm::Triple::UnknownEnvironment)
+      return nullptr;
+    return new SPIR64TargetInfo(Triple);
+  }
   }
 }
 
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 7e7f7fa..cc8652e 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -87,6 +87,8 @@
     virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
                                                    uint64_t Members) const;
 
+    virtual bool shouldSignExtUnsignedType(QualType Ty) const;
+
     bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
                                 uint64_t &Members) const;
 
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 7bc351a..30e9ebf 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -189,7 +189,14 @@
   const PassManagerBuilderWrapper &BuilderWrapper =
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
-  PM.add(createSanitizerCoverageModulePass(CGOpts.SanitizeCoverage));
+  SanitizerCoverageOptions Opts;
+  Opts.CoverageType =
+      static_cast<SanitizerCoverageOptions::Type>(CGOpts.SanitizeCoverageType);
+  Opts.IndirectCalls = CGOpts.SanitizeCoverageIndirectCalls;
+  Opts.TraceBB = CGOpts.SanitizeCoverageTraceBB;
+  Opts.TraceCmp = CGOpts.SanitizeCoverageTraceCmp;
+  Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters;
+  PM.add(createSanitizerCoverageModulePass(Opts));
 }
 
 static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -276,7 +283,6 @@
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
-  PMBuilder.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
@@ -306,7 +312,9 @@
                            addBoundsCheckingPass);
   }
 
-  if (CodeGenOpts.SanitizeCoverage) {
+  if (CodeGenOpts.SanitizeCoverageType ||
+      CodeGenOpts.SanitizeCoverageIndirectCalls ||
+      CodeGenOpts.SanitizeCoverageTraceCmp) {
     PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
                            addSanitizerCoveragePass);
     PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
@@ -394,6 +402,7 @@
   if (CodeGenOpts.ProfileInstrGenerate) {
     InstrProfOptions Options;
     Options.NoRedZone = CodeGenOpts.DisableRedZone;
+    Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
     MPM->add(createInstrProfilingPass(Options));
   }
 
@@ -443,10 +452,8 @@
   std::string FeaturesStr;
   if (!TargetOpts.Features.empty()) {
     SubtargetFeatures Features;
-    for (std::vector<std::string>::const_iterator
-           it = TargetOpts.Features.begin(),
-           ie = TargetOpts.Features.end(); it != ie; ++it)
-      Features.AddFeature(*it);
+    for (const std::string &Feature : TargetOpts.Features)
+      Features.AddFeature(Feature);
     FeaturesStr = Features.getString();
   }
 
@@ -470,6 +477,9 @@
 
   llvm::TargetOptions Options;
 
+  if (!TargetOpts.Reciprocals.empty())
+    Options.Reciprocals = TargetRecip(TargetOpts.Reciprocals);
+
   Options.ThreadModel =
     llvm::StringSwitch<llvm::ThreadModel::Model>(CodeGenOpts.ThreadModel)
       .Case("posix", llvm::ThreadModel::POSIX)
@@ -481,15 +491,6 @@
   if (CodeGenOpts.CompressDebugSections)
     Options.CompressDebugSections = true;
 
-  // Set frame pointer elimination mode.
-  if (!CodeGenOpts.DisableFPElim) {
-    Options.NoFramePointerElim = false;
-  } else if (CodeGenOpts.OmitLeafFramePointer) {
-    Options.NoFramePointerElim = false;
-  } else {
-    Options.NoFramePointerElim = true;
-  }
-
   if (CodeGenOpts.UseInitArray)
     Options.UseInitArray = true;
 
@@ -521,9 +522,7 @@
   Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
   Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
   Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
-  Options.UseSoftFloat = CodeGenOpts.SoftFloat;
   Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
-  Options.DisableTailCalls = CodeGenOpts.DisableTailCalls;
   Options.TrapFuncName = CodeGenOpts.TrapFuncName;
   Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.FunctionSections = CodeGenOpts.FunctionSections;
@@ -570,8 +569,7 @@
   // Add ObjC ARC final-cleanup optimizations. This is done as part of the
   // "codegen" passes so that it isn't run multiple times when there is
   // inlining happening.
-  if (LangOpts.ObjCAutoRefCount &&
-      CodeGenOpts.OptimizationLevel > 0)
+  if (CodeGenOpts.OptimizationLevel > 0)
     PM->add(createObjCARCContractPass());
 
   if (TM->addPassesToEmitFile(*PM, OS, CGFT,
@@ -625,10 +623,9 @@
     PrettyStackTraceString CrashInfo("Per-function optimization");
 
     PerFunctionPasses->doInitialization();
-    for (Module::iterator I = TheModule->begin(),
-           E = TheModule->end(); I != E; ++I)
-      if (!I->isDeclaration())
-        PerFunctionPasses->run(*I);
+    for (Function &F : *TheModule)
+      if (!F.isDeclaration())
+        PerFunctionPasses->run(F);
     PerFunctionPasses->doFinalization();
   }
 
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 2de9cb2..da82249 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -215,6 +215,17 @@
         llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
         bool IsWeak = false);
 
+    /// \brief Emits atomic update.
+    /// \param AO Atomic ordering.
+    /// \param UpdateOp Update operation for the current lvalue.
+    void EmitAtomicUpdate(llvm::AtomicOrdering AO,
+                          const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                          bool IsVolatile);
+    /// \brief Emits atomic update.
+    /// \param AO Atomic ordering.
+    void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
+                          bool IsVolatile);
+
     /// Materialize an atomic r-value in atomic-layout memory.
     llvm::Value *materializeRValue(RValue rvalue) const;
 
@@ -235,16 +246,31 @@
     /// \brief Emits atomic load as LLVM instruction.
     llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
     /// \brief Emits atomic compare-and-exchange op as a libcall.
-    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchangeLibcall(
-        RValue Expected, RValue DesiredAddr,
+    llvm::Value *EmitAtomicCompareExchangeLibcall(
+        llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
         llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
         llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent);
     /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
-    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchangeOp(
-        RValue Expected, RValue Desired,
+    std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
+        llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
         llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
         llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
         bool IsWeak = false);
+    /// \brief Emit atomic update as libcalls.
+    void
+    EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
+                            const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                            bool IsVolatile);
+    /// \brief Emit atomic update as LLVM instructions.
+    void EmitAtomicUpdateOp(llvm::AtomicOrdering AO,
+                            const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                            bool IsVolatile);
+    /// \brief Emit atomic update as libcalls.
+    void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal,
+                                 bool IsVolatile);
+    /// \brief Emit atomic update as LLVM instructions.
+    void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal,
+                            bool IsVolatile);
   };
 }
 
@@ -1313,12 +1339,10 @@
                                        getAtomicAlignment().getQuantity());
 }
 
-std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
-    RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
-    llvm::AtomicOrdering Failure, bool IsWeak) {
+std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
+    llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
+    llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) {
   // Do the atomic store.
-  auto *ExpectedVal = convertRValueToInt(Expected);
-  auto *DesiredVal = convertRValueToInt(Desired);
   auto *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
   auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
                                                Success, Failure);
@@ -1329,20 +1353,16 @@
   // Okay, turn that back into the original value type.
   auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
   auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1);
-  return std::make_pair(
-      ConvertIntToValueOrAtomic(PreviousVal, AggValueSlot::ignored(),
-                                SourceLocation(), /*AsValue=*/false),
-      SuccessFailureVal);
+  return std::make_pair(PreviousVal, SuccessFailureVal);
 }
 
-std::pair<RValue, llvm::Value *>
-AtomicInfo::EmitAtomicCompareExchangeLibcall(RValue Expected, RValue Desired,
+llvm::Value *
+AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
+                                             llvm::Value *DesiredAddr,
                                              llvm::AtomicOrdering Success,
                                              llvm::AtomicOrdering Failure) {
   // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
   // void *desired, int success, int failure);
-  auto *ExpectedAddr = materializeRValue(Expected);
-  auto *DesiredAddr = materializeRValue(Desired);
   CallArgList Args;
   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
@@ -1360,10 +1380,7 @@
   auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
                                               CGF.getContext().BoolTy, Args);
 
-  return std::make_pair(
-      convertTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
-                          SourceLocation(), /*AsValue=*/false),
-      SuccessFailureRVal.getScalarVal());
+  return SuccessFailureRVal.getScalarVal();
 }
 
 std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
@@ -1376,14 +1393,247 @@
   // Check whether we should use a library call.
   if (shouldUseLibcall()) {
     // Produce a source address.
-    return EmitAtomicCompareExchangeLibcall(Expected, Desired, Success,
-                                            Failure);
+    auto *ExpectedAddr = materializeRValue(Expected);
+    auto *DesiredAddr = materializeRValue(Desired);
+    auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr,
+                                                 Success, Failure);
+    return std::make_pair(
+        convertTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
+                            SourceLocation(), /*AsValue=*/false),
+        Res);
   }
 
   // If we've got a scalar value of the right size, try to avoid going
   // through memory.
-  return EmitAtomicCompareExchangeOp(Expected, Desired, Success, Failure,
-                                     IsWeak);
+  auto *ExpectedVal = convertRValueToInt(Expected);
+  auto *DesiredVal = convertRValueToInt(Desired);
+  auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
+                                         Failure, IsWeak);
+  return std::make_pair(
+      ConvertIntToValueOrAtomic(Res.first, AggValueSlot::ignored(),
+                                SourceLocation(), /*AsValue=*/false),
+      Res.second);
+}
+
+static void
+EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal,
+                      const llvm::function_ref<RValue(RValue)> &UpdateOp,
+                      llvm::Value *DesiredAddr) {
+  llvm::Value *Ptr = nullptr;
+  LValue UpdateLVal;
+  RValue UpRVal;
+  LValue AtomicLVal = Atomics.getAtomicLValue();
+  LValue DesiredLVal;
+  if (AtomicLVal.isSimple()) {
+    UpRVal = OldRVal;
+    DesiredLVal =
+        LValue::MakeAddr(DesiredAddr, AtomicLVal.getType(),
+                         AtomicLVal.getAlignment(), CGF.CGM.getContext());
+  } else {
+    // Build new lvalue for temp address
+    Ptr = Atomics.materializeRValue(OldRVal);
+    if (AtomicLVal.isBitField()) {
+      UpdateLVal =
+          LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
+                               AtomicLVal.getType(), AtomicLVal.getAlignment());
+      DesiredLVal =
+          LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
+                               AtomicLVal.getType(), AtomicLVal.getAlignment());
+    } else if (AtomicLVal.isVectorElt()) {
+      UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
+                                         AtomicLVal.getType(),
+                                         AtomicLVal.getAlignment());
+      DesiredLVal = LValue::MakeVectorElt(
+          DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(),
+          AtomicLVal.getAlignment());
+    } else {
+      assert(AtomicLVal.isExtVectorElt());
+      UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
+                                            AtomicLVal.getType(),
+                                            AtomicLVal.getAlignment());
+      DesiredLVal = LValue::MakeExtVectorElt(
+          DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
+          AtomicLVal.getAlignment());
+    }
+    UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
+    DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
+    UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation());
+  }
+  // Store new value in the corresponding memory area
+  RValue NewRVal = UpdateOp(UpRVal);
+  if (NewRVal.isScalar()) {
+    CGF.EmitStoreThroughLValue(NewRVal, DesiredLVal);
+  } else {
+    assert(NewRVal.isComplex());
+    CGF.EmitStoreOfComplex(NewRVal.getComplexVal(), DesiredLVal,
+                           /*isInit=*/false);
+  }
+}
+
+void AtomicInfo::EmitAtomicUpdateLibcall(
+    llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
+    bool IsVolatile) {
+  auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+
+  llvm::Value *ExpectedAddr = CreateTempAlloca();
+
+  EmitAtomicLoadLibcall(ExpectedAddr, AO, IsVolatile);
+  auto *ContBB = CGF.createBasicBlock("atomic_cont");
+  auto *ExitBB = CGF.createBasicBlock("atomic_exit");
+  CGF.EmitBlock(ContBB);
+  auto *DesiredAddr = CreateTempAlloca();
+  if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
+      requiresMemSetZero(
+          getAtomicAddress()->getType()->getPointerElementType())) {
+    auto *OldVal = CGF.Builder.CreateAlignedLoad(
+        ExpectedAddr, getAtomicAlignment().getQuantity());
+    CGF.Builder.CreateAlignedStore(OldVal, DesiredAddr,
+                                   getAtomicAlignment().getQuantity());
+  }
+  auto OldRVal = convertTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
+                                    SourceLocation(), /*AsValue=*/false);
+  EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, DesiredAddr);
+  auto *Res =
+      EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, AO, Failure);
+  CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+void AtomicInfo::EmitAtomicUpdateOp(
+    llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
+    bool IsVolatile) {
+  auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+
+  // Do the atomic load.
+  auto *OldVal = EmitAtomicLoadOp(AO, IsVolatile);
+  // For non-simple lvalues perform compare-and-swap procedure.
+  auto *ContBB = CGF.createBasicBlock("atomic_cont");
+  auto *ExitBB = CGF.createBasicBlock("atomic_exit");
+  auto *CurBB = CGF.Builder.GetInsertBlock();
+  CGF.EmitBlock(ContBB);
+  llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
+                                             /*NumReservedValues=*/2);
+  PHI->addIncoming(OldVal, CurBB);
+  auto *NewAtomicAddr = CreateTempAlloca();
+  auto *NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
+  if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
+      requiresMemSetZero(
+          getAtomicAddress()->getType()->getPointerElementType())) {
+    CGF.Builder.CreateAlignedStore(PHI, NewAtomicIntAddr,
+                                   getAtomicAlignment().getQuantity());
+  }
+  auto OldRVal = ConvertIntToValueOrAtomic(PHI, AggValueSlot::ignored(),
+                                           SourceLocation(), /*AsValue=*/false);
+  EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
+  auto *DesiredVal = CGF.Builder.CreateAlignedLoad(
+      NewAtomicIntAddr, getAtomicAlignment().getQuantity());
+  // Try to write new value using cmpxchg operation
+  auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
+  PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
+  CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB);
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics,
+                                  RValue UpdateRVal, llvm::Value *DesiredAddr) {
+  LValue AtomicLVal = Atomics.getAtomicLValue();
+  LValue DesiredLVal;
+  // Build new lvalue for temp address
+  if (AtomicLVal.isBitField()) {
+    DesiredLVal =
+        LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
+                             AtomicLVal.getType(), AtomicLVal.getAlignment());
+  } else if (AtomicLVal.isVectorElt()) {
+    DesiredLVal =
+        LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(),
+                              AtomicLVal.getType(), AtomicLVal.getAlignment());
+  } else {
+    assert(AtomicLVal.isExtVectorElt());
+    DesiredLVal = LValue::MakeExtVectorElt(
+        DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
+        AtomicLVal.getAlignment());
+  }
+  DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
+  // Store new value in the corresponding memory area
+  assert(UpdateRVal.isScalar());
+  CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal);
+}
+
+void AtomicInfo::EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
+                                         RValue UpdateRVal, bool IsVolatile) {
+  auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+
+  llvm::Value *ExpectedAddr = CreateTempAlloca();
+
+  EmitAtomicLoadLibcall(ExpectedAddr, AO, IsVolatile);
+  auto *ContBB = CGF.createBasicBlock("atomic_cont");
+  auto *ExitBB = CGF.createBasicBlock("atomic_exit");
+  CGF.EmitBlock(ContBB);
+  auto *DesiredAddr = CreateTempAlloca();
+  if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
+      requiresMemSetZero(
+          getAtomicAddress()->getType()->getPointerElementType())) {
+    auto *OldVal = CGF.Builder.CreateAlignedLoad(
+        ExpectedAddr, getAtomicAlignment().getQuantity());
+    CGF.Builder.CreateAlignedStore(OldVal, DesiredAddr,
+                                   getAtomicAlignment().getQuantity());
+  }
+  EmitAtomicUpdateValue(CGF, *this, UpdateRVal, DesiredAddr);
+  auto *Res =
+      EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, AO, Failure);
+  CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
+                                    bool IsVolatile) {
+  auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
+
+  // Do the atomic load.
+  auto *OldVal = EmitAtomicLoadOp(AO, IsVolatile);
+  // For non-simple lvalues perform compare-and-swap procedure.
+  auto *ContBB = CGF.createBasicBlock("atomic_cont");
+  auto *ExitBB = CGF.createBasicBlock("atomic_exit");
+  auto *CurBB = CGF.Builder.GetInsertBlock();
+  CGF.EmitBlock(ContBB);
+  llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
+                                             /*NumReservedValues=*/2);
+  PHI->addIncoming(OldVal, CurBB);
+  auto *NewAtomicAddr = CreateTempAlloca();
+  auto *NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
+  if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
+      requiresMemSetZero(
+          getAtomicAddress()->getType()->getPointerElementType())) {
+    CGF.Builder.CreateAlignedStore(PHI, NewAtomicIntAddr,
+                                   getAtomicAlignment().getQuantity());
+  }
+  EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr);
+  auto *DesiredVal = CGF.Builder.CreateAlignedLoad(
+      NewAtomicIntAddr, getAtomicAlignment().getQuantity());
+  // Try to write new value using cmpxchg operation
+  auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
+  PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
+  CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB);
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+void AtomicInfo::EmitAtomicUpdate(
+    llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
+    bool IsVolatile) {
+  if (shouldUseLibcall()) {
+    EmitAtomicUpdateLibcall(AO, UpdateOp, IsVolatile);
+  } else {
+    EmitAtomicUpdateOp(AO, UpdateOp, IsVolatile);
+  }
+}
+
+void AtomicInfo::EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
+                                  bool IsVolatile) {
+  if (shouldUseLibcall()) {
+    EmitAtomicUpdateLibcall(AO, UpdateRVal, IsVolatile);
+  } else {
+    EmitAtomicUpdateOp(AO, UpdateRVal, IsVolatile);
+  }
 }
 
 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
@@ -1465,46 +1715,8 @@
     return;
   }
 
-  // Atomic load of prev value.
-  RValue OldRVal =
-      atomics.EmitAtomicLoad(AggValueSlot::ignored(), SourceLocation(),
-                             /*AsValue=*/false, AO, IsVolatile);
-  // For non-simple lvalues perform compare-and-swap procedure.
-  auto *ContBB = createBasicBlock("atomic_cont");
-  auto *ExitBB = createBasicBlock("atomic_exit");
-  auto *CurBB = Builder.GetInsertBlock();
-  EmitBlock(ContBB);
-  llvm::PHINode *PHI = Builder.CreatePHI(OldRVal.getScalarVal()->getType(),
-                                         /*NumReservedValues=*/2);
-  PHI->addIncoming(OldRVal.getScalarVal(), CurBB);
-  RValue OriginalRValue = RValue::get(PHI);
-  // Build new lvalue for temp address
-  auto *Ptr = atomics.materializeRValue(OriginalRValue);
-  // Build new lvalue for temp address
-  LValue UpdateLVal;
-  if (LVal.isBitField())
-    UpdateLVal = LValue::MakeBitfield(Ptr, LVal.getBitFieldInfo(),
-                                      LVal.getType(), LVal.getAlignment());
-  else if (LVal.isVectorElt())
-    UpdateLVal = LValue::MakeVectorElt(Ptr, LVal.getVectorIdx(), LVal.getType(),
-                                       LVal.getAlignment());
-  else {
-    assert(LVal.isExtVectorElt());
-    UpdateLVal = LValue::MakeExtVectorElt(Ptr, LVal.getExtVectorElts(),
-                                          LVal.getType(), LVal.getAlignment());
-  }
-  UpdateLVal.setTBAAInfo(LVal.getTBAAInfo());
-  // Store new value in the corresponding memory area
-  EmitStoreThroughLValue(rvalue, UpdateLVal);
-  // Load new value
-  RValue NewRValue = RValue::get(EmitLoadOfScalar(
-      Ptr, LVal.isVolatile(), atomics.getAtomicAlignment().getQuantity(),
-      atomics.getAtomicType(), SourceLocation()));
-  // Try to write new value using cmpxchg operation
-  auto Pair = atomics.EmitAtomicCompareExchange(OriginalRValue, NewRValue, AO);
-  PHI->addIncoming(Pair.first.getScalarVal(), ContBB);
-  Builder.CreateCondBr(Pair.second, ExitBB, ContBB);
-  EmitBlock(ExitBB, /*IsFinished=*/true);
+  // Emit simple atomic update operation.
+  atomics.EmitAtomicUpdate(AO, rvalue, IsVolatile);
 }
 
 /// Emit a compare-and-exchange op for atomic type.
@@ -1529,72 +1741,9 @@
 
 void CodeGenFunction::EmitAtomicUpdate(
     LValue LVal, llvm::AtomicOrdering AO,
-    const std::function<RValue(RValue)> &UpdateOp, bool IsVolatile) {
+    const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) {
   AtomicInfo Atomics(*this, LVal);
-  LValue AtomicLVal = Atomics.getAtomicLValue();
-
-  // Atomic load of prev value.
-  RValue OldRVal =
-      Atomics.EmitAtomicLoad(AggValueSlot::ignored(), SourceLocation(),
-                             /*AsValue=*/false, AO, IsVolatile);
-  bool IsScalar = OldRVal.isScalar();
-  auto *OldVal =
-      IsScalar ? OldRVal.getScalarVal() : Atomics.convertRValueToInt(OldRVal);
-  // For non-simple lvalues perform compare-and-swap procedure.
-  auto *ContBB = createBasicBlock("atomic_cont");
-  auto *ExitBB = createBasicBlock("atomic_exit");
-  auto *CurBB = Builder.GetInsertBlock();
-  EmitBlock(ContBB);
-  llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(),
-                                         /*NumReservedValues=*/2);
-  PHI->addIncoming(OldVal, CurBB);
-  RValue OriginalRValue =
-      IsScalar ? RValue::get(PHI) : Atomics.ConvertIntToValueOrAtomic(
-                                        PHI, AggValueSlot::ignored(),
-                                        SourceLocation(), /*AsValue=*/false);
-  // Build new lvalue for temp address
-  LValue UpdateLVal;
-  llvm::Value *Ptr = nullptr;
-  RValue UpRVal;
-  if (AtomicLVal.isSimple()) {
-    UpRVal = OriginalRValue;
-  } else {
-    // Build new lvalue for temp address
-    Ptr = Atomics.materializeRValue(OriginalRValue);
-    if (AtomicLVal.isBitField())
-      UpdateLVal =
-          LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
-                               AtomicLVal.getType(), AtomicLVal.getAlignment());
-    else if (AtomicLVal.isVectorElt())
-      UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
-                                         AtomicLVal.getType(),
-                                         AtomicLVal.getAlignment());
-    else {
-      assert(AtomicLVal.isExtVectorElt());
-      UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
-                                            AtomicLVal.getType(),
-                                            AtomicLVal.getAlignment());
-    }
-    UpdateLVal.setTBAAInfo(LVal.getTBAAInfo());
-    UpRVal = EmitLoadOfLValue(UpdateLVal, SourceLocation());
-  }
-  // Store new value in the corresponding memory area
-  RValue NewRVal = UpdateOp(UpRVal);
-  if (!AtomicLVal.isSimple()) {
-    EmitStoreThroughLValue(NewRVal, UpdateLVal);
-    // Load new value
-    NewRVal = RValue::get(
-        EmitLoadOfScalar(Ptr, AtomicLVal.isVolatile(),
-                         Atomics.getAtomicAlignment().getQuantity(),
-                         Atomics.getAtomicType(), SourceLocation()));
-  }
-  // Try to write new value using cmpxchg operation
-  auto Pair = Atomics.EmitAtomicCompareExchange(OriginalRValue, NewRVal, AO);
-  OldVal = IsScalar ? Pair.first.getScalarVal()
-                    : Atomics.convertRValueToInt(Pair.first);
-  PHI->addIncoming(OldVal, ContBB);
-  Builder.CreateCondBr(Pair.second, ExitBB, ContBB);
-  EmitBlock(ExitBB, /*IsFinished=*/true);
+  Atomics.EmitAtomicUpdate(AO, UpdateOp, IsVolatile);
 }
 
 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 202996b..3fd344c 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -1221,8 +1221,7 @@
     EmitLambdaBlockInvokeBody();
   else {
     PGO.assignRegionCounters(blockDecl, fn);
-    RegionCounter Cnt = getPGORegionCounter(blockDecl->getBody());
-    Cnt.beginRegion(Builder);
+    incrementProfileCounter(blockDecl->getBody());
     EmitStmt(blockDecl->getBody());
   }
 
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 2653d7c..2b9631d 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -205,7 +205,7 @@
          "arguments have the same integer width?)");
 
   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
-  llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y);
+  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
   return CGF.Builder.CreateExtractValue(Tmp, 0);
 }
@@ -254,8 +254,8 @@
 
     DstPtr = Builder.CreateBitCast(DstPtr, Type);
     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
-    return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
-                                           DstPtr, SrcPtr));
+    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
+                                          {DstPtr, SrcPtr}));
   }
   case Builtin::BI__builtin_abs:
   case Builtin::BI__builtin_labs:
@@ -333,7 +333,7 @@
 
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
-    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
+    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
     if (Result->getType() != ResultType)
       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
                                      "cast");
@@ -350,7 +350,7 @@
 
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
-    Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
+    Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
     if (Result->getType() != ResultType)
       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
                                      "cast");
@@ -366,9 +366,9 @@
     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
 
     llvm::Type *ResultType = ConvertType(E->getType());
-    Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
-                                                       Builder.getTrue()),
-                                   llvm::ConstantInt::get(ArgType, 1));
+    Value *Tmp =
+        Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
+                          llvm::ConstantInt::get(ArgType, 1));
     Value *Zero = llvm::Constant::getNullValue(ArgType);
     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
@@ -421,8 +421,8 @@
       return RValue::get(ArgValue);
 
     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
-    Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
-                                        "expval");
+    Value *Result =
+        Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
     return RValue::get(Result);
   }
   case Builtin::BI__builtin_assume_aligned: {
@@ -473,7 +473,8 @@
     // FIXME: Get right address space.
     llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
-    return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
+    return RValue::get(
+        Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI}));
   }
   case Builtin::BI__builtin_prefetch: {
     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
@@ -484,25 +485,25 @@
       llvm::ConstantInt::get(Int32Ty, 3);
     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
-    return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
+    return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
   }
   case Builtin::BI__builtin_readcyclecounter: {
     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
-    return RValue::get(Builder.CreateCall(F));
+    return RValue::get(Builder.CreateCall(F, {}));
   }
   case Builtin::BI__builtin___clear_cache: {
     Value *Begin = EmitScalarExpr(E->getArg(0));
     Value *End = EmitScalarExpr(E->getArg(1));
     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
-    return RValue::get(Builder.CreateCall2(F, Begin, End));
+    return RValue::get(Builder.CreateCall(F, {Begin, End}));
   }
   case Builtin::BI__builtin_trap: {
     Value *F = CGM.getIntrinsic(Intrinsic::trap);
-    return RValue::get(Builder.CreateCall(F));
+    return RValue::get(Builder.CreateCall(F, {}));
   }
   case Builtin::BI__debugbreak: {
     Value *F = CGM.getIntrinsic(Intrinsic::debugtrap);
-    return RValue::get(Builder.CreateCall(F));
+    return RValue::get(Builder.CreateCall(F, {}));
   }
   case Builtin::BI__builtin_unreachable: {
     if (SanOpts.has(SanitizerKind::Unreachable)) {
@@ -527,7 +528,7 @@
     Value *Exponent = EmitScalarExpr(E->getArg(1));
     llvm::Type *ArgType = Base->getType();
     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
-    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
+    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
   }
 
   case Builtin::BI__builtin_isgreater:
@@ -697,6 +698,8 @@
     std::pair<llvm::Value*, unsigned> Dest =
         EmitPointerWithAlignment(E->getArg(0));
     Value *SizeVal = EmitScalarExpr(E->getArg(1));
+    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
     Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
                          Dest.second, false);
     return RValue::get(Dest.first);
@@ -709,6 +712,10 @@
         EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     unsigned Align = std::min(Dest.second, Src.second);
+    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
+    EmitNonNullArgCheck(RValue::get(Src.first), E->getArg(1)->getType(),
+                        E->getArg(1)->getExprLoc(), FD, 1);
     Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
     return RValue::get(Dest.first);
   }
@@ -766,6 +773,10 @@
         EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     unsigned Align = std::min(Dest.second, Src.second);
+    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
+    EmitNonNullArgCheck(RValue::get(Src.first), E->getArg(1)->getType(),
+                        E->getArg(1)->getExprLoc(), FD, 1);
     Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
     return RValue::get(Dest.first);
   }
@@ -776,6 +787,8 @@
     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
                                          Builder.getInt8Ty());
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
+    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
     Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
     return RValue::get(Dest.first);
   }
@@ -858,7 +871,7 @@
     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
                                   ? Intrinsic::eh_return_i32
                                   : Intrinsic::eh_return_i64);
-    Builder.CreateCall2(F, Int, Ptr);
+    Builder.CreateCall(F, {Int, Ptr});
     Builder.CreateUnreachable();
 
     // We do need to preserve an insertion point.
@@ -868,7 +881,7 @@
   }
   case Builtin::BI__builtin_unwind_init: {
     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
-    return RValue::get(Builder.CreateCall(F));
+    return RValue::get(Builder.CreateCall(F, {}));
   }
   case Builtin::BI__builtin_extend_pointer: {
     // Extends a pointer to the size of an _Unwind_Word, which is
@@ -907,7 +920,7 @@
 
     // Store the stack pointer to the setjmp buffer.
     Value *StackAddr =
-      Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
+        Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave), {});
     Value *StackSaveSlot =
       Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
     Builder.CreateStore(StackAddr, StackSaveSlot);
@@ -1413,7 +1426,7 @@
     Value *Exponent = EmitScalarExpr(E->getArg(1));
     llvm::Type *ArgType = Base->getType();
     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
-    return RValue::get(Builder.CreateCall2(F, Base, Exponent));
+    return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
   }
 
   case Builtin::BIfma:
@@ -1426,9 +1439,9 @@
     Value *FirstArg = EmitScalarExpr(E->getArg(0));
     llvm::Type *ArgType = FirstArg->getType();
     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
-    return RValue::get(Builder.CreateCall3(F, FirstArg,
-                                              EmitScalarExpr(E->getArg(1)),
-                                              EmitScalarExpr(E->getArg(2))));
+    return RValue::get(
+        Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
+                               EmitScalarExpr(E->getArg(2))}));
   }
 
   case Builtin::BI__builtin_signbit:
@@ -2915,7 +2928,7 @@
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
 
     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
-    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
   }
   case NEON::BI__builtin_neon_vld1_v:
   case NEON::BI__builtin_neon_vld1q_v:
@@ -2928,7 +2941,7 @@
   case NEON::BI__builtin_neon_vld4_v:
   case NEON::BI__builtin_neon_vld4q_v: {
     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
-    Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint);
+    Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     return Builder.CreateStore(Ops[1], Ops[0]);
@@ -3266,6 +3279,66 @@
   }
 }
 
+// Generates the IR for the read/write special register builtin,
+// ValueType is the type of the value that is to be written or read,
+// RegisterType is the type of the register being written to or read from.
+static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
+                                         const CallExpr *E,
+                                         llvm::Type *RegisterType,
+                                         llvm::Type *ValueType, bool IsRead) {
+  // write and register intrinsics only support 32 and 64 bit operations.
+  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
+          && "Unsupported size for register.");
+
+  CodeGen::CGBuilderTy &Builder = CGF.Builder;
+  CodeGen::CodeGenModule &CGM = CGF.CGM;
+  LLVMContext &Context = CGM.getLLVMContext();
+
+  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
+  StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+
+  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
+  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
+  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
+
+  llvm::Type *Types[] = { RegisterType };
+
+  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
+  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
+            && "Can't fit 64-bit value in 32-bit register");
+
+  if (IsRead) {
+    llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+    llvm::Value *Call = Builder.CreateCall(F, Metadata);
+
+    if (MixedTypes)
+      // Read into 64 bit register and then truncate result to 32 bit.
+      return Builder.CreateTrunc(Call, ValueType);
+
+    if (ValueType->isPointerTy())
+      // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
+      return Builder.CreateIntToPtr(Call, ValueType);
+
+    return Call;
+  }
+
+  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
+  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
+  if (MixedTypes) {
+    // Extend 32 bit write value to 64 bit to pass to write.
+    ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
+    return Builder.CreateCall(F, { Metadata, ArgValue });
+  }
+
+  if (ValueType->isPointerTy()) {
+    // Have VoidPtrTy ArgValue but want to return an i32/i64.
+    ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
+    return Builder.CreateCall(F, { Metadata, ArgValue });
+  }
+
+  return Builder.CreateCall(F, { Metadata, ArgValue });
+}
+
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   if (auto Hint = GetValueForARMHint(BuiltinID))
@@ -3288,7 +3361,7 @@
                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
                                  /*SideEffects=*/true);
 
-    return Builder.CreateCall(Emit);
+    return Builder.CreateCall(Emit, {});
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
@@ -3305,7 +3378,7 @@
     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
 
     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
-    return Builder.CreateCall4(F, Address, RW, Locality, IsData);
+    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
@@ -3403,7 +3476,7 @@
     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
-    return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
+    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_strex ||
@@ -3427,12 +3500,12 @@
                                        ? Intrinsic::arm_stlex
                                        : Intrinsic::arm_strex,
                                    StoreAddr->getType());
-    return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex");
+    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
-    return Builder.CreateCall(F);
+    return Builder.CreateCall(F, {});
   }
 
   // CRC32
@@ -3468,16 +3541,54 @@
       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
 
       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
-      Value *Res = Builder.CreateCall2(F, Arg0, Arg1a);
-      return Builder.CreateCall2(F, Res, Arg1b);
+      Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
+      return Builder.CreateCall(F, {Res, Arg1b});
     } else {
       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
 
       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
-      return Builder.CreateCall2(F, Arg0, Arg1);
+      return Builder.CreateCall(F, {Arg0, Arg1});
     }
   }
 
+  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+      BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+      BuiltinID == ARM::BI__builtin_arm_rsrp ||
+      BuiltinID == ARM::BI__builtin_arm_wsr ||
+      BuiltinID == ARM::BI__builtin_arm_wsr64 ||
+      BuiltinID == ARM::BI__builtin_arm_wsrp) {
+
+    bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
+                  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+                  BuiltinID == ARM::BI__builtin_arm_rsrp;
+
+    bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
+                            BuiltinID == ARM::BI__builtin_arm_wsrp;
+
+    bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+                   BuiltinID == ARM::BI__builtin_arm_wsr64;
+
+    llvm::Type *ValueType;
+    llvm::Type *RegisterType;
+    if (IsPointerBuiltin) {
+      ValueType = VoidPtrTy;
+      RegisterType = Int32Ty;
+    } else if (Is64Bit) {
+      ValueType = RegisterType = Int64Ty;
+    } else {
+      ValueType = RegisterType = Int32Ty;
+    }
+
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+  }
+
+  // Find out if any arguments are required to be integer constant
+  // expressions.
+  unsigned ICEArguments = 0;
+  ASTContext::GetBuiltinTypeError Error;
+  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+  assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
   SmallVector<Value*, 4> Ops;
   llvm::Value *Align = nullptr;
   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
@@ -3540,7 +3651,17 @@
         continue;
       }
     }
-    Ops.push_back(EmitScalarExpr(E->getArg(i)));
+
+    if ((ICEArguments & (1 << i)) == 0) {
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+    } else {
+      // If this is required to be a constant, constant fold it so that we know
+      // that the generated intrinsic gets a ConstantInt.
+      llvm::APSInt Result;
+      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
+      assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
+      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+    }
   }
 
   switch (BuiltinID) {
@@ -3650,7 +3771,7 @@
       // Load the value as a one-element vector.
       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
-      Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
+      Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
       // Combine them.
       SmallVector<Constant*, 2> Indices;
       Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
@@ -3685,7 +3806,7 @@
       default: llvm_unreachable("unknown vld_dup intrinsic?");
       }
       Function *F = CGM.getIntrinsic(Int, Ty);
-      Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
+      Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
       return Builder.CreateStore(Ops[1], Ops[0]);
@@ -3754,7 +3875,7 @@
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
-    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
+    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
   case NEON::BI__builtin_neon_vsri_n_v:
   case NEON::BI__builtin_neon_vsriq_n_v:
@@ -4000,38 +4121,6 @@
   return Op;
 }
 
-Value *CodeGenFunction::
-emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
-                                  const char *Name) {
-  // i8 is not a legal types for AArch64, so we can't just use
-  // a normal overloaded intrinsic call for these scalar types. Instead
-  // we'll build 64-bit vectors w/ lane zero being our input values and
-  // perform the operation on that. The back end can pattern match directly
-  // to the scalar instruction.
-  Ops[0] = vectorWrapScalar8(Ops[0]);
-  Ops[1] = vectorWrapScalar8(Ops[1]);
-  llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
-  Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name);
-  Constant *CI = ConstantInt::get(SizeTy, 0);
-  return Builder.CreateExtractElement(V, CI, "lane0");
-}
-
-Value *CodeGenFunction::
-emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops,
-                                   const char *Name) {
-  // i16 is not a legal types for AArch64, so we can't just use
-  // a normal overloaded intrinsic call for these scalar types. Instead
-  // we'll build 64-bit vectors w/ lane zero being our input values and
-  // perform the operation on that. The back end can pattern match directly
-  // to the scalar instruction.
-  Ops[0] = vectorWrapScalar16(Ops[0]);
-  Ops[1] = vectorWrapScalar16(Ops[1]);
-  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
-  Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name);
-  Constant *CI = ConstantInt::get(SizeTy, 0);
-  return Builder.CreateExtractElement(V, CI, "lane0");
-}
-
 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
                                                const CallExpr *E) {
   unsigned HintID = static_cast<unsigned>(-1);
@@ -4082,7 +4171,7 @@
     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
     // PLDL3STRM or PLDL2STRM.
     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
-    return Builder.CreateCall4(F, Address, RW, Locality, IsData);
+    return Builder.CreateCall(F, {Address, RW, Locality, IsData});
   }
 
   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
@@ -4177,9 +4266,11 @@
     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
                                          Int8PtrTy);
-    return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp");
-  } else if (BuiltinID == AArch64::BI__builtin_arm_strex ||
-             BuiltinID == AArch64::BI__builtin_arm_stlex) {
+    return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
+  }
+
+  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
+      BuiltinID == AArch64::BI__builtin_arm_stlex) {
     Value *StoreVal = EmitScalarExpr(E->getArg(0));
     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
 
@@ -4199,12 +4290,12 @@
                                        ? Intrinsic::aarch64_stlxr
                                        : Intrinsic::aarch64_stxr,
                                    StoreAddr->getType());
-    return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr");
+    return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
   }
 
   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
-    return Builder.CreateCall(F);
+    return Builder.CreateCall(F, {});
   }
 
   // CRC32
@@ -4236,12 +4327,60 @@
     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
 
-    return Builder.CreateCall2(F, Arg0, Arg1);
+    return Builder.CreateCall(F, {Arg0, Arg1});
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+      BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+      BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
+      BuiltinID == AArch64::BI__builtin_arm_wsrp) {
+
+    bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
+                  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+                  BuiltinID == AArch64::BI__builtin_arm_rsrp;
+
+    bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+                            BuiltinID == AArch64::BI__builtin_arm_wsrp;
+
+    bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
+                   BuiltinID != AArch64::BI__builtin_arm_wsr;
+
+    llvm::Type *ValueType;
+    llvm::Type *RegisterType = Int64Ty;
+    if (IsPointerBuiltin) {
+      ValueType = VoidPtrTy;
+    } else if (Is64Bit) {
+      ValueType = Int64Ty;
+    } else {
+      ValueType = Int32Ty;
+    }
+
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+  }
+
+  // Find out if any arguments are required to be integer constant
+  // expressions.
+  unsigned ICEArguments = 0;
+  ASTContext::GetBuiltinTypeError Error;
+  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+  assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
   llvm::SmallVector<Value*, 4> Ops;
-  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
-    Ops.push_back(EmitScalarExpr(E->getArg(i)));
+  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+    if ((ICEArguments & (1 << i)) == 0) {
+      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+    } else {
+      // If this is required to be a constant, constant fold it so that we know
+      // that the generated intrinsic gets a ConstantInt.
+      llvm::APSInt Result;
+      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
+      assert(IsConst && "Constant arg isn't actually constant?");
+      (void)IsConst;
+      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+    }
+  }
 
   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
@@ -4631,8 +4770,8 @@
                                    : Intrinsic::aarch64_neon_srshl;
     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
-    Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1],
-                                 Builder.CreateSExt(Ops[2], Int64Ty));
+    Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
+                                {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
   }
   case NEON::BI__builtin_neon_vshld_n_s64:
@@ -4802,7 +4941,7 @@
       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
-      Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+      Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
       return Builder.CreateBitCast(Result, Ty);
     }
     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
@@ -4816,7 +4955,7 @@
                                                cast<ConstantInt>(Ops[3]));
     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
 
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+    return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
   }
   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
@@ -4825,7 +4964,7 @@
 
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
-    return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+    return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
   }
   case NEON::BI__builtin_neon_vfmas_lane_f32:
   case NEON::BI__builtin_neon_vfmas_laneq_f32:
@@ -4835,7 +4974,7 @@
     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
-    return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+    return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
   }
   case NEON::BI__builtin_neon_vfms_v:
   case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
@@ -5920,7 +6059,7 @@
     Value *Locality = EmitScalarExpr(E->getArg(1));
     Value *Data = ConstantInt::get(Int32Ty, 1);
     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
-    return Builder.CreateCall4(F, Address, RW, Locality, Data);
+    return Builder.CreateCall(F, {Address, RW, Locality, Data});
   }
   case X86::BI__builtin_ia32_vec_init_v8qi:
   case X86::BI__builtin_ia32_vec_init_v4hi:
@@ -6115,7 +6254,7 @@
       break;
     }
 
-    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
+    Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {});
     Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
     return Builder.CreateExtractValue(Call, 1);
   }
@@ -6395,7 +6534,7 @@
   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
 
   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall3(F, Src0, Src1, Src2);
+  return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
 }
 
 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
@@ -6406,7 +6545,7 @@
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall2(F, Src0, Src1);
+  return CGF.Builder.CreateCall(F, {Src0, Src1});
 }
 
 Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
@@ -6427,7 +6566,7 @@
     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
                                            X->getType());
 
-    llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z);
+    llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
 
     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
@@ -6450,7 +6589,7 @@
     llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
                                       Src0->getType());
     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
-    return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool);
+    return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
   }
   case R600::BI__builtin_amdgpu_div_fixup:
   case R600::BI__builtin_amdgpu_div_fixupf:
@@ -6478,6 +6617,24 @@
   }
 }
 
+/// Handle a SystemZ function in which the final argument is a pointer
+/// to an int that receives the post-instruction CC value.  At the LLVM level
+/// this is represented as a function that returns a {result, cc} pair.
+static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
+                                         unsigned IntrinsicID,
+                                         const CallExpr *E) {
+  unsigned NumArgs = E->getNumArgs() - 1;
+  SmallVector<Value *, 8> Args(NumArgs);
+  for (unsigned I = 0; I < NumArgs; ++I)
+    Args[I] = CGF.EmitScalarExpr(E->getArg(I));
+  Value *CCPtr = CGF.EmitScalarExpr(E->getArg(NumArgs));
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
+  Value *Call = CGF.Builder.CreateCall(F, Args);
+  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
+  CGF.Builder.CreateStore(CC, CCPtr);
+  return CGF.Builder.CreateExtractValue(Call, 0);
+}
+
 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
                                                const CallExpr *E) {
   switch (BuiltinID) {
@@ -6485,19 +6642,19 @@
     Value *TDB = EmitScalarExpr(E->getArg(0));
     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
-    return Builder.CreateCall2(F, TDB, Control);
+    return Builder.CreateCall(F, {TDB, Control});
   }
   case SystemZ::BI__builtin_tbegin_nofloat: {
     Value *TDB = EmitScalarExpr(E->getArg(0));
     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
-    return Builder.CreateCall2(F, TDB, Control);
+    return Builder.CreateCall(F, {TDB, Control});
   }
   case SystemZ::BI__builtin_tbeginc: {
     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
-    return Builder.CreateCall2(F, TDB, Control);
+    return Builder.CreateCall(F, {TDB, Control});
   }
   case SystemZ::BI__builtin_tabort: {
     Value *Data = EmitScalarExpr(E->getArg(0));
@@ -6508,9 +6665,196 @@
     Value *Address = EmitScalarExpr(E->getArg(0));
     Value *Data = EmitScalarExpr(E->getArg(1));
     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
-    return Builder.CreateCall2(F, Data, Address);
+    return Builder.CreateCall(F, {Data, Address});
   }
 
+  // Vector builtins.  Note that most vector builtins are mapped automatically
+  // to target-specific LLVM intrinsics.  The ones handled specially here can
+  // be represented via standard LLVM IR, which is preferable to enable common
+  // LLVM optimizations.
+
+  case SystemZ::BI__builtin_s390_vpopctb:
+  case SystemZ::BI__builtin_s390_vpopcth:
+  case SystemZ::BI__builtin_s390_vpopctf:
+  case SystemZ::BI__builtin_s390_vpopctg: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
+    return Builder.CreateCall(F, X);
+  }
+
+  case SystemZ::BI__builtin_s390_vclzb:
+  case SystemZ::BI__builtin_s390_vclzh:
+  case SystemZ::BI__builtin_s390_vclzf:
+  case SystemZ::BI__builtin_s390_vclzg: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
+    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
+    return Builder.CreateCall(F, {X, Undef});
+  }
+
+  case SystemZ::BI__builtin_s390_vctzb:
+  case SystemZ::BI__builtin_s390_vctzh:
+  case SystemZ::BI__builtin_s390_vctzf:
+  case SystemZ::BI__builtin_s390_vctzg: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
+    Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
+    return Builder.CreateCall(F, {X, Undef});
+  }
+
+  case SystemZ::BI__builtin_s390_vfsqdb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
+    return Builder.CreateCall(F, X);
+  }
+  case SystemZ::BI__builtin_s390_vfmadb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    Value *Z = EmitScalarExpr(E->getArg(2));
+    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+    return Builder.CreateCall(F, {X, Y, Z});
+  }
+  case SystemZ::BI__builtin_s390_vfmsdb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Y = EmitScalarExpr(E->getArg(1));
+    Value *Z = EmitScalarExpr(E->getArg(2));
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+    Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
+    return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+  }
+  case SystemZ::BI__builtin_s390_vflpdb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
+    return Builder.CreateCall(F, X);
+  }
+  case SystemZ::BI__builtin_s390_vflndb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
+    Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
+    return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
+  }
+  case SystemZ::BI__builtin_s390_vfidb: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    // Constant-fold the M4 and M5 mask arguments.
+    llvm::APSInt M4, M5;
+    bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
+    bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
+    assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
+    (void)IsConstM4; (void)IsConstM5;
+    // Check whether this instance of vfidb can be represented via a LLVM
+    // standard intrinsic.  We only support some combinations of M4 and M5.
+    Intrinsic::ID ID = Intrinsic::not_intrinsic;
+    switch (M4.getZExtValue()) {
+    default: break;
+    case 0:  // IEEE-inexact exception allowed
+      switch (M5.getZExtValue()) {
+      default: break;
+      case 0: ID = Intrinsic::rint; break;
+      }
+      break;
+    case 4:  // IEEE-inexact exception suppressed
+      switch (M5.getZExtValue()) {
+      default: break;
+      case 0: ID = Intrinsic::nearbyint; break;
+      case 1: ID = Intrinsic::round; break;
+      case 5: ID = Intrinsic::trunc; break;
+      case 6: ID = Intrinsic::ceil; break;
+      case 7: ID = Intrinsic::floor; break;
+      }
+      break;
+    }
+    if (ID != Intrinsic::not_intrinsic) {
+      Function *F = CGM.getIntrinsic(ID, ResultType);
+      return Builder.CreateCall(F, X);
+    }
+    Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
+    Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
+    Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
+    return Builder.CreateCall(F, {X, M4Value, M5Value});
+  }
+
+  // Vector intrisincs that output the post-instruction CC value.
+
+#define INTRINSIC_WITH_CC(NAME) \
+    case SystemZ::BI__builtin_##NAME: \
+      return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
+
+  INTRINSIC_WITH_CC(s390_vpkshs);
+  INTRINSIC_WITH_CC(s390_vpksfs);
+  INTRINSIC_WITH_CC(s390_vpksgs);
+
+  INTRINSIC_WITH_CC(s390_vpklshs);
+  INTRINSIC_WITH_CC(s390_vpklsfs);
+  INTRINSIC_WITH_CC(s390_vpklsgs);
+
+  INTRINSIC_WITH_CC(s390_vceqbs);
+  INTRINSIC_WITH_CC(s390_vceqhs);
+  INTRINSIC_WITH_CC(s390_vceqfs);
+  INTRINSIC_WITH_CC(s390_vceqgs);
+
+  INTRINSIC_WITH_CC(s390_vchbs);
+  INTRINSIC_WITH_CC(s390_vchhs);
+  INTRINSIC_WITH_CC(s390_vchfs);
+  INTRINSIC_WITH_CC(s390_vchgs);
+
+  INTRINSIC_WITH_CC(s390_vchlbs);
+  INTRINSIC_WITH_CC(s390_vchlhs);
+  INTRINSIC_WITH_CC(s390_vchlfs);
+  INTRINSIC_WITH_CC(s390_vchlgs);
+
+  INTRINSIC_WITH_CC(s390_vfaebs);
+  INTRINSIC_WITH_CC(s390_vfaehs);
+  INTRINSIC_WITH_CC(s390_vfaefs);
+
+  INTRINSIC_WITH_CC(s390_vfaezbs);
+  INTRINSIC_WITH_CC(s390_vfaezhs);
+  INTRINSIC_WITH_CC(s390_vfaezfs);
+
+  INTRINSIC_WITH_CC(s390_vfeebs);
+  INTRINSIC_WITH_CC(s390_vfeehs);
+  INTRINSIC_WITH_CC(s390_vfeefs);
+
+  INTRINSIC_WITH_CC(s390_vfeezbs);
+  INTRINSIC_WITH_CC(s390_vfeezhs);
+  INTRINSIC_WITH_CC(s390_vfeezfs);
+
+  INTRINSIC_WITH_CC(s390_vfenebs);
+  INTRINSIC_WITH_CC(s390_vfenehs);
+  INTRINSIC_WITH_CC(s390_vfenefs);
+
+  INTRINSIC_WITH_CC(s390_vfenezbs);
+  INTRINSIC_WITH_CC(s390_vfenezhs);
+  INTRINSIC_WITH_CC(s390_vfenezfs);
+
+  INTRINSIC_WITH_CC(s390_vistrbs);
+  INTRINSIC_WITH_CC(s390_vistrhs);
+  INTRINSIC_WITH_CC(s390_vistrfs);
+
+  INTRINSIC_WITH_CC(s390_vstrcbs);
+  INTRINSIC_WITH_CC(s390_vstrchs);
+  INTRINSIC_WITH_CC(s390_vstrcfs);
+
+  INTRINSIC_WITH_CC(s390_vstrczbs);
+  INTRINSIC_WITH_CC(s390_vstrczhs);
+  INTRINSIC_WITH_CC(s390_vstrczfs);
+
+  INTRINSIC_WITH_CC(s390_vfcedbs);
+  INTRINSIC_WITH_CC(s390_vfchdbs);
+  INTRINSIC_WITH_CC(s390_vfchedbs);
+
+  INTRINSIC_WITH_CC(s390_vftcidb);
+
+#undef INTRINSIC_WITH_CC
+
   default:
     return nullptr;
   }
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index fb11751..67d0ab7 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -20,7 +20,6 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
-#include <vector>
 
 using namespace clang;
 using namespace CodeGen;
@@ -30,29 +29,66 @@
 class CGNVCUDARuntime : public CGCUDARuntime {
 
 private:
-  llvm::Type *IntTy, *SizeTy;
-  llvm::PointerType *CharPtrTy, *VoidPtrTy;
+  llvm::Type *IntTy, *SizeTy, *VoidTy;
+  llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
+
+  /// Convenience reference to LLVM Context
+  llvm::LLVMContext &Context;
+  /// Convenience reference to the current module
+  llvm::Module &TheModule;
+  /// Keeps track of kernel launch stubs emitted in this module
+  llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
+  /// Keeps track of variables containing handles of GPU binaries. Populated by
+  /// ModuleCtorFunction() and used to create corresponding cleanup calls in
+  /// ModuleDtorFunction()
+  llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles;
 
   llvm::Constant *getSetupArgumentFn() const;
   llvm::Constant *getLaunchFn() const;
 
+  /// Creates a function to register all kernel stubs generated in this module.
+  llvm::Function *makeRegisterKernelsFn();
+
+  /// Helper function that generates a constant string and returns a pointer to
+  /// the start of the string.  The result of this function can be used anywhere
+  /// where the C code specifies const char*.
+  llvm::Constant *makeConstantString(const std::string &Str,
+                                     const std::string &Name = "",
+                                     unsigned Alignment = 0) {
+    llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
+                               llvm::ConstantInt::get(SizeTy, 0)};
+    auto *ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
+    return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
+                                                ConstStr, Zeros);
+ }
+
+  void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
+
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
-  void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args) override;
+  void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
+  /// Creates module constructor function
+  llvm::Function *makeModuleCtorFunction() override;
+  /// Creates module destructor function
+  llvm::Function *makeModuleDtorFunction() override;
 };
 
 }
 
-CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
+CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
+    : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
+      TheModule(CGM.getModule()) {
   CodeGen::CodeGenTypes &Types = CGM.getTypes();
   ASTContext &Ctx = CGM.getContext();
 
   IntTy = Types.ConvertType(Ctx.IntTy);
   SizeTy = Types.ConvertType(Ctx.getSizeType());
+  VoidTy = llvm::Type::getVoidTy(Context);
 
   CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
   VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
+  VoidPtrPtrTy = VoidPtrTy->getPointerTo();
 }
 
 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
@@ -68,14 +104,17 @@
 
 llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
   // cudaError_t cudaLaunch(char *)
-  std::vector<llvm::Type*> Params;
-  Params.push_back(CharPtrTy);
-  return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
-                                                           Params, false),
-                                   "cudaLaunch");
+  return CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
 }
 
-void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
+void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
+                                     FunctionArgList &Args) {
+  EmittedKernels.push_back(CGF.CurFn);
+  emitDeviceStubBody(CGF, Args);
+}
+
+void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
                                          FunctionArgList &Args) {
   // Build the argument value list and the argument stack struct type.
   SmallVector<llvm::Value *, 16> ArgValues;
@@ -87,8 +126,7 @@
     assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
     ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
   }
-  llvm::StructType *ArgStackTy = llvm::StructType::get(
-      CGF.getLLVMContext(), ArgTypes);
+  llvm::StructType *ArgStackTy = llvm::StructType::get(Context, ArgTypes);
 
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
 
@@ -120,6 +158,160 @@
   CGF.EmitBlock(EndBlock);
 }
 
+/// Creates internal function to register all kernel stubs generated in this
+/// module with the CUDA runtime.
+/// \code
+/// void __cuda_register_kernels(void** GpuBinaryHandle) {
+///    __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
+///    ...
+///    __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
+/// }
+/// \endcode
+llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
+  llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
+      llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
+      llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule);
+  llvm::BasicBlock *EntryBB =
+      llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
+  CGBuilderTy Builder(Context);
+  Builder.SetInsertPoint(EntryBB);
+
+  // void __cudaRegisterFunction(void **, const char *, char *, const char *,
+  //                             int, uint3*, uint3*, dim3*, dim3*, int*)
+  std::vector<llvm::Type *> RegisterFuncParams = {
+      VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
+      VoidPtrTy,    VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
+  llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
+      "__cudaRegisterFunction");
+
+  // Extract GpuBinaryHandle passed as the first argument passed to
+  // __cuda_register_kernels() and generate __cudaRegisterFunction() call for
+  // each emitted kernel.
+  llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
+  for (llvm::Function *Kernel : EmittedKernels) {
+    llvm::Constant *KernelName = makeConstantString(Kernel->getName());
+    llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
+    llvm::Value *args[] = {
+        &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
+        KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
+        NullPtr, NullPtr, NullPtr,
+        llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
+    Builder.CreateCall(RegisterFunc, args);
+  }
+
+  Builder.CreateRetVoid();
+  return RegisterKernelsFunc;
+}
+
+/// Creates a global constructor function for the module:
+/// \code
+/// void __cuda_module_ctor(void*) {
+///     Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
+///     __cuda_register_kernels(Handle0);
+///     ...
+///     HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
+///     __cuda_register_kernels(HandleN);
+/// }
+/// \endcode
+llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
+  // void __cuda_register_kernels(void* handle);
+  llvm::Function *RegisterKernelsFunc = makeRegisterKernelsFn();
+  // void ** __cudaRegisterFatBinary(void *);
+  llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
+      "__cudaRegisterFatBinary");
+  // struct { int magic, int version, void * gpu_binary, void * dont_care };
+  llvm::StructType *FatbinWrapperTy =
+      llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy, nullptr);
+
+  llvm::Function *ModuleCtorFunc = llvm::Function::Create(
+      llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
+      llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
+  llvm::BasicBlock *CtorEntryBB =
+      llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
+  CGBuilderTy CtorBuilder(Context);
+
+  CtorBuilder.SetInsertPoint(CtorEntryBB);
+
+  // For each GPU binary, register it with the CUDA runtime and store returned
+  // handle in a global variable and save the handle in GpuBinaryHandles vector
+  // to be cleaned up in destructor on exit. Then associate all known kernels
+  // with the GPU binary handle so CUDA runtime can figure out what to call on
+  // the GPU side.
+  for (const std::string &GpuBinaryFileName :
+       CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
+        llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
+    if (std::error_code EC = GpuBinaryOrErr.getError()) {
+      CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
+                                                        << EC.message();
+      continue;
+    }
+
+    // Create initialized wrapper structure that points to the loaded GPU binary
+    llvm::Constant *Values[] = {
+        llvm::ConstantInt::get(IntTy, 0x466243b1), // Fatbin wrapper magic.
+        llvm::ConstantInt::get(IntTy, 1),          // Fatbin version.
+        makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "", 16), // Data.
+        llvm::ConstantPointerNull::get(VoidPtrTy)}; // Unused in fatbin v1.
+    llvm::GlobalVariable *FatbinWrapper = new llvm::GlobalVariable(
+        TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantStruct::get(FatbinWrapperTy, Values),
+        "__cuda_fatbin_wrapper");
+
+    // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
+    llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+        RegisterFatbinFunc,
+        CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+    llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
+        TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
+    CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryHandle, false);
+
+    // Call __cuda_register_kernels(GpuBinaryHandle);
+    CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall);
+
+    // Save GpuBinaryHandle so we can unregister it in destructor.
+    GpuBinaryHandles.push_back(GpuBinaryHandle);
+  }
+
+  CtorBuilder.CreateRetVoid();
+  return ModuleCtorFunc;
+}
+
+/// Creates a global destructor function that unregisters all GPU code blobs
+/// registered by constructor.
+/// \code
+/// void __cuda_module_dtor(void*) {
+///     __cudaUnregisterFatBinary(Handle0);
+///     ...
+///     __cudaUnregisterFatBinary(HandleN);
+/// }
+/// \endcode
+llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
+  // void __cudaUnregisterFatBinary(void ** handle);
+  llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
+      "__cudaUnregisterFatBinary");
+
+  llvm::Function *ModuleDtorFunc = llvm::Function::Create(
+      llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
+      llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
+  llvm::BasicBlock *DtorEntryBB =
+      llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
+  CGBuilderTy DtorBuilder(Context);
+  DtorBuilder.SetInsertPoint(DtorEntryBB);
+
+  for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
+    DtorBuilder.CreateCall(UnregisterFatbinFunc,
+                           DtorBuilder.CreateLoad(GpuBinaryHandle, false));
+  }
+
+  DtorBuilder.CreateRetVoid();
+  return ModuleDtorFunc;
+}
+
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }
diff --git a/lib/CodeGen/CGCUDARuntime.h b/lib/CodeGen/CGCUDARuntime.h
index 8c162fb..dcacf97 100644
--- a/lib/CodeGen/CGCUDARuntime.h
+++ b/lib/CodeGen/CGCUDARuntime.h
@@ -16,6 +16,10 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 
+namespace llvm {
+class Function;
+}
+
 namespace clang {
 
 class CUDAKernelCallExpr;
@@ -39,10 +43,17 @@
   virtual RValue EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
                                         const CUDAKernelCallExpr *E,
                                         ReturnValueSlot ReturnValue);
-  
-  virtual void EmitDeviceStubBody(CodeGenFunction &CGF,
-                                  FunctionArgList &Args) = 0;
 
+  /// Emits a kernel launch stub.
+  virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
+
+  /// Constructs and returns a module initialization function or nullptr if it's
+  /// not needed. Must be called after all kernels have been emitted.
+  virtual llvm::Function *makeModuleCtorFunction() = 0;
+
+  /// Returns a module cleanup function or nullptr if it's not needed.
+  /// Must be called after ModuleCtorFunction
+  virtual llvm::Function *makeModuleDtorFunction() = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 9f0e67e..7d7ed78 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -182,8 +182,8 @@
     return true;
 
   // Create the alias with no name.
-  auto *Alias = llvm::GlobalAlias::create(AliasType->getElementType(), 0,
-                                          Linkage, "", Aliasee, &getModule());
+  auto *Alias =
+      llvm::GlobalAlias::create(AliasType, Linkage, "", Aliasee, &getModule());
 
   // Switch any previous uses to the alias.
   if (Entry) {
@@ -218,6 +218,8 @@
   }
 
   setFunctionLinkage(GD, Fn);
+  setFunctionDLLStorageClass(GD, Fn);
+
   CodeGenFunction(*this).GenerateCode(GD, Fn, FnInfo);
   setFunctionDefinitionAttributes(MD, Fn);
   SetLLVMFunctionAttributesForDefinition(MD, Fn);
@@ -231,8 +233,7 @@
   if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
     GD = GlobalDecl(CD, toCXXCtorType(Type));
   } else {
-    auto *DD = dyn_cast<CXXDestructorDecl>(MD);
-    GD = GlobalDecl(DD, toCXXDtorType(Type));
+    GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type));
   }
 
   StringRef Name = getMangledName(GD);
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index c031bd7..6903073 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <sstream>
 using namespace clang;
@@ -1464,6 +1465,8 @@
       FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
     }
 
+    FuncAttrs.addAttribute("disable-tail-calls",
+                           llvm::toStringRef(CodeGenOpts.DisableTailCalls));
     FuncAttrs.addAttribute("less-precise-fpmad",
                            llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
     FuncAttrs.addAttribute("no-infs-fp-math",
@@ -1480,24 +1483,62 @@
     if (!CodeGenOpts.StackRealignment)
       FuncAttrs.addAttribute("no-realign-stack");
 
-    // Add target-cpu and target-features work if they differ from the defaults.
-    std::string &CPU = getTarget().getTargetOpts().CPU;
-    if (CPU != "" && CPU != getTarget().getTriple().getArchName())
-      FuncAttrs.addAttribute("target-cpu", getTarget().getTargetOpts().CPU);
+    // Add target-cpu and target-features attributes to functions. If
+    // we have a decl for the function and it has a target attribute then
+    // parse that and add it to the feature set.
+    StringRef TargetCPU = getTarget().getTargetOpts().CPU;
 
-    // TODO: FeaturesAsWritten gets us the features on the command line,
-    // for canonicalization purposes we might want to avoid putting features
-    // in the target-features set if we know it'll be one of the default
-    // features in the backend, e.g. corei7-avx and +avx.
-    std::vector<std::string> &Features =
-        getTarget().getTargetOpts().FeaturesAsWritten;
+    // TODO: Features gets us the features on the command line including
+    // feature dependencies. For canonicalization purposes we might want to
+    // avoid putting features in the target-features set if we know it'll be
+    // one of the default features in the backend, e.g. corei7-avx and +avx or
+    // figure out non-explicit dependencies.
+    std::vector<std::string> Features(getTarget().getTargetOpts().Features);
+
+    // TODO: The target attribute complicates this further by allowing multiple
+    // additional features to be tacked on to the feature string for a
+    // particular function. For now we simply append to the set of features and
+    // let backend resolution fix them up.
+    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
+    if (FD) {
+      if (const TargetAttr *TD = FD->getAttr<TargetAttr>()) {
+        StringRef FeaturesStr = TD->getFeatures();
+        SmallVector<StringRef, 1> AttrFeatures;
+        FeaturesStr.split(AttrFeatures, ",");
+
+        // Grab the various features and prepend a "+" to turn on the feature to
+        // the backend and add them to our existing set of Features.
+        for (auto &Feature : AttrFeatures) {
+          // While we're here iterating check for a different target cpu.
+          if (Feature.startswith("arch="))
+            TargetCPU = Feature.split("=").second;
+	  else if (Feature.startswith("tune="))
+	    // We don't support cpu tuning this way currently.
+	    ;
+	  else if (Feature.startswith("fpmath="))
+	    // TODO: Support the fpmath option this way. It will require checking
+	    // overall feature validity for the function with the rest of the
+	    // attributes on the function.
+	    ;
+	  else if (Feature.startswith("mno-"))
+            Features.push_back("-" + Feature.split("-").second.str());
+          else
+            Features.push_back("+" + Feature.str());
+	}
+      }
+    }
+
+    // Now add the target-cpu and target-features to the function.
+    if (TargetCPU != "")
+      FuncAttrs.addAttribute("target-cpu", TargetCPU);
     if (!Features.empty()) {
-      std::stringstream S;
+      std::stringstream TargetFeatures;
       std::copy(Features.begin(), Features.end(),
-                std::ostream_iterator<std::string>(S, ","));
+                std::ostream_iterator<std::string>(TargetFeatures, ","));
+
       // The drop_back gets rid of the trailing space.
       FuncAttrs.addAttribute("target-features",
-                             StringRef(S.str()).drop_back(1));
+                             StringRef(TargetFeatures.str()).drop_back(1));
     }
   }
 
@@ -1587,8 +1628,12 @@
     case ABIArgInfo::Extend:
       if (ParamType->isSignedIntegerOrEnumerationType())
         Attrs.addAttribute(llvm::Attribute::SExt);
-      else if (ParamType->isUnsignedIntegerOrEnumerationType())
-        Attrs.addAttribute(llvm::Attribute::ZExt);
+      else if (ParamType->isUnsignedIntegerOrEnumerationType()) {
+        if (getTypes().getABIInfo().shouldSignExtUnsignedType(ParamType))
+          Attrs.addAttribute(llvm::Attribute::SExt);
+        else
+          Attrs.addAttribute(llvm::Attribute::ZExt);
+      }
       // FALL THROUGH
     case ABIArgInfo::Direct:
       if (ArgNo == 0 && FI.isChainCall())
@@ -1812,8 +1857,7 @@
         ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
       } else {
         // Load scalar value from indirect argument.
-        CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
-        V = EmitLoadOfScalar(V, false, Alignment.getQuantity(), Ty,
+        V = EmitLoadOfScalar(V, false, ArgI.getIndirectAlign(), Ty,
                              Arg->getLocStart());
 
         if (isPromoted)
@@ -2216,7 +2260,28 @@
   if (!CGF.ReturnValue->hasOneUse()) {
     llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
     if (IP->empty()) return nullptr;
-    llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(&IP->back());
+    llvm::Instruction *I = &IP->back();
+
+    // Skip lifetime markers
+    for (llvm::BasicBlock::reverse_iterator II = IP->rbegin(),
+                                            IE = IP->rend();
+         II != IE; ++II) {
+      if (llvm::IntrinsicInst *Intrinsic =
+              dyn_cast<llvm::IntrinsicInst>(&*II)) {
+        if (Intrinsic->getIntrinsicID() == llvm::Intrinsic::lifetime_end) {
+          const llvm::Value *CastAddr = Intrinsic->getArgOperand(1);
+          ++II;
+          if (II == IE)
+            break;
+          if (isa<llvm::BitCastInst>(&*II) && (CastAddr == &*II))
+            continue;
+        }
+      }
+      I = &*II;
+      break;
+    }
+
+    llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(I);
     if (!store) return nullptr;
     if (store->getPointerOperand() != CGF.ReturnValue) return nullptr;
     assert(!store->isAtomic() && !store->isVolatile()); // see below
@@ -2314,7 +2379,8 @@
 
       // If there is a dominating store to ReturnValue, we can elide
       // the load, zap the store, and usually zap the alloca.
-      if (llvm::StoreInst *SI = findDominatingStoreToReturnValue(*this)) {
+      if (llvm::StoreInst *SI =
+              findDominatingStoreToReturnValue(*this)) {
         // Reuse the debug location from the store unless there is
         // cleanup code to be emitted between the store and return
         // instruction.
@@ -2669,7 +2735,7 @@
 
   // Save the stack.
   llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
-  StackBase = CGF.Builder.CreateCall(F, "inalloca.save");
+  StackBase = CGF.Builder.CreateCall(F, {}, "inalloca.save");
 
   // Control gets really tied up in landing pads, so we have to spill the
   // stacksave to an alloca to avoid violating SSA form.
@@ -2692,27 +2758,28 @@
   }
 }
 
-static void emitNonNullArgCheck(CodeGenFunction &CGF, RValue RV,
-                                QualType ArgType, SourceLocation ArgLoc,
-                                const FunctionDecl *FD, unsigned ParmNum) {
-  if (!CGF.SanOpts.has(SanitizerKind::NonnullAttribute) || !FD)
+void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
+                                          SourceLocation ArgLoc,
+                                          const FunctionDecl *FD,
+                                          unsigned ParmNum) {
+  if (!SanOpts.has(SanitizerKind::NonnullAttribute) || !FD)
     return;
   auto PVD = ParmNum < FD->getNumParams() ? FD->getParamDecl(ParmNum) : nullptr;
   unsigned ArgNo = PVD ? PVD->getFunctionScopeIndex() : ParmNum;
   auto NNAttr = getNonNullAttr(FD, PVD, ArgType, ArgNo);
   if (!NNAttr)
     return;
-  CodeGenFunction::SanitizerScope SanScope(&CGF);
+  SanitizerScope SanScope(this);
   assert(RV.isScalar());
   llvm::Value *V = RV.getScalarVal();
   llvm::Value *Cond =
-      CGF.Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType()));
+      Builder.CreateICmpNE(V, llvm::Constant::getNullValue(V->getType()));
   llvm::Constant *StaticData[] = {
-      CGF.EmitCheckSourceLocation(ArgLoc),
-      CGF.EmitCheckSourceLocation(NNAttr->getLocation()),
-      llvm::ConstantInt::get(CGF.Int32Ty, ArgNo + 1),
+      EmitCheckSourceLocation(ArgLoc),
+      EmitCheckSourceLocation(NNAttr->getLocation()),
+      llvm::ConstantInt::get(Int32Ty, ArgNo + 1),
   };
-  CGF.EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute),
+  EmitCheck(std::make_pair(Cond, SanitizerKind::NonnullAttribute),
                 "nonnull_arg", StaticData, None);
 }
 
@@ -2740,7 +2807,7 @@
     for (int I = ArgTypes.size() - 1; I >= 0; --I) {
       CallExpr::const_arg_iterator Arg = ArgBeg + I;
       EmitCallArg(Args, *Arg, ArgTypes[I]);
-      emitNonNullArgCheck(*this, Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
+      EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
                           CalleeDecl, ParamsToSkip + I);
     }
 
@@ -2754,7 +2821,7 @@
     CallExpr::const_arg_iterator Arg = ArgBeg + I;
     assert(Arg != ArgEnd);
     EmitCallArg(Args, *Arg, ArgTypes[I]);
-    emitNonNullArgCheck(*this, Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
+    EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
                         CalleeDecl, ParamsToSkip + I);
   }
 }
@@ -2948,7 +3015,6 @@
     call->setCallingConv(getRuntimeCC());
     Builder.CreateUnreachable();
   }
-  PGO.setCurrentRegionUnreachable();
 }
 
 /// Emits a call or invoke instruction to the given nullary runtime
@@ -3055,10 +3121,18 @@
   // If the call returns a temporary with struct return, create a temporary
   // alloca to hold the result, unless one is given to us.
   llvm::Value *SRetPtr = nullptr;
+  size_t UnusedReturnSize = 0;
   if (RetAI.isIndirect() || RetAI.isInAlloca()) {
     SRetPtr = ReturnValue.getValue();
-    if (!SRetPtr)
+    if (!SRetPtr) {
       SRetPtr = CreateMemTemp(RetTy);
+      if (HaveInsertPoint() && ReturnValue.isUnused()) {
+        uint64_t size =
+            CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
+        if (EmitLifetimeStart(size, SRetPtr))
+          UnusedReturnSize = size;
+      }
+    }
     if (IRFunctionArgs.hasSRetArg()) {
       IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr;
     } else {
@@ -3390,6 +3464,10 @@
   // insertion point; this allows the rest of IRgen to discard
   // unreachable code.
   if (CS.doesNotReturn()) {
+    if (UnusedReturnSize)
+      EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
+                      SRetPtr);
+
     Builder.CreateUnreachable();
     Builder.ClearInsertionPoint();
 
@@ -3418,8 +3496,13 @@
   RValue Ret = [&] {
     switch (RetAI.getKind()) {
     case ABIArgInfo::InAlloca:
-    case ABIArgInfo::Indirect:
-      return convertTempToRValue(SRetPtr, RetTy, SourceLocation());
+    case ABIArgInfo::Indirect: {
+      RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
+      if (UnusedReturnSize)
+        EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
+                        SRetPtr);
+      return ret;
+    }
 
     case ABIArgInfo::Ignore:
       // If we are ignoring an argument that had a result, make sure to
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index b228733..7a4708e 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -155,17 +155,25 @@
   /// ReturnValueSlot - Contains the address where the return value of a 
   /// function can be stored, and whether the address is volatile or not.
   class ReturnValueSlot {
-    llvm::PointerIntPair<llvm::Value *, 1, bool> Value;
+    llvm::PointerIntPair<llvm::Value *, 2, unsigned int> Value;
+
+    // Return value slot flags
+    enum Flags {
+      IS_VOLATILE = 0x1,
+      IS_UNUSED = 0x2,
+    };
 
   public:
     ReturnValueSlot() {}
-    ReturnValueSlot(llvm::Value *Value, bool IsVolatile)
-      : Value(Value, IsVolatile) {}
+    ReturnValueSlot(llvm::Value *Value, bool IsVolatile, bool IsUnused = false)
+      : Value(Value,
+              (IsVolatile ? IS_VOLATILE : 0) | (IsUnused ? IS_UNUSED : 0)) {}
 
     bool isNull() const { return !getValue(); }
-    
-    bool isVolatile() const { return Value.getInt(); }
+
+    bool isVolatile() const { return Value.getInt() & IS_VOLATILE; }
     llvm::Value *getValue() const { return Value.getPointer(); }
+    bool isUnused() const { return Value.getInt() & IS_UNUSED; }
   };
   
 }  // end namespace CodeGen
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index bd15c12..1320cd3 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -29,31 +29,31 @@
 using namespace clang;
 using namespace CodeGen;
 
-static CharUnits 
-ComputeNonVirtualBaseClassOffset(ASTContext &Context, 
+static CharUnits
+ComputeNonVirtualBaseClassOffset(ASTContext &Context,
                                  const CXXRecordDecl *DerivedClass,
                                  CastExpr::path_const_iterator Start,
                                  CastExpr::path_const_iterator End) {
   CharUnits Offset = CharUnits::Zero();
-  
+
   const CXXRecordDecl *RD = DerivedClass;
-  
+
   for (CastExpr::path_const_iterator I = Start; I != End; ++I) {
     const CXXBaseSpecifier *Base = *I;
     assert(!Base->isVirtual() && "Should not see virtual bases here!");
 
     // Get the layout.
     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
-    
-    const CXXRecordDecl *BaseDecl = 
+
+    const CXXRecordDecl *BaseDecl =
       cast<CXXRecordDecl>(Base->getType()->getAs<RecordType>()->getDecl());
-    
+
     // Add the offset.
     Offset += Layout.getBaseClassOffset(BaseDecl);
-    
+
     RD = BaseDecl;
   }
-  
+
   return Offset;
 }
 
@@ -63,15 +63,15 @@
                                    CastExpr::path_const_iterator PathEnd) {
   assert(PathBegin != PathEnd && "Base path should not be empty!");
 
-  CharUnits Offset = 
+  CharUnits Offset =
     ComputeNonVirtualBaseClassOffset(getContext(), ClassDecl,
                                      PathBegin, PathEnd);
   if (Offset.isZero())
     return nullptr;
 
-  llvm::Type *PtrDiffTy = 
+  llvm::Type *PtrDiffTy =
   Types.ConvertType(getContext().getPointerDiffType());
-  
+
   return llvm::ConstantInt::get(PtrDiffTy, Offset.getQuantity());
 }
 
@@ -128,7 +128,7 @@
   } else {
     baseOffset = virtualOffset;
   }
-  
+
   // Apply the base offset.
   ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8PtrTy);
   ptr = CGF.Builder.CreateInBoundsGEP(ptr, baseOffset, "add.ptr");
@@ -150,7 +150,7 @@
   // *start* with a step down to the correct virtual base subobject,
   // and hence will not require any further steps.
   if ((*Start)->isVirtual()) {
-    VBase = 
+    VBase =
       cast<CXXRecordDecl>((*Start)->getType()->getAs<RecordType>()->getDecl());
     ++Start;
   }
@@ -158,7 +158,7 @@
   // Compute the static offset of the ultimate destination within its
   // allocating subobject (the virtual base, if there is one, or else
   // the "complete" object that we see).
-  CharUnits NonVirtualOffset = 
+  CharUnits NonVirtualOffset =
     ComputeNonVirtualBaseClassOffset(getContext(), VBase ? VBase : Derived,
                                      Start, PathEnd);
 
@@ -173,7 +173,7 @@
   }
 
   // Get the base pointer type.
-  llvm::Type *BasePtrTy = 
+  llvm::Type *BasePtrTy =
     ConvertType((PathEnd[-1])->getType())->getPointerTo();
 
   QualType DerivedTy = getContext().getRecordType(Derived);
@@ -198,7 +198,7 @@
     origBB = Builder.GetInsertBlock();
     llvm::BasicBlock *notNullBB = createBasicBlock("cast.notnull");
     endBB = createBasicBlock("cast.end");
-    
+
     llvm::Value *isNull = Builder.CreateIsNull(Value);
     Builder.CreateCondBr(isNull, endBB, notNullBB);
     EmitBlock(notNullBB);
@@ -217,10 +217,10 @@
   }
 
   // Apply both offsets.
-  Value = ApplyNonVirtualAndVirtualOffset(*this, Value, 
+  Value = ApplyNonVirtualAndVirtualOffset(*this, Value,
                                           NonVirtualOffset,
                                           VirtualOffset);
-  
+
   // Cast to the destination type.
   Value = Builder.CreateBitCast(Value, BasePtrTy);
 
@@ -229,13 +229,13 @@
     llvm::BasicBlock *notNullBB = Builder.GetInsertBlock();
     Builder.CreateBr(endBB);
     EmitBlock(endBB);
-    
+
     llvm::PHINode *PHI = Builder.CreatePHI(BasePtrTy, 2, "cast.result");
     PHI->addIncoming(Value, notNullBB);
     PHI->addIncoming(llvm::Constant::getNullValue(BasePtrTy), origBB);
     Value = PHI;
   }
-  
+
   return Value;
 }
 
@@ -253,7 +253,7 @@
 
   llvm::Value *NonVirtualOffset =
     CGM.GetNonVirtualBaseClassOffset(Derived, PathBegin, PathEnd);
-  
+
   if (!NonVirtualOffset) {
     // No offset, we can just cast back.
     return Builder.CreateBitCast(Value, DerivedPtrTy);
@@ -267,12 +267,12 @@
     CastNull = createBasicBlock("cast.null");
     CastNotNull = createBasicBlock("cast.notnull");
     CastEnd = createBasicBlock("cast.end");
-    
+
     llvm::Value *IsNull = Builder.CreateIsNull(Value);
     Builder.CreateCondBr(IsNull, CastNull, CastNotNull);
     EmitBlock(CastNotNull);
   }
-  
+
   // Apply the offset.
   Value = Builder.CreateBitCast(Value, Int8PtrTy);
   Value = Builder.CreateGEP(Value, Builder.CreateNeg(NonVirtualOffset),
@@ -286,14 +286,14 @@
     EmitBlock(CastNull);
     Builder.CreateBr(CastEnd);
     EmitBlock(CastEnd);
-    
+
     llvm::PHINode *PHI = Builder.CreatePHI(Value->getType(), 2);
     PHI->addIncoming(Value, CastNotNull);
-    PHI->addIncoming(llvm::Constant::getNullValue(Value->getType()), 
+    PHI->addIncoming(llvm::Constant::getNullValue(Value->getType()),
                      CastNull);
     Value = PHI;
   }
-  
+
   return Value;
 }
 
@@ -304,7 +304,7 @@
     // This constructor/destructor does not need a VTT parameter.
     return nullptr;
   }
-  
+
   const CXXRecordDecl *RD = cast<CXXMethodDecl>(CurCodeDecl)->getParent();
   const CXXRecordDecl *Base = cast<CXXMethodDecl>(GD.getDecl())->getParent();
 
@@ -324,15 +324,15 @@
     SubVTTIndex = 0;
   } else {
     const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
-    CharUnits BaseOffset = ForVirtualBase ? 
-      Layout.getVBaseClassOffset(Base) : 
+    CharUnits BaseOffset = ForVirtualBase ?
+      Layout.getVBaseClassOffset(Base) :
       Layout.getBaseClassOffset(Base);
 
-    SubVTTIndex = 
+    SubVTTIndex =
       CGM.getVTables().getSubVTTIndex(RD, BaseSubobject(Base, BaseOffset));
     assert(SubVTTIndex != 0 && "Sub-VTT index must be greater than zero!");
   }
-  
+
   if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
     // A VTT parameter was passed to the constructor, use it.
     VTT = LoadCXXVTT();
@@ -359,7 +359,7 @@
         cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
 
       const CXXDestructorDecl *D = BaseClass->getDestructor();
-      llvm::Value *Addr = 
+      llvm::Value *Addr =
         CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThis(),
                                                   DerivedClass, BaseClass,
                                                   BaseIsVirtual);
@@ -370,29 +370,29 @@
 
   /// A visitor which checks whether an initializer uses 'this' in a
   /// way which requires the vtable to be properly set.
-  struct DynamicThisUseChecker : EvaluatedExprVisitor<DynamicThisUseChecker> {
-    typedef EvaluatedExprVisitor<DynamicThisUseChecker> super;
+  struct DynamicThisUseChecker : ConstEvaluatedExprVisitor<DynamicThisUseChecker> {
+    typedef ConstEvaluatedExprVisitor<DynamicThisUseChecker> super;
 
     bool UsesThis;
 
-    DynamicThisUseChecker(ASTContext &C) : super(C), UsesThis(false) {}
+    DynamicThisUseChecker(const ASTContext &C) : super(C), UsesThis(false) {}
 
     // Black-list all explicit and implicit references to 'this'.
     //
     // Do we need to worry about external references to 'this' derived
     // from arbitrary code?  If so, then anything which runs arbitrary
     // external code might potentially access the vtable.
-    void VisitCXXThisExpr(CXXThisExpr *E) { UsesThis = true; }
+    void VisitCXXThisExpr(const CXXThisExpr *E) { UsesThis = true; }
   };
 }
 
 static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) {
   DynamicThisUseChecker Checker(C);
-  Checker.Visit(const_cast<Expr*>(Init));
+  Checker.Visit(Init);
   return Checker.UsesThis;
 }
 
-static void EmitBaseInitializer(CodeGenFunction &CGF, 
+static void EmitBaseInitializer(CodeGenFunction &CGF,
                                 const CXXRecordDecl *ClassDecl,
                                 CXXCtorInitializer *BaseInit,
                                 CXXCtorType CtorType) {
@@ -400,7 +400,7 @@
          "Must have base initializer!");
 
   llvm::Value *ThisPtr = CGF.LoadCXXThis();
-  
+
   const Type *BaseType = BaseInit->getBaseClass();
   CXXRecordDecl *BaseClassDecl =
     cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());
@@ -419,7 +419,7 @@
 
   // We can pretend to be a complete class because it only matters for
   // virtual bases, and we only do virtual bases for complete ctors.
-  llvm::Value *V = 
+  llvm::Value *V =
     CGF.GetAddressOfDirectBaseInCompleteClass(ThisPtr, ClassDecl,
                                               BaseClassDecl,
                                               isBaseVirtual);
@@ -431,8 +431,8 @@
                           AggValueSlot::IsNotAliased);
 
   CGF.EmitAggExpr(BaseInit->getInit(), AggSlot);
-  
-  if (CGF.CGM.getLangOpts().Exceptions && 
+
+  if (CGF.CGM.getLangOpts().Exceptions &&
       !BaseClassDecl->hasTrivialDestructor())
     CGF.EHStack.pushCleanup<CallBaseDtor>(EHCleanup, BaseClassDecl,
                                           isBaseVirtual);
@@ -491,17 +491,17 @@
   llvm::Value *IndexVar
     = CGF.GetAddrOfLocalVar(ArrayIndexes[Index]);
   assert(IndexVar && "Array index variable not loaded");
-  
+
   // Initialize this index variable to zero.
   llvm::Value* Zero
     = llvm::Constant::getNullValue(
                               CGF.ConvertType(CGF.getContext().getSizeType()));
   CGF.Builder.CreateStore(Zero, IndexVar);
-                                   
+
   // Start the loop with a block that tests the condition.
   llvm::BasicBlock *CondBlock = CGF.createBasicBlock("for.cond");
   llvm::BasicBlock *AfterFor = CGF.createBasicBlock("for.end");
-  
+
   CGF.EmitBlock(CondBlock);
 
   llvm::BasicBlock *ForBody = CGF.createBasicBlock("for.body");
@@ -513,7 +513,7 @@
     llvm::ConstantInt::get(Counter->getType(), NumElements);
   llvm::Value *IsLess = CGF.Builder.CreateICmpULT(Counter, NumElementsPtr,
                                                   "isless");
-                                   
+
   // If the condition is true, execute the body.
   CGF.Builder.CreateCondBr(IsLess, ForBody, AfterFor);
 
@@ -540,6 +540,23 @@
   CGF.EmitBlock(AfterFor, true);
 }
 
+static bool isMemcpyEquivalentSpecialMember(const CXXMethodDecl *D) {
+  auto *CD = dyn_cast<CXXConstructorDecl>(D);
+  if (!(CD && CD->isCopyOrMoveConstructor()) &&
+      !D->isCopyAssignmentOperator() && !D->isMoveAssignmentOperator())
+    return false;
+
+  // We can emit a memcpy for a trivial copy or move constructor/assignment.
+  if (D->isTrivial() && !D->getParent()->mayInsertExtraPadding())
+    return true;
+
+  // We *must* emit a memcpy for a defaulted union copy or move op.
+  if (D->getParent()->isUnion() && D->isDefaulted())
+    return true;
+
+  return false;
+}
+
 static void EmitMemberInitializer(CodeGenFunction &CGF,
                                   const CXXRecordDecl *ClassDecl,
                                   CXXCtorInitializer *MemberInit,
@@ -549,7 +566,7 @@
   assert(MemberInit->isAnyMemberInitializer() &&
          "Must have member initializer!");
   assert(MemberInit->getInit() && "Must have initializer!");
-  
+
   // non-static data member initializers.
   FieldDecl *Field = MemberInit->getAnyMember();
   QualType FieldType = Field->getType();
@@ -581,14 +598,14 @@
     QualType BaseElementTy = CGF.getContext().getBaseElementType(Array);
     CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
     if (BaseElementTy.isPODType(CGF.getContext()) ||
-        (CE && CE->getConstructor()->isTrivial())) {
+        (CE && isMemcpyEquivalentSpecialMember(CE->getConstructor()))) {
       unsigned SrcArgIndex =
           CGF.CGM.getCXXABI().getSrcArgforCopyCtor(Constructor, Args);
       llvm::Value *SrcPtr
         = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(Args[SrcArgIndex]));
       LValue ThisRHSLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy);
       LValue Src = CGF.EmitLValueForFieldInitialization(ThisRHSLV, Field);
-      
+
       // Copy the aggregate.
       CGF.EmitAggregateCopy(LHS.getAddress(), Src.getAddress(), FieldType,
                             LHS.isVolatileQualified());
@@ -622,28 +639,28 @@
     llvm::Value *ArrayIndexVar = nullptr;
     if (ArrayIndexes.size()) {
       llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
-      
+
       // The LHS is a pointer to the first object we'll be constructing, as
       // a flat array.
       QualType BaseElementTy = getContext().getBaseElementType(FieldType);
       llvm::Type *BasePtr = ConvertType(BaseElementTy);
       BasePtr = llvm::PointerType::getUnqual(BasePtr);
-      llvm::Value *BaseAddrPtr = Builder.CreateBitCast(LHS.getAddress(), 
+      llvm::Value *BaseAddrPtr = Builder.CreateBitCast(LHS.getAddress(),
                                                        BasePtr);
       LHS = MakeAddrLValue(BaseAddrPtr, BaseElementTy);
-      
+
       // Create an array index that will be used to walk over all of the
       // objects we're constructing.
       ArrayIndexVar = CreateTempAlloca(SizeTy, "object.index");
       llvm::Value *Zero = llvm::Constant::getNullValue(SizeTy);
       Builder.CreateStore(Zero, ArrayIndexVar);
-      
-      
+
+
       // Emit the block variables for the array indices, if any.
       for (unsigned I = 0, N = ArrayIndexes.size(); I != N; ++I)
         EmitAutoVarDecl(*ArrayIndexes[I]);
     }
-    
+
     EmitAggMemberInitializer(*this, LHS, Init, ArrayIndexVar, FieldType,
                              ArrayIndexes, 0);
   }
@@ -763,9 +780,9 @@
     if (PoisonSize < AsanAlignment || !SSV[i].Size ||
         (NextField % AsanAlignment) != 0)
       continue;
-    Builder.CreateCall2(
-        F, Builder.CreateAdd(ThisPtr, Builder.getIntN(PtrSize, EndOffset)),
-        Builder.getIntN(PtrSize, PoisonSize));
+    Builder.CreateCall(
+        F, {Builder.CreateAdd(ThisPtr, Builder.getIntN(PtrSize, EndOffset)),
+            Builder.getIntN(PtrSize, PoisonSize)});
   }
 }
 
@@ -797,8 +814,7 @@
   if (IsTryBody)
     EnterCXXTryStmt(*cast<CXXTryStmt>(Body), true);
 
-  RegionCounter Cnt = getPGORegionCounter(Body);
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(Body);
 
   RunCleanupsScope RunCleanups(*this);
 
@@ -851,7 +867,7 @@
   public:
     FieldMemcpyizer(CodeGenFunction &CGF, const CXXRecordDecl *ClassDecl,
                     const VarDecl *SrcRec)
-      : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec), 
+      : CGF(CGF), ClassDecl(ClassDecl), SrcRec(SrcRec),
         RecLayout(CGF.getContext().getASTRecordLayout(ClassDecl)),
         FirstField(nullptr), LastField(nullptr), FirstFieldOffset(0),
         LastFieldOffset(0), LastAddedFieldIndex(0) {}
@@ -877,7 +893,7 @@
       unsigned LastFieldSize =
         LastField->isBitField() ?
           LastField->getBitWidthValue(CGF.getContext()) :
-          CGF.getContext().getTypeSize(LastField->getType()); 
+          CGF.getContext().getTypeSize(LastField->getType());
       uint64_t MemcpySizeBits =
         LastFieldOffset + LastFieldSize - FirstByteOffset +
         CGF.getContext().getCharWidth() - 1;
@@ -1022,8 +1038,8 @@
       QualType FieldType = Field->getType();
       CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(MemberInit->getInit());
 
-      // Bail out on non-POD, not-trivially-constructable members.
-      if (!(CE && CE->getConstructor()->isTrivial()) &&
+      // Bail out on non-memcpyable, not-trivially-copyable members.
+      if (!(CE && isMemcpyEquivalentSpecialMember(CE->getConstructor())) &&
           !(FieldType.isTriviallyCopyableType(CGF.getContext()) ||
             FieldType->isReferenceType()))
         return false;
@@ -1128,9 +1144,7 @@
         return Field;
       } else if (CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(S)) {
         CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MCE->getCalleeDecl());
-        if (!(MD && (MD->isCopyAssignmentOperator() ||
-                       MD->isMoveAssignmentOperator()) &&
-              MD->isTrivial()))
+        if (!(MD && isMemcpyEquivalentSpecialMember(MD)))
           return nullptr;
         MemberExpr *IOA = dyn_cast<MemberExpr>(MCE->getImplicitObjectArgument());
         if (!IOA)
@@ -1190,7 +1204,7 @@
       if (F) {
         addMemcpyableField(F);
         AggregatedStmts.push_back(S);
-      } else {  
+      } else {
         emitAggregatedStmts();
         CGF.EmitStmt(S);
       }
@@ -1275,7 +1289,7 @@
 FieldHasTrivialDestructorBody(ASTContext &Context, const FieldDecl *Field);
 
 static bool
-HasTrivialDestructorBody(ASTContext &Context, 
+HasTrivialDestructorBody(ASTContext &Context,
                          const CXXRecordDecl *BaseClassDecl,
                          const CXXRecordDecl *MostDerivedClassDecl)
 {
@@ -1310,7 +1324,7 @@
         cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());
       if (!HasTrivialDestructorBody(Context, VirtualBase,
                                     MostDerivedClassDecl))
-        return false;      
+        return false;
     }
   }
 
@@ -1326,7 +1340,7 @@
   const RecordType *RT = FieldBaseElementType->getAs<RecordType>();
   if (!RT)
     return true;
-  
+
   CXXRecordDecl *FieldClassDecl = cast<CXXRecordDecl>(RT->getDecl());
   return HasTrivialDestructorBody(Context, FieldClassDecl, FieldClassDecl);
 }
@@ -1352,6 +1366,10 @@
   const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl());
   CXXDtorType DtorType = CurGD.getDtorType();
 
+  Stmt *Body = Dtor->getBody();
+  if (Body)
+    incrementProfileCounter(Body);
+
   // The call to operator delete in a deleting destructor happens
   // outside of the function-try-block, which means it's always
   // possible to delegate the destructor body to the complete
@@ -1364,8 +1382,6 @@
     return;
   }
 
-  Stmt *Body = Dtor->getBody();
-
   // If the body is a function-try-block, enter the try before
   // anything else.
   bool isTryBody = (Body && isa<CXXTryStmt>(Body));
@@ -1375,11 +1391,11 @@
 
   // Enter the epilogue cleanups.
   RunCleanupsScope DtorEpilogue(*this);
-  
+
   // If this is the complete variant, just invoke the base variant;
   // the epilogue will destruct the virtual bases.  But we can't do
   // this optimization if the body is a function-try-block, because
-  // we'd introduce *two* handler blocks.  In the Microsoft ABI, we 
+  // we'd introduce *two* handler blocks.  In the Microsoft ABI, we
   // always delegate because we might not have a definition in this TU.
   switch (DtorType) {
   case Dtor_Comdat:
@@ -1400,13 +1416,10 @@
       break;
     }
     // Fallthrough: act like we're in the base variant.
-      
+
   case Dtor_Base:
     assert(Body);
 
-    RegionCounter Cnt = getPGORegionCounter(Body);
-    Cnt.beginRegion(Builder);
-
     // Enter the cleanup scopes for fields and non-virtual bases.
     EnterDtorCleanups(Dtor, Dtor_Base);
 
@@ -1448,7 +1461,7 @@
 
   AssignmentMemcpyizer AM(*this, AssignOp, Args);
   for (auto *I : RootCS->body())
-    AM.emitAssignment(I);  
+    AM.emitAssignment(I);
   AM.finish();
 }
 
@@ -1509,7 +1522,7 @@
       LValue ThisLV = CGF.MakeAddrLValue(thisValue, RecordTy);
       LValue LV = CGF.EmitLValueForField(ThisLV, field);
       assert(LV.isSimple());
-      
+
       CGF.emitDestroy(LV.getAddress(), field->getType(), destroyer,
                       flags.isForNormalCleanup() && useEHCleanupForArray);
     }
@@ -1527,7 +1540,7 @@
   // The deleting-destructor phase just needs to call the appropriate
   // operator delete that Sema picked up.
   if (DtorType == Dtor_Deleting) {
-    assert(DD->getOperatorDelete() && 
+    assert(DD->getOperatorDelete() &&
            "operator delete missing - EnterDtorCleanups");
     if (CXXStructorImplicitParamValue) {
       // If there is an implicit param to the deleting dtor, it's a boolean
@@ -1554,7 +1567,7 @@
     for (const auto &Base : ClassDecl->vbases()) {
       CXXRecordDecl *BaseClassDecl
         = cast<CXXRecordDecl>(Base.getType()->getAs<RecordType>()->getDecl());
-    
+
       // Ignore trivial destructors.
       if (BaseClassDecl->hasTrivialDestructor())
         continue;
@@ -1568,15 +1581,15 @@
   }
 
   assert(DtorType == Dtor_Base);
-  
+
   // Destroy non-virtual bases.
   for (const auto &Base : ClassDecl->bases()) {
     // Ignore virtual bases.
     if (Base.isVirtual())
       continue;
-    
+
     CXXRecordDecl *BaseClassDecl = Base.getType()->getAsCXXRecordDecl();
-    
+
     // Ignore trivial destructors.
     if (BaseClassDecl->hasTrivialDestructor())
       continue;
@@ -1657,7 +1670,7 @@
     zeroCheckBranch = Builder.CreateCondBr(iszero, loopBB, loopBB);
     EmitBlock(loopBB);
   }
-      
+
   // Find the end of the array.
   llvm::Value *arrayEnd = Builder.CreateInBoundsGEP(arrayBegin, numElements,
                                                     "arrayctor.end");
@@ -1677,15 +1690,15 @@
   // Zero initialize the storage, if requested.
   if (zeroInitialize)
     EmitNullInitialization(cur, type);
-  
-  // C++ [class.temporary]p4: 
+
+  // C++ [class.temporary]p4:
   // There are two contexts in which temporaries are destroyed at a different
   // point than the end of the full-expression. The first context is when a
-  // default constructor is called to initialize an element of an array. 
-  // If the constructor has one or more default arguments, the destruction of 
-  // every temporary created in a default argument expression is sequenced 
+  // default constructor is called to initialize an element of an array.
+  // If the constructor has one or more default arguments, the destruction of
+  // every temporary created in a default argument expression is sequenced
   // before the construction of the next array element, if any.
-  
+
   {
     RunCleanupsScope Scope(*this);
 
@@ -1734,18 +1747,23 @@
                                              bool ForVirtualBase,
                                              bool Delegating, llvm::Value *This,
                                              const CXXConstructExpr *E) {
-  // If this is a trivial constructor, just emit what's needed.
-  if (D->isTrivial() && !D->getParent()->mayInsertExtraPadding()) {
-    if (E->getNumArgs() == 0) {
-      // Trivial default constructor, no codegen required.
-      assert(D->isDefaultConstructor() &&
-             "trivial 0-arg ctor not a default ctor");
-      return;
-    }
+  // C++11 [class.mfct.non-static]p2:
+  //   If a non-static member function of a class X is called for an object that
+  //   is not of type X, or of a type derived from X, the behavior is undefined.
+  // FIXME: Provide a source location here.
+  EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(), This,
+                getContext().getRecordType(D->getParent()));
 
+  if (D->isTrivial() && D->isDefaultConstructor()) {
+    assert(E->getNumArgs() == 0 && "trivial default ctor with args");
+    return;
+  }
+
+  // If this is a trivial constructor, just emit what's needed. If this is a
+  // union copy constructor, we must emit a memcpy, because the AST does not
+  // model that copy.
+  if (isMemcpyEquivalentSpecialMember(D)) {
     assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
-    assert(D->isCopyOrMoveConstructor() &&
-           "trivial 1-arg ctor not a copy/move ctor");
 
     const Expr *Arg = E->getArg(0);
     QualType SrcTy = Arg->getType();
@@ -1755,13 +1773,6 @@
     return;
   }
 
-  // C++11 [class.mfct.non-static]p2:
-  //   If a non-static member function of a class X is called for an object that
-  //   is not of type X, or of a type derived from X, the behavior is undefined.
-  // FIXME: Provide a source location here.
-  EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(), This,
-                getContext().getRecordType(D->getParent()));
-
   CallArgList Args;
 
   // Push the this ptr.
@@ -1786,8 +1797,7 @@
 CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
                                         llvm::Value *This, llvm::Value *Src,
                                         const CXXConstructExpr *E) {
-  if (D->isTrivial() &&
-      !D->getParent()->mayInsertExtraPadding()) {
+  if (isMemcpyEquivalentSpecialMember(D)) {
     assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
     assert(D->isCopyOrMoveConstructor() &&
            "trivial 1-arg ctor not a copy/move ctor");
@@ -1799,14 +1809,14 @@
   llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete);
   assert(D->isInstance() &&
          "Trying to emit a member call expr on a static method!");
-  
+
   const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
-  
+
   CallArgList Args;
-  
+
   // Push the this ptr.
   Args.add(RValue::get(This), D->getThisType(getContext()));
-  
+
   // Push the src ptr.
   QualType QT = *(FPT->param_type_begin());
   llvm::Type *t = CGM.getTypes().ConvertType(QT);
@@ -1949,7 +1959,7 @@
 }
 
 void
-CodeGenFunction::InitializeVTablePointer(BaseSubobject Base, 
+CodeGenFunction::InitializeVTablePointer(BaseSubobject Base,
                                          const CXXRecordDecl *NearestVBase,
                                          CharUnits OffsetFromNearestVBase,
                                          const CXXRecordDecl *VTableClass) {
@@ -1972,7 +1982,7 @@
   // Compute where to store the address point.
   llvm::Value *VirtualOffset = nullptr;
   CharUnits NonVirtualOffset = CharUnits::Zero();
-  
+
   if (NeedsVirtualOffset) {
     // We need to use the virtual base offset offset because the virtual base
     // might have a different offset in the most derived class.
@@ -1985,12 +1995,12 @@
     // We can just use the base offset in the complete class.
     NonVirtualOffset = Base.getBaseOffset();
   }
-  
+
   // Apply the offsets.
   llvm::Value *VTableField = LoadCXXThis();
-  
+
   if (!NonVirtualOffset.isZero() || VirtualOffset)
-    VTableField = ApplyNonVirtualAndVirtualOffset(*this, VTableField, 
+    VTableField = ApplyNonVirtualAndVirtualOffset(*this, VTableField,
                                                   NonVirtualOffset,
                                                   VirtualOffset);
 
@@ -2007,7 +2017,7 @@
 }
 
 void
-CodeGenFunction::InitializeVTablePointers(BaseSubobject Base, 
+CodeGenFunction::InitializeVTablePointers(BaseSubobject Base,
                                           const CXXRecordDecl *NearestVBase,
                                           CharUnits OffsetFromNearestVBase,
                                           bool BaseIsNonVirtualPrimaryBase,
@@ -2020,7 +2030,7 @@
     InitializeVTablePointer(Base, NearestVBase, OffsetFromNearestVBase,
                             VTableClass);
   }
-  
+
   const CXXRecordDecl *RD = Base.getBase();
 
   // Traverse bases.
@@ -2041,7 +2051,7 @@
       if (!VBases.insert(BaseDecl).second)
         continue;
 
-      const ASTRecordLayout &Layout = 
+      const ASTRecordLayout &Layout =
         getContext().getASTRecordLayout(VTableClass);
 
       BaseOffset = Layout.getVBaseClassOffset(BaseDecl);
@@ -2051,15 +2061,15 @@
       const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
 
       BaseOffset = Base.getBaseOffset() + Layout.getBaseClassOffset(BaseDecl);
-      BaseOffsetFromNearestVBase = 
+      BaseOffsetFromNearestVBase =
         OffsetFromNearestVBase + Layout.getBaseClassOffset(BaseDecl);
       BaseDeclIsNonVirtualPrimaryBase = Layout.getPrimaryBase() == BaseDecl;
     }
-    
-    InitializeVTablePointers(BaseSubobject(BaseDecl, BaseOffset), 
+
+    InitializeVTablePointers(BaseSubobject(BaseDecl, BaseOffset),
                              I.isVirtual() ? BaseDecl : NearestVBase,
                              BaseOffsetFromNearestVBase,
-                             BaseDeclIsNonVirtualPrimaryBase, 
+                             BaseDeclIsNonVirtualPrimaryBase,
                              VTableClass, VBases);
   }
 }
@@ -2071,7 +2081,7 @@
 
   // Initialize the vtable pointers for this class and all of its bases.
   VisitedVirtualBasesSetTy VBases;
-  InitializeVTablePointers(BaseSubobject(RD, CharUnits::Zero()), 
+  InitializeVTablePointers(BaseSubobject(RD, CharUnits::Zero()),
                            /*NearestVBase=*/nullptr,
                            /*OffsetFromNearestVBase=*/CharUnits::Zero(),
                            /*BaseIsNonVirtualPrimaryBase=*/false, RD, VBases);
@@ -2195,9 +2205,9 @@
   llvm::Value *BitSetName = llvm::MetadataAsValue::get(
       getLLVMContext(), llvm::MDString::get(getLLVMContext(), Out.str()));
 
-  llvm::Value *BitSetTest = Builder.CreateCall2(
+  llvm::Value *BitSetTest = Builder.CreateCall(
       CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
-      Builder.CreateBitCast(VTable, CGM.Int8PtrTy), BitSetName);
+      {Builder.CreateBitCast(VTable, CGM.Int8PtrTy), BitSetName});
 
   llvm::BasicBlock *ContBlock = createBasicBlock("vtable.check.cont");
   llvm::BasicBlock *TrapBlock = createBasicBlock("vtable.check.trap");
@@ -2205,7 +2215,7 @@
   Builder.CreateCondBr(BitSetTest, ContBlock, TrapBlock);
 
   EmitBlock(TrapBlock);
-  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap));
+  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap), {});
   Builder.CreateUnreachable();
 
   EmitBlock(ContBlock);
@@ -2274,7 +2284,7 @@
       // This is a record decl. We know the type and can devirtualize it.
       return VD->getType()->isRecordType();
     }
-    
+
     return false;
   }
 
@@ -2288,11 +2298,11 @@
   // We can always devirtualize calls on temporary object expressions.
   if (isa<CXXConstructExpr>(Base))
     return true;
-  
+
   // And calls on bound temporaries.
   if (isa<CXXBindTemporaryExpr>(Base))
     return true;
-  
+
   // Check if this is a call expr that returns a record type.
   if (const CallExpr *CE = dyn_cast<CallExpr>(Base))
     return CE->getCallReturnType(getContext())->isRecordType();
@@ -2324,7 +2334,7 @@
   // We don't need to separately arrange the call arguments because
   // the call can't be variadic anyway --- it's impossible to forward
   // variadic arguments.
-  
+
   // Now emit our call.
   RValue RV = EmitCall(calleeFnInfo, callee, returnSlot,
                        callArgs, callOperator);
@@ -2352,7 +2362,7 @@
   for (auto param : BD->params())
     EmitDelegateCallArg(CallArgs, param, param->getLocStart());
 
-  assert(!Lambda->isGenericLambda() && 
+  assert(!Lambda->isGenericLambda() &&
             "generic lambda interconversion to block not implemented");
   EmitForwardingCallToLambda(Lambda->getLambdaCallOperator(), CallArgs);
 }
@@ -2390,7 +2400,7 @@
     const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
     FunctionTemplateDecl *CallOpTemplate = CallOp->getDescribedFunctionTemplate();
     void *InsertPos = nullptr;
-    FunctionDecl *CorrespondingCallOpSpecialization = 
+    FunctionDecl *CorrespondingCallOpSpecialization =
         CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
     assert(CorrespondingCallOpSpecialization);
     CallOp = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index 299969a..d97e405 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -125,6 +125,17 @@
   return StartOfData;
 }
 
+bool EHScopeStack::containsOnlyLifetimeMarkers(
+    EHScopeStack::stable_iterator Old) const {
+  for (EHScopeStack::iterator it = begin(); stabilize(it) != Old; it++) {
+    EHCleanupScope *cleanup = dyn_cast<EHCleanupScope>(&*it);
+    if (!cleanup || !cleanup->isLifetimeMarker())
+      return false;
+  }
+
+  return true;
+}
+
 EHScopeStack::stable_iterator
 EHScopeStack::getInnermostActiveNormalCleanup() const {
   for (stable_iterator si = getInnermostNormalCleanup(), se = stable_end();
@@ -748,7 +759,15 @@
           Scope.getNumBranchAfters() == 1) {
         assert(!BranchThroughDest || !IsActive);
 
-        // TODO: clean up the possibly dead stores to the cleanup dest slot.
+        // Clean up the possibly dead store to the cleanup dest slot.
+        llvm::Instruction *NormalCleanupDestSlot =
+            cast<llvm::Instruction>(getNormalCleanupDestSlot());
+        if (NormalCleanupDestSlot->hasOneUse()) {
+          NormalCleanupDestSlot->user_back()->eraseFromParent();
+          NormalCleanupDestSlot->eraseFromParent();
+          NormalCleanupDest = nullptr;
+        }
+
         llvm::BasicBlock *BranchAfter = Scope.getBranchAfterBlock(0);
         InstsToAppend.push_back(llvm::BranchInst::Create(BranchAfter));
 
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index 5f94aec..81c6412 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -62,6 +62,9 @@
     /// Whether this cleanup is currently active.
     unsigned IsActive : 1;
 
+    /// Whether this cleanup is a lifetime marker
+    unsigned IsLifetimeMarker : 1;
+
     /// Whether the normal cleanup should test the activation flag.
     unsigned TestFlagInNormalCleanup : 1;
 
@@ -75,7 +78,7 @@
     /// The number of fixups required by enclosing scopes (not including
     /// this one).  If this is the top cleanup scope, all the fixups
     /// from this index onwards belong to this scope.
-    unsigned FixupDepth : 32 - 17 - NumCommonBits; // currently 13    
+    unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 13
   };
 
   class FilterBitFields {
@@ -272,6 +275,7 @@
     CleanupBits.IsNormalCleanup = isNormal;
     CleanupBits.IsEHCleanup = isEH;
     CleanupBits.IsActive = isActive;
+    CleanupBits.IsLifetimeMarker = false;
     CleanupBits.TestFlagInNormalCleanup = false;
     CleanupBits.TestFlagInEHCleanup = false;
     CleanupBits.CleanupSize = cleanupSize;
@@ -295,6 +299,9 @@
   bool isActive() const { return CleanupBits.IsActive; }
   void setActive(bool A) { CleanupBits.IsActive = A; }
 
+  bool isLifetimeMarker() const { return CleanupBits.IsLifetimeMarker; }
+  void setLifetimeMarker() { CleanupBits.IsLifetimeMarker = true; }
+
   llvm::AllocaInst *getActiveFlag() const { return ActiveFlag; }
   void setActiveFlag(llvm::AllocaInst *Var) { ActiveFlag = Var; }
 
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 4af49c2..48458db 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -75,8 +75,8 @@
       else {
         // Construct a location that has a valid scope, but no line info.
         assert(!DI->LexicalBlockStack.empty());
-        llvm::DIDescriptor Scope(DI->LexicalBlockStack.back());
-        CGF.Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(0, 0, Scope));
+        CGF.Builder.SetCurrentDebugLocation(
+            llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back()));
       }
     } else
       DI->EmitLocation(CGF.Builder, TemporaryLocation);
@@ -120,37 +120,33 @@
     return;
 
   SourceManager &SM = CGM.getContext().getSourceManager();
-  auto *Scope = cast<llvm::MDScope>(LexicalBlockStack.back());
+  auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
   PresumedLoc PCLoc = SM.getPresumedLoc(CurLoc);
 
   if (PCLoc.isInvalid() || Scope->getFilename() == PCLoc.getFilename())
     return;
 
-  if (auto *LBF = dyn_cast<llvm::MDLexicalBlockFile>(Scope)) {
-    llvm::DIDescriptor D = DBuilder.createLexicalBlockFile(
-        LBF->getScope(), getOrCreateFile(CurLoc));
-    llvm::MDNode *N = D;
+  if (auto *LBF = dyn_cast<llvm::DILexicalBlockFile>(Scope)) {
     LexicalBlockStack.pop_back();
-    LexicalBlockStack.emplace_back(N);
-  } else if (isa<llvm::MDLexicalBlock>(Scope) ||
-             isa<llvm::MDSubprogram>(Scope)) {
-    llvm::DIDescriptor D =
-        DBuilder.createLexicalBlockFile(Scope, getOrCreateFile(CurLoc));
-    llvm::MDNode *N = D;
+    LexicalBlockStack.emplace_back(DBuilder.createLexicalBlockFile(
+        LBF->getScope(), getOrCreateFile(CurLoc)));
+  } else if (isa<llvm::DILexicalBlock>(Scope) ||
+             isa<llvm::DISubprogram>(Scope)) {
     LexicalBlockStack.pop_back();
-    LexicalBlockStack.emplace_back(N);
+    LexicalBlockStack.emplace_back(
+        DBuilder.createLexicalBlockFile(Scope, getOrCreateFile(CurLoc)));
   }
 }
 
 /// getContextDescriptor - Get context info for the decl.
-llvm::DIScope CGDebugInfo::getContextDescriptor(const Decl *Context) {
+llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context) {
   if (!Context)
     return TheCU;
 
   auto I = RegionMap.find(Context);
   if (I != RegionMap.end()) {
     llvm::Metadata *V = I->second;
-    return dyn_cast_or_null<llvm::MDScope>(V);
+    return dyn_cast_or_null<llvm::DIScope>(V);
   }
 
   // Check namespace.
@@ -247,7 +243,7 @@
 }
 
 /// getOrCreateFile - Get the file debug info descriptor for the input location.
-llvm::DIFile CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
+llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
   if (!Loc.isValid())
     // If Location is not valid then use main input file.
     return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
@@ -266,17 +262,18 @@
   if (it != DIFileCache.end()) {
     // Verify that the information still exists.
     if (llvm::Metadata *V = it->second)
-      return cast<llvm::MDFile>(V);
+      return cast<llvm::DIFile>(V);
   }
 
-  llvm::DIFile F = DBuilder.createFile(PLoc.getFilename(), getCurrentDirname());
+  llvm::DIFile *F =
+      DBuilder.createFile(PLoc.getFilename(), getCurrentDirname());
 
   DIFileCache[fname].reset(F);
   return F;
 }
 
 /// getOrCreateMainFile - Get the file info for main compile unit.
-llvm::DIFile CGDebugInfo::getOrCreateMainFile() {
+llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
   return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
 }
 
@@ -382,12 +379,13 @@
       DebugKind <= CodeGenOptions::DebugLineTablesOnly
           ? llvm::DIBuilder::LineTablesOnly
           : llvm::DIBuilder::FullDebug,
+      0 /* DWOid */,
       DebugKind != CodeGenOptions::LocTrackingOnly);
 }
 
 /// CreateType - Get the Basic type from the cache or create a new
 /// one if necessary.
-llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) {
+llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
   llvm::dwarf::TypeKind Encoding;
   StringRef BTName;
   switch (BT->getKind()) {
@@ -399,7 +397,7 @@
   case BuiltinType::NullPtr:
     return DBuilder.createNullPtrType();
   case BuiltinType::Void:
-    return llvm::DIType();
+    return nullptr;
   case BuiltinType::ObjCClass:
     if (!ClassTy)
       ClassTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
@@ -422,11 +420,11 @@
 
     unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
 
-    llvm::DIType ISATy = DBuilder.createPointerType(ClassTy, Size);
+    auto *ISATy = DBuilder.createPointerType(ClassTy, Size);
 
     ObjTy =
         DBuilder.createStructType(TheCU, "objc_object", getOrCreateMainFile(),
-                                  0, 0, 0, 0, llvm::DIType(), llvm::DIArray());
+                                  0, 0, 0, 0, nullptr, llvm::DINodeArray());
 
     DBuilder.replaceArrays(
         ObjTy,
@@ -523,11 +521,10 @@
   // Bit size, align and offset of the type.
   uint64_t Size = CGM.getContext().getTypeSize(BT);
   uint64_t Align = CGM.getContext().getTypeAlign(BT);
-  llvm::DIType DbgTy = DBuilder.createBasicType(BTName, Size, Align, Encoding);
-  return DbgTy;
+  return DBuilder.createBasicType(BTName, Size, Align, Encoding);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const ComplexType *Ty) {
+llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
   // Bit size, align and offset of the type.
   llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
   if (Ty->isComplexIntegerType())
@@ -535,15 +532,13 @@
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
   uint64_t Align = CGM.getContext().getTypeAlign(Ty);
-  llvm::DIType DbgTy =
-      DBuilder.createBasicType("complex", Size, Align, Encoding);
-
-  return DbgTy;
+  return DBuilder.createBasicType("complex", Size, Align, Encoding);
 }
 
 /// CreateCVRType - Get the qualified type from the cache or create
 /// a new one if necessary.
-llvm::DIType CGDebugInfo::CreateQualifiedType(QualType Ty, llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty,
+                                               llvm::DIFile *Unit) {
   QualifierCollector Qc;
   const Type *T = Qc.strip(Ty);
 
@@ -569,17 +564,15 @@
     return getOrCreateType(QualType(T, 0), Unit);
   }
 
-  llvm::DIType FromTy = getOrCreateType(Qc.apply(CGM.getContext(), T), Unit);
+  auto *FromTy = getOrCreateType(Qc.apply(CGM.getContext(), T), Unit);
 
   // No need to fill in the Name, Line, Size, Alignment, Offset in case of
   // CVR derived types.
-  llvm::DIType DbgTy = DBuilder.createQualifiedType(Tag, FromTy);
-
-  return DbgTy;
+  return DBuilder.createQualifiedType(Tag, FromTy);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const ObjCObjectPointerType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const ObjCObjectPointerType *Ty,
+                                      llvm::DIFile *Unit) {
 
   // The frontend treats 'id' as a typedef to an ObjCObjectType,
   // whereas 'id<protocol>' is treated as an ObjCPointerType. For the
@@ -587,12 +580,12 @@
   if (Ty->isObjCQualifiedIdType())
     return getOrCreateType(CGM.getContext().getObjCIdType(), Unit);
 
-  llvm::DIType DbgTy = CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type,
-                                             Ty, Ty->getPointeeType(), Unit);
-  return DbgTy;
+  return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty,
+                               Ty->getPointeeType(), Unit);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const PointerType *Ty, llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty,
+                                      llvm::DIFile *Unit) {
   return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty,
                                Ty->getPointeeType(), Unit);
 }
@@ -601,7 +594,7 @@
 /// on their mangled names, if they're external.
 static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
                                              CodeGenModule &CGM,
-                                             llvm::DICompileUnit TheCU) {
+                                             llvm::DICompileUnit *TheCU) {
   SmallString<256> FullName;
   // FIXME: ODR should apply to ObjC++ exactly the same wasy it does to C++.
   // For now, only apply ODR with C++.
@@ -637,13 +630,13 @@
 }
 
 // Creates a forward declaration for a RecordDecl in the given context.
-llvm::MDCompositeType *
+llvm::DICompositeType *
 CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty,
-                                      llvm::MDScope *Ctx) {
+                                      llvm::DIScope *Ctx) {
   const RecordDecl *RD = Ty->getDecl();
-  if (llvm::MDType *T = getTypeOrNull(CGM.getContext().getRecordType(RD)))
-    return cast<llvm::MDCompositeType>(T);
-  llvm::DIFile DefUnit = getOrCreateFile(RD->getLocation());
+  if (llvm::DIType *T = getTypeOrNull(CGM.getContext().getRecordType(RD)))
+    return cast<llvm::DICompositeType>(T);
+  llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
@@ -658,19 +651,19 @@
 
   // Create the type.
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
-  llvm::MDCompositeType *RetTy = DBuilder.createReplaceableCompositeType(
+  llvm::DICompositeType *RetTy = DBuilder.createReplaceableCompositeType(
       getTagForRecord(RD), RDName, Ctx, DefUnit, Line, 0, Size, Align,
-      llvm::DebugNode::FlagFwdDecl, FullName);
+      llvm::DINode::FlagFwdDecl, FullName);
   ReplaceMap.emplace_back(
       std::piecewise_construct, std::make_tuple(Ty),
       std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
   return RetTy;
 }
 
-llvm::DIType CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
-                                                const Type *Ty,
-                                                QualType PointeeTy,
-                                                llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
+                                                 const Type *Ty,
+                                                 QualType PointeeTy,
+                                                 llvm::DIFile *Unit) {
   if (Tag == llvm::dwarf::DW_TAG_reference_type ||
       Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
     return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit));
@@ -686,8 +679,8 @@
                                     Align);
 }
 
-llvm::DIType CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
-                                                   llvm::DIType &Cache) {
+llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
+                                                    llvm::DIType *&Cache) {
   if (Cache)
     return Cache;
   Cache = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, Name,
@@ -697,18 +690,16 @@
   return Cache;
 }
 
-llvm::DIType CGDebugInfo::CreateType(const BlockPointerType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const BlockPointerType *Ty,
+                                      llvm::DIFile *Unit) {
   if (BlockLiteralGeneric)
     return BlockLiteralGeneric;
 
   SmallVector<llvm::Metadata *, 8> EltTys;
-  llvm::DIType FieldTy;
   QualType FType;
   uint64_t FieldSize, FieldOffset;
   unsigned FieldAlign;
-  llvm::DIArray Elements;
-  llvm::DIType EltTy, DescTy;
+  llvm::DINodeArray Elements;
 
   FieldOffset = 0;
   FType = CGM.getContext().UnsignedLongTy;
@@ -718,17 +709,17 @@
   Elements = DBuilder.getOrCreateArray(EltTys);
   EltTys.clear();
 
-  unsigned Flags = llvm::DebugNode::FlagAppleBlock;
+  unsigned Flags = llvm::DINode::FlagAppleBlock;
   unsigned LineNo = getLineNumber(CurLoc);
 
-  EltTy = DBuilder.createStructType(Unit, "__block_descriptor", Unit, LineNo,
-                                    FieldOffset, 0, Flags, llvm::DIType(),
-                                    Elements);
+  auto *EltTy =
+      DBuilder.createStructType(Unit, "__block_descriptor", Unit, LineNo,
+                                FieldOffset, 0, Flags, nullptr, Elements);
 
   // Bit size, align and offset of the type.
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
 
-  DescTy = DBuilder.createPointerType(EltTy, Size);
+  auto *DescTy = DBuilder.createPointerType(EltTy, Size);
 
   FieldOffset = 0;
   FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
@@ -740,29 +731,27 @@
   EltTys.push_back(CreateMemberType(Unit, FType, "__FuncPtr", &FieldOffset));
 
   FType = CGM.getContext().getPointerType(CGM.getContext().VoidTy);
-  FieldTy = DescTy;
   FieldSize = CGM.getContext().getTypeSize(Ty);
   FieldAlign = CGM.getContext().getTypeAlign(Ty);
-  FieldTy =
-      DBuilder.createMemberType(Unit, "__descriptor", Unit, LineNo, FieldSize,
-                                FieldAlign, FieldOffset, 0, FieldTy);
-  EltTys.push_back(FieldTy);
+  EltTys.push_back(DBuilder.createMemberType(Unit, "__descriptor", Unit, LineNo,
+                                             FieldSize, FieldAlign, FieldOffset,
+                                             0, DescTy));
 
   FieldOffset += FieldSize;
   Elements = DBuilder.getOrCreateArray(EltTys);
 
-  EltTy = DBuilder.createStructType(Unit, "__block_literal_generic", Unit,
-                                    LineNo, FieldOffset, 0, Flags,
-                                    llvm::DIType(), Elements);
+  EltTy =
+      DBuilder.createStructType(Unit, "__block_literal_generic", Unit, LineNo,
+                                FieldOffset, 0, Flags, nullptr, Elements);
 
   BlockLiteralGeneric = DBuilder.createPointerType(EltTy, Size);
   return BlockLiteralGeneric;
 }
 
-llvm::DIType CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
+                                      llvm::DIFile *Unit) {
   assert(Ty->isTypeAlias());
-  llvm::DIType Src = getOrCreateType(Ty->getAliasedType(), Unit);
+  llvm::DIType *Src = getOrCreateType(Ty->getAliasedType(), Unit);
 
   SmallString<128> NS;
   llvm::raw_svector_ostream OS(NS);
@@ -782,7 +771,8 @@
       getContextDescriptor(cast<Decl>(AliasDecl->getDeclContext())));
 }
 
-llvm::DIType CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
+                                      llvm::DIFile *Unit) {
   // We don't set size information, but do specify where the typedef was
   // declared.
   SourceLocation Loc = Ty->getDecl()->getLocation();
@@ -794,8 +784,8 @@
       getContextDescriptor(cast<Decl>(Ty->getDecl()->getDeclContext())));
 }
 
-llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
+                                      llvm::DIFile *Unit) {
   SmallVector<llvm::Metadata *, 16> EltTys;
 
   // Add the result type at least.
@@ -812,11 +802,11 @@
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
   }
 
-  llvm::DITypeArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
+  llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
   return DBuilder.createSubroutineType(Unit, EltTypeArray);
 }
 
-/// Convert an AccessSpecifier into the corresponding DIDescriptor flag.
+/// Convert an AccessSpecifier into the corresponding DINode flag.
 /// As an optimization, return 0 if the access specifier equals the
 /// default for the containing type.
 static unsigned getAccessFlag(AccessSpecifier Access, const RecordDecl *RD) {
@@ -831,25 +821,25 @@
 
   switch (Access) {
   case clang::AS_private:
-    return llvm::DebugNode::FlagPrivate;
+    return llvm::DINode::FlagPrivate;
   case clang::AS_protected:
-    return llvm::DebugNode::FlagProtected;
+    return llvm::DINode::FlagProtected;
   case clang::AS_public:
-    return llvm::DebugNode::FlagPublic;
+    return llvm::DINode::FlagPublic;
   case clang::AS_none:
     return 0;
   }
   llvm_unreachable("unexpected access enumerator");
 }
 
-llvm::DIType CGDebugInfo::createFieldType(
+llvm::DIType *CGDebugInfo::createFieldType(
     StringRef name, QualType type, uint64_t sizeInBitsOverride,
     SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits,
-    llvm::DIFile tunit, llvm::DIScope scope, const RecordDecl *RD) {
-  llvm::DIType debugType = getOrCreateType(type, tunit);
+    llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD) {
+  llvm::DIType *debugType = getOrCreateType(type, tunit);
 
   // Get the location for the field.
-  llvm::DIFile file = getOrCreateFile(loc);
+  llvm::DIFile *file = getOrCreateFile(loc);
   unsigned line = getLineNumber(loc);
 
   uint64_t SizeInBits = 0;
@@ -871,7 +861,7 @@
 /// CollectRecordLambdaFields - Helper for CollectRecordFields.
 void CGDebugInfo::CollectRecordLambdaFields(
     const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements,
-    llvm::DIType RecordTy) {
+    llvm::DIType *RecordTy) {
   // For C++11 Lambdas a Field will be the same as a Capture, but the Capture
   // has the name and the location of the variable so we should iterate over
   // both concurrently.
@@ -884,14 +874,14 @@
     const LambdaCapture &C = *I;
     if (C.capturesVariable()) {
       VarDecl *V = C.getCapturedVar();
-      llvm::DIFile VUnit = getOrCreateFile(C.getLocation());
+      llvm::DIFile *VUnit = getOrCreateFile(C.getLocation());
       StringRef VName = V->getName();
       uint64_t SizeInBitsOverride = 0;
       if (Field->isBitField()) {
         SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext());
         assert(SizeInBitsOverride && "found named 0-width bitfield");
       }
-      llvm::DIType fieldType = createFieldType(
+      llvm::DIType *fieldType = createFieldType(
           VName, Field->getType(), SizeInBitsOverride, C.getLocation(),
           Field->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy,
           CXXDecl);
@@ -902,9 +892,9 @@
       // by using AT_object_pointer for the function and having that be
       // used as 'this' for semantic references.
       FieldDecl *f = *Field;
-      llvm::DIFile VUnit = getOrCreateFile(f->getLocation());
+      llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
       QualType type = f->getType();
-      llvm::DIType fieldType = createFieldType(
+      llvm::DIType *fieldType = createFieldType(
           "this", type, 0, f->getLocation(), f->getAccess(),
           layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
 
@@ -914,14 +904,14 @@
 }
 
 /// Helper for CollectRecordFields.
-llvm::DIDerivedType CGDebugInfo::CreateRecordStaticField(const VarDecl *Var,
-                                                         llvm::DIType RecordTy,
-                                                         const RecordDecl *RD) {
+llvm::DIDerivedType *
+CGDebugInfo::CreateRecordStaticField(const VarDecl *Var, llvm::DIType *RecordTy,
+                                     const RecordDecl *RD) {
   // Create the descriptor for the static variable, with or without
   // constant initializers.
   Var = Var->getCanonicalDecl();
-  llvm::DIFile VUnit = getOrCreateFile(Var->getLocation());
-  llvm::DIType VTy = getOrCreateType(Var->getType(), VUnit);
+  llvm::DIFile *VUnit = getOrCreateFile(Var->getLocation());
+  llvm::DIType *VTy = getOrCreateType(Var->getType(), VUnit);
 
   unsigned LineNumber = getLineNumber(Var->getLocation());
   StringRef VName = Var->getName();
@@ -937,7 +927,7 @@
   }
 
   unsigned Flags = getAccessFlag(Var->getAccess(), RD);
-  llvm::DIDerivedType GV = DBuilder.createStaticMemberType(
+  llvm::DIDerivedType *GV = DBuilder.createStaticMemberType(
       RecordTy, VName, VUnit, LineNumber, VTy, Flags, C);
   StaticDataMemberCache[Var->getCanonicalDecl()].reset(GV);
   return GV;
@@ -945,8 +935,8 @@
 
 /// CollectRecordNormalField - Helper for CollectRecordFields.
 void CGDebugInfo::CollectRecordNormalField(
-    const FieldDecl *field, uint64_t OffsetInBits, llvm::DIFile tunit,
-    SmallVectorImpl<llvm::Metadata *> &elements, llvm::DIType RecordTy,
+    const FieldDecl *field, uint64_t OffsetInBits, llvm::DIFile *tunit,
+    SmallVectorImpl<llvm::Metadata *> &elements, llvm::DIType *RecordTy,
     const RecordDecl *RD) {
   StringRef name = field->getName();
   QualType type = field->getType();
@@ -961,7 +951,7 @@
     assert(SizeInBitsOverride && "found named 0-width bitfield");
   }
 
-  llvm::DIType fieldType =
+  llvm::DIType *fieldType =
       createFieldType(name, type, SizeInBitsOverride, field->getLocation(),
                       field->getAccess(), OffsetInBits, tunit, RecordTy, RD);
 
@@ -971,9 +961,9 @@
 /// CollectRecordFields - A helper function to collect debug info for
 /// record fields. This is used while creating debug info entry for a Record.
 void CGDebugInfo::CollectRecordFields(
-    const RecordDecl *record, llvm::DIFile tunit,
+    const RecordDecl *record, llvm::DIFile *tunit,
     SmallVectorImpl<llvm::Metadata *> &elements,
-    llvm::DICompositeType RecordTy) {
+    llvm::DICompositeType *RecordTy) {
   const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record);
 
   if (CXXDecl && CXXDecl->isLambda())
@@ -993,7 +983,7 @@
         if (MI != StaticDataMemberCache.end()) {
           assert(MI->second &&
                  "Static data member declaration should still exist");
-          elements.push_back(cast<llvm::MDDerivedTypeBase>(MI->second));
+          elements.push_back(cast<llvm::DIDerivedTypeBase>(MI->second));
         } else {
           auto Field = CreateRecordStaticField(V, RecordTy, record);
           elements.push_back(Field);
@@ -1011,22 +1001,22 @@
 /// getOrCreateMethodType - CXXMethodDecl's type is a FunctionType. This
 /// function type is not updated to include implicit "this" pointer. Use this
 /// routine to get a method type which includes "this" pointer.
-llvm::MDSubroutineType *
+llvm::DISubroutineType *
 CGDebugInfo::getOrCreateMethodType(const CXXMethodDecl *Method,
-                                   llvm::DIFile Unit) {
+                                   llvm::DIFile *Unit) {
   const FunctionProtoType *Func = Method->getType()->getAs<FunctionProtoType>();
   if (Method->isStatic())
-    return cast_or_null<llvm::MDSubroutineType>(
+    return cast_or_null<llvm::DISubroutineType>(
         getOrCreateType(QualType(Func, 0), Unit));
   return getOrCreateInstanceMethodType(Method->getThisType(CGM.getContext()),
                                        Func, Unit);
 }
 
-llvm::MDSubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
-    QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile Unit) {
+llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
+    QualType ThisPtr, const FunctionProtoType *Func, llvm::DIFile *Unit) {
   // Add "this" pointer.
-  llvm::DITypeArray Args(
-      cast<llvm::MDSubroutineType>(getOrCreateType(QualType(Func, 0), Unit))
+  llvm::DITypeRefArray Args(
+      cast<llvm::DISubroutineType>(getOrCreateType(QualType(Func, 0), Unit))
           ->getTypeArray());
   assert(Args.size() && "Invalid number of arguments!");
 
@@ -1044,8 +1034,8 @@
     unsigned AS = CGM.getContext().getTargetAddressSpace(PointeeTy);
     uint64_t Size = CGM.getTarget().getPointerWidth(AS);
     uint64_t Align = CGM.getContext().getTypeAlign(ThisPtrTy);
-    llvm::DIType PointeeType = getOrCreateType(PointeeTy, Unit);
-    llvm::DIType ThisPtrType =
+    llvm::DIType *PointeeType = getOrCreateType(PointeeTy, Unit);
+    llvm::DIType *ThisPtrType =
         DBuilder.createPointerType(PointeeType, Size, Align);
     TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
     // TODO: This and the artificial type below are misleading, the
@@ -1054,7 +1044,7 @@
     ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
     Elts.push_back(ThisPtrType);
   } else {
-    llvm::DIType ThisPtrType = getOrCreateType(ThisPtr, Unit);
+    llvm::DIType *ThisPtrType = getOrCreateType(ThisPtr, Unit);
     TypeCache[ThisPtr.getAsOpaquePtr()].reset(ThisPtrType);
     ThisPtrType = DBuilder.createObjectPointerType(ThisPtrType);
     Elts.push_back(ThisPtrType);
@@ -1064,13 +1054,13 @@
   for (unsigned i = 1, e = Args.size(); i != e; ++i)
     Elts.push_back(Args[i]);
 
-  llvm::DITypeArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
+  llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
 
   unsigned Flags = 0;
   if (Func->getExtProtoInfo().RefQualifier == RQ_LValue)
-    Flags |= llvm::DebugNode::FlagLValueReference;
+    Flags |= llvm::DINode::FlagLValueReference;
   if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
-    Flags |= llvm::DebugNode::FlagRValueReference;
+    Flags |= llvm::DINode::FlagRValueReference;
 
   return DBuilder.createSubroutineType(Unit, EltTypeArray, Flags);
 }
@@ -1085,16 +1075,15 @@
   return false;
 }
 
-/// CreateCXXMemberFunction - A helper function to create a DISubprogram for
+/// CreateCXXMemberFunction - A helper function to create a subprogram for
 /// a single member function GlobalDecl.
-llvm::DISubprogram
-CGDebugInfo::CreateCXXMemberFunction(const CXXMethodDecl *Method,
-                                     llvm::DIFile Unit, llvm::DIType RecordTy) {
+llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
+    const CXXMethodDecl *Method, llvm::DIFile *Unit, llvm::DIType *RecordTy) {
   bool IsCtorOrDtor =
       isa<CXXConstructorDecl>(Method) || isa<CXXDestructorDecl>(Method);
 
   StringRef MethodName = getFunctionName(Method);
-  llvm::MDSubroutineType *MethodTy = getOrCreateMethodType(Method, Unit);
+  llvm::DISubroutineType *MethodTy = getOrCreateMethodType(Method, Unit);
 
   // Since a single ctor/dtor corresponds to multiple functions, it doesn't
   // make sense to give a single ctor/dtor a linkage name.
@@ -1103,7 +1092,7 @@
     MethodLinkageName = CGM.getMangledName(Method);
 
   // Get the location for the method.
-  llvm::DIFile MethodDefUnit;
+  llvm::DIFile *MethodDefUnit = nullptr;
   unsigned MethodLine = 0;
   if (!Method->isImplicit()) {
     MethodDefUnit = getOrCreateFile(Method->getLocation());
@@ -1111,7 +1100,7 @@
   }
 
   // Collect virtual method info.
-  llvm::DIType ContainingType;
+  llvm::DIType *ContainingType = nullptr;
   unsigned Virtuality = 0;
   unsigned VIndex = 0;
 
@@ -1134,25 +1123,25 @@
 
   unsigned Flags = 0;
   if (Method->isImplicit())
-    Flags |= llvm::DebugNode::FlagArtificial;
+    Flags |= llvm::DINode::FlagArtificial;
   Flags |= getAccessFlag(Method->getAccess(), Method->getParent());
   if (const CXXConstructorDecl *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
     if (CXXC->isExplicit())
-      Flags |= llvm::DebugNode::FlagExplicit;
+      Flags |= llvm::DINode::FlagExplicit;
   } else if (const CXXConversionDecl *CXXC =
                  dyn_cast<CXXConversionDecl>(Method)) {
     if (CXXC->isExplicit())
-      Flags |= llvm::DebugNode::FlagExplicit;
+      Flags |= llvm::DINode::FlagExplicit;
   }
   if (Method->hasPrototype())
-    Flags |= llvm::DebugNode::FlagPrototyped;
+    Flags |= llvm::DINode::FlagPrototyped;
   if (Method->getRefQualifier() == RQ_LValue)
-    Flags |= llvm::DebugNode::FlagLValueReference;
+    Flags |= llvm::DINode::FlagLValueReference;
   if (Method->getRefQualifier() == RQ_RValue)
-    Flags |= llvm::DebugNode::FlagRValueReference;
+    Flags |= llvm::DINode::FlagRValueReference;
 
-  llvm::DIArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
-  llvm::DISubprogram SP = DBuilder.createMethod(
+  llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
+  llvm::DISubprogram *SP = DBuilder.createMethod(
       RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
       MethodTy, /*isLocalToUnit=*/false,
       /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags,
@@ -1167,8 +1156,8 @@
 /// C++ member functions. This is used while creating debug info entry for
 /// a Record.
 void CGDebugInfo::CollectCXXMemberFunctions(
-    const CXXRecordDecl *RD, llvm::DIFile Unit,
-    SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DIType RecordTy) {
+    const CXXRecordDecl *RD, llvm::DIFile *Unit,
+    SmallVectorImpl<llvm::Metadata *> &EltTys, llvm::DIType *RecordTy) {
 
   // Since we want more than just the individual member decls if we
   // have templated functions iterate over every declaration to gather
@@ -1206,10 +1195,9 @@
 /// CollectCXXBases - A helper function to collect debug info for
 /// C++ base classes. This is used while creating debug info entry for
 /// a Record.
-void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile Unit,
+void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile *Unit,
                                   SmallVectorImpl<llvm::Metadata *> &EltTys,
-                                  llvm::DIType RecordTy) {
-
+                                  llvm::DIType *RecordTy) {
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
   for (const auto &BI : RD->bases()) {
     unsigned BFlags = 0;
@@ -1231,24 +1219,24 @@
         BaseOffset =
             4 * CGM.getMicrosoftVTableContext().getVBTableIndex(RD, Base);
       }
-      BFlags = llvm::DebugNode::FlagVirtual;
+      BFlags = llvm::DINode::FlagVirtual;
     } else
       BaseOffset = CGM.getContext().toBits(RL.getBaseClassOffset(Base));
     // FIXME: Inconsistent units for BaseOffset. It is in bytes when
     // BI->isVirtual() and bits when not.
 
     BFlags |= getAccessFlag(BI.getAccessSpecifier(), RD);
-    llvm::DIType DTy = DBuilder.createInheritance(
+    llvm::DIType *DTy = DBuilder.createInheritance(
         RecordTy, getOrCreateType(BI.getType(), Unit), BaseOffset, BFlags);
     EltTys.push_back(DTy);
   }
 }
 
 /// CollectTemplateParams - A helper function to collect template parameters.
-llvm::DIArray
+llvm::DINodeArray
 CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
                                    ArrayRef<TemplateArgument> TAList,
-                                   llvm::DIFile Unit) {
+                                   llvm::DIFile *Unit) {
   SmallVector<llvm::Metadata *, 16> TemplateParams;
   for (unsigned i = 0, e = TAList.size(); i != e; ++i) {
     const TemplateArgument &TA = TAList[i];
@@ -1257,23 +1245,20 @@
       Name = TPList->getParam(i)->getName();
     switch (TA.getKind()) {
     case TemplateArgument::Type: {
-      llvm::DIType TTy = getOrCreateType(TA.getAsType(), Unit);
-      llvm::DITemplateTypeParameter TTP =
-          DBuilder.createTemplateTypeParameter(TheCU, Name, TTy);
-      TemplateParams.push_back(TTP);
+      llvm::DIType *TTy = getOrCreateType(TA.getAsType(), Unit);
+      TemplateParams.push_back(
+          DBuilder.createTemplateTypeParameter(TheCU, Name, TTy));
     } break;
     case TemplateArgument::Integral: {
-      llvm::DIType TTy = getOrCreateType(TA.getIntegralType(), Unit);
-      llvm::DITemplateValueParameter TVP =
-          DBuilder.createTemplateValueParameter(
-              TheCU, Name, TTy,
-              llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral()));
-      TemplateParams.push_back(TVP);
+      llvm::DIType *TTy = getOrCreateType(TA.getIntegralType(), Unit);
+      TemplateParams.push_back(DBuilder.createTemplateValueParameter(
+          TheCU, Name, TTy,
+          llvm::ConstantInt::get(CGM.getLLVMContext(), TA.getAsIntegral())));
     } break;
     case TemplateArgument::Declaration: {
       const ValueDecl *D = TA.getAsDecl();
       QualType T = TA.getParamTypeForDecl().getDesugaredType(CGM.getContext());
-      llvm::DIType TTy = getOrCreateType(T, Unit);
+      llvm::DIType *TTy = getOrCreateType(T, Unit);
       llvm::Constant *V = nullptr;
       const CXXMethodDecl *MD;
       // Variable pointer template parameters have a value that is the address
@@ -1297,15 +1282,13 @@
             CGM.getContext().toCharUnitsFromBits((int64_t)fieldOffset);
         V = CGM.getCXXABI().EmitMemberDataPointer(MPT, chars);
       }
-      llvm::DITemplateValueParameter TVP =
-          DBuilder.createTemplateValueParameter(
-              TheCU, Name, TTy,
-              cast_or_null<llvm::Constant>(V->stripPointerCasts()));
-      TemplateParams.push_back(TVP);
+      TemplateParams.push_back(DBuilder.createTemplateValueParameter(
+          TheCU, Name, TTy,
+          cast_or_null<llvm::Constant>(V->stripPointerCasts())));
     } break;
     case TemplateArgument::NullPtr: {
       QualType T = TA.getNullPtrType();
-      llvm::DIType TTy = getOrCreateType(T, Unit);
+      llvm::DIType *TTy = getOrCreateType(T, Unit);
       llvm::Constant *V = nullptr;
       // Special case member data pointer null values since they're actually -1
       // instead of zero.
@@ -1320,24 +1303,19 @@
           V = CGM.getCXXABI().EmitNullMemberPointer(MPT);
       if (!V)
         V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
-      llvm::DITemplateValueParameter TVP =
-          DBuilder.createTemplateValueParameter(TheCU, Name, TTy,
-                                                cast<llvm::Constant>(V));
-      TemplateParams.push_back(TVP);
+      TemplateParams.push_back(DBuilder.createTemplateValueParameter(
+          TheCU, Name, TTy, cast<llvm::Constant>(V)));
     } break;
-    case TemplateArgument::Template: {
-      llvm::DITemplateValueParameter
-      TVP = DBuilder.createTemplateTemplateParameter(
-          TheCU, Name, llvm::DIType(),
-          TA.getAsTemplate().getAsTemplateDecl()->getQualifiedNameAsString());
-      TemplateParams.push_back(TVP);
-    } break;
-    case TemplateArgument::Pack: {
-      llvm::DITemplateValueParameter TVP = DBuilder.createTemplateParameterPack(
-          TheCU, Name, llvm::DIType(),
-          CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit));
-      TemplateParams.push_back(TVP);
-    } break;
+    case TemplateArgument::Template:
+      TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
+          TheCU, Name, nullptr,
+          TA.getAsTemplate().getAsTemplateDecl()->getQualifiedNameAsString()));
+      break;
+    case TemplateArgument::Pack:
+      TemplateParams.push_back(DBuilder.createTemplateParameterPack(
+          TheCU, Name, nullptr,
+          CollectTemplateParams(nullptr, TA.getPackAsArray(), Unit)));
+      break;
     case TemplateArgument::Expression: {
       const Expr *E = TA.getAsExpr();
       QualType T = E->getType();
@@ -1345,11 +1323,9 @@
         T = CGM.getContext().getLValueReferenceType(T);
       llvm::Constant *V = CGM.EmitConstantExpr(E, T);
       assert(V && "Expression in template argument isn't constant");
-      llvm::DIType TTy = getOrCreateType(T, Unit);
-      llvm::DITemplateValueParameter TVP =
-          DBuilder.createTemplateValueParameter(
-              TheCU, Name, TTy, cast<llvm::Constant>(V->stripPointerCasts()));
-      TemplateParams.push_back(TVP);
+      llvm::DIType *TTy = getOrCreateType(T, Unit);
+      TemplateParams.push_back(DBuilder.createTemplateValueParameter(
+          TheCU, Name, TTy, cast<llvm::Constant>(V->stripPointerCasts())));
     } break;
     // And the following should never occur:
     case TemplateArgument::TemplateExpansion:
@@ -1363,8 +1339,9 @@
 
 /// CollectFunctionTemplateParams - A helper function to collect debug
 /// info for function template parameters.
-llvm::DIArray CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
-                                                         llvm::DIFile Unit) {
+llvm::DINodeArray
+CGDebugInfo::CollectFunctionTemplateParams(const FunctionDecl *FD,
+                                           llvm::DIFile *Unit) {
   if (FD->getTemplatedKind() ==
       FunctionDecl::TK_FunctionTemplateSpecialization) {
     const TemplateParameterList *TList = FD->getTemplateSpecializationInfo()
@@ -1373,13 +1350,13 @@
     return CollectTemplateParams(
         TList, FD->getTemplateSpecializationArgs()->asArray(), Unit);
   }
-  return llvm::DIArray();
+  return llvm::DINodeArray();
 }
 
 /// CollectCXXTemplateParams - A helper function to collect debug info for
 /// template parameters.
-llvm::DIArray CGDebugInfo::CollectCXXTemplateParams(
-    const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile Unit) {
+llvm::DINodeArray CGDebugInfo::CollectCXXTemplateParams(
+    const ClassTemplateSpecializationDecl *TSpecial, llvm::DIFile *Unit) {
   // Always get the full list of parameters, not just the ones from
   // the specialization.
   TemplateParameterList *TPList =
@@ -1389,7 +1366,7 @@
 }
 
 /// getOrCreateVTablePtrType - Return debug info descriptor for vtable.
-llvm::DIType CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
   if (VTablePtrType)
     return VTablePtrType;
 
@@ -1397,10 +1374,10 @@
 
   /* Function type */
   llvm::Metadata *STy = getOrCreateType(Context.IntTy, Unit);
-  llvm::DITypeArray SElements = DBuilder.getOrCreateTypeArray(STy);
-  llvm::DIType SubTy = DBuilder.createSubroutineType(Unit, SElements);
+  llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy);
+  llvm::DIType *SubTy = DBuilder.createSubroutineType(Unit, SElements);
   unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
-  llvm::DIType vtbl_ptr_type =
+  llvm::DIType *vtbl_ptr_type =
       DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type");
   VTablePtrType = DBuilder.createPointerType(vtbl_ptr_type, Size);
   return VTablePtrType;
@@ -1414,7 +1391,7 @@
 
 /// CollectVTableInfo - If the C++ class has vtable info then insert appropriate
 /// debug info entry in EltTys vector.
-void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile Unit,
+void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
                                     SmallVectorImpl<llvm::Metadata *> &EltTys) {
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
 
@@ -1427,26 +1404,26 @@
     return;
 
   unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy);
-  llvm::DIType VPTR = DBuilder.createMemberType(
+  llvm::DIType *VPTR = DBuilder.createMemberType(
       Unit, getVTableName(RD), Unit, 0, Size, 0, 0,
-      llvm::DebugNode::FlagArtificial, getOrCreateVTablePtrType(Unit));
+      llvm::DINode::FlagArtificial, getOrCreateVTablePtrType(Unit));
   EltTys.push_back(VPTR);
 }
 
 /// getOrCreateRecordType - Emit record type's standalone debug info.
-llvm::DIType CGDebugInfo::getOrCreateRecordType(QualType RTy,
-                                                SourceLocation Loc) {
+llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy,
+                                                 SourceLocation Loc) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
-  llvm::DIType T = getOrCreateType(RTy, getOrCreateFile(Loc));
+  llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc));
   return T;
 }
 
 /// getOrCreateInterfaceType - Emit an objective c interface type standalone
 /// debug info.
-llvm::DIType CGDebugInfo::getOrCreateInterfaceType(QualType D,
-                                                   SourceLocation Loc) {
+llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D,
+                                                    SourceLocation Loc) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
-  llvm::DIType T = getOrCreateType(D, getOrCreateFile(Loc));
+  llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc));
   RetainedTypes.push_back(D.getAsOpaquePtr());
   return T;
 }
@@ -1457,9 +1434,9 @@
   QualType Ty = CGM.getContext().getEnumType(ED);
   void *TyPtr = Ty.getAsOpaquePtr();
   auto I = TypeCache.find(TyPtr);
-  if (I == TypeCache.end() || !cast<llvm::MDType>(I->second)->isForwardDecl())
+  if (I == TypeCache.end() || !cast<llvm::DIType>(I->second)->isForwardDecl())
     return;
-  llvm::DIType Res = CreateTypeDefinition(Ty->castAs<EnumType>());
+  llvm::DIType *Res = CreateTypeDefinition(Ty->castAs<EnumType>());
   assert(!Res->isForwardDecl());
   TypeCache[TyPtr].reset(Res);
 }
@@ -1479,7 +1456,7 @@
       return;
 
   QualType Ty = CGM.getContext().getRecordType(RD);
-  llvm::DIType T = getTypeOrNull(Ty);
+  llvm::DIType *T = getTypeOrNull(Ty);
   if (T && T->isForwardDecl())
     completeClassData(RD);
 }
@@ -1490,9 +1467,9 @@
   QualType Ty = CGM.getContext().getRecordType(RD);
   void *TyPtr = Ty.getAsOpaquePtr();
   auto I = TypeCache.find(TyPtr);
-  if (I != TypeCache.end() && !cast<llvm::MDType>(I->second)->isForwardDecl())
+  if (I != TypeCache.end() && !cast<llvm::DIType>(I->second)->isForwardDecl())
     return;
-  llvm::DIType Res = CreateTypeDefinition(Ty->castAs<RecordType>());
+  llvm::DIType *Res = CreateTypeDefinition(Ty->castAs<RecordType>());
   assert(!Res->isForwardDecl());
   TypeCache[TyPtr].reset(Res);
 }
@@ -1541,9 +1518,9 @@
 }
 
 /// CreateType - get structure or union type.
-llvm::DIType CGDebugInfo::CreateType(const RecordType *Ty) {
+llvm::DIType *CGDebugInfo::CreateType(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
-  llvm::DIType T = cast_or_null<llvm::MDType>(getTypeOrNull(QualType(Ty, 0)));
+  llvm::DIType *T = cast_or_null<llvm::DIType>(getTypeOrNull(QualType(Ty, 0)));
   if (T || shouldOmitDefinition(DebugKind, RD, CGM.getLangOpts())) {
     if (!T)
       T = getOrCreateRecordFwdDecl(
@@ -1554,11 +1531,11 @@
   return CreateTypeDefinition(Ty);
 }
 
-llvm::DIType CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
+llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
 
   // Get overall information about the record type for the debug info.
-  llvm::DIFile DefUnit = getOrCreateFile(RD->getLocation());
+  llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
 
   // Records and classes and unions can all be recursive.  To handle them, we
   // first generate a debug descriptor for the struct as a forward declaration.
@@ -1568,7 +1545,7 @@
   // uses of the forward declaration with the final definition.
 
   auto *FwdDecl =
-      cast<llvm::MDCompositeType>(getOrCreateLimitedType(Ty, DefUnit));
+      cast<llvm::DICompositeType>(getOrCreateLimitedType(Ty, DefUnit));
 
   const RecordDecl *D = RD->getDefinition();
   if (!D || !D->isCompleteDefinition())
@@ -1603,20 +1580,20 @@
   LexicalBlockStack.pop_back();
   RegionMap.erase(Ty->getDecl());
 
-  llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
+  llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
   DBuilder.replaceArrays(FwdDecl, Elements);
 
   if (FwdDecl->isTemporary())
     FwdDecl =
-        llvm::MDNode::replaceWithPermanent(llvm::TempMDCompositeType(FwdDecl));
+        llvm::MDNode::replaceWithPermanent(llvm::TempDICompositeType(FwdDecl));
 
   RegionMap[Ty->getDecl()].reset(FwdDecl);
   return FwdDecl;
 }
 
 /// CreateType - get objective-c object type.
-llvm::DIType CGDebugInfo::CreateType(const ObjCObjectType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const ObjCObjectType *Ty,
+                                      llvm::DIFile *Unit) {
   // Ignore protocols.
   return getOrCreateType(Ty->getBaseType(), Unit);
 }
@@ -1646,14 +1623,14 @@
 }
 
 /// CreateType - get objective-c interface type.
-llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
+                                      llvm::DIFile *Unit) {
   ObjCInterfaceDecl *ID = Ty->getDecl();
   if (!ID)
-    return llvm::DIType();
+    return nullptr;
 
   // Get overall information about the record type for the debug info.
-  llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
+  llvm::DIFile *DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
   auto RuntimeLang =
       static_cast<llvm::dwarf::SourceLanguage>(TheCU->getSourceLanguage());
@@ -1662,7 +1639,7 @@
   // debug type since we won't be able to lay out the entire type.
   ObjCInterfaceDecl *Def = ID->getDefinition();
   if (!Def || !Def->getImplementation()) {
-    llvm::DIType FwdDecl = DBuilder.createReplaceableCompositeType(
+    llvm::DIType *FwdDecl = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_structure_type, ID->getName(), TheCU, DefUnit, Line,
         RuntimeLang);
     ObjCInterfaceCache.push_back(ObjCInterfaceCacheEntry(Ty, FwdDecl, Unit));
@@ -1672,10 +1649,10 @@
   return CreateTypeDefinition(Ty, Unit);
 }
 
-llvm::DIType CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
-                                               llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
+                                                llvm::DIFile *Unit) {
   ObjCInterfaceDecl *ID = Ty->getDecl();
-  llvm::DIFile DefUnit = getOrCreateFile(ID->getLocation());
+  llvm::DIFile *DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
   unsigned RuntimeLang = TheCU->getSourceLanguage();
 
@@ -1685,11 +1662,11 @@
 
   unsigned Flags = 0;
   if (ID->getImplementation())
-    Flags |= llvm::DebugNode::FlagObjcClassComplete;
+    Flags |= llvm::DINode::FlagObjcClassComplete;
 
-  llvm::MDCompositeType *RealDecl = DBuilder.createStructType(
-      Unit, ID->getName(), DefUnit, Line, Size, Align, Flags, llvm::DIType(),
-      llvm::DIArray(), RuntimeLang);
+  llvm::DICompositeType *RealDecl = DBuilder.createStructType(
+      Unit, ID->getName(), DefUnit, Line, Size, Align, Flags, nullptr,
+      llvm::DINodeArray(), RuntimeLang);
 
   QualType QTy(Ty, 0);
   TypeCache[QTy.getAsOpaquePtr()].reset(RealDecl);
@@ -1703,19 +1680,19 @@
 
   ObjCInterfaceDecl *SClass = ID->getSuperClass();
   if (SClass) {
-    llvm::DIType SClassTy =
+    llvm::DIType *SClassTy =
         getOrCreateType(CGM.getContext().getObjCInterfaceType(SClass), Unit);
     if (!SClassTy)
-      return llvm::DIType();
+      return nullptr;
 
-    llvm::DIType InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0);
+    llvm::DIType *InhTag = DBuilder.createInheritance(RealDecl, SClassTy, 0, 0);
     EltTys.push_back(InhTag);
   }
 
   // Create entries for all of the properties.
   for (const auto *PD : ID->properties()) {
     SourceLocation Loc = PD->getLocation();
-    llvm::DIFile PUnit = getOrCreateFile(Loc);
+    llvm::DIFile *PUnit = getOrCreateFile(Loc);
     unsigned PLine = getLineNumber(Loc);
     ObjCMethodDecl *Getter = PD->getGetterMethodDecl();
     ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
@@ -1733,9 +1710,9 @@
   unsigned FieldNo = 0;
   for (ObjCIvarDecl *Field = ID->all_declared_ivar_begin(); Field;
        Field = Field->getNextIvar(), ++FieldNo) {
-    llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
+    llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
     if (!FieldTy)
-      return llvm::DIType();
+      return nullptr;
 
     StringRef FieldName = Field->getName();
 
@@ -1744,7 +1721,7 @@
       continue;
 
     // Get the location for the field.
-    llvm::DIFile FieldDefUnit = getOrCreateFile(Field->getLocation());
+    llvm::DIFile *FieldDefUnit = getOrCreateFile(Field->getLocation());
     unsigned FieldLine = getLineNumber(Field->getLocation());
     QualType FType = Field->getType();
     uint64_t FieldSize = 0;
@@ -1777,11 +1754,11 @@
 
     unsigned Flags = 0;
     if (Field->getAccessControl() == ObjCIvarDecl::Protected)
-      Flags = llvm::DebugNode::FlagProtected;
+      Flags = llvm::DINode::FlagProtected;
     else if (Field->getAccessControl() == ObjCIvarDecl::Private)
-      Flags = llvm::DebugNode::FlagPrivate;
+      Flags = llvm::DINode::FlagPrivate;
     else if (Field->getAccessControl() == ObjCIvarDecl::Public)
-      Flags = llvm::DebugNode::FlagPublic;
+      Flags = llvm::DINode::FlagPublic;
 
     llvm::MDNode *PropertyNode = nullptr;
     if (ObjCImplementationDecl *ImpD = ID->getImplementation()) {
@@ -1789,7 +1766,7 @@
               ImpD->FindPropertyImplIvarDecl(Field->getIdentifier())) {
         if (ObjCPropertyDecl *PD = PImpD->getPropertyDecl()) {
           SourceLocation Loc = PD->getLocation();
-          llvm::DIFile PUnit = getOrCreateFile(Loc);
+          llvm::DIFile *PUnit = getOrCreateFile(Loc);
           unsigned PLine = getLineNumber(Loc);
           ObjCMethodDecl *Getter = PD->getGetterMethodDecl();
           ObjCMethodDecl *Setter = PD->getSetterMethodDecl();
@@ -1810,15 +1787,16 @@
     EltTys.push_back(FieldTy);
   }
 
-  llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
+  llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
   DBuilder.replaceArrays(RealDecl, Elements);
 
   LexicalBlockStack.pop_back();
   return RealDecl;
 }
 
-llvm::DIType CGDebugInfo::CreateType(const VectorType *Ty, llvm::DIFile Unit) {
-  llvm::DIType ElementTy = getOrCreateType(Ty->getElementType(), Unit);
+llvm::DIType *CGDebugInfo::CreateType(const VectorType *Ty,
+                                      llvm::DIFile *Unit) {
+  llvm::DIType *ElementTy = getOrCreateType(Ty->getElementType(), Unit);
   int64_t Count = Ty->getNumElements();
   if (Count == 0)
     // If number of elements are not known then this is an unbounded array.
@@ -1826,7 +1804,7 @@
     Count = -1;
 
   llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(0, Count);
-  llvm::DIArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
+  llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
 
   uint64_t Size = CGM.getContext().getTypeSize(Ty);
   uint64_t Align = CGM.getContext().getTypeAlign(Ty);
@@ -1834,7 +1812,7 @@
   return DBuilder.createVectorType(Size, Align, ElementTy, SubscriptArray);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
   uint64_t Size;
   uint64_t Align;
 
@@ -1880,32 +1858,33 @@
     EltTy = Ty->getElementType();
   }
 
-  llvm::DIArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts);
+  llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts);
 
-  llvm::DIType DbgTy = DBuilder.createArrayType(
-      Size, Align, getOrCreateType(EltTy, Unit), SubscriptArray);
-  return DbgTy;
+  return DBuilder.createArrayType(Size, Align, getOrCreateType(EltTy, Unit),
+                                  SubscriptArray);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const LValueReferenceType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const LValueReferenceType *Ty,
+                                      llvm::DIFile *Unit) {
   return CreatePointerLikeType(llvm::dwarf::DW_TAG_reference_type, Ty,
                                Ty->getPointeeType(), Unit);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const RValueReferenceType *Ty,
-                                     llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateType(const RValueReferenceType *Ty,
+                                      llvm::DIFile *Unit) {
   return CreatePointerLikeType(llvm::dwarf::DW_TAG_rvalue_reference_type, Ty,
                                Ty->getPointeeType(), Unit);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const MemberPointerType *Ty,
-                                     llvm::DIFile U) {
-  llvm::DIType ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
-  if (!Ty->getPointeeType()->isFunctionType())
+llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
+                                      llvm::DIFile *U) {
+  uint64_t Size = CGM.getCXXABI().isTypeInfoCalculable(QualType(Ty, 0))
+                      ? CGM.getContext().getTypeSize(Ty)
+                      : 0;
+  llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
+  if (Ty->isMemberDataPointerType())
     return DBuilder.createMemberPointerType(
-      getOrCreateType(Ty->getPointeeType(), U), ClassType,
-      CGM.getContext().getTypeSize(Ty));
+        getOrCreateType(Ty->getPointeeType(), U), ClassType, Size);
 
   const FunctionProtoType *FPT =
       Ty->getPointeeType()->getAs<FunctionProtoType>();
@@ -1913,17 +1892,17 @@
       getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
                                         Ty->getClass(), FPT->getTypeQuals())),
                                     FPT, U),
-      ClassType, CGM.getContext().getTypeSize(Ty));
+      ClassType, Size);
 }
 
-llvm::DIType CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile U) {
+llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
   // Ignore the atomic wrapping
   // FIXME: What is the correct representation?
   return getOrCreateType(Ty->getValueType(), U);
 }
 
 /// CreateEnumType - get enumeration type.
-llvm::DIType CGDebugInfo::CreateEnumType(const EnumType *Ty) {
+llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
   const EnumDecl *ED = Ty->getDecl();
   uint64_t Size = 0;
   uint64_t Align = 0;
@@ -1937,14 +1916,14 @@
   // If this is just a forward declaration, construct an appropriately
   // marked node and just return it.
   if (!ED->getDefinition()) {
-    llvm::MDScope *EDContext =
+    llvm::DIScope *EDContext =
         getContextDescriptor(cast<Decl>(ED->getDeclContext()));
-    llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
+    llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
-    llvm::DIType RetTy = DBuilder.createReplaceableCompositeType(
+    llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
-        0, Size, Align, llvm::DebugNode::FlagFwdDecl, FullName);
+        0, Size, Align, llvm::DINode::FlagFwdDecl, FullName);
     ReplaceMap.emplace_back(
         std::piecewise_construct, std::make_tuple(Ty),
         std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
@@ -1954,7 +1933,7 @@
   return CreateTypeDefinition(Ty);
 }
 
-llvm::DIType CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
+llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
   const EnumDecl *ED = Ty->getDecl();
   uint64_t Size = 0;
   uint64_t Align = 0;
@@ -1965,7 +1944,7 @@
 
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
-  // Create DIEnumerator elements for each enumerator.
+  // Create elements for each enumerator.
   SmallVector<llvm::Metadata *, 16> Enumerators;
   ED = ED->getDefinition();
   for (const auto *Enum : ED->enumerators()) {
@@ -1974,19 +1953,17 @@
   }
 
   // Return a CompositeType for the enum itself.
-  llvm::DIArray EltArray = DBuilder.getOrCreateArray(Enumerators);
+  llvm::DINodeArray EltArray = DBuilder.getOrCreateArray(Enumerators);
 
-  llvm::DIFile DefUnit = getOrCreateFile(ED->getLocation());
+  llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
   unsigned Line = getLineNumber(ED->getLocation());
-  llvm::MDScope *EnumContext =
+  llvm::DIScope *EnumContext =
       getContextDescriptor(cast<Decl>(ED->getDeclContext()));
-  llvm::DIType ClassTy = ED->isFixed()
-                             ? getOrCreateType(ED->getIntegerType(), DefUnit)
-                             : llvm::DIType();
-  llvm::DIType DbgTy =
-      DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit, Line,
-                                     Size, Align, EltArray, ClassTy, FullName);
-  return DbgTy;
+  llvm::DIType *ClassTy =
+      ED->isFixed() ? getOrCreateType(ED->getIntegerType(), DefUnit) : nullptr;
+  return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
+                                        Line, Size, Align, EltArray, ClassTy,
+                                        FullName);
 }
 
 static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
@@ -2046,7 +2023,7 @@
 
 /// getType - Get the type from the cache or return null type if it doesn't
 /// exist.
-llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) {
+llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) {
 
   // Unwrap the type as needed for debug information.
   Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
@@ -2055,7 +2032,7 @@
   if (it != TypeCache.end()) {
     // Verify that the debug info still exists.
     if (llvm::Metadata *V = it->second)
-      return cast<llvm::MDType>(V);
+      return cast<llvm::DIType>(V);
   }
 
   return nullptr;
@@ -2074,18 +2051,18 @@
 
 /// getOrCreateType - Get the type from the cache or create a new
 /// one if necessary.
-llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
   if (Ty.isNull())
-    return llvm::DIType();
+    return nullptr;
 
   // Unwrap the type as needed for debug information.
   Ty = UnwrapTypeForDebugInfo(Ty, CGM.getContext());
 
-  if (llvm::DIType T = getTypeOrNull(Ty))
+  if (auto *T = getTypeOrNull(Ty))
     return T;
 
   // Otherwise create the type.
-  llvm::DIType Res = CreateTypeNode(Ty, Unit);
+  llvm::DIType *Res = CreateTypeNode(Ty, Unit);
   void *TyPtr = Ty.getAsOpaquePtr();
 
   // And update the type cache.
@@ -2121,7 +2098,7 @@
 }
 
 /// CreateTypeNode - Create a new debug type node.
-llvm::DIType CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
   // Handle qualifiers, which recursively handles what they refer to.
   if (Ty.hasLocalQualifiers())
     return CreateQualifiedType(Ty, Unit);
@@ -2203,11 +2180,11 @@
 
 /// getOrCreateLimitedType - Get the type from the cache or create a new
 /// limited type if necessary.
-llvm::DIType CGDebugInfo::getOrCreateLimitedType(const RecordType *Ty,
-                                                 llvm::DIFile Unit) {
+llvm::DIType *CGDebugInfo::getOrCreateLimitedType(const RecordType *Ty,
+                                                  llvm::DIFile *Unit) {
   QualType QTy(Ty, 0);
 
-  auto *T = cast_or_null<llvm::MDCompositeTypeBase>(getTypeOrNull(QTy));
+  auto *T = cast_or_null<llvm::DICompositeTypeBase>(getTypeOrNull(QTy));
 
   // We may have cached a forward decl when we could have created
   // a non-forward decl. Go ahead and create a non-forward decl
@@ -2216,12 +2193,12 @@
     return T;
 
   // Otherwise create the type.
-  llvm::MDCompositeType *Res = CreateLimitedType(Ty);
+  llvm::DICompositeType *Res = CreateLimitedType(Ty);
 
   // Propagate members from the declaration to the definition
   // CreateType(const RecordType*) will overwrite this with the members in the
   // correct order if the full type is needed.
-  DBuilder.replaceArrays(Res, T ? T->getElements() : llvm::DIArray());
+  DBuilder.replaceArrays(Res, T ? T->getElements() : llvm::DINodeArray());
 
   // And update the type cache.
   TypeCache[QTy.getAsOpaquePtr()].reset(Res);
@@ -2229,20 +2206,20 @@
 }
 
 // TODO: Currently used for context chains when limiting debug info.
-llvm::MDCompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
+llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
 
   // Get overall information about the record type for the debug info.
-  llvm::DIFile DefUnit = getOrCreateFile(RD->getLocation());
+  llvm::DIFile *DefUnit = getOrCreateFile(RD->getLocation());
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
-  llvm::MDScope *RDContext =
+  llvm::DIScope *RDContext =
       getContextDescriptor(cast<Decl>(RD->getDeclContext()));
 
   // If we ended up creating the type during the context chain construction,
   // just return that.
-  auto *T = cast_or_null<llvm::MDCompositeType>(
+  auto *T = cast_or_null<llvm::DICompositeType>(
       getTypeOrNull(CGM.getContext().getRecordType(RD)));
   if (T && (!T->isForwardDecl() || !RD->getDefinition()))
     return T;
@@ -2258,7 +2235,7 @@
 
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
-  llvm::MDCompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
+  llvm::DICompositeType *RealDecl = DBuilder.createReplaceableCompositeType(
       getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, 0,
       FullName);
 
@@ -2267,15 +2244,15 @@
 
   if (const ClassTemplateSpecializationDecl *TSpecial =
           dyn_cast<ClassTemplateSpecializationDecl>(RD))
-    DBuilder.replaceArrays(RealDecl, llvm::DIArray(),
+    DBuilder.replaceArrays(RealDecl, llvm::DINodeArray(),
                            CollectCXXTemplateParams(TSpecial, DefUnit));
   return RealDecl;
 }
 
 void CGDebugInfo::CollectContainingType(const CXXRecordDecl *RD,
-                                        llvm::MDCompositeType *RealDecl) {
+                                        llvm::DICompositeType *RealDecl) {
   // A class's primary base or the class itself contains the vtable.
-  llvm::MDCompositeType *ContainingType = nullptr;
+  llvm::DICompositeType *ContainingType = nullptr;
   const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
   if (const CXXRecordDecl *PBase = RL.getPrimaryBase()) {
     // Seek non-virtual primary base root.
@@ -2287,7 +2264,7 @@
       else
         break;
     }
-    ContainingType = cast<llvm::MDCompositeType>(
+    ContainingType = cast<llvm::DICompositeType>(
         getOrCreateType(QualType(PBase->getTypeForDecl(), 0),
                         getOrCreateFile(RD->getLocation())));
   } else if (RD->isDynamicClass())
@@ -2297,26 +2274,29 @@
 }
 
 /// CreateMemberType - Create new member and increase Offset by FType's size.
-llvm::DIType CGDebugInfo::CreateMemberType(llvm::DIFile Unit, QualType FType,
-                                           StringRef Name, uint64_t *Offset) {
-  llvm::DIType FieldTy = CGDebugInfo::getOrCreateType(FType, Unit);
+llvm::DIType *CGDebugInfo::CreateMemberType(llvm::DIFile *Unit, QualType FType,
+                                            StringRef Name, uint64_t *Offset) {
+  llvm::DIType *FieldTy = CGDebugInfo::getOrCreateType(FType, Unit);
   uint64_t FieldSize = CGM.getContext().getTypeSize(FType);
   unsigned FieldAlign = CGM.getContext().getTypeAlign(FType);
-  llvm::DIType Ty = DBuilder.createMemberType(Unit, Name, Unit, 0, FieldSize,
-                                              FieldAlign, *Offset, 0, FieldTy);
+  llvm::DIType *Ty = DBuilder.createMemberType(Unit, Name, Unit, 0, FieldSize,
+                                               FieldAlign, *Offset, 0, FieldTy);
   *Offset += FieldSize;
   return Ty;
 }
 
-void CGDebugInfo::collectFunctionDeclProps(
-    GlobalDecl GD, llvm::DIFile Unit, StringRef &Name, StringRef &LinkageName,
-    llvm::MDScope *&FDContext, llvm::DIArray &TParamsArray, unsigned &Flags) {
+void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
+                                           StringRef &Name,
+                                           StringRef &LinkageName,
+                                           llvm::DIScope *&FDContext,
+                                           llvm::DINodeArray &TParamsArray,
+                                           unsigned &Flags) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
   Name = getFunctionName(FD);
   // Use mangled name as linkage name for C/C++ functions.
   if (FD->hasPrototype()) {
     LinkageName = CGM.getMangledName(GD);
-    Flags |= llvm::DebugNode::FlagPrototyped;
+    Flags |= llvm::DINode::FlagPrototyped;
   }
   // No need to replicate the linkage name if it isn't different from the
   // subprogram name, no need to have it at all unless coverage is enabled or
@@ -2339,10 +2319,10 @@
   }
 }
 
-void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile &Unit,
+void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
                                       unsigned &LineNo, QualType &T,
                                       StringRef &Name, StringRef &LinkageName,
-                                      llvm::MDScope *&VDContext) {
+                                      llvm::DIScope *&VDContext) {
   Unit = getOrCreateFile(VD->getLocation());
   LineNo = getLineNumber(VD->getLocation());
 
@@ -2384,14 +2364,14 @@
   VDContext = getContextDescriptor(dyn_cast<Decl>(DC));
 }
 
-llvm::DISubprogram
+llvm::DISubprogram *
 CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
-  llvm::DIArray TParamsArray;
+  llvm::DINodeArray TParamsArray;
   StringRef Name, LinkageName;
   unsigned Flags = 0;
   SourceLocation Loc = FD->getLocation();
-  llvm::DIFile Unit = getOrCreateFile(Loc);
-  llvm::MDScope *DContext = Unit;
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  llvm::DIScope *DContext = Unit;
   unsigned Line = getLineNumber(Loc);
 
   collectFunctionDeclProps(FD, Unit, Name, LinkageName, DContext,
@@ -2403,7 +2383,7 @@
   QualType FnType =
     CGM.getContext().getFunctionType(FD->getReturnType(), ArgTypes,
                                      FunctionProtoType::ExtProtoInfo());
-  llvm::MDSubprogram *SP = DBuilder.createTempFunctionFwdDecl(
+  llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
       DContext, Name, LinkageName, Unit, Line,
       getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
       false /*declaration*/, 0, Flags, CGM.getLangOpts().Optimize, nullptr,
@@ -2415,21 +2395,19 @@
   return SP;
 }
 
-llvm::DIGlobalVariable
+llvm::DIGlobalVariable *
 CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) {
   QualType T;
   StringRef Name, LinkageName;
   SourceLocation Loc = VD->getLocation();
-  llvm::DIFile Unit = getOrCreateFile(Loc);
-  llvm::MDScope *DContext = Unit;
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  llvm::DIScope *DContext = Unit;
   unsigned Line = getLineNumber(Loc);
 
   collectVarDeclProps(VD, Unit, Line, T, Name, LinkageName, DContext);
-  llvm::DIGlobalVariable GV =
-    DBuilder.createTempGlobalVariableFwdDecl(DContext, Name, LinkageName, Unit,
-                                             Line, getOrCreateType(T, Unit),
-                                             !VD->isExternallyVisible(),
-                                             nullptr, nullptr);
+  auto *GV = DBuilder.createTempGlobalVariableFwdDecl(
+      DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit),
+      !VD->isExternallyVisible(), nullptr, nullptr);
   FwdDeclReplaceMap.emplace_back(
       std::piecewise_construct,
       std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())),
@@ -2437,7 +2415,7 @@
   return GV;
 }
 
-llvm::DebugNode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
+llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
   // We only need a declaration (not a definition) of the type - so use whatever
   // we would otherwise do to get a type for a pointee. (forward declarations in
   // limited debug info, full definitions (if the type definition is available)
@@ -2448,7 +2426,7 @@
   auto I = DeclCache.find(D->getCanonicalDecl());
 
   if (I != DeclCache.end())
-    return dyn_cast_or_null<llvm::DebugNode>(I->second);
+    return dyn_cast_or_null<llvm::DINode>(I->second);
 
   // No definition for now. Emit a forward definition that might be
   // merged with a potential upcoming definition.
@@ -2462,29 +2440,27 @@
 
 /// getFunctionDeclaration - Return debug info descriptor to describe method
 /// declaration for the given method definition.
-llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) {
+llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
   if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
-    return llvm::DISubprogram();
+    return nullptr;
 
   const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
   if (!FD)
-    return llvm::DISubprogram();
+    return nullptr;
 
   // Setup context.
-  llvm::DIScope S = getContextDescriptor(cast<Decl>(D->getDeclContext()));
+  auto *S = getContextDescriptor(cast<Decl>(D->getDeclContext()));
 
   auto MI = SPCache.find(FD->getCanonicalDecl());
   if (MI == SPCache.end()) {
     if (const CXXMethodDecl *MD =
             dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
-      llvm::DICompositeType T = cast<llvm::MDCompositeType>(S);
-      llvm::DISubprogram SP =
-          CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()), T);
-      return SP;
+      return CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()),
+                                     cast<llvm::DICompositeType>(S));
     }
   }
   if (MI != SPCache.end()) {
-    auto *SP = dyn_cast_or_null<llvm::MDSubprogram>(MI->second);
+    auto *SP = dyn_cast_or_null<llvm::DISubprogram>(MI->second);
     if (SP && !SP->isDefinition())
       return SP;
   }
@@ -2492,24 +2468,22 @@
   for (auto NextFD : FD->redecls()) {
     auto MI = SPCache.find(NextFD->getCanonicalDecl());
     if (MI != SPCache.end()) {
-      auto *SP = dyn_cast_or_null<llvm::MDSubprogram>(MI->second);
+      auto *SP = dyn_cast_or_null<llvm::DISubprogram>(MI->second);
       if (SP && !SP->isDefinition())
         return SP;
     }
   }
-  return llvm::DISubprogram();
+  return nullptr;
 }
 
-// getOrCreateFunctionType - Construct DIType. If it is a c++ method, include
+// getOrCreateFunctionType - Construct type. If it is a c++ method, include
 // implicit parameter "this".
-llvm::MDSubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
+llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
                                                              QualType FnType,
-                                                             llvm::DIFile F) {
+                                                             llvm::DIFile *F) {
   if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
-    // Create fake but valid subroutine type. Otherwise
-    // llvm::DISubprogram::Verify() would return false, and
-    // subprogram DIE will miss DW_AT_decl_file and
-    // DW_AT_decl_line fields.
+    // Create fake but valid subroutine type. Otherwise -verify would fail, and
+    // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields.
     return DBuilder.createSubroutineType(F,
                                          DBuilder.getOrCreateTypeArray(None));
 
@@ -2530,11 +2504,10 @@
     Elts.push_back(getOrCreateType(ResultTy, F));
     // "self" pointer is always first argument.
     QualType SelfDeclTy = OMethod->getSelfDecl()->getType();
-    llvm::DIType SelfTy = getOrCreateType(SelfDeclTy, F);
-    Elts.push_back(CreateSelfType(SelfDeclTy, SelfTy));
+    Elts.push_back(CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
     // "_cmd" pointer is always second argument.
-    llvm::DIType CmdTy = getOrCreateType(OMethod->getCmdDecl()->getType(), F);
-    Elts.push_back(DBuilder.createArtificialType(CmdTy));
+    Elts.push_back(DBuilder.createArtificialType(
+        getOrCreateType(OMethod->getCmdDecl()->getType(), F)));
     // Get rest of the arguments.
     for (const auto *PI : OMethod->params())
       Elts.push_back(getOrCreateType(PI->getType(), F));
@@ -2542,7 +2515,7 @@
     if (OMethod->isVariadic())
       Elts.push_back(DBuilder.createUnspecifiedParameter());
 
-    llvm::DITypeArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
+    llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
     return DBuilder.createSubroutineType(F, EltTypeArray);
   }
 
@@ -2556,11 +2529,11 @@
         for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
           EltTys.push_back(getOrCreateType(FPT->getParamType(i), F));
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
-      llvm::DITypeArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
+      llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
       return DBuilder.createSubroutineType(F, EltTypeArray);
     }
 
-  return cast<llvm::MDSubroutineType>(getOrCreateType(FnType, F));
+  return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F));
 }
 
 /// EmitFunctionStart - Constructs the debug code for entering a function.
@@ -2577,20 +2550,19 @@
   bool HasDecl = (D != nullptr);
 
   unsigned Flags = 0;
-  llvm::DIFile Unit = getOrCreateFile(Loc);
-  llvm::MDScope *FDContext = Unit;
-  llvm::DIArray TParamsArray;
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  llvm::DIScope *FDContext = Unit;
+  llvm::DINodeArray TParamsArray;
   if (!HasDecl) {
     // Use llvm function name.
     LinkageName = Fn->getName();
   } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
-    // If there is a DISubprogram for this function available then use it.
+    // If there is a subprogram for this function available then use it.
     auto FI = SPCache.find(FD->getCanonicalDecl());
     if (FI != SPCache.end()) {
-      auto *SP = dyn_cast_or_null<llvm::MDSubprogram>(FI->second);
+      auto *SP = dyn_cast_or_null<llvm::DISubprogram>(FI->second);
       if (SP && SP->isDefinition()) {
-        llvm::MDNode *SPN = SP;
-        LexicalBlockStack.emplace_back(SPN);
+        LexicalBlockStack.emplace_back(SP);
         RegionMap[D].reset(SP);
         return;
       }
@@ -2599,17 +2571,17 @@
                              TParamsArray, Flags);
   } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
-    Flags |= llvm::DebugNode::FlagPrototyped;
+    Flags |= llvm::DINode::FlagPrototyped;
   } else {
     // Use llvm function name.
     Name = Fn->getName();
-    Flags |= llvm::DebugNode::FlagPrototyped;
+    Flags |= llvm::DINode::FlagPrototyped;
   }
   if (!Name.empty() && Name[0] == '\01')
     Name = Name.substr(1);
 
   if (!HasDecl || D->isImplicit()) {
-    Flags |= llvm::DebugNode::FlagArtificial;
+    Flags |= llvm::DINode::FlagArtificial;
     // Artificial functions without a location should not silently reuse CurLoc.
     if (Loc.isInvalid())
       CurLoc = SourceLocation();
@@ -2622,7 +2594,7 @@
   // FunctionDecls. When/if we fix this we can have FDContext be TheCU/null for
   // all subprograms instead of the actual context since subprogram definitions
   // are emitted as CU level entities by the backend.
-  llvm::DISubprogram SP = DBuilder.createFunction(
+  llvm::DISubprogram *SP = DBuilder.createFunction(
       FDContext, Name, LinkageName, Unit, LineNo,
       getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(),
       true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, Fn,
@@ -2634,8 +2606,7 @@
     DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(SP));
 
   // Push the function onto the lexical block stack.
-  llvm::MDNode *SPN = SP;
-  LexicalBlockStack.emplace_back(SPN);
+  LexicalBlockStack.emplace_back(SP);
 
   if (HasDecl)
     RegionMap[D].reset(SP);
@@ -2662,11 +2633,9 @@
   llvm::MDNode *Back = nullptr;
   if (!LexicalBlockStack.empty())
     Back = LexicalBlockStack.back().get();
-  llvm::DIDescriptor D = DBuilder.createLexicalBlock(
-      cast<llvm::MDScope>(Back), getOrCreateFile(CurLoc), getLineNumber(CurLoc),
-      getColumnNumber(CurLoc));
-  llvm::MDNode *DN = D;
-  LexicalBlockStack.emplace_back(DN);
+  LexicalBlockStack.emplace_back(DBuilder.createLexicalBlock(
+      cast<llvm::DIScope>(Back), getOrCreateFile(CurLoc), getLineNumber(CurLoc),
+      getColumnNumber(CurLoc)));
 }
 
 /// EmitLexicalBlockStart - Constructs the debug code for entering a declarative
@@ -2719,15 +2688,15 @@
 
 // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.
 // See BuildByRefType.
-llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
-                                                       uint64_t *XOffset) {
+llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
+                                                        uint64_t *XOffset) {
 
   SmallVector<llvm::Metadata *, 5> EltTys;
   QualType FType;
   uint64_t FieldSize, FieldOffset;
   unsigned FieldAlign;
 
-  llvm::DIFile Unit = getOrCreateFile(VD->getLocation());
+  llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
   QualType Type = VD->getType();
 
   FieldOffset = 0;
@@ -2774,7 +2743,7 @@
   }
 
   FType = Type;
-  llvm::DIType FieldTy = getOrCreateType(FType, Unit);
+  llvm::DIType *FieldTy = getOrCreateType(FType, Unit);
   FieldSize = CGM.getContext().getTypeSize(FType);
   FieldAlign = CGM.getContext().toBits(Align);
 
@@ -2784,12 +2753,12 @@
   EltTys.push_back(FieldTy);
   FieldOffset += FieldSize;
 
-  llvm::DIArray Elements = DBuilder.getOrCreateArray(EltTys);
+  llvm::DINodeArray Elements = DBuilder.getOrCreateArray(EltTys);
 
-  unsigned Flags = llvm::DebugNode::FlagBlockByrefStruct;
+  unsigned Flags = llvm::DINode::FlagBlockByrefStruct;
 
   return DBuilder.createStructType(Unit, "", Unit, 0, FieldOffset, 0, Flags,
-                                   llvm::DIType(), Elements);
+                                   nullptr, Elements);
 }
 
 /// EmitDeclare - Emit local variable declaration debug info.
@@ -2802,10 +2771,10 @@
   bool Unwritten =
       VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) &&
                            cast<Decl>(VD->getDeclContext())->isImplicit());
-  llvm::DIFile Unit;
+  llvm::DIFile *Unit = nullptr;
   if (!Unwritten)
     Unit = getOrCreateFile(VD->getLocation());
-  llvm::DIType Ty;
+  llvm::DIType *Ty;
   uint64_t XOffset = 0;
   if (VD->hasAttr<BlocksAttr>())
     Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset);
@@ -2827,20 +2796,20 @@
   SmallVector<int64_t, 9> Expr;
   unsigned Flags = 0;
   if (VD->isImplicit())
-    Flags |= llvm::DebugNode::FlagArtificial;
+    Flags |= llvm::DINode::FlagArtificial;
   // If this is the first argument and it is implicit then
   // give it an object pointer flag.
   // FIXME: There has to be a better way to do this, but for static
   // functions there won't be an implicit param at arg1 and
   // otherwise it is 'self' or 'this'.
   if (isa<ImplicitParamDecl>(VD) && ArgNo == 1)
-    Flags |= llvm::DebugNode::FlagObjectPointer;
+    Flags |= llvm::DINode::FlagObjectPointer;
   if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage))
     if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
         !VD->getType()->isPointerType())
       Expr.push_back(llvm::dwarf::DW_OP_deref);
 
-  auto *Scope = cast<llvm::MDScope>(LexicalBlockStack.back());
+  auto *Scope = cast<llvm::DIScope>(LexicalBlockStack.back());
 
   StringRef Name = VD->getName();
   if (!Name.empty()) {
@@ -2858,8 +2827,8 @@
       Expr.push_back(offset.getQuantity());
 
       // Create the descriptor for the variable.
-      llvm::DIVariable D = DBuilder.createLocalVariable(
-          Tag, Scope, VD->getName(), Unit, Line, Ty, ArgNo);
+      auto *D = DBuilder.createLocalVariable(Tag, Scope, VD->getName(), Unit,
+                                             Line, Ty, ArgNo);
 
       // Insert an llvm.dbg.declare into the current block.
       DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
@@ -2873,8 +2842,15 @@
     // all union fields.
     const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
     if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
+      // GDB has trouble finding local variables in anonymous unions, so we emit
+      // artifical local variables for each of the members.
+      //
+      // FIXME: Remove this code as soon as GDB supports this.
+      // The debug info verifier in LLVM operates based on the assumption that a
+      // variable has the same size as its storage and we had to disable the check
+      // for artificial variables.
       for (const auto *Field : RD->fields()) {
-        llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
+        llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
         StringRef FieldName = Field->getName();
 
         // Ignore unnamed fields. Do not ignore unnamed records.
@@ -2882,21 +2858,21 @@
           continue;
 
         // Use VarDecl's Tag, Scope and Line number.
-        llvm::DIVariable D = DBuilder.createLocalVariable(
+        auto *D = DBuilder.createLocalVariable(
             Tag, Scope, FieldName, Unit, Line, FieldTy,
-            CGM.getLangOpts().Optimize, Flags, ArgNo);
+            CGM.getLangOpts().Optimize, Flags | llvm::DINode::FlagArtificial,
+            ArgNo);
 
         // Insert an llvm.dbg.declare into the current block.
         DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
                                llvm::DebugLoc::get(Line, Column, Scope),
                                Builder.GetInsertBlock());
       }
-      return;
     }
   }
 
   // Create the descriptor for the variable.
-  llvm::DIVariable D =
+  auto *D =
       DBuilder.createLocalVariable(Tag, Scope, Name, Unit, Line, Ty,
                                    CGM.getLangOpts().Optimize, Flags, ArgNo);
 
@@ -2919,9 +2895,9 @@
 /// never happen though, since creating a type for the implicit self
 /// argument implies that we already parsed the interface definition
 /// and the ivar declarations in the implementation.
-llvm::DIType CGDebugInfo::CreateSelfType(const QualType &QualTy,
-                                         llvm::DIType Ty) {
-  llvm::DIType CachedTy = getTypeOrNull(QualTy);
+llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
+                                          llvm::DIType *Ty) {
+  llvm::DIType *CachedTy = getTypeOrNull(QualTy);
   if (CachedTy)
     Ty = CachedTy;
   return DBuilder.createObjectPointerType(Ty);
@@ -2939,8 +2915,8 @@
   bool isByRef = VD->hasAttr<BlocksAttr>();
 
   uint64_t XOffset = 0;
-  llvm::DIFile Unit = getOrCreateFile(VD->getLocation());
-  llvm::DIType Ty;
+  llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
+  llvm::DIType *Ty;
   if (isByRef)
     Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset);
   else
@@ -2981,9 +2957,9 @@
   }
 
   // Create the descriptor for the variable.
-  llvm::DIVariable D = DBuilder.createLocalVariable(
+  auto *D = DBuilder.createLocalVariable(
       llvm::dwarf::DW_TAG_auto_variable,
-      cast<llvm::MDLocalScope>(LexicalBlockStack.back()), VD->getName(), Unit,
+      cast<llvm::DILocalScope>(LexicalBlockStack.back()), VD->getName(), Unit,
       Line, Ty);
 
   // Insert an llvm.dbg.declare into the current block.
@@ -3026,7 +3002,7 @@
 
   // Collect some general information about the block's location.
   SourceLocation loc = blockDecl->getCaretLocation();
-  llvm::DIFile tunit = getOrCreateFile(loc);
+  llvm::DIFile *tunit = getOrCreateFile(loc);
   unsigned line = getLineNumber(loc);
   unsigned column = getColumnNumber(loc);
 
@@ -3109,7 +3085,7 @@
     const VarDecl *variable = capture->getVariable();
     StringRef name = variable->getName();
 
-    llvm::DIType fieldType;
+    llvm::DIType *fieldType;
     if (capture->isByRef()) {
       TypeInfo PtrInfo = C.getTypeInfo(C.VoidPtrTy);
 
@@ -3131,21 +3107,20 @@
   llvm::raw_svector_ostream(typeName) << "__block_literal_"
                                       << CGM.getUniqueBlockCount();
 
-  llvm::DIArray fieldsArray = DBuilder.getOrCreateArray(fields);
+  llvm::DINodeArray fieldsArray = DBuilder.getOrCreateArray(fields);
 
-  llvm::DIType type =
-      DBuilder.createStructType(tunit, typeName.str(), tunit, line,
-                                CGM.getContext().toBits(block.BlockSize),
-                                CGM.getContext().toBits(block.BlockAlign), 0,
-                                llvm::DIType(), fieldsArray);
+  llvm::DIType *type = DBuilder.createStructType(
+      tunit, typeName.str(), tunit, line,
+      CGM.getContext().toBits(block.BlockSize),
+      CGM.getContext().toBits(block.BlockAlign), 0, nullptr, fieldsArray);
   type = DBuilder.createPointerType(type, CGM.PointerWidthInBits);
 
   // Get overall information about the block.
-  unsigned flags = llvm::DebugNode::FlagArtificial;
-  auto *scope = cast<llvm::MDLocalScope>(LexicalBlockStack.back());
+  unsigned flags = llvm::DINode::FlagArtificial;
+  auto *scope = cast<llvm::DILocalScope>(LexicalBlockStack.back());
 
   // Create the descriptor for the parameter.
-  llvm::DIVariable debugVar = DBuilder.createLocalVariable(
+  auto *debugVar = DBuilder.createLocalVariable(
       llvm::dwarf::DW_TAG_arg_variable, scope, Arg->getName(), tunit, line,
       type, CGM.getLangOpts().Optimize, flags, ArgNo);
 
@@ -3164,35 +3139,35 @@
 
 /// If D is an out-of-class definition of a static data member of a class, find
 /// its corresponding in-class declaration.
-llvm::DIDerivedType
+llvm::DIDerivedType *
 CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
   if (!D->isStaticDataMember())
-    return llvm::DIDerivedType();
+    return nullptr;
 
   auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
   if (MI != StaticDataMemberCache.end()) {
     assert(MI->second && "Static data member declaration should still exist");
-    return cast<llvm::MDDerivedTypeBase>(MI->second);
+    return cast<llvm::DIDerivedType>(MI->second);
   }
 
   // If the member wasn't found in the cache, lazily construct and add it to the
   // type (used when a limited form of the type is emitted).
   auto DC = D->getDeclContext();
-  llvm::DICompositeType Ctxt =
-      cast<llvm::MDCompositeType>(getContextDescriptor(cast<Decl>(DC)));
+  auto *Ctxt =
+      cast<llvm::DICompositeType>(getContextDescriptor(cast<Decl>(DC)));
   return CreateRecordStaticField(D, Ctxt, cast<RecordDecl>(DC));
 }
 
 /// Recursively collect all of the member fields of a global anonymous decl and
 /// create static variables for them. The first time this is called it needs
 /// to be on a union and then from there we can have additional unnamed fields.
-llvm::DIGlobalVariable CGDebugInfo::CollectAnonRecordDecls(
-    const RecordDecl *RD, llvm::DIFile Unit, unsigned LineNo,
-    StringRef LinkageName, llvm::GlobalVariable *Var, llvm::MDScope *DContext) {
-  llvm::DIGlobalVariable GV;
+llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls(
+    const RecordDecl *RD, llvm::DIFile *Unit, unsigned LineNo,
+    StringRef LinkageName, llvm::GlobalVariable *Var, llvm::DIScope *DContext) {
+  llvm::DIGlobalVariable *GV = nullptr;
 
   for (const auto *Field : RD->fields()) {
-    llvm::DIType FieldTy = getOrCreateType(Field->getType(), Unit);
+    llvm::DIType *FieldTy = getOrCreateType(Field->getType(), Unit);
     StringRef FieldName = Field->getName();
 
     // Ignore unnamed fields, but recurse into anonymous records.
@@ -3204,9 +3179,9 @@
       continue;
     }
     // Use VarDecl's Tag, Scope and Line number.
-    GV = DBuilder.createGlobalVariable(
-        DContext, FieldName, LinkageName, Unit, LineNo, FieldTy,
-        Var->hasInternalLinkage(), Var, llvm::DIDerivedType());
+    GV = DBuilder.createGlobalVariable(DContext, FieldName, LinkageName, Unit,
+                                       LineNo, FieldTy,
+                                       Var->hasInternalLinkage(), Var, nullptr);
   }
   return GV;
 }
@@ -3216,8 +3191,8 @@
                                      const VarDecl *D) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
   // Create global variable debug descriptor.
-  llvm::DIFile Unit;
-  llvm::MDScope *DContext = nullptr;
+  llvm::DIFile *Unit = nullptr;
+  llvm::DIScope *DContext = nullptr;
   unsigned LineNo;
   StringRef DeclName, LinkageName;
   QualType T;
@@ -3225,7 +3200,7 @@
 
   // Attempt to store one global variable for the declaration - even if we
   // emit a lot of fields.
-  llvm::DIGlobalVariable GV;
+  llvm::DIGlobalVariable *GV = nullptr;
 
   // If this is an anonymous union then we'll want to emit a global
   // variable for each member of the anonymous union so that it's possible
@@ -3249,15 +3224,17 @@
                                      llvm::Constant *Init) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
   // Create the descriptor for the variable.
-  llvm::DIFile Unit = getOrCreateFile(VD->getLocation());
+  llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
   StringRef Name = VD->getName();
-  llvm::DIType Ty = getOrCreateType(VD->getType(), Unit);
+  llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit);
   if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(VD)) {
     const EnumDecl *ED = cast<EnumDecl>(ECD->getDeclContext());
     assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?");
     Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit);
   }
-  // Do not use DIGlobalVariable for enums.
+  // Do not use global variables for enums.
+  //
+  // FIXME: why not?
   if (Ty->getTag() == llvm::dwarf::DW_TAG_enumeration_type)
     return;
   // Do not emit separate definitions for function local const/statics.
@@ -3274,7 +3251,7 @@
     return;
   }
 
-  llvm::MDScope *DContext =
+  llvm::DIScope *DContext =
       getContextDescriptor(dyn_cast<Decl>(VD->getDeclContext()));
 
   auto &GV = DeclCache[VD];
@@ -3285,9 +3262,9 @@
       true, Init, getOrCreateStaticDataMemberDeclarationOrNull(VarD)));
 }
 
-llvm::DIScope CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
+llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
   if (!LexicalBlockStack.empty())
-    return cast<llvm::MDScope>(LexicalBlockStack.back());
+    return LexicalBlockStack.back();
   return getContextDescriptor(D);
 }
 
@@ -3308,21 +3285,21 @@
   // Emitting one decl is sufficient - debuggers can detect that this is an
   // overloaded name & provide lookup for all the overloads.
   const UsingShadowDecl &USD = **UD.shadow_begin();
-  if (llvm::DebugNode *Target =
+  if (llvm::DINode *Target =
           getDeclarationOrDefinition(USD.getUnderlyingDecl()))
     DBuilder.createImportedDeclaration(
         getCurrentContextDescriptor(cast<Decl>(USD.getDeclContext())), Target,
         getLineNumber(USD.getLocation()));
 }
 
-llvm::DIImportedEntity
+llvm::DIImportedEntity *
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
-    return llvm::DIImportedEntity();
+    return nullptr;
   auto &VH = NamespaceAliasCache[&NA];
   if (VH)
-    return cast<llvm::MDImportedEntity>(VH);
-  llvm::DIImportedEntity R;
+    return cast<llvm::DIImportedEntity>(VH);
+  llvm::DIImportedEntity *R;
   if (const NamespaceAliasDecl *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
@@ -3341,19 +3318,19 @@
 
 /// getOrCreateNamesSpace - Return namespace descriptor for the given
 /// namespace decl.
-llvm::DINameSpace
+llvm::DINamespace *
 CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
   NSDecl = NSDecl->getCanonicalDecl();
   auto I = NameSpaceCache.find(NSDecl);
   if (I != NameSpaceCache.end())
-    return cast<llvm::MDNamespace>(I->second);
+    return cast<llvm::DINamespace>(I->second);
 
   unsigned LineNo = getLineNumber(NSDecl->getLocation());
-  llvm::DIFile FileD = getOrCreateFile(NSDecl->getLocation());
-  llvm::MDScope *Context =
+  llvm::DIFile *FileD = getOrCreateFile(NSDecl->getLocation());
+  llvm::DIScope *Context =
       getContextDescriptor(dyn_cast<Decl>(NSDecl->getDeclContext()));
-  llvm::DINameSpace NS =
-    DBuilder.createNameSpace(Context, NSDecl->getName(), FileD, LineNo);
+  llvm::DINamespace *NS =
+      DBuilder.createNameSpace(Context, NSDecl->getName(), FileD, LineNo);
   NameSpaceCache[NSDecl].reset(NS);
   return NS;
 }
@@ -3363,28 +3340,28 @@
   // element and the size(), so don't cache/reference them.
   for (size_t i = 0; i != ObjCInterfaceCache.size(); ++i) {
     ObjCInterfaceCacheEntry E = ObjCInterfaceCache[i];
-    llvm::MDType *Ty = E.Type->getDecl()->getDefinition()
+    llvm::DIType *Ty = E.Type->getDecl()->getDefinition()
                            ? CreateTypeDefinition(E.Type, E.Unit)
                            : E.Decl;
-    DBuilder.replaceTemporary(llvm::TempMDType(E.Decl), Ty);
+    DBuilder.replaceTemporary(llvm::TempDIType(E.Decl), Ty);
   }
 
   for (auto p : ReplaceMap) {
     assert(p.second);
-    auto *Ty = cast<llvm::MDType>(p.second);
+    auto *Ty = cast<llvm::DIType>(p.second);
     assert(Ty->isForwardDecl());
 
     auto it = TypeCache.find(p.first);
     assert(it != TypeCache.end());
     assert(it->second);
 
-    DBuilder.replaceTemporary(llvm::TempMDType(Ty),
-                               cast<llvm::MDType>(it->second));
+    DBuilder.replaceTemporary(llvm::TempDIType(Ty),
+                              cast<llvm::DIType>(it->second));
   }
 
   for (const auto &p : FwdDeclReplaceMap) {
     assert(p.second);
-    llvm::DIDescriptor FwdDecl(cast<llvm::MDNode>(p.second));
+    llvm::TempMDNode FwdDecl(cast<llvm::MDNode>(p.second));
     llvm::Metadata *Repl;
 
     auto it = DeclCache.find(p.first);
@@ -3396,15 +3373,14 @@
     else
       Repl = it->second;
 
-    DBuilder.replaceTemporary(llvm::TempMDNode(FwdDecl),
-                              cast<llvm::MDNode>(Repl));
+    DBuilder.replaceTemporary(std::move(FwdDecl), cast<llvm::MDNode>(Repl));
   }
 
   // We keep our own list of retained types, because we need to look
   // up the final type in the type cache.
   for (std::vector<void *>::const_iterator RI = RetainedTypes.begin(),
          RE = RetainedTypes.end(); RI != RE; ++RI)
-    DBuilder.retainType(cast<llvm::MDType>(TypeCache[*RI]));
+    DBuilder.retainType(cast<llvm::DIType>(TypeCache[*RI]));
 
   DBuilder.finalize();
 }
@@ -3413,7 +3389,7 @@
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
     return;
 
-  if (llvm::DIType DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
+  if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
     // Don't ignore in case of explicit cast where it is referenced indirectly.
     DBuilder.retainType(DieTy);
 }
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 6fcceed..8509e07 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -52,27 +52,30 @@
   CodeGenModule &CGM;
   const CodeGenOptions::DebugInfoKind DebugKind;
   llvm::DIBuilder DBuilder;
-  llvm::DICompileUnit TheCU;
+  llvm::DICompileUnit *TheCU = nullptr;
   SourceLocation CurLoc;
-  llvm::DIType VTablePtrType;
-  llvm::DIType ClassTy;
-  llvm::MDCompositeType *ObjTy = nullptr;
-  llvm::DIType SelTy;
-  llvm::DIType OCLImage1dDITy, OCLImage1dArrayDITy, OCLImage1dBufferDITy;
-  llvm::DIType OCLImage2dDITy, OCLImage2dArrayDITy;
-  llvm::DIType OCLImage3dDITy;
-  llvm::DIType OCLEventDITy;
-  llvm::DIType BlockLiteralGeneric;
+  llvm::DIType *VTablePtrType = nullptr;
+  llvm::DIType *ClassTy = nullptr;
+  llvm::DICompositeType *ObjTy = nullptr;
+  llvm::DIType *SelTy = nullptr;
+  llvm::DIType *OCLImage1dDITy = nullptr;
+  llvm::DIType *OCLImage1dArrayDITy = nullptr;
+  llvm::DIType *OCLImage1dBufferDITy = nullptr;
+  llvm::DIType *OCLImage2dDITy = nullptr;
+  llvm::DIType *OCLImage2dArrayDITy = nullptr;
+  llvm::DIType *OCLImage3dDITy = nullptr;
+  llvm::DIType *OCLEventDITy = nullptr;
+  llvm::DIType *BlockLiteralGeneric = nullptr;
 
   /// \brief Cache of previously constructed Types.
   llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache;
 
   struct ObjCInterfaceCacheEntry {
     const ObjCInterfaceType *Type;
-    llvm::DIType Decl;
-    llvm::DIFile Unit;
-    ObjCInterfaceCacheEntry(const ObjCInterfaceType *Type, llvm::DIType Decl,
-                            llvm::DIFile Unit)
+    llvm::DIType *Decl;
+    llvm::DIFile *Unit;
+    ObjCInterfaceCacheEntry(const ObjCInterfaceType *Type, llvm::DIType *Decl,
+                            llvm::DIFile *Unit)
         : Type(Type), Decl(Decl), Unit(Unit) {}
   };
 
@@ -93,7 +96,7 @@
       FwdDeclReplaceMap;
 
   // LexicalBlockStack - Keep track of our current nested lexical block.
-  std::vector<llvm::TrackingMDNodeRef> LexicalBlockStack;
+  std::vector<llvm::TypedTrackingMDRef<llvm::DIScope>> LexicalBlockStack;
   llvm::DenseMap<const Decl *, llvm::TrackingMDRef> RegionMap;
   // FnBeginRegionCount - Keep track of LexicalBlockStack counter at the
   // beginning of a function. This is used to pop unbalanced regions at
@@ -117,97 +120,94 @@
 
   /// Helper functions for getOrCreateType.
   unsigned Checksum(const ObjCInterfaceDecl *InterfaceDecl);
-  llvm::DIType CreateType(const BuiltinType *Ty);
-  llvm::DIType CreateType(const ComplexType *Ty);
-  llvm::DIType CreateQualifiedType(QualType Ty, llvm::DIFile Fg);
-  llvm::DIType CreateType(const TypedefType *Ty, llvm::DIFile Fg);
-  llvm::DIType CreateType(const TemplateSpecializationType *Ty, llvm::DIFile Fg);
-  llvm::DIType CreateType(const ObjCObjectPointerType *Ty,
-                          llvm::DIFile F);
-  llvm::DIType CreateType(const PointerType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const BlockPointerType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const FunctionType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const RecordType *Tyg);
-  llvm::DIType CreateTypeDefinition(const RecordType *Ty);
-  llvm::MDCompositeType *CreateLimitedType(const RecordType *Ty);
+  llvm::DIType *CreateType(const BuiltinType *Ty);
+  llvm::DIType *CreateType(const ComplexType *Ty);
+  llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
+  llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg);
+  llvm::DIType *CreateType(const TemplateSpecializationType *Ty,
+                           llvm::DIFile *Fg);
+  llvm::DIType *CreateType(const ObjCObjectPointerType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const BlockPointerType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const RecordType *Tyg);
+  llvm::DIType *CreateTypeDefinition(const RecordType *Ty);
+  llvm::DICompositeType *CreateLimitedType(const RecordType *Ty);
   void CollectContainingType(const CXXRecordDecl *RD,
-                             llvm::MDCompositeType *CT);
-  llvm::DIType CreateType(const ObjCInterfaceType *Ty, llvm::DIFile F);
-  llvm::DIType CreateTypeDefinition(const ObjCInterfaceType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const ObjCObjectType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const VectorType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const ArrayType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const LValueReferenceType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const RValueReferenceType *Ty, llvm::DIFile Unit);
-  llvm::DIType CreateType(const MemberPointerType *Ty, llvm::DIFile F);
-  llvm::DIType CreateType(const AtomicType *Ty, llvm::DIFile F);
-  llvm::DIType CreateEnumType(const EnumType *Ty);
-  llvm::DIType CreateTypeDefinition(const EnumType *Ty);
-  llvm::DIType CreateSelfType(const QualType &QualTy, llvm::DIType Ty);
-  llvm::DIType getTypeOrNull(const QualType);
-  llvm::MDSubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method,
-                                                llvm::DIFile F);
-  llvm::MDSubroutineType *
+                             llvm::DICompositeType *CT);
+  llvm::DIType *CreateType(const ObjCInterfaceType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateTypeDefinition(const ObjCInterfaceType *Ty,
+                                     llvm::DIFile *F);
+  llvm::DIType *CreateType(const ObjCObjectType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const VectorType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const ArrayType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const LValueReferenceType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const RValueReferenceType *Ty, llvm::DIFile *Unit);
+  llvm::DIType *CreateType(const MemberPointerType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateType(const AtomicType *Ty, llvm::DIFile *F);
+  llvm::DIType *CreateEnumType(const EnumType *Ty);
+  llvm::DIType *CreateTypeDefinition(const EnumType *Ty);
+  llvm::DIType *CreateSelfType(const QualType &QualTy, llvm::DIType *Ty);
+  llvm::DIType *getTypeOrNull(const QualType);
+  llvm::DISubroutineType *getOrCreateMethodType(const CXXMethodDecl *Method,
+                                                llvm::DIFile *F);
+  llvm::DISubroutineType *
   getOrCreateInstanceMethodType(QualType ThisPtr, const FunctionProtoType *Func,
-                                llvm::DIFile Unit);
-  llvm::MDSubroutineType *
-  getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile F);
-  llvm::DIType getOrCreateVTablePtrType(llvm::DIFile F);
-  llvm::DINameSpace getOrCreateNameSpace(const NamespaceDecl *N);
-  llvm::DIType getOrCreateTypeDeclaration(QualType PointeeTy, llvm::DIFile F);
-  llvm::DIType CreatePointerLikeType(llvm::dwarf::Tag Tag,
-                                     const Type *Ty, QualType PointeeTy,
-                                     llvm::DIFile F);
+                                llvm::DIFile *Unit);
+  llvm::DISubroutineType *
+  getOrCreateFunctionType(const Decl *D, QualType FnType, llvm::DIFile *F);
+  llvm::DIType *getOrCreateVTablePtrType(llvm::DIFile *F);
+  llvm::DINamespace *getOrCreateNameSpace(const NamespaceDecl *N);
+  llvm::DIType *getOrCreateTypeDeclaration(QualType PointeeTy, llvm::DIFile *F);
+  llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty,
+                                      QualType PointeeTy, llvm::DIFile *F);
 
   llvm::Value *getCachedInterfaceTypeOrNull(const QualType Ty);
-  llvm::DIType getOrCreateStructPtrType(StringRef Name, llvm::DIType &Cache);
+  llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache);
 
-  llvm::DISubprogram CreateCXXMemberFunction(const CXXMethodDecl *Method,
-                                             llvm::DIFile F,
-                                             llvm::DIType RecordTy);
+  llvm::DISubprogram *CreateCXXMemberFunction(const CXXMethodDecl *Method,
+                                              llvm::DIFile *F,
+                                              llvm::DIType *RecordTy);
 
-  void CollectCXXMemberFunctions(const CXXRecordDecl *Decl, llvm::DIFile F,
+  void CollectCXXMemberFunctions(const CXXRecordDecl *Decl, llvm::DIFile *F,
                                  SmallVectorImpl<llvm::Metadata *> &E,
-                                 llvm::DIType T);
+                                 llvm::DIType *T);
 
-  void CollectCXXBases(const CXXRecordDecl *Decl, llvm::DIFile F,
+  void CollectCXXBases(const CXXRecordDecl *Decl, llvm::DIFile *F,
                        SmallVectorImpl<llvm::Metadata *> &EltTys,
-                       llvm::DIType RecordTy);
+                       llvm::DIType *RecordTy);
 
-  llvm::DIArray
-  CollectTemplateParams(const TemplateParameterList *TPList,
-                        ArrayRef<TemplateArgument> TAList,
-                        llvm::DIFile Unit);
-  llvm::DIArray
-  CollectFunctionTemplateParams(const FunctionDecl *FD, llvm::DIFile Unit);
-  llvm::DIArray
+  llvm::DINodeArray CollectTemplateParams(const TemplateParameterList *TPList,
+                                          ArrayRef<TemplateArgument> TAList,
+                                          llvm::DIFile *Unit);
+  llvm::DINodeArray CollectFunctionTemplateParams(const FunctionDecl *FD,
+                                                  llvm::DIFile *Unit);
+  llvm::DINodeArray
   CollectCXXTemplateParams(const ClassTemplateSpecializationDecl *TS,
-                           llvm::DIFile F);
+                           llvm::DIFile *F);
 
-  llvm::DIType createFieldType(StringRef name, QualType type,
-                               uint64_t sizeInBitsOverride, SourceLocation loc,
-                               AccessSpecifier AS,
-                               uint64_t offsetInBits,
-                               llvm::DIFile tunit,
-                               llvm::DIScope scope,
-                               const RecordDecl* RD = nullptr);
+  llvm::DIType *createFieldType(StringRef name, QualType type,
+                                uint64_t sizeInBitsOverride, SourceLocation loc,
+                                AccessSpecifier AS, uint64_t offsetInBits,
+                                llvm::DIFile *tunit, llvm::DIScope *scope,
+                                const RecordDecl *RD = nullptr);
 
   // Helpers for collecting fields of a record.
   void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
                                  SmallVectorImpl<llvm::Metadata *> &E,
-                                 llvm::DIType RecordTy);
-  llvm::DIDerivedType CreateRecordStaticField(const VarDecl *Var,
-                                              llvm::DIType RecordTy,
-                                              const RecordDecl* RD);
+                                 llvm::DIType *RecordTy);
+  llvm::DIDerivedType *CreateRecordStaticField(const VarDecl *Var,
+                                               llvm::DIType *RecordTy,
+                                               const RecordDecl *RD);
   void CollectRecordNormalField(const FieldDecl *Field, uint64_t OffsetInBits,
-                                llvm::DIFile F,
+                                llvm::DIFile *F,
                                 SmallVectorImpl<llvm::Metadata *> &E,
-                                llvm::DIType RecordTy, const RecordDecl *RD);
-  void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile F,
+                                llvm::DIType *RecordTy, const RecordDecl *RD);
+  void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F,
                            SmallVectorImpl<llvm::Metadata *> &E,
-                           llvm::DICompositeType RecordTy);
+                           llvm::DICompositeType *RecordTy);
 
-  void CollectVTableInfo(const CXXRecordDecl *Decl, llvm::DIFile F,
+  void CollectVTableInfo(const CXXRecordDecl *Decl, llvm::DIFile *F,
                          SmallVectorImpl<llvm::Metadata *> &EltTys);
 
   // CreateLexicalBlock - Create a new lexical block node and push it on
@@ -290,15 +290,14 @@
   void EmitUsingDecl(const UsingDecl &UD);
 
   /// \brief Emit C++ namespace alias.
-  llvm::DIImportedEntity EmitNamespaceAlias(const NamespaceAliasDecl &NA);
+  llvm::DIImportedEntity *EmitNamespaceAlias(const NamespaceAliasDecl &NA);
 
   /// \brief Emit record type's standalone debug info.
-  llvm::DIType getOrCreateRecordType(QualType Ty, SourceLocation L);
+  llvm::DIType *getOrCreateRecordType(QualType Ty, SourceLocation L);
 
   /// \brief Emit an objective c interface type standalone
   /// debug info.
-  llvm::DIType getOrCreateInterfaceType(QualType Ty,
-                                        SourceLocation Loc);
+  llvm::DIType *getOrCreateInterfaceType(QualType Ty, SourceLocation Loc);
 
   void completeType(const EnumDecl *ED);
   void completeType(const RecordDecl *RD);
@@ -316,20 +315,17 @@
 
   // EmitTypeForVarWithBlocksAttr - Build up structure info for the byref.
   // See BuildByRefType.
-  llvm::DIType EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
-                                            uint64_t *OffSet);
+  llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
+                                             uint64_t *OffSet);
 
   /// \brief Get context info for the decl.
-  llvm::DIScope getContextDescriptor(const Decl *Decl);
+  llvm::DIScope *getContextDescriptor(const Decl *Decl);
 
-  llvm::DIScope getCurrentContextDescriptor(const Decl *Decl);
+  llvm::DIScope *getCurrentContextDescriptor(const Decl *Decl);
 
   /// \brief Create a forward decl for a RecordType in a given context.
-  llvm::MDCompositeType *getOrCreateRecordFwdDecl(const RecordType *,
-                                                  llvm::MDScope *);
-
-  /// \brief Create a set of decls for the context chain.
-  llvm::DIDescriptor createContextChain(const Decl *Decl);
+  llvm::DICompositeType *getOrCreateRecordFwdDecl(const RecordType *,
+                                                  llvm::DIScope *);
 
   /// \brief Return current directory name.
   StringRef getCurrentDirname();
@@ -339,57 +335,58 @@
 
   /// \brief Get the file debug info descriptor for the input
   /// location.
-  llvm::DIFile getOrCreateFile(SourceLocation Loc);
+  llvm::DIFile *getOrCreateFile(SourceLocation Loc);
 
   /// \brief Get the file info for main compile unit.
-  llvm::DIFile getOrCreateMainFile();
+  llvm::DIFile *getOrCreateMainFile();
 
   /// \brief Get the type from the cache or create a new type if
   /// necessary.
-  llvm::DIType getOrCreateType(QualType Ty, llvm::DIFile Fg);
+  llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
 
   /// \brief Get the type from the cache or create a new
   /// partial type if necessary.
-  llvm::DIType getOrCreateLimitedType(const RecordType *Ty, llvm::DIFile F);
+  llvm::DIType *getOrCreateLimitedType(const RecordType *Ty, llvm::DIFile *F);
 
   /// \brief Create type metadata for a source language type.
-  llvm::DIType CreateTypeNode(QualType Ty, llvm::DIFile Fg);
+  llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg);
 
   /// \brief return the underlying ObjCInterfaceDecl
   /// if Ty is an ObjCInterface or a pointer to one.
   ObjCInterfaceDecl* getObjCInterfaceDecl(QualType Ty);
 
   /// \brief Create new member and increase Offset by FType's size.
-  llvm::DIType CreateMemberType(llvm::DIFile Unit, QualType FType,
-                                StringRef Name, uint64_t *Offset);
+  llvm::DIType *CreateMemberType(llvm::DIFile *Unit, QualType FType,
+                                 StringRef Name, uint64_t *Offset);
 
   /// \brief Retrieve the DIDescriptor, if any, for the canonical form of this
   /// declaration.
-  llvm::DebugNode *getDeclarationOrDefinition(const Decl *D);
+  llvm::DINode *getDeclarationOrDefinition(const Decl *D);
 
   /// \brief Return debug info descriptor to describe method
   /// declaration for the given method definition.
-  llvm::DISubprogram getFunctionDeclaration(const Decl *D);
+  llvm::DISubprogram *getFunctionDeclaration(const Decl *D);
 
   /// Return debug info descriptor to describe in-class static data member
   /// declaration for the given out-of-class definition.
-  llvm::DIDerivedType
+  llvm::DIDerivedType *
   getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D);
 
-  /// \brief Create a DISubprogram describing the forward
+  /// \brief Create a subprogram describing the forward
   /// decalration represented in the given FunctionDecl.
-  llvm::DISubprogram getFunctionForwardDeclaration(const FunctionDecl *FD);
+  llvm::DISubprogram *getFunctionForwardDeclaration(const FunctionDecl *FD);
 
-  /// \brief Create a DIGlobalVariable describing the forward
-  /// decalration represented in the given VarDecl.
-  llvm::DIGlobalVariable getGlobalVariableForwardDeclaration(const VarDecl *VD);
+  /// \brief Create a global variable describing the forward decalration
+  /// represented in the given VarDecl.
+  llvm::DIGlobalVariable *
+  getGlobalVariableForwardDeclaration(const VarDecl *VD);
 
   /// Return a global variable that represents one of the collection of
   /// global variables created for an anonmyous union.
-  llvm::DIGlobalVariable
-  CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile Unit,
+  llvm::DIGlobalVariable *
+  CollectAnonRecordDecls(const RecordDecl *RD, llvm::DIFile *Unit,
                          unsigned LineNo, StringRef LinkageName,
-                         llvm::GlobalVariable *Var, llvm::MDScope *DContext);
+                         llvm::GlobalVariable *Var, llvm::DIScope *DContext);
 
   /// \brief Get function name for the given FunctionDecl. If the
   /// name is constructed on demand (e.g. C++ destructor) then the name
@@ -421,15 +418,16 @@
 
   /// \brief Collect various properties of a FunctionDecl.
   /// \param GD  A GlobalDecl whose getDecl() must return a FunctionDecl.
-  void collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile Unit,
+  void collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
                                 StringRef &Name, StringRef &LinkageName,
-                                llvm::MDScope *&FDContext,
-                                llvm::DIArray &TParamsArray, unsigned &Flags);
+                                llvm::DIScope *&FDContext,
+                                llvm::DINodeArray &TParamsArray,
+                                unsigned &Flags);
 
   /// \brief Collect various properties of a VarDecl.
-  void collectVarDeclProps(const VarDecl *VD, llvm::DIFile &Unit,
+  void collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
                            unsigned &LineNo, QualType &T, StringRef &Name,
-                           StringRef &LinkageName, llvm::MDScope *&VDContext);
+                           StringRef &LinkageName, llvm::DIScope *&VDContext);
 
   /// \brief Allocate a copy of \p A using the DebugInfoNames allocator
   /// and return a reference to it. If multiple arguments are given the strings
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index f1ccb09..07dbce4 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
+#include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGOpenCLRuntime.h"
 #include "CodeGenModule.h"
@@ -155,6 +156,8 @@
   assert(!D.isExternallyVisible() && "name shouldn't matter");
   std::string ContextName;
   const DeclContext *DC = D.getDeclContext();
+  if (auto *CD = dyn_cast<CapturedDecl>(DC))
+    DC = cast<DeclContext>(CD->getNonClosureContext());
   if (const auto *FD = dyn_cast<FunctionDecl>(DC))
     ContextName = CGM.getMangledName(FD);
   else if (const auto *BD = dyn_cast<BlockDecl>(DC))
@@ -516,10 +519,7 @@
       : Addr(addr), Size(size) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
-      llvm::Value *castAddr = CGF.Builder.CreateBitCast(Addr, CGF.Int8PtrTy);
-      CGF.Builder.CreateCall2(CGF.CGM.getLLVMLifetimeEndFn(),
-                              Size, castAddr)
-        ->setDoesNotThrow();
+      CGF.EmitLifetimeEnd(Size, Addr);
     }
   };
 }
@@ -840,21 +840,6 @@
          canEmitInitWithFewStoresAfterMemset(Init, StoreBudget);
 }
 
-/// Should we use the LLVM lifetime intrinsics for the given local variable?
-static bool shouldUseLifetimeMarkers(CodeGenFunction &CGF, const VarDecl &D,
-                                     unsigned Size) {
-  // For now, only in optimized builds.
-  if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0)
-    return false;
-
-  // Limit the size of marked objects to 32 bytes. We don't want to increase
-  // compile time by marking tiny objects.
-  unsigned SizeThreshold = 32;
-
-  return Size > SizeThreshold;
-}
-
-
 /// EmitAutoVarDecl - Emit code and set up an entry in LocalDeclMap for a
 /// variable declaration with auto, register, or no storage class specifier.
 /// These turn into simple stack objects, or GlobalValues depending on target.
@@ -864,6 +849,35 @@
   EmitAutoVarCleanups(emission);
 }
 
+/// Emit a lifetime.begin marker if some criteria are satisfied.
+/// \return a pointer to the temporary size Value if a marker was emitted, null
+/// otherwise
+llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
+                                                llvm::Value *Addr) {
+  // For now, only in optimized builds.
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+    return nullptr;
+
+  // Disable lifetime markers in msan builds.
+  // FIXME: Remove this when msan works with lifetime markers.
+  if (getLangOpts().Sanitize.has(SanitizerKind::Memory))
+    return nullptr;
+
+  llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
+  Addr = Builder.CreateBitCast(Addr, Int8PtrTy);
+  llvm::CallInst *C =
+      Builder.CreateCall(CGM.getLLVMLifetimeStartFn(), {SizeV, Addr});
+  C->setDoesNotThrow();
+  return SizeV;
+}
+
+void CodeGenFunction::EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr) {
+  Addr = Builder.CreateBitCast(Addr, Int8PtrTy);
+  llvm::CallInst *C =
+      Builder.CreateCall(CGM.getLLVMLifetimeEndFn(), {Size, Addr});
+  C->setDoesNotThrow();
+}
+
 /// EmitAutoVarAlloca - Emit the alloca and debug information for a
 /// local variable.  Does not emit initialization or destruction.
 CodeGenFunction::AutoVarEmission
@@ -959,13 +973,8 @@
       // Emit a lifetime intrinsic if meaningful.  There's no point
       // in doing this if we don't have a valid insertion point (?).
       uint64_t size = CGM.getDataLayout().getTypeAllocSize(LTy);
-      if (HaveInsertPoint() && shouldUseLifetimeMarkers(*this, D, size)) {
-        llvm::Value *sizeV = llvm::ConstantInt::get(Int64Ty, size);
-
-        emission.SizeForLifetimeMarkers = sizeV;
-        llvm::Value *castAddr = Builder.CreateBitCast(Alloc, Int8PtrTy);
-        Builder.CreateCall2(CGM.getLLVMLifetimeStartFn(), sizeV, castAddr)
-          ->setDoesNotThrow();
+      if (HaveInsertPoint()) {
+        emission.SizeForLifetimeMarkers = EmitLifetimeStart(size, Alloc);
       } else {
         assert(!emission.useLifetimeMarkers());
       }
@@ -978,7 +987,7 @@
       llvm::Value *Stack = CreateTempAlloca(Int8PtrTy, "saved_stack");
 
       llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave);
-      llvm::Value *V = Builder.CreateCall(F);
+      llvm::Value *V = Builder.CreateCall(F, {});
 
       Builder.CreateStore(V, Stack);
 
@@ -1311,6 +1320,8 @@
     EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup,
                                          emission.getAllocatedAddress(),
                                          emission.getSizeForLifetimeMarkers());
+    EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
+    cleanup.setLifetimeMarker();
   }
 
   // Check the type for a cleanup.
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index eb4ddc7..50a4996 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -259,6 +259,8 @@
       Fn->setSection(Section);
   }
 
+  SetLLVMFunctionAttributes(nullptr, getTypes().arrangeNullaryFunction(), Fn);
+
   Fn->setCallingConv(getRuntimeCC());
 
   if (!getLangOpts().Exceptions)
@@ -271,6 +273,8 @@
       Fn->addFnAttr(llvm::Attribute::SanitizeThread);
     if (getLangOpts().Sanitize.has(SanitizerKind::Memory))
       Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+    if (getLangOpts().Sanitize.has(SanitizerKind::SafeStack))
+      Fn->addFnAttr(llvm::Attribute::SafeStack);
   }
 
   return Fn;
@@ -429,7 +433,7 @@
     // priority emitted above.
     FileName = llvm::sys::path::filename(MainFile->getName());
   } else {
-    FileName = SmallString<128>("<null>");
+    FileName = "<null>";
   }
 
   for (size_t i = 0; i < FileName.size(); ++i) {
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index ff12a9a..d9a3f0b 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -60,7 +60,10 @@
     name = "_ZSt9terminatev";
   } else if (getLangOpts().CPlusPlus &&
              getTarget().getCXXABI().isMicrosoft()) {
-    name = "\01?terminate@@YAXXZ";
+    if (getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
+      name = "__std_terminate";
+    else
+      name = "\01?terminate@@YAXXZ";
   } else if (getLangOpts().ObjC1 &&
              getLangOpts().ObjCRuntime.hasTerminate())
     name = "objc_terminate";
@@ -955,8 +958,7 @@
     CGM.getCXXABI().emitBeginCatch(*this, C);
 
     // Emit the PGO counter increment.
-    RegionCounter CatchCnt = getPGORegionCounter(C);
-    CatchCnt.beginRegion(Builder);
+    incrementProfileCounter(C);
 
     // Perform the body of the catch.
     EmitStmt(C->getHandlerBlock());
@@ -984,9 +986,8 @@
       Builder.CreateBr(ContBB);
   }
 
-  RegionCounter ContCnt = getPGORegionCounter(&S);
   EmitBlock(ContBB);
-  ContCnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
 }
 
 namespace {
@@ -1305,7 +1306,7 @@
 
   void Emit(CodeGenFunction &CGF, Flags F) override {
     ASTContext &Context = CGF.getContext();
-    QualType ArgTys[2] = {Context.BoolTy, Context.VoidPtrTy};
+    QualType ArgTys[2] = {Context.UnsignedCharTy, Context.VoidPtrTy};
     FunctionProtoType::ExtProtoInfo EPI;
     const auto *FTP = cast<FunctionType>(
         Context.getFunctionType(Context.VoidTy, ArgTys, EPI));
@@ -1412,9 +1413,9 @@
         InsertPair.first->second = ParentCGF.EscapedLocals.size() - 1;
       int FrameEscapeIdx = InsertPair.first->second;
       // call i8* @llvm.framerecover(i8* bitcast(@parentFn), i8* %fp, i32 N)
-      RecoverCall =
-          Builder.CreateCall3(FrameRecoverFn, ParentI8Fn, ParentFP,
-                              llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx));
+      RecoverCall = Builder.CreateCall(
+          FrameRecoverFn, {ParentI8Fn, ParentFP,
+                           llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
 
     } else {
       // If the parent didn't have an alloca, we're doing some nested outlining.
@@ -1502,7 +1503,8 @@
     CGM.getCXXABI().getMangleContext().mangleSEHFilterExpression(Parent, OS);
   }
 
-  startOutlinedSEHHelper(ParentCGF, Name, getContext().IntTy, Args, FilterExpr);
+  startOutlinedSEHHelper(ParentCGF, Name, getContext().LongTy, Args,
+                         FilterExpr);
 
   // Mark finally block calls as nounwind and noinline to make LLVM's job a
   // little easier.
@@ -1514,7 +1516,7 @@
 
   // Emit the original filter expression, convert to i32, and return.
   llvm::Value *R = EmitScalarExpr(FilterExpr);
-  R = Builder.CreateIntCast(R, CGM.IntTy,
+  R = Builder.CreateIntCast(R, ConvertType(getContext().LongTy),
                             FilterExpr->getType()->isSignedIntegerType());
   Builder.CreateStore(R, ReturnValue);
 
@@ -1532,7 +1534,8 @@
   FunctionArgList Args;
   Args.push_back(ImplicitParamDecl::Create(
       getContext(), nullptr, StartLoc,
-      &getContext().Idents.get("abnormal_termination"), getContext().BoolTy));
+      &getContext().Idents.get("abnormal_termination"),
+      getContext().UnsignedCharTy));
   Args.push_back(ImplicitParamDecl::Create(
       getContext(), nullptr, StartLoc,
       &getContext().Idents.get("frame_pointer"), getContext().VoidPtrTy));
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 4147317..1ed45a3 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -31,6 +31,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -499,7 +500,7 @@
 
   SanitizerScope SanScope(this);
 
-  SmallVector<std::pair<llvm::Value *, SanitizerKind>, 3> Checks;
+  SmallVector<std::pair<llvm::Value *, SanitizerMask>, 3> Checks;
   llvm::BasicBlock *Done = nullptr;
 
   bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast ||
@@ -534,7 +535,7 @@
     llvm::Value *Min = Builder.getFalse();
     llvm::Value *CastAddr = Builder.CreateBitCast(Address, Int8PtrTy);
     llvm::Value *LargeEnough =
-        Builder.CreateICmpUGE(Builder.CreateCall2(F, CastAddr, Min),
+        Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}),
                               llvm::ConstantInt::get(IntPtrTy, Size));
     Checks.push_back(std::make_pair(LargeEnough, SanitizerKind::ObjectSize));
   }
@@ -1204,7 +1205,7 @@
         EmitCheckSourceLocation(Loc),
         EmitCheckTypeDescriptor(Ty)
       };
-      SanitizerKind Kind = NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool;
+      SanitizerMask Kind = NeedsEnumCheck ? SanitizerKind::Enum : SanitizerKind::Bool;
       EmitCheck(std::make_pair(Check, Kind), "load_invalid_value", StaticArgs,
                 EmitCheckValue(Load));
     }
@@ -1719,8 +1720,8 @@
   llvm::Value *Value = Src.getScalarVal();
   if (OrigTy->isPointerTy())
     Value = Builder.CreatePtrToInt(Value, Ty);
-  Builder.CreateCall2(F, llvm::MetadataAsValue::get(Ty->getContext(), RegName),
-                      Value);
+  Builder.CreateCall(
+      F, {llvm::MetadataAsValue::get(Ty->getContext(), RegName), Value});
 }
 
 // setObjCGCLValueClass - sets class of the lvalue for the purpose of
@@ -2243,7 +2244,8 @@
 };
 }
 
-static CheckRecoverableKind getRecoverableKind(SanitizerKind Kind) {
+static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) {
+  assert(llvm::countPopulation(Kind) == 1);
   switch (Kind) {
   case SanitizerKind::Vptr:
     return CheckRecoverableKind::AlwaysRecoverable;
@@ -2290,7 +2292,7 @@
 }
 
 void CodeGenFunction::EmitCheck(
-    ArrayRef<std::pair<llvm::Value *, SanitizerKind>> Checked,
+    ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked,
     StringRef CheckName, ArrayRef<llvm::Constant *> StaticArgs,
     ArrayRef<llvm::Value *> DynamicArgs) {
   assert(IsSanitizerScope);
@@ -2402,7 +2404,7 @@
     Builder.CreateCondBr(Checked, Cont, TrapBB);
     EmitBlock(TrapBB);
     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::trap);
-    llvm::CallInst *TrapCall = Builder.CreateCall(F);
+    llvm::CallInst *TrapCall = Builder.CreateCall(F, {});
     TrapCall->setDoesNotReturn();
     TrapCall->setDoesNotThrow();
     Builder.CreateUnreachable();
@@ -2860,7 +2862,6 @@
   }
 
   OpaqueValueMapping binding(*this, expr);
-  RegionCounter Cnt = getPGORegionCounter(expr);
 
   const Expr *condExpr = expr->getCond();
   bool CondExprBool;
@@ -2871,7 +2872,7 @@
     if (!ContainsLabel(dead)) {
       // If the true case is live, we need to track its region.
       if (CondExprBool)
-        Cnt.beginRegion(Builder);
+        incrementProfileCounter(expr);
       return EmitLValue(live);
     }
   }
@@ -2881,11 +2882,11 @@
   llvm::BasicBlock *contBlock = createBasicBlock("cond.end");
 
   ConditionalEvaluation eval(*this);
-  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, Cnt.getCount());
+  EmitBranchOnBoolExpr(condExpr, lhsBlock, rhsBlock, getProfileCount(expr));
 
   // Any temporaries created here are conditional.
   EmitBlock(lhsBlock);
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(expr);
   eval.begin(*this);
   Optional<LValue> lhs =
       EmitLValueOrThrowExpression(*this, expr->getTrueExpr());
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 6b4cf68..883b76b 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -34,6 +34,7 @@
   CodeGenFunction &CGF;
   CGBuilderTy &Builder;
   AggValueSlot Dest;
+  bool IsResultUnused;
 
   /// We want to use 'dest' as the return slot except under two
   /// conditions:
@@ -48,7 +49,7 @@
     if (!shouldUseDestForReturnSlot())
       return ReturnValueSlot();
 
-    return ReturnValueSlot(Dest.getAddr(), Dest.isVolatile());
+    return ReturnValueSlot(Dest.getAddr(), Dest.isVolatile(), IsResultUnused);
   }
 
   AggValueSlot EnsureSlot(QualType T) {
@@ -61,9 +62,9 @@
   }
 
 public:
-  AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest)
-    : CGF(cgf), Builder(CGF.Builder), Dest(Dest) {
-  }
+  AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, bool IsResultUnused)
+    : CGF(cgf), Builder(CGF.Builder), Dest(Dest),
+    IsResultUnused(IsResultUnused) { }
 
   //===--------------------------------------------------------------------===//
   //                               Utilities
@@ -159,10 +160,12 @@
     EmitAggLoadOfLValue(E);
   }
 
+  void VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E);
   void VisitAbstractConditionalOperator(const AbstractConditionalOperator *CO);
   void VisitChooseExpr(const ChooseExpr *CE);
   void VisitInitListExpr(InitListExpr *E);
   void VisitImplicitValueInitExpr(ImplicitValueInitExpr *E);
+  void VisitNoInitExpr(NoInitExpr *E) { } // Do nothing.
   void VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) {
     Visit(DAE->getExpr());
   }
@@ -584,7 +587,12 @@
   }
       
   case CK_ToUnion: {
-    if (Dest.isIgnored()) break;
+    // Evaluate even if the destination is ignored.
+    if (Dest.isIgnored()) {
+      CGF.EmitAnyExpr(E->getSubExpr(), AggValueSlot::ignored(),
+                      /*ignoreResult=*/true);
+      break;
+    }
 
     // GCC union extension
     QualType Ty = E->getSubExpr()->getType();
@@ -916,16 +924,16 @@
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
 
-  RegionCounter Cnt = CGF.getPGORegionCounter(E);
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock,
+                           CGF.getProfileCount(E));
 
   // Save whether the destination's lifetime is externally managed.
   bool isExternallyDestructed = Dest.isExternallyDestructed();
 
   eval.begin(CGF);
   CGF.EmitBlock(LHSBlock);
-  Cnt.beginRegion(Builder);
+  CGF.incrementProfileCounter(E);
   Visit(E->getTrueExpr());
   eval.end(CGF);
 
@@ -1050,6 +1058,9 @@
     return;
   } else if (isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) {
     return EmitNullInitializationToLValue(LV);
+  } else if (isa<NoInitExpr>(E)) {
+    // Do nothing.
+    return;
   } else if (type->isReferenceType()) {
     RValue RV = CGF.EmitReferenceBindingToExpr(E);
     return CGF.EmitStoreThroughLValue(RV, LV);
@@ -1270,6 +1281,15 @@
     cleanupDominator->eraseFromParent();
 }
 
+void AggExprEmitter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+  AggValueSlot Dest = EnsureSlot(E->getType());
+
+  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddr(), E->getType(),
+                                     Dest.getAlignment());
+  EmitInitializationToLValue(E->getBase(), DestLV);
+  VisitInitListExpr(E->getUpdater());
+}
+
 //===----------------------------------------------------------------------===//
 //                        Entry Points into this File
 //===----------------------------------------------------------------------===//
@@ -1389,7 +1409,7 @@
   // Optimize the slot if possible.
   CheckAggExprForMemSetUse(Slot, E, *this);
  
-  AggExprEmitter(*this, Slot).Visit(const_cast<Expr*>(E));
+  AggExprEmitter(*this, Slot, Slot.isIgnored()).Visit(const_cast<Expr*>(E));
 }
 
 LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
@@ -1415,7 +1435,8 @@
       assert((Record->hasTrivialCopyConstructor() || 
               Record->hasTrivialCopyAssignment() ||
               Record->hasTrivialMoveConstructor() ||
-              Record->hasTrivialMoveAssignment()) &&
+              Record->hasTrivialMoveAssignment() ||
+              Record->isUnion()) &&
              "Trying to aggregate-copy a type without a trivial copy/move "
              "constructor or assignment operator");
       // Ignore empty classes in C++.
@@ -1446,7 +1467,34 @@
   if (alignment.isZero())
     alignment = TypeInfo.second;
 
-  // FIXME: Handle variable sized types.
+  llvm::Value *SizeVal = nullptr;
+  if (TypeInfo.first.isZero()) {
+    // But note that getTypeInfo returns 0 for a VLA.
+    if (auto *VAT = dyn_cast_or_null<VariableArrayType>(
+            getContext().getAsArrayType(Ty))) {
+      QualType BaseEltTy;
+      SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr);
+      TypeInfo = getContext().getTypeInfoDataSizeInChars(BaseEltTy);
+      std::pair<CharUnits, CharUnits> LastElementTypeInfo;
+      if (!isAssignment)
+        LastElementTypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
+      assert(!TypeInfo.first.isZero());
+      SizeVal = Builder.CreateNUWMul(
+          SizeVal,
+          llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
+      if (!isAssignment) {
+        SizeVal = Builder.CreateNUWSub(
+            SizeVal,
+            llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
+        SizeVal = Builder.CreateNUWAdd(
+            SizeVal, llvm::ConstantInt::get(
+                         SizeTy, LastElementTypeInfo.first.getQuantity()));
+      }
+    }
+  }
+  if (!SizeVal) {
+    SizeVal = llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity());
+  }
 
   // FIXME: If we have a volatile struct, the optimizer can remove what might
   // appear to be `extra' memory ops:
@@ -1477,9 +1525,6 @@
   } else if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
     RecordDecl *Record = RecordTy->getDecl();
     if (Record->hasObjectMember()) {
-      CharUnits size = TypeInfo.first;
-      llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
-      llvm::Value *SizeVal = llvm::ConstantInt::get(SizeTy, size.getQuantity());
       CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestPtr, SrcPtr, 
                                                     SizeVal);
       return;
@@ -1488,10 +1533,6 @@
     QualType BaseType = getContext().getBaseElementType(Ty);
     if (const RecordType *RecordTy = BaseType->getAs<RecordType>()) {
       if (RecordTy->getDecl()->hasObjectMember()) {
-        CharUnits size = TypeInfo.first;
-        llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
-        llvm::Value *SizeVal = 
-          llvm::ConstantInt::get(SizeTy, size.getQuantity());
         CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestPtr, SrcPtr, 
                                                       SizeVal);
         return;
@@ -1504,9 +1545,6 @@
   // the optimizer wishes to expand it in to scalar memory operations.
   llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty);
 
-  Builder.CreateMemCpy(DestPtr, SrcPtr,
-                       llvm::ConstantInt::get(IntPtrTy, 
-                                              TypeInfo.first.getQuantity()),
-                       alignment.getQuantity(), isVolatile,
-                       /*TBAATag=*/nullptr, TBAAStructTag);
+  Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, alignment.getQuantity(),
+                       isVolatile, /*TBAATag=*/nullptr, TBAAStructTag);
 }
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index 4bffad3..b3353ba 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -173,7 +173,7 @@
     This = EmitLValue(Base).getAddress();
 
 
-  if (MD->isTrivial()) {
+  if (MD->isTrivial() || (MD->isDefaulted() && MD->getParent()->isUnion())) {
     if (isa<CXXDestructorDecl>(MD)) return RValue::get(nullptr);
     if (isa<CXXConstructorDecl>(MD) && 
         cast<CXXConstructorDecl>(MD)->isDefaultConstructor())
@@ -690,7 +690,7 @@
       llvm::Value *tsmV =
         llvm::ConstantInt::get(CGF.SizeTy, typeSizeMultiplier);
       llvm::Value *result =
-        CGF.Builder.CreateCall2(umul_with_overflow, size, tsmV);
+          CGF.Builder.CreateCall(umul_with_overflow, {size, tsmV});
 
       llvm::Value *overflowed = CGF.Builder.CreateExtractValue(result, 1);
       if (hasOverflow)
@@ -729,7 +729,7 @@
 
       llvm::Value *cookieSizeV = llvm::ConstantInt::get(CGF.SizeTy, cookieSize);
       llvm::Value *result =
-        CGF.Builder.CreateCall2(uadd_with_overflow, size, cookieSizeV);
+          CGF.Builder.CreateCall(uadd_with_overflow, {size, cookieSizeV});
 
       llvm::Value *overflowed = CGF.Builder.CreateExtractValue(result, 1);
       if (hasOverflow)
@@ -958,6 +958,25 @@
     if (ILE->getNumInits() == 0 && TryMemsetInitialization())
       return;
 
+  // If we have a struct whose every field is value-initialized, we can
+  // usually use memset.
+  if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
+    if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) {
+      if (RType->getDecl()->isStruct()) {
+        unsigned NumFields = 0;
+        for (auto *Field : RType->getDecl()->fields())
+          if (!Field->isUnnamedBitfield())
+            ++NumFields;
+        if (ILE->getNumInits() == NumFields)
+          for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
+            if (!isa<ImplicitValueInitExpr>(ILE->getInit(i)))
+              --NumFields;
+        if (ILE->getNumInits() == NumFields && TryMemsetInitialization())
+          return;
+      }
+    }
+  }
+
   // Create the loop blocks.
   llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
   llvm::BasicBlock *LoopBB = createBasicBlock("new.loop");
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index dead1b5..27d1c68 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -949,13 +949,14 @@
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
 
-  RegionCounter Cnt = CGF.getPGORegionCounter(E);
+
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+  CGF.EmitBranchOnBoolExpr(E->getCond(), LHSBlock, RHSBlock,
+                           CGF.getProfileCount(E));
 
   eval.begin(CGF);
   CGF.EmitBlock(LHSBlock);
-  Cnt.beginRegion(Builder);
+  CGF.incrementProfileCounter(E);
   ComplexPairTy LHS = Visit(E->getTrueExpr());
   LHSBlock = Builder.GetInsertBlock();
   CGF.EmitBranch(ContBlock);
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index b1cf99c..acfb9b6 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -33,6 +33,7 @@
 //===----------------------------------------------------------------------===//
 
 namespace {
+class ConstExprEmitter;
 class ConstStructBuilder {
   CodeGenModule &CGM;
   CodeGenFunction *CGF;
@@ -42,6 +43,10 @@
   CharUnits LLVMStructAlignment;
   SmallVector<llvm::Constant *, 32> Elements;
 public:
+  static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CFG,
+                                     ConstExprEmitter *Emitter,
+                                     llvm::ConstantStruct *Base,
+                                     InitListExpr *Updater);
   static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF,
                                      InitListExpr *ILE);
   static llvm::Constant *BuildStruct(CodeGenModule &CGM, CodeGenFunction *CGF,
@@ -68,6 +73,8 @@
   void ConvertStructToPacked();
 
   bool Build(InitListExpr *ILE);
+  bool Build(ConstExprEmitter *Emitter, llvm::ConstantStruct *Base,
+             InitListExpr *Updater);
   void Build(const APValue &Val, const RecordDecl *RD, bool IsPrimaryBase,
              const CXXRecordDecl *VTableClass, CharUnits BaseOffset);
   llvm::Constant *Finalize(QualType Ty);
@@ -547,6 +554,17 @@
 
 llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM,
                                                 CodeGenFunction *CGF,
+                                                ConstExprEmitter *Emitter,
+                                                llvm::ConstantStruct *Base,
+                                                InitListExpr *Updater) {
+  ConstStructBuilder Builder(CGM, CGF);
+  if (!Builder.Build(Emitter, Base, Updater))
+    return nullptr;
+  return Builder.Finalize(Updater->getType());
+}
+
+llvm::Constant *ConstStructBuilder::BuildStruct(CodeGenModule &CGM,
+                                                CodeGenFunction *CGF,
                                                 InitListExpr *ILE) {
   ConstStructBuilder Builder(CGM, CGF);
 
@@ -818,6 +836,82 @@
     return nullptr;
   }
 
+  llvm::Constant *EmitDesignatedInitUpdater(llvm::Constant *Base,
+                                            InitListExpr *Updater) {
+    QualType ExprType = Updater->getType();
+
+    if (ExprType->isArrayType()) {
+      llvm::ArrayType *AType = cast<llvm::ArrayType>(ConvertType(ExprType));
+      llvm::Type *ElemType = AType->getElementType();
+
+      unsigned NumInitElements = Updater->getNumInits();
+      unsigned NumElements = AType->getNumElements();
+      
+      std::vector<llvm::Constant *> Elts;
+      Elts.reserve(NumElements);
+
+      if (llvm::ConstantDataArray *DataArray =
+            dyn_cast<llvm::ConstantDataArray>(Base))
+        for (unsigned i = 0; i != NumElements; ++i)
+          Elts.push_back(DataArray->getElementAsConstant(i));
+      else if (llvm::ConstantArray *Array =
+                 dyn_cast<llvm::ConstantArray>(Base))
+        for (unsigned i = 0; i != NumElements; ++i)
+          Elts.push_back(Array->getOperand(i));
+      else
+        return nullptr; // FIXME: other array types not implemented
+
+      llvm::Constant *fillC = nullptr;
+      if (Expr *filler = Updater->getArrayFiller())
+        if (!isa<NoInitExpr>(filler))
+          fillC = CGM.EmitConstantExpr(filler, filler->getType(), CGF);
+      bool RewriteType = (fillC && fillC->getType() != ElemType);
+
+      for (unsigned i = 0; i != NumElements; ++i) {
+        Expr *Init = nullptr;
+        if (i < NumInitElements)
+          Init = Updater->getInit(i);
+
+        if (!Init && fillC)
+          Elts[i] = fillC;
+        else if (!Init || isa<NoInitExpr>(Init))
+          ; // Do nothing.
+        else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init))
+          Elts[i] = EmitDesignatedInitUpdater(Elts[i], ChildILE);
+        else
+          Elts[i] = CGM.EmitConstantExpr(Init, Init->getType(), CGF);
+ 
+       if (!Elts[i])
+          return nullptr;
+        RewriteType |= (Elts[i]->getType() != ElemType);
+      }
+
+      if (RewriteType) {
+        std::vector<llvm::Type *> Types;
+        Types.reserve(NumElements);
+        for (unsigned i = 0; i != NumElements; ++i)
+          Types.push_back(Elts[i]->getType());
+        llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
+                                                        Types, true);
+        return llvm::ConstantStruct::get(SType, Elts);
+      }
+
+      return llvm::ConstantArray::get(AType, Elts);
+    }
+
+    if (ExprType->isRecordType())
+      return ConstStructBuilder::BuildStruct(CGM, CGF, this,
+                 dyn_cast<llvm::ConstantStruct>(Base), Updater);
+
+    return nullptr;
+  }
+
+  llvm::Constant *VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+    return EmitDesignatedInitUpdater(
+               CGM.EmitConstantExpr(E->getBase(), E->getType(), CGF),
+               E->getUpdater());
+  }  
+
   llvm::Constant *VisitCXXConstructExpr(CXXConstructExpr *E) {
     if (!E->getConstructor()->isTrivial())
       return nullptr;
@@ -1003,6 +1097,68 @@
 
 }  // end anonymous namespace.
 
+bool ConstStructBuilder::Build(ConstExprEmitter *Emitter,
+                               llvm::ConstantStruct *Base,
+                               InitListExpr *Updater) {
+  assert(Base && "base expression should not be empty");
+
+  QualType ExprType = Updater->getType();
+  RecordDecl *RD = ExprType->getAs<RecordType>()->getDecl();
+  const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD);
+  const llvm::StructLayout *BaseLayout = CGM.getDataLayout().getStructLayout(
+                                           Base->getType());
+  unsigned FieldNo = -1;
+  unsigned ElementNo = 0;
+
+  for (FieldDecl *Field : RD->fields()) {
+    ++FieldNo;
+
+    if (RD->isUnion() && Updater->getInitializedFieldInUnion() != Field)
+      continue;
+
+    // Skip anonymous bitfields.
+    if (Field->isUnnamedBitfield())
+      continue;
+
+    llvm::Constant *EltInit = Base->getOperand(ElementNo);
+
+    // Bail out if the type of the ConstantStruct does not have the same layout
+    // as the type of the InitListExpr.
+    if (CGM.getTypes().ConvertType(Field->getType()) != EltInit->getType() ||
+        Layout.getFieldOffset(ElementNo) !=
+          BaseLayout->getElementOffsetInBits(ElementNo))
+      return false;
+
+    // Get the initializer. If we encounter an empty field or a NoInitExpr,
+    // we use values from the base expression.
+    Expr *Init = nullptr;
+    if (ElementNo < Updater->getNumInits())
+      Init = Updater->getInit(ElementNo);
+
+    if (!Init || isa<NoInitExpr>(Init))
+      ; // Do nothing.
+    else if (InitListExpr *ChildILE = dyn_cast<InitListExpr>(Init))
+      EltInit = Emitter->EmitDesignatedInitUpdater(EltInit, ChildILE);
+    else
+      EltInit = CGM.EmitConstantExpr(Init, Field->getType(), CGF);
+
+    ++ElementNo;
+
+    if (!EltInit)
+      return false;
+
+    if (!Field->isBitField())
+      AppendField(Field, Layout.getFieldOffset(FieldNo), EltInit);
+    else if (llvm::ConstantInt *CI = dyn_cast<llvm::ConstantInt>(EltInit))
+      AppendBitField(Field, Layout.getFieldOffset(FieldNo), CI);
+    else
+      // Initializing a bitfield with a non-trivial constant?
+      return false;
+  }
+
+  return true;
+}
+
 llvm::Constant *CodeGenModule::EmitConstantInit(const VarDecl &D,
                                                 CodeGenFunction *CGF) {
   // Make a quick check if variable can be default NULL initialized
@@ -1349,8 +1505,14 @@
     }
 
     // For unions, stop after the first named field.
-    if (record->isUnion() && Field->getDeclName())
-      break;
+    if (record->isUnion()) {
+      if (Field->getIdentifier())
+        break;
+      if (const auto *FieldRD =
+              dyn_cast_or_null<RecordDecl>(Field->getType()->getAsTagDecl()))
+        if (FieldRD->findFirstNamedDataMember())
+          break;
+    }
   }
 
   // Fill in the virtual bases, if we're working with the complete object.
@@ -1408,10 +1570,6 @@
 
     llvm::Constant *Element = EmitNullConstant(ElementTy);
     unsigned NumElements = CAT->getSize().getZExtValue();
-    
-    if (Element->isNullValue())
-      return llvm::ConstantAggregateZero::get(ATy);
-    
     SmallVector<llvm::Constant *, 8> Array(NumElements, Element);
     return llvm::ConstantArray::get(ATy, Array);
   }
@@ -1421,8 +1579,7 @@
     return ::EmitNullConstant(*this, RD, /*complete object*/ true);
   }
 
-  assert(T->isMemberPointerType() && "Should only see member pointers here!");
-  assert(!T->getAs<MemberPointerType>()->getPointeeType()->isFunctionType() &&
+  assert(T->isMemberDataPointerType() &&
          "Should only see pointers to data members here!");
 
   return getCXXABI().EmitNullMemberPointer(T->castAs<MemberPointerType>());
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 658bd3e..08c81c0 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -85,7 +85,7 @@
     return CGF.EmitCheckedLValue(E, TCK);
   }
 
-  void EmitBinOpCheck(ArrayRef<std::pair<Value *, SanitizerKind>> Checks,
+  void EmitBinOpCheck(ArrayRef<std::pair<Value *, SanitizerMask>> Checks,
                       const BinOpInfo &Info);
 
   Value *EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
@@ -349,10 +349,9 @@
     return EmitScalarPrePostIncDec(E, LV, true, true);
   }
 
-  llvm::Value *EmitAddConsiderOverflowBehavior(const UnaryOperator *E,
-                                               llvm::Value *InVal,
-                                               llvm::Value *NextVal,
-                                               bool IsInc);
+  llvm::Value *EmitIncDecConsiderOverflowBehavior(const UnaryOperator *E,
+                                                  llvm::Value *InVal,
+                                                  bool IsInc);
 
   llvm::Value *EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                        bool isInc, bool isPre);
@@ -917,7 +916,7 @@
 /// operation). The check passes if all values in \p Checks (which are \c i1),
 /// are \c true.
 void ScalarExprEmitter::EmitBinOpCheck(
-    ArrayRef<std::pair<Value *, SanitizerKind>> Checks, const BinOpInfo &Info) {
+    ArrayRef<std::pair<Value *, SanitizerMask>> Checks, const BinOpInfo &Info) {
   assert(CGF.IsSanitizerScope);
   StringRef CheckName;
   SmallVector<llvm::Constant *, 4> StaticData;
@@ -1610,26 +1609,32 @@
 //                             Unary Operators
 //===----------------------------------------------------------------------===//
 
-llvm::Value *ScalarExprEmitter::
-EmitAddConsiderOverflowBehavior(const UnaryOperator *E,
-                                llvm::Value *InVal,
-                                llvm::Value *NextVal, bool IsInc) {
+static BinOpInfo createBinOpInfoFromIncDec(const UnaryOperator *E,
+                                           llvm::Value *InVal, bool IsInc) {
+  BinOpInfo BinOp;
+  BinOp.LHS = InVal;
+  BinOp.RHS = llvm::ConstantInt::get(InVal->getType(), 1, false);
+  BinOp.Ty = E->getType();
+  BinOp.Opcode = IsInc ? BO_Add : BO_Sub;
+  BinOp.FPContractable = false;
+  BinOp.E = E;
+  return BinOp;
+}
+
+llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
+    const UnaryOperator *E, llvm::Value *InVal, bool IsInc) {
+  llvm::Value *Amount =
+      llvm::ConstantInt::get(InVal->getType(), IsInc ? 1 : -1, true);
+  StringRef Name = IsInc ? "inc" : "dec";
   switch (CGF.getLangOpts().getSignedOverflowBehavior()) {
   case LangOptions::SOB_Defined:
-    return Builder.CreateAdd(InVal, NextVal, IsInc ? "inc" : "dec");
+    return Builder.CreateAdd(InVal, Amount, Name);
   case LangOptions::SOB_Undefined:
     if (!CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow))
-      return Builder.CreateNSWAdd(InVal, NextVal, IsInc ? "inc" : "dec");
+      return Builder.CreateNSWAdd(InVal, Amount, Name);
     // Fall through.
   case LangOptions::SOB_Trapping:
-    BinOpInfo BinOp;
-    BinOp.LHS = InVal;
-    BinOp.RHS = NextVal;
-    BinOp.Ty = E->getType();
-    BinOp.Opcode = BO_Add;
-    BinOp.FPContractable = false;
-    BinOp.E = E;
-    return EmitOverflowCheckedBinOp(BinOp);
+    return EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, InVal, IsInc));
   }
   llvm_unreachable("Unknown SignedOverflowBehaviorTy");
 }
@@ -1707,27 +1712,20 @@
 
   // Most common case by far: integer increment.
   } else if (type->isIntegerType()) {
-
-    llvm::Value *amt = llvm::ConstantInt::get(value->getType(), amount, true);
-
     // Note that signed integer inc/dec with width less than int can't
     // overflow because of promotion rules; we're just eliding a few steps here.
     bool CanOverflow = value->getType()->getIntegerBitWidth() >=
                        CGF.IntTy->getIntegerBitWidth();
     if (CanOverflow && type->isSignedIntegerOrEnumerationType()) {
-      value = EmitAddConsiderOverflowBehavior(E, value, amt, isInc);
+      value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
     } else if (CanOverflow && type->isUnsignedIntegerType() &&
                CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
-      BinOpInfo BinOp;
-      BinOp.LHS = value;
-      BinOp.RHS = llvm::ConstantInt::get(value->getType(), 1, false);
-      BinOp.Ty = E->getType();
-      BinOp.Opcode = isInc ? BO_Add : BO_Sub;
-      BinOp.FPContractable = false;
-      BinOp.E = E;
-      value = EmitOverflowCheckedBinOp(BinOp);
-    } else
+      value =
+          EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(E, value, isInc));
+    } else {
+      llvm::Value *amt = llvm::ConstantInt::get(value->getType(), amount, true);
       value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec");
+    }
 
   // Next most common: pointer increment.
   } else if (const PointerType *ptr = type->getAs<PointerType>()) {
@@ -2233,7 +2231,7 @@
 
 void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck(
     const BinOpInfo &Ops, llvm::Value *Zero, bool isDiv) {
-  SmallVector<std::pair<llvm::Value *, SanitizerKind>, 2> Checks;
+  SmallVector<std::pair<llvm::Value *, SanitizerMask>, 2> Checks;
 
   if (CGF.SanOpts.has(SanitizerKind::IntegerDivideByZero)) {
     Checks.push_back(std::make_pair(Builder.CreateICmpNE(Ops.RHS, Zero),
@@ -2345,7 +2343,7 @@
 
   llvm::Function *intrinsic = CGF.CGM.getIntrinsic(IID, opTy);
 
-  Value *resultAndOverflow = Builder.CreateCall2(intrinsic, Ops.LHS, Ops.RHS);
+  Value *resultAndOverflow = Builder.CreateCall(intrinsic, {Ops.LHS, Ops.RHS});
   Value *result = Builder.CreateExtractValue(resultAndOverflow, 0);
   Value *overflow = Builder.CreateExtractValue(resultAndOverflow, 1);
 
@@ -2358,7 +2356,7 @@
     if (!isSigned || CGF.SanOpts.has(SanitizerKind::SignedIntegerOverflow)) {
       CodeGenFunction::SanitizerScope SanScope(&CGF);
       llvm::Value *NotOverflow = Builder.CreateNot(overflow);
-      SanitizerKind Kind = isSigned ? SanitizerKind::SignedIntegerOverflow
+      SanitizerMask Kind = isSigned ? SanitizerKind::SignedIntegerOverflow
                               : SanitizerKind::UnsignedIntegerOverflow;
       EmitBinOpCheck(std::make_pair(NotOverflow, Kind), Ops);
     } else
@@ -2525,10 +2523,9 @@
         "neg");
   }
 
-  Value *FMulAdd =
-    Builder.CreateCall3(
+  Value *FMulAdd = Builder.CreateCall(
       CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()),
-                           MulOp0, MulOp1, Addend);
+      {MulOp0, MulOp1, Addend});
    MulOp->eraseFromParent();
 
    return FMulAdd;
@@ -2723,7 +2720,7 @@
   else if ((SanitizeBase || SanitizeExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
-    SmallVector<std::pair<Value *, SanitizerKind>, 2> Checks;
+    SmallVector<std::pair<Value *, SanitizerMask>, 2> Checks;
     llvm::Value *WidthMinusOne = GetWidthMinusOneValue(Ops.LHS, RHS);
     llvm::Value *ValidExponent = Builder.CreateICmpULE(RHS, WidthMinusOne);
 
@@ -2906,7 +2903,7 @@
 
       Value *CR6Param = Builder.getInt32(CR6);
       llvm::Function *F = CGF.CGM.getIntrinsic(ID);
-      Result = Builder.CreateCall3(F, CR6Param, FirstVecArg, SecondVecArg, "");
+      Result = Builder.CreateCall(F, {CR6Param, FirstVecArg, SecondVecArg});
       return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType());
     }
 
@@ -3035,11 +3032,9 @@
 }
 
 Value *ScalarExprEmitter::VisitBinLAnd(const BinaryOperator *E) {
-  RegionCounter Cnt = CGF.getPGORegionCounter(E);
-
   // Perform vector logical and on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
-    Cnt.beginRegion(Builder);
+    CGF.incrementProfileCounter(E);
 
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
@@ -3062,7 +3057,7 @@
   bool LHSCondVal;
   if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
     if (LHSCondVal) { // If we have 1 && X, just emit X.
-      Cnt.beginRegion(Builder);
+      CGF.incrementProfileCounter(E);
 
       Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
       // ZExt result to int or bool.
@@ -3080,7 +3075,8 @@
   CodeGenFunction::ConditionalEvaluation eval(CGF);
 
   // Branch on the LHS first.  If it is false, go to the failure (cont) block.
-  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock, Cnt.getCount());
+  CGF.EmitBranchOnBoolExpr(E->getLHS(), RHSBlock, ContBlock,
+                           CGF.getProfileCount(E->getRHS()));
 
   // Any edges into the ContBlock are now from an (indeterminate number of)
   // edges from this first condition.  All of these values will be false.  Start
@@ -3093,7 +3089,7 @@
 
   eval.begin(CGF);
   CGF.EmitBlock(RHSBlock);
-  Cnt.beginRegion(Builder);
+  CGF.incrementProfileCounter(E);
   Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
   eval.end(CGF);
 
@@ -3114,11 +3110,9 @@
 }
 
 Value *ScalarExprEmitter::VisitBinLOr(const BinaryOperator *E) {
-  RegionCounter Cnt = CGF.getPGORegionCounter(E);
-
   // Perform vector logical or on comparisons with zero vectors.
   if (E->getType()->isVectorType()) {
-    Cnt.beginRegion(Builder);
+    CGF.incrementProfileCounter(E);
 
     Value *LHS = Visit(E->getLHS());
     Value *RHS = Visit(E->getRHS());
@@ -3141,7 +3135,7 @@
   bool LHSCondVal;
   if (CGF.ConstantFoldsToSimpleInteger(E->getLHS(), LHSCondVal)) {
     if (!LHSCondVal) { // If we have 0 || X, just emit X.
-      Cnt.beginRegion(Builder);
+      CGF.incrementProfileCounter(E);
 
       Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
       // ZExt result to int or bool.
@@ -3160,7 +3154,8 @@
 
   // Branch on the LHS first.  If it is true, go to the success (cont) block.
   CGF.EmitBranchOnBoolExpr(E->getLHS(), ContBlock, RHSBlock,
-                           Cnt.getParentCount() - Cnt.getCount());
+                           CGF.getCurrentProfileCount() -
+                               CGF.getProfileCount(E->getRHS()));
 
   // Any edges into the ContBlock are now from an (indeterminate number of)
   // edges from this first condition.  All of these values will be true.  Start
@@ -3175,7 +3170,7 @@
 
   // Emit the RHS condition as a bool value.
   CGF.EmitBlock(RHSBlock);
-  Cnt.beginRegion(Builder);
+  CGF.incrementProfileCounter(E);
   Value *RHSCond = CGF.EvaluateExprAsBool(E->getRHS());
 
   eval.end(CGF);
@@ -3226,7 +3221,6 @@
 
   // Bind the common expression if necessary.
   CodeGenFunction::OpaqueValueMapping binding(CGF, E);
-  RegionCounter Cnt = CGF.getPGORegionCounter(E);
 
   Expr *condExpr = E->getCond();
   Expr *lhsExpr = E->getTrueExpr();
@@ -3242,7 +3236,7 @@
     // If the dead side doesn't have labels we need, just emit the Live part.
     if (!CGF.ContainsLabel(dead)) {
       if (CondExprBool)
-        Cnt.beginRegion(Builder);
+        CGF.incrementProfileCounter(E);
       Value *Result = Visit(live);
 
       // If the live part is a throw expression, it acts like it has a void
@@ -3259,7 +3253,7 @@
   // the select function.
   if (CGF.getLangOpts().OpenCL
       && condExpr->getType()->isVectorType()) {
-    Cnt.beginRegion(Builder);
+    CGF.incrementProfileCounter(E);
 
     llvm::Value *CondV = CGF.EmitScalarExpr(condExpr);
     llvm::Value *LHS = Visit(lhsExpr);
@@ -3304,7 +3298,7 @@
   // safe to evaluate the LHS and RHS unconditionally.
   if (isCheapEnoughToEvaluateUnconditionally(lhsExpr, CGF) &&
       isCheapEnoughToEvaluateUnconditionally(rhsExpr, CGF)) {
-    Cnt.beginRegion(Builder);
+    CGF.incrementProfileCounter(E);
 
     llvm::Value *CondV = CGF.EvaluateExprAsBool(condExpr);
     llvm::Value *LHS = Visit(lhsExpr);
@@ -3322,10 +3316,11 @@
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("cond.end");
 
   CodeGenFunction::ConditionalEvaluation eval(CGF);
-  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock, Cnt.getCount());
+  CGF.EmitBranchOnBoolExpr(condExpr, LHSBlock, RHSBlock,
+                           CGF.getProfileCount(lhsExpr));
 
   CGF.EmitBlock(LHSBlock);
-  Cnt.beginRegion(Builder);
+  CGF.incrementProfileCounter(E);
   eval.begin(CGF);
   Value *LHS = Visit(lhsExpr);
   eval.end(CGF);
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index 011ae7e..1163d63 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -8,13 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGLoopInfo.h"
+#include "clang/AST/Attr.h"
+#include "clang/Sema/LoopHint.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
-using namespace clang;
-using namespace CodeGen;
+using namespace clang::CodeGen;
 using namespace llvm;
 
 static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
@@ -77,7 +78,34 @@
   LoopID = createMetadata(Header->getContext(), Attrs);
 }
 
-void LoopInfoStack::push(BasicBlock *Header) {
+void LoopInfoStack::push(BasicBlock *Header,
+                         ArrayRef<const clang::Attr *> Attrs) {
+  for (const auto *Attr : Attrs) {
+    const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
+
+    // Skip non loop hint attributes
+    if (!LH)
+      continue;
+
+    LoopHintAttr::OptionType Option = LH->getOption();
+    LoopHintAttr::LoopHintState State = LH->getState();
+    switch (Option) {
+    case LoopHintAttr::Vectorize:
+    case LoopHintAttr::Interleave:
+      if (State == LoopHintAttr::AssumeSafety) {
+        // Apply "llvm.mem.parallel_loop_access" metadata to load/stores.
+        setParallel(true);
+      }
+      break;
+    case LoopHintAttr::VectorizeWidth:
+    case LoopHintAttr::InterleaveCount:
+    case LoopHintAttr::Unroll:
+    case LoopHintAttr::UnrollCount:
+      // Nothing to do here for these loop hints.
+      break;
+    }
+  }
+
   Active.push_back(LoopInfo(Header, StagedAttrs));
   // Clear the attributes so nested loops do not inherit them.
   StagedAttrs.clear();
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index aee1621..2249937 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H
 #define LLVM_CLANG_LIB_CODEGEN_CGLOOPINFO_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Value.h"
@@ -27,6 +28,7 @@
 } // end namespace llvm
 
 namespace clang {
+class Attr;
 namespace CodeGen {
 
 /// \brief Attributes that may be specified on loops.
@@ -86,7 +88,8 @@
 
   /// \brief Begin a new structured loop. The set of staged attributes will be
   /// applied to the loop and then cleared.
-  void push(llvm::BasicBlock *Header);
+  void push(llvm::BasicBlock *Header,
+            llvm::ArrayRef<const Attr *> Attrs = llvm::None);
 
   /// \brief End the current loop.
   void pop();
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index dfad13a..9981fcc 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -497,8 +497,7 @@
   StartObjCMethod(OMD, OMD->getClassInterface());
   PGO.assignRegionCounters(OMD, CurFn);
   assert(isa<CompoundStmt>(OMD->getBody()));
-  RegionCounter Cnt = getPGORegionCounter(OMD->getBody());
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(OMD->getBody());
   EmitCompoundStmtWithoutScope(*cast<CompoundStmt>(OMD->getBody()));
   FinishFunction(OMD->getBodyRBrace());
 }
@@ -1503,11 +1502,11 @@
   // If the limit pointer was zero to begin with, the collection is
   // empty; skip all this. Set the branch weight assuming this has the same
   // probability of exiting the loop as any other loop exit.
-  uint64_t EntryCount = PGO.getCurrentRegionCount();
-  RegionCounter Cnt = getPGORegionCounter(&S);
-  Builder.CreateCondBr(Builder.CreateICmpEQ(initialBufferLimit, zero, "iszero"),
-                       EmptyBB, LoopInitBB,
-                       PGO.createBranchWeights(EntryCount, Cnt.getCount()));
+  uint64_t EntryCount = getCurrentProfileCount();
+  Builder.CreateCondBr(
+      Builder.CreateICmpEQ(initialBufferLimit, zero, "iszero"), EmptyBB,
+      LoopInitBB,
+      createProfileWeights(EntryCount, getProfileCount(S.getBody())));
 
   // Otherwise, initialize the loop.
   EmitBlock(LoopInitBB);
@@ -1536,7 +1535,7 @@
   llvm::PHINode *count = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.count");
   count->addIncoming(initialBufferLimit, LoopInitBB);
 
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
 
   // Check whether the mutations value has changed from where it was
   // at start.  StateMutationsPtr should actually be invariant between
@@ -1648,9 +1647,9 @@
   // Set the branch weights based on the simplifying assumption that this is
   // like a while-loop, i.e., ignoring that the false branch fetches more
   // elements and then returns to the loop.
-  Builder.CreateCondBr(Builder.CreateICmpULT(indexPlusOne, count),
-                       LoopBodyBB, FetchMoreBB,
-                       PGO.createBranchWeights(Cnt.getCount(), EntryCount));
+  Builder.CreateCondBr(
+      Builder.CreateICmpULT(indexPlusOne, count), LoopBodyBB, FetchMoreBB,
+      createProfileWeights(getProfileCount(S.getBody()), EntryCount));
 
   index->addIncoming(indexPlusOne, AfterBody.getBlock());
   count->addIncoming(count, AfterBody.getBlock());
@@ -1981,7 +1980,8 @@
   }
 
   // Call the marker asm if we made one, which we do only at -O0.
-  if (marker) Builder.CreateCall(marker);
+  if (marker)
+    Builder.CreateCall(marker, {});
 
   return emitARCValueOperation(*this, value,
                      CGM.getARCEntrypoints().objc_retainAutoreleasedReturnValue,
@@ -2996,13 +2996,9 @@
   
   SmallVector<Expr*, 4> ConstructorArgs;
   ConstructorArgs.push_back(&SRC);
-  CXXConstructExpr::arg_iterator A = CXXConstExpr->arg_begin();
-  ++A;
-  
-  for (CXXConstructExpr::arg_iterator AEnd = CXXConstExpr->arg_end();
-       A != AEnd; ++A)
-    ConstructorArgs.push_back(*A);
-  
+  ConstructorArgs.append(std::next(CXXConstExpr->arg_begin()),
+                         CXXConstExpr->arg_end());
+
   CXXConstructExpr *TheCXXConstructExpr =
     CXXConstructExpr::Create(C, Ty, SourceLocation(),
                              CXXConstExpr->getConstructor(),
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index 981fe90..b52d623 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -46,54 +46,49 @@
 /// avoids constructing the type more than once if it's used more than once.
 class LazyRuntimeFunction {
   CodeGenModule *CGM;
-  std::vector<llvm::Type*> ArgTys;
+  llvm::FunctionType *FTy;
   const char *FunctionName;
   llvm::Constant *Function;
-  public:
-    /// Constructor leaves this class uninitialized, because it is intended to
-    /// be used as a field in another class and not all of the types that are
-    /// used as arguments will necessarily be available at construction time.
-    LazyRuntimeFunction()
+
+public:
+  /// Constructor leaves this class uninitialized, because it is intended to
+  /// be used as a field in another class and not all of the types that are
+  /// used as arguments will necessarily be available at construction time.
+  LazyRuntimeFunction()
       : CGM(nullptr), FunctionName(nullptr), Function(nullptr) {}
 
-    /// Initialises the lazy function with the name, return type, and the types
-    /// of the arguments.
-    LLVM_END_WITH_NULL
-    void init(CodeGenModule *Mod, const char *name,
-        llvm::Type *RetTy, ...) {
-       CGM =Mod;
-       FunctionName = name;
-       Function = nullptr;
-       ArgTys.clear();
-       va_list Args;
-       va_start(Args, RetTy);
-         while (llvm::Type *ArgTy = va_arg(Args, llvm::Type*))
-           ArgTys.push_back(ArgTy);
-       va_end(Args);
-       // Push the return type on at the end so we can pop it off easily
-       ArgTys.push_back(RetTy);
-   }
-   /// Overloaded cast operator, allows the class to be implicitly cast to an
-   /// LLVM constant.
-   operator llvm::Constant*() {
-     if (!Function) {
-       if (!FunctionName) return nullptr;
-       // We put the return type on the end of the vector, so pop it back off
-       llvm::Type *RetTy = ArgTys.back();
-       ArgTys.pop_back();
-       llvm::FunctionType *FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
-       Function =
-         cast<llvm::Constant>(CGM->CreateRuntimeFunction(FTy, FunctionName));
-       // We won't need to use the types again, so we may as well clean up the
-       // vector now
-       ArgTys.resize(0);
-     }
-     return Function;
-   }
-   operator llvm::Function*() {
-     return cast<llvm::Function>((llvm::Constant*)*this);
-   }
+  /// Initialises the lazy function with the name, return type, and the types
+  /// of the arguments.
+  LLVM_END_WITH_NULL
+  void init(CodeGenModule *Mod, const char *name, llvm::Type *RetTy, ...) {
+    CGM = Mod;
+    FunctionName = name;
+    Function = nullptr;
+    std::vector<llvm::Type *> ArgTys;
+    va_list Args;
+    va_start(Args, RetTy);
+    while (llvm::Type *ArgTy = va_arg(Args, llvm::Type *))
+      ArgTys.push_back(ArgTy);
+    va_end(Args);
+    FTy = llvm::FunctionType::get(RetTy, ArgTys, false);
+  }
 
+  llvm::FunctionType *getType() { return FTy; }
+
+  /// Overloaded cast operator, allows the class to be implicitly cast to an
+  /// LLVM constant.
+  operator llvm::Constant *() {
+    if (!Function) {
+      if (!FunctionName)
+        return nullptr;
+      Function =
+          cast<llvm::Constant>(CGM->CreateRuntimeFunction(FTy, FunctionName));
+    }
+    return Function;
+  }
+  operator llvm::Function *() {
+    return cast<llvm::Function>((llvm::Constant *)*this);
+  }
 };
 
 
@@ -1060,9 +1055,9 @@
   }
   if (!SelValue) {
     SelValue = llvm::GlobalAlias::create(
-        SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage,
+        SelectorTy, llvm::GlobalValue::PrivateLinkage,
         ".objc_selector_" + Sel.getAsString(), &TheModule);
-    Types.push_back(TypedSelector(TypeEncoding, SelValue));
+    Types.emplace_back(TypeEncoding, SelValue);
   }
 
   if (lval) {
@@ -1266,14 +1261,14 @@
     if (IsClassMessage)  {
       if (!MetaClassPtrAlias) {
         MetaClassPtrAlias = llvm::GlobalAlias::create(
-            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+            IdTy, llvm::GlobalValue::InternalLinkage,
             ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = MetaClassPtrAlias;
     } else {
       if (!ClassPtrAlias) {
         ClassPtrAlias = llvm::GlobalAlias::create(
-            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+            IdTy, llvm::GlobalValue::InternalLinkage,
             ".objc_class_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = ClassPtrAlias;
@@ -2126,9 +2121,8 @@
   // Get the class declaration for which the alias is specified.
   ObjCInterfaceDecl *ClassDecl =
     const_cast<ObjCInterfaceDecl *>(OAD->getClassInterface());
-  std::string ClassName = ClassDecl->getNameAsString();
-  std::string AliasName = OAD->getNameAsString();
-  ClassAliases.push_back(ClassAliasPair(ClassName,AliasName));
+  ClassAliases.emplace_back(ClassDecl->getNameAsString(),
+                            OAD->getNameAsString());
 }
 
 void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
@@ -2570,8 +2564,8 @@
             true);
        if (TheClass) {
          TheClass = llvm::ConstantExpr::getBitCast(TheClass, PtrTy);
-         Builder.CreateCall2(RegisterAlias, TheClass,
-            MakeConstantString(iter->second));
+         Builder.CreateCall(RegisterAlias,
+                            {TheClass, MakeConstantString(iter->second)});
        }
     }
     // Jump to end:
@@ -2687,7 +2681,7 @@
                                           llvm::Value *AddrWeakObj) {
   CGBuilderTy &B = CGF.Builder;
   AddrWeakObj = EnforceType(B, AddrWeakObj, PtrToIdTy);
-  return B.CreateCall(WeakReadFn, AddrWeakObj);
+  return B.CreateCall(WeakReadFn.getType(), WeakReadFn, AddrWeakObj);
 }
 
 void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF,
@@ -2695,7 +2689,7 @@
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
-  B.CreateCall2(WeakAssignFn, src, dst);
+  B.CreateCall(WeakAssignFn.getType(), WeakAssignFn, {src, dst});
 }
 
 void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF,
@@ -2704,11 +2698,9 @@
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
-  if (!threadlocal)
-    B.CreateCall2(GlobalAssignFn, src, dst);
-  else
-    // FIXME. Add threadloca assign API
-    llvm_unreachable("EmitObjCGlobalAssign - Threal Local API NYI");
+  // FIXME. Add threadloca assign API
+  assert(!threadlocal && "EmitObjCGlobalAssign - Threal Local API NYI");
+  B.CreateCall(GlobalAssignFn.getType(), GlobalAssignFn, {src, dst});
 }
 
 void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF,
@@ -2717,7 +2709,7 @@
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, IdTy);
-  B.CreateCall3(IvarAssignFn, src, dst, ivarOffset);
+  B.CreateCall(IvarAssignFn.getType(), IvarAssignFn, {src, dst, ivarOffset});
 }
 
 void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF,
@@ -2725,7 +2717,7 @@
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
-  B.CreateCall2(StrongCastAssignFn, src, dst);
+  B.CreateCall(StrongCastAssignFn.getType(), StrongCastAssignFn, {src, dst});
 }
 
 void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF,
@@ -2736,7 +2728,7 @@
   DestPtr = EnforceType(B, DestPtr, PtrTy);
   SrcPtr = EnforceType(B, SrcPtr, PtrTy);
 
-  B.CreateCall3(MemMoveFn, DestPtr, SrcPtr, Size);
+  B.CreateCall(MemMoveFn.getType(), MemMoveFn, {DestPtr, SrcPtr, Size});
 }
 
 llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index 3d013da..5290a87 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -160,7 +160,7 @@
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       if (!MightThrow) {
-        CGF.Builder.CreateCall(Fn)->setDoesNotThrow();
+        CGF.Builder.CreateCall(Fn, {})->setDoesNotThrow();
         return;
       }
 
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 5988c78..1238acc 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -641,12 +641,12 @@
   }
   case OMPRTL__kmpc_copyprivate: {
     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
     // kmp_int32 didit);
     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
     auto *CpyFnTy =
         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
                                 CGM.Int32Ty};
     llvm::FunctionType *FnTy =
@@ -710,6 +710,52 @@
         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
     break;
   }
+  case OMPRTL__kmpc_omp_task_begin_if0: {
+    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+    // *new_task);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
+    break;
+  }
+  case OMPRTL__kmpc_omp_task_complete_if0: {
+    // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
+    // *new_task);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy,
+                                      /*Name=*/"__kmpc_omp_task_complete_if0");
+    break;
+  }
+  case OMPRTL__kmpc_ordered: {
+    // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
+    break;
+  }
+  case OMPRTL__kmpc_end_ordered: {
+    // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
+    break;
+  }
+  case OMPRTL__kmpc_omp_taskwait: {
+    // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -762,6 +808,23 @@
   return CGM.CreateRuntimeFunction(FnTy, Name);
 }
 
+llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
+                                                            bool IVSigned) {
+  assert((IVSize == 32 || IVSize == 64) &&
+         "IV size is not compatible with the omp runtime");
+  auto Name =
+      IVSize == 32
+          ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
+          : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
+  llvm::Type *TypeParams[] = {
+      getIdentTyPointerTy(), // loc
+      CGM.Int32Ty,           // tid
+  };
+  llvm::FunctionType *FnTy =
+      llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+  return CGM.CreateRuntimeFunction(FnTy, Name);
+}
+
 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
                                                             bool IVSigned) {
   assert((IVSize == 32 || IVSize == 64) &&
@@ -934,43 +997,112 @@
   return nullptr;
 }
 
-void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                       llvm::Value *OutlinedFn,
-                                       llvm::Value *CapturedStruct) {
-  // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/)
-  llvm::Value *Args[] = {
-      emitUpdateLocation(CGF, Loc),
-      CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument
-      // (there is only one additional argument - 'context')
-      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
-      CGF.EmitCastToVoidPtr(CapturedStruct)};
-  auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
-  CGF.EmitRuntimeCall(RTLFn, Args);
+/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
+/// function. Here is the logic:
+/// if (Cond) {
+///   ThenGen();
+/// } else {
+///   ElseGen();
+/// }
+static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
+                            const RegionCodeGenTy &ThenGen,
+                            const RegionCodeGenTy &ElseGen) {
+  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
+
+  // If the condition constant folds and can be elided, try to avoid emitting
+  // the condition and the dead arm of the if/else.
+  bool CondConstant;
+  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    if (CondConstant) {
+      ThenGen(CGF);
+    } else {
+      ElseGen(CGF);
+    }
+    return;
+  }
+
+  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
+  // emit the conditional branch.
+  auto ThenBlock = CGF.createBasicBlock("omp_if.then");
+  auto ElseBlock = CGF.createBasicBlock("omp_if.else");
+  auto ContBlock = CGF.createBasicBlock("omp_if.end");
+  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
+
+  // Emit the 'then' code.
+  CGF.EmitBlock(ThenBlock);
+  {
+    CodeGenFunction::RunCleanupsScope ThenScope(CGF);
+    ThenGen(CGF);
+  }
+  CGF.EmitBranch(ContBlock);
+  // Emit the 'else' code if present.
+  {
+    // There is no need to emit line number for unconditional branch.
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
+    CGF.EmitBlock(ElseBlock);
+  }
+  {
+    CodeGenFunction::RunCleanupsScope ThenScope(CGF);
+    ElseGen(CGF);
+  }
+  {
+    // There is no need to emit line number for unconditional branch.
+    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
+    CGF.EmitBranch(ContBlock);
+  }
+  // Emit the continuation block for code after the if.
+  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
 }
 
-void CGOpenMPRuntime::emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                     llvm::Value *OutlinedFn,
-                                     llvm::Value *CapturedStruct) {
-  auto ThreadID = getThreadID(CGF, Loc);
-  // Build calls:
-  // __kmpc_serialized_parallel(&Loc, GTid);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), ThreadID};
-  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
-                      Args);
+void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                       llvm::Value *OutlinedFn,
+                                       llvm::Value *CapturedStruct,
+                                       const Expr *IfCond) {
+  auto *RTLoc = emitUpdateLocation(CGF, Loc);
+  auto &&ThenGen =
+      [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
+        // Build call __kmpc_fork_call(loc, 1, microtask,
+        // captured_struct/*context*/)
+        llvm::Value *Args[] = {
+            RTLoc,
+            CGF.Builder.getInt32(
+                1), // Number of arguments after 'microtask' argument
+            // (there is only one additional argument - 'context')
+            CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
+            CGF.EmitCastToVoidPtr(CapturedStruct)};
+        auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
+        CGF.EmitRuntimeCall(RTLFn, Args);
+      };
+  auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
+      CodeGenFunction &CGF) {
+    auto ThreadID = getThreadID(CGF, Loc);
+    // Build calls:
+    // __kmpc_serialized_parallel(&Loc, GTid);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
+                        Args);
 
-  // OutlinedFn(&GTid, &zero, CapturedStruct);
-  auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
-  auto Int32Ty =
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
-  auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
-  CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+    // OutlinedFn(&GTid, &zero, CapturedStruct);
+    auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+    auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
+                                                          /*Signed*/ true);
+    auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+    llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
+    CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
 
-  // __kmpc_end_serialized_parallel(&Loc, GTid);
-  llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
-  CGF.EmitRuntimeCall(
-      createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
+    // __kmpc_end_serialized_parallel(&Loc, GTid);
+    llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
+    CGF.EmitRuntimeCall(
+        createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
+  };
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+  } else {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    ThenGen(CGF);
+  }
 }
 
 // If we're inside an (outlined) parallel region, use the region info's
@@ -1022,16 +1154,16 @@
 }
 
 namespace {
-class CallEndCleanup : public EHScopeStack::Cleanup {
-public:
-  typedef ArrayRef<llvm::Value *> CleanupValuesTy;
-private:
+template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup {
   llvm::Value *Callee;
-  llvm::SmallVector<llvm::Value *, 8> Args;
+  llvm::Value *Args[N];
 
 public:
-  CallEndCleanup(llvm::Value *Callee, CleanupValuesTy Args)
-      : Callee(Callee), Args(Args.begin(), Args.end()) {}
+  CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
+      : Callee(Callee) {
+    assert(CleanupArgs.size() == N);
+    std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
+  }
   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
     CGF.EmitRuntimeCall(Callee, Args);
   }
@@ -1052,7 +1184,7 @@
                            getCriticalRegionLock(CriticalName)};
     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
     // Build a call to __kmpc_end_critical
-    CGF.EHStack.pushCleanup<CallEndCleanup>(
+    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
         llvm::makeArrayRef(Args));
     emitInlinedDirective(CGF, CriticalOpGen);
@@ -1088,9 +1220,11 @@
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   auto *IsMaster =
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
+  typedef CallEndCleanup<std::extent<decltype(Args)>::value>
+      MasterCallEndCleanup;
   emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
-    CGF.EHStack.pushCleanup<CallEndCleanup>(
+    CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
         llvm::makeArrayRef(Args));
     MasterOpGen(CGF);
@@ -1153,7 +1287,9 @@
             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
             CGM.PointerAlignInBytes),
         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
-    CGF.EmitOMPCopy(CGF, CopyprivateVars[I]->getType(), DestAddr, SrcAddr,
+    auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
+    QualType Type = VD->getType();
+    CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
                     AssignmentOps[I]);
@@ -1187,15 +1323,18 @@
     // int32 did_it = 0;
     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
-    CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
+    CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
+                                   DidIt->getAlignment());
   }
   // Prepare arguments and build a call to __kmpc_single
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   auto *IsSingle =
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
+  typedef CallEndCleanup<std::extent<decltype(Args)>::value>
+      SingleCallEndCleanup;
   emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
-    CGF.EHStack.pushCleanup<CallEndCleanup>(
+    CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
         llvm::makeArrayRef(Args));
     SingleOpGen(CGF);
@@ -1228,8 +1367,8 @@
     auto *CpyFn = emitCopyprivateCopyFunction(
         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
-    auto *BufSize = CGF.Builder.getInt32(
-        C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
+    auto *BufSize = llvm::ConstantInt::get(
+        CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
                                                                CGF.VoidPtrTy);
     auto *DidItVal =
@@ -1237,7 +1376,7 @@
     llvm::Value *Args[] = {
         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
         getThreadID(CGF, Loc),        // i32 <gtid>
-        BufSize,                      // i32 <buf_size>
+        BufSize,                      // size_t <buf_size>
         CL,                           // void *<copyprivate list>
         CpyFn,                        // void (*) (void *, void *) <copy_func>
         DidItVal                      // i32 did_it
@@ -1246,6 +1385,25 @@
   }
 }
 
+void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
+                                        const RegionCodeGenTy &OrderedOpGen,
+                                        SourceLocation Loc) {
+  // __kmpc_ordered(ident_t *, gtid);
+  // OrderedOpGen();
+  // __kmpc_end_ordered(ident_t *, gtid);
+  // Prepare arguments and build a call to __kmpc_ordered
+  {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
+    // Build a call to __kmpc_end_ordered
+    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
+        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
+        llvm::makeArrayRef(Args));
+    emitInlinedDirective(CGF, OrderedOpGen);
+  }
+}
+
 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
                                       OpenMPDirectiveKind Kind) {
   // Build call __kmpc_cancel_barrier(loc, thread_id);
@@ -1288,51 +1446,61 @@
   OMP_sch_auto = 38,
   /// \brief Lower bound for 'ordered' versions.
   OMP_ord_lower = 64,
-  /// \brief Lower bound for 'nomerge' versions.
-  OMP_nm_lower = 160,
+  OMP_ord_static_chunked = 65,
+  OMP_ord_static = 66,
+  OMP_ord_dynamic_chunked = 67,
+  OMP_ord_guided_chunked = 68,
+  OMP_ord_runtime = 69,
+  OMP_ord_auto = 70,
+  OMP_sch_default = OMP_sch_static,
 };
 
 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
-                                          bool Chunked) {
+                                          bool Chunked, bool Ordered) {
   switch (ScheduleKind) {
   case OMPC_SCHEDULE_static:
-    return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
+    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
+                   : (Ordered ? OMP_ord_static : OMP_sch_static);
   case OMPC_SCHEDULE_dynamic:
-    return OMP_sch_dynamic_chunked;
+    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
   case OMPC_SCHEDULE_guided:
-    return OMP_sch_guided_chunked;
-  case OMPC_SCHEDULE_auto:
-    return OMP_sch_auto;
+    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
   case OMPC_SCHEDULE_runtime:
-    return OMP_sch_runtime;
+    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
+  case OMPC_SCHEDULE_auto:
+    return Ordered ? OMP_ord_auto : OMP_sch_auto;
   case OMPC_SCHEDULE_unknown:
     assert(!Chunked && "chunk was specified but schedule kind not known");
-    return OMP_sch_static;
+    return Ordered ? OMP_ord_static : OMP_sch_static;
   }
   llvm_unreachable("Unexpected runtime schedule");
 }
 
 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
                                          bool Chunked) const {
-  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
   return Schedule == OMP_sch_static;
 }
 
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
-  auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
+  auto Schedule =
+      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
   return Schedule != OMP_sch_static;
 }
 
 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
                                   OpenMPScheduleClauseKind ScheduleKind,
-                                  unsigned IVSize, bool IVSigned,
+                                  unsigned IVSize, bool IVSigned, bool Ordered,
                                   llvm::Value *IL, llvm::Value *LB,
                                   llvm::Value *UB, llvm::Value *ST,
                                   llvm::Value *Chunk) {
-  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
-  if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
+  OpenMPSchedType Schedule =
+      getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+  if (Ordered ||
+      (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
+       Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
     // Call __kmpc_dispatch_init(
     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
@@ -1357,12 +1525,13 @@
     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
     if (Chunk == nullptr) {
-      assert(Schedule == OMP_sch_static &&
+      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
              "expected static non-chunked schedule");
       // If the Chunk was not specified in the clause - use default value 1.
       Chunk = CGF.Builder.getIntN(IVSize, 1);
     } else
-      assert(Schedule == OMP_sch_static_chunked &&
+      assert((Schedule == OMP_sch_static_chunked ||
+              Schedule == OMP_ord_static_chunked) &&
              "expected static chunked schedule");
     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                             getThreadID(CGF, Loc),
@@ -1378,12 +1547,8 @@
   }
 }
 
-void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
-                                    OpenMPScheduleClauseKind ScheduleKind) {
-  assert((ScheduleKind == OMPC_SCHEDULE_static ||
-          ScheduleKind == OMPC_SCHEDULE_unknown) &&
-         "Non-static schedule kinds are not yet implemented");
-  (void)ScheduleKind;
+void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
+                                          SourceLocation Loc) {
   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                          getThreadID(CGF, Loc)};
@@ -1391,6 +1556,16 @@
                       Args);
 }
 
+void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
+                                                 SourceLocation Loc,
+                                                 unsigned IVSize,
+                                                 bool IVSigned) {
+  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+                         getThreadID(CGF, Loc)};
+  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
+}
+
 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
                                           SourceLocation Loc, unsigned IVSize,
                                           bool IVSigned, llvm::Value *IL,
@@ -1468,16 +1643,49 @@
   DC->addDecl(Field);
 }
 
-static QualType createKmpTaskTRecordDecl(CodeGenModule &CGM,
-                                         QualType KmpInt32Ty,
-                                         QualType KmpRoutineEntryPointerQTy) {
+namespace {
+struct PrivateHelpersTy {
+  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
+                   const VarDecl *PrivateElemInit)
+      : Original(Original), PrivateCopy(PrivateCopy),
+        PrivateElemInit(PrivateElemInit) {}
+  const VarDecl *Original;
+  const VarDecl *PrivateCopy;
+  const VarDecl *PrivateElemInit;
+};
+typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
+} // namespace
+
+static RecordDecl *
+createPrivatesRecordDecl(CodeGenModule &CGM,
+                         const ArrayRef<PrivateDataTy> Privates) {
+  if (!Privates.empty()) {
+    auto &C = CGM.getContext();
+    // Build struct .kmp_privates_t. {
+    //         /*  private vars  */
+    //       };
+    auto *RD = C.buildImplicitRecord(".kmp_privates.t");
+    RD->startDefinition();
+    for (auto &&Pair : Privates) {
+      auto Type = Pair.second.Original->getType();
+      Type = Type.getNonReferenceType();
+      addFieldToRecordDecl(C, RD, Type);
+    }
+    RD->completeDefinition();
+    return RD;
+  }
+  return nullptr;
+}
+
+static RecordDecl *
+createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
+                         QualType KmpRoutineEntryPointerQTy) {
   auto &C = CGM.getContext();
   // Build struct kmp_task_t {
   //         void *              shareds;
   //         kmp_routine_entry_t routine;
   //         kmp_int32           part_id;
   //         kmp_routine_entry_t destructors;
-  //         /*  private vars  */
   //       };
   auto *RD = C.buildImplicitRecord("kmp_task_t");
   RD->startDefinition();
@@ -1485,29 +1693,48 @@
   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   addFieldToRecordDecl(C, RD, KmpInt32Ty);
   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
-  // TODO: add private fields.
   RD->completeDefinition();
-  return C.getRecordType(RD);
+  return RD;
+}
+
+static RecordDecl *
+createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
+                                     const ArrayRef<PrivateDataTy> Privates) {
+  auto &C = CGM.getContext();
+  // Build struct kmp_task_t_with_privates {
+  //         kmp_task_t task_data;
+  //         .kmp_privates_t. privates;
+  //       };
+  auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
+  RD->startDefinition();
+  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
+  if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
+    addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
+  }
+  RD->completeDefinition();
+  return RD;
 }
 
 /// \brief Emit a proxy function which accepts kmp_task_t as the second
 /// argument.
 /// \code
 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
-///   TaskFunction(gtid, tt->part_id, tt->shareds);
+///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
+///   tt->shareds);
 ///   return 0;
 /// }
 /// \endcode
 static llvm::Value *
 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
-                      QualType KmpInt32Ty, QualType KmpTaskTPtrQTy,
+                      QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
+                      QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
-                      llvm::Type *KmpTaskTTy) {
+                      llvm::Value *TaskPrivatesMap) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
-                                /*Id=*/nullptr, KmpTaskTPtrQTy);
+                                /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
   FunctionType::ExtInfo Info;
@@ -1523,27 +1750,42 @@
   CGF.disableDebugInfo();
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
 
-  // TaskFunction(gtid, tt->part_id, tt->shareds);
+  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
+  // tt->task_data.shareds);
   auto *GtidParam = CGF.EmitLoadOfScalar(
       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
-  auto TaskTypeArgAddr = CGF.EmitLoadOfScalar(
-      CGF.GetAddrOfLocalVar(&TaskTypeArg), /*Volatile=*/false,
-      CGM.PointerAlignInBytes, KmpTaskTPtrQTy, Loc);
-  auto *PartidPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
-                                                /*Idx=*/KmpTaskTPartId);
-  auto *PartidParam = CGF.EmitLoadOfScalar(
-      PartidPtr, /*Volatile=*/false,
-      C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
-  auto *SharedsPtr = CGF.Builder.CreateStructGEP(KmpTaskTTy, TaskTypeArgAddr,
-                                                 /*Idx=*/KmpTaskTShareds);
-  auto *SharedsParam =
-      CGF.EmitLoadOfScalar(SharedsPtr, /*Volatile=*/false,
-                           CGM.PointerAlignInBytes, C.VoidPtrTy, Loc);
-  llvm::Value *CallArgs[] = {
-      GtidParam, PartidParam,
-      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-          SharedsParam, CGF.ConvertTypeForMem(SharedsPtrTy))};
+  auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
+  LValue TDBase =
+      CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
+  auto *KmpTaskTWithPrivatesQTyRD =
+      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
+  LValue Base =
+      CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
+  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
+  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
+  auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
+  auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
+
+  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
+  auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
+  auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
+      CGF.ConvertTypeForMem(SharedsPtrTy));
+
+  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
+  llvm::Value *PrivatesParam;
+  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
+    auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
+    PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        PrivatesLVal.getAddress(), CGF.VoidPtrTy);
+  } else {
+    PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+  }
+
+  llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
+                             TaskPrivatesMap, SharedsParam};
   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   CGF.EmitStoreThroughLValue(
       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
@@ -1552,28 +1794,216 @@
   return TaskEntry;
 }
 
-void CGOpenMPRuntime::emitTaskCall(
-    CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
-    llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-    llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds) {
+static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
+                                            SourceLocation Loc,
+                                            QualType KmpInt32Ty,
+                                            QualType KmpTaskTWithPrivatesPtrQTy,
+                                            QualType KmpTaskTWithPrivatesQTy) {
   auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
+  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
+                                /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+  Args.push_back(&GtidArg);
+  Args.push_back(&TaskTypeArg);
+  FunctionType::ExtInfo Info;
+  auto &DestructorFnInfo =
+      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
+                                                    /*isVariadic=*/false);
+  auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
+  auto *DestructorFn =
+      llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
+                             ".omp_task_destructor.", &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
+                    Args);
+
+  auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
+  LValue Base =
+      CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
+  auto *KmpTaskTWithPrivatesQTyRD =
+      cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
+  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+  Base = CGF.EmitLValueForField(Base, *FI);
+  for (auto *Field :
+       cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
+    if (auto DtorKind = Field->getType().isDestructedType()) {
+      auto FieldLValue = CGF.EmitLValueForField(Base, Field);
+      CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
+    }
+  }
+  CGF.FinishFunction();
+  return DestructorFn;
+}
+
+/// \brief Emit a privates mapping function for correct handling of private and
+/// firstprivate variables.
+/// \code
+/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
+/// **noalias priv1,...,  <tyn> **noalias privn) {
+///   *priv1 = &.privates.priv1;
+///   ...;
+///   *privn = &.privates.privn;
+/// }
+/// \endcode
+static llvm::Value *
+emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
+                               const ArrayRef<const Expr *> PrivateVars,
+                               const ArrayRef<const Expr *> FirstprivateVars,
+                               QualType PrivatesQTy,
+                               const ArrayRef<PrivateDataTy> Privates) {
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl TaskPrivatesArg(
+      C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+      C.getPointerType(PrivatesQTy).withConst().withRestrict());
+  Args.push_back(&TaskPrivatesArg);
+  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
+  unsigned Counter = 1;
+  for (auto *E: PrivateVars) {
+    Args.push_back(ImplicitParamDecl::Create(
+        C, /*DC=*/nullptr, Loc,
+        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
+                            .withConst()
+                            .withRestrict()));
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    PrivateVarsPos[VD] = Counter;
+    ++Counter;
+  }
+  for (auto *E : FirstprivateVars) {
+    Args.push_back(ImplicitParamDecl::Create(
+        C, /*DC=*/nullptr, Loc,
+        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
+                            .withConst()
+                            .withRestrict()));
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    PrivateVarsPos[VD] = Counter;
+    ++Counter;
+  }
+  FunctionType::ExtInfo Info;
+  auto &TaskPrivatesMapFnInfo =
+      CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
+                                                    /*isVariadic=*/false);
+  auto *TaskPrivatesMapTy =
+      CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
+  auto *TaskPrivatesMap = llvm::Function::Create(
+      TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
+      ".omp_task_privates_map.", &CGM.getModule());
+  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
+                                TaskPrivatesMap);
+  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
+                    TaskPrivatesMapFnInfo, Args);
+
+  // *privi = &.privates.privi;
+  auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
+      CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
+  LValue Base =
+      CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
+  auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
+  Counter = 0;
+  for (auto *Field : PrivatesQTyRD->fields()) {
+    auto FieldLVal = CGF.EmitLValueForField(Base, Field);
+    auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
+    auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
+                                                  VD->getType());
+    auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
+    CGF.EmitStoreOfScalar(
+        FieldLVal.getAddress(),
+        CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
+                                       RefLVal.getType()->getPointeeType()));
+    ++Counter;
+  }
+  CGF.FinishFunction();
+  return TaskPrivatesMap;
+}
+
+static int array_pod_sort_comparator(const PrivateDataTy *P1,
+                                     const PrivateDataTy *P2) {
+  return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
+}
+
+void CGOpenMPRuntime::emitTaskCall(
+    CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
+    bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+    llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+    const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars,
+    const ArrayRef<const Expr *> PrivateCopies,
+    const ArrayRef<const Expr *> FirstprivateVars,
+    const ArrayRef<const Expr *> FirstprivateCopies,
+    const ArrayRef<const Expr *> FirstprivateInits) {
+  auto &C = CGM.getContext();
+  llvm::SmallVector<PrivateDataTy, 8> Privates;
+  // Aggregate privates and sort them by the alignment.
+  auto I = PrivateCopies.begin();
+  for (auto *E : PrivateVars) {
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    Privates.push_back(std::make_pair(
+        C.getTypeAlignInChars(VD->getType()),
+        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+                         /*PrivateElemInit=*/nullptr)));
+    ++I;
+  }
+  I = FirstprivateCopies.begin();
+  auto IElemInitRef = FirstprivateInits.begin();
+  for (auto *E : FirstprivateVars) {
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    Privates.push_back(std::make_pair(
+        C.getTypeAlignInChars(VD->getType()),
+        PrivateHelpersTy(
+            VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+            cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
+    ++I, ++IElemInitRef;
+  }
+  llvm::array_pod_sort(Privates.begin(), Privates.end(),
+                       array_pod_sort_comparator);
   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
   // Build type kmp_routine_entry_t (if not built yet).
   emitKmpRoutineEntryT(KmpInt32Ty);
+  // Build type kmp_task_t (if not built yet).
+  if (KmpTaskTQTy.isNull()) {
+    KmpTaskTQTy = C.getRecordType(
+        createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
+  }
+  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   // Build particular struct kmp_task_t for the given task.
-  auto KmpTaskQTy =
-      createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy);
-  QualType KmpTaskTPtrQTy = C.getPointerType(KmpTaskQTy);
-  auto *KmpTaskTTy = CGF.ConvertType(KmpTaskQTy);
-  auto *KmpTaskTPtrTy = KmpTaskTTy->getPointerTo();
-  auto KmpTaskTySize = CGM.getSize(C.getTypeSizeInChars(KmpTaskQTy));
+  auto *KmpTaskTWithPrivatesQTyRD =
+      createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
+  auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
+  QualType KmpTaskTWithPrivatesPtrQTy =
+      C.getPointerType(KmpTaskTWithPrivatesQTy);
+  auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
+  auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
+  auto KmpTaskTWithPrivatesTySize =
+      CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
 
+  // Emit initial values for private copies (if any).
+  llvm::Value *TaskPrivatesMap = nullptr;
+  auto *TaskPrivatesMapTy =
+      std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
+                3)
+          ->getType();
+  if (!Privates.empty()) {
+    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+    TaskPrivatesMap = emitTaskPrivateMappingFunction(
+        CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
+    TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        TaskPrivatesMap, TaskPrivatesMapTy);
+  } else {
+    TaskPrivatesMap = llvm::ConstantPointerNull::get(
+        cast<llvm::PointerType>(TaskPrivatesMapTy));
+  }
   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   // kmp_task_t *tt);
-  auto *TaskEntry =
-      emitProxyTaskFunction(CGM, Loc, KmpInt32Ty, KmpTaskTPtrQTy, SharedsPtrTy,
-                            TaskFunction, KmpTaskTTy);
+  auto *TaskEntry = emitProxyTaskFunction(
+      CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
+      KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
 
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
@@ -1592,41 +2022,151 @@
           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
-  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
-                              getThreadID(CGF, Loc), TaskFlags, KmpTaskTySize,
-                              CGM.getSize(SharedsSize),
-                              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                                  TaskEntry, KmpRoutineEntryPtrTy)};
+  llvm::Value *AllocArgs[] = {
+      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
+      KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
+                                                      KmpRoutineEntryPtrTy)};
   auto *NewTask = CGF.EmitRuntimeCall(
       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
-  auto *NewTaskNewTaskTTy =
-      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(NewTask, KmpTaskTPtrTy);
+  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      NewTask, KmpTaskTWithPrivatesPtrTy);
+  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
+                                               KmpTaskTWithPrivatesQTy);
+  LValue TDBase =
+      CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
   // Fill the data in the resulting kmp_task_t record.
   // Copy shareds if there are any.
-  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty())
-    CGF.EmitAggregateCopy(
-        CGF.EmitLoadOfScalar(
-            CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
-                                        /*Idx=*/KmpTaskTShareds),
-            /*Volatile=*/false, CGM.PointerAlignInBytes, SharedsPtrTy, Loc),
-        Shareds, SharedsTy);
-  // TODO: generate function with destructors for privates.
+  llvm::Value *KmpTaskSharedsPtr = nullptr;
+  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
+    KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
+        CGF.EmitLValueForField(
+            TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
+        Loc);
+    CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
+  }
+  // Emit initial values for private copies (if any).
+  bool NeedsCleanup = false;
+  if (!Privates.empty()) {
+    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+    auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
+    FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
+    LValue SharedsBase;
+    if (!FirstprivateVars.empty()) {
+      SharedsBase = CGF.MakeNaturalAlignAddrLValue(
+          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
+          SharedsTy);
+    }
+    CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
+        cast<CapturedStmt>(*D.getAssociatedStmt()));
+    for (auto &&Pair : Privates) {
+      auto *VD = Pair.second.PrivateCopy;
+      auto *Init = VD->getAnyInitializer();
+      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
+      if (Init) {
+        if (auto *Elem = Pair.second.PrivateElemInit) {
+          auto *OriginalVD = Pair.second.Original;
+          auto *SharedField = CapturesInfo.lookup(OriginalVD);
+          auto SharedRefLValue =
+              CGF.EmitLValueForField(SharedsBase, SharedField);
+          QualType Type = OriginalVD->getType();
+          if (Type->isArrayType()) {
+            // Initialize firstprivate array.
+            if (!isa<CXXConstructExpr>(Init) ||
+                CGF.isTrivialInitializer(Init)) {
+              // Perform simple memcpy.
+              CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
+                                      SharedRefLValue.getAddress(), Type);
+            } else {
+              // Initialize firstprivate array using element-by-element
+              // intialization.
+              CGF.EmitOMPAggregateAssign(
+                  PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
+                  Type, [&CGF, Elem, Init, &CapturesInfo](
+                            llvm::Value *DestElement, llvm::Value *SrcElement) {
+                    // Clean up any temporaries needed by the initialization.
+                    CodeGenFunction::OMPPrivateScope InitScope(CGF);
+                    InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
+                      return SrcElement;
+                    });
+                    (void)InitScope.Privatize();
+                    // Emit initialization for single element.
+                    auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
+                    CGF.CapturedStmtInfo = &CapturesInfo;
+                    CGF.EmitAnyExprToMem(Init, DestElement,
+                                         Init->getType().getQualifiers(),
+                                         /*IsInitializer=*/false);
+                    CGF.CapturedStmtInfo = OldCapturedStmtInfo;
+                  });
+            }
+          } else {
+            CodeGenFunction::OMPPrivateScope InitScope(CGF);
+            InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
+              return SharedRefLValue.getAddress();
+            });
+            (void)InitScope.Privatize();
+            auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
+            CGF.CapturedStmtInfo = &CapturesInfo;
+            CGF.EmitExprAsInit(Init, VD, PrivateLValue,
+                               /*capturedByInit=*/false);
+            CGF.CapturedStmtInfo = OldCapturedStmtInfo;
+          }
+        } else {
+          CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
+        }
+      }
+      NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
+      ++FI;
+    }
+  }
   // Provide pointer to function with destructors for privates.
-  CGF.Builder.CreateAlignedStore(
-      llvm::ConstantPointerNull::get(
-          cast<llvm::PointerType>(KmpRoutineEntryPtrTy)),
-      CGF.Builder.CreateStructGEP(KmpTaskTTy, NewTaskNewTaskTTy,
-                                  /*Idx=*/KmpTaskTDestructors),
-      CGM.PointerAlignInBytes);
-
+  llvm::Value *DestructorFn =
+      NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
+                                             KmpTaskTWithPrivatesPtrQTy,
+                                             KmpTaskTWithPrivatesQTy)
+                   : llvm::ConstantPointerNull::get(
+                         cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
+  LValue Destructor = CGF.EmitLValueForField(
+      TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
+  CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                            DestructorFn, KmpRoutineEntryPtrTy),
+                        Destructor);
   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   // libcall.
   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
   // *new_task);
-  llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc),
-                             getThreadID(CGF, Loc), NewTask};
-  // TODO: add check for untied tasks.
-  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+  auto *ThreadID = getThreadID(CGF, Loc);
+  llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
+  auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
+    // TODO: add check for untied tasks.
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+  };
+  typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
+      IfCallEndCleanup;
+  auto &&ElseCodeGen =
+      [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
+          CodeGenFunction &CGF) {
+        CodeGenFunction::RunCleanupsScope LocalScope(CGF);
+        CGF.EmitRuntimeCall(
+            createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
+        // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+        // kmp_task_t *new_task);
+        CGF.EHStack.pushCleanup<IfCallEndCleanup>(
+            NormalAndEHCleanup,
+            createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
+            llvm::makeArrayRef(TaskArgs));
+
+        // Call proxy_task_entry(gtid, new_task);
+        llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+        CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+      };
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
+  } else {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    ThenCodeGen(CGF);
+  }
 }
 
 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
@@ -1728,6 +2268,7 @@
   //  ...
   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
   //  ...
+  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
   // break;
   // default:;
   // }
@@ -1804,11 +2345,12 @@
         ThreadId,  // i32 <gtid>
         Lock       // kmp_critical_name *&<lock>
     };
-    CGF.EHStack.pushCleanup<CallEndCleanup>(
-        NormalAndEHCleanup,
-        createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
-                                         : OMPRTL__kmpc_end_reduce),
-        llvm::makeArrayRef(EndArgs));
+    CGF.EHStack
+        .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
+            NormalAndEHCleanup,
+            createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
+                                             : OMPRTL__kmpc_end_reduce),
+            llvm::makeArrayRef(EndArgs));
     for (auto *E : ReductionOps) {
       CGF.EmitIgnoredExpr(E);
     }
@@ -1827,28 +2369,43 @@
 
   {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
+    if (!WithNowait) {
+      // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
+      llvm::Value *EndArgs[] = {
+          IdentTLoc, // ident_t *<loc>
+          ThreadId,  // i32 <gtid>
+          Lock       // kmp_critical_name *&<lock>
+      };
+      CGF.EHStack
+          .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
+              NormalAndEHCleanup,
+              createRuntimeFunction(OMPRTL__kmpc_end_reduce),
+              llvm::makeArrayRef(EndArgs));
+    }
     auto I = LHSExprs.begin();
     for (auto *E : ReductionOps) {
       const Expr *XExpr = nullptr;
       const Expr *EExpr = nullptr;
       const Expr *UpExpr = nullptr;
       BinaryOperatorKind BO = BO_Comma;
-      // Try to emit update expression as a simple atomic.
-      if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
-        // If this is a conditional operator, analyze it's condition for
-        // min/max reduction operator.
-        E = ACO->getCond();
-      }
       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
         if (BO->getOpcode() == BO_Assign) {
           XExpr = BO->getLHS();
           UpExpr = BO->getRHS();
         }
       }
-      // Analyze RHS part of the whole expression.
-      if (UpExpr) {
+      // Try to emit update expression as a simple atomic.
+      auto *RHSExpr = UpExpr;
+      if (RHSExpr) {
+        // Analyze RHS part of the whole expression.
+        if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
+                RHSExpr->IgnoreParenImpCasts())) {
+          // If this is a conditional operator, analyze its condition for
+          // min/max reduction operator.
+          RHSExpr = ACO->getCond();
+        }
         if (auto *BORHS =
-                dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
+                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
           EExpr = BORHS->getRHS();
           BO = BORHS->getOpcode();
         }
@@ -1888,6 +2445,15 @@
   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
 }
 
+void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
+                                       SourceLocation Loc) {
+  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+  // global_tid);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+  // Ignore return result until untied tasks are supported.
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
+}
+
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
                                            const RegionCodeGenTy &CodeGen) {
   InlinedOpenMPRegionRAII Region(CGF, CodeGen);
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index fa59930..f5aa4a5 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -100,7 +100,7 @@
     // new_task);
     OMPRTL__kmpc_omp_task,
     // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
     // kmp_int32 didit);
     OMPRTL__kmpc_copyprivate,
     // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
@@ -118,6 +118,19 @@
     // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
     // kmp_critical_name *lck);
     OMPRTL__kmpc_end_reduce_nowait,
+    // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+    // kmp_task_t * new_task);
+    OMPRTL__kmpc_omp_task_begin_if0,
+    // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+    // kmp_task_t * new_task);
+    OMPRTL__kmpc_omp_task_complete_if0,
+    // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
+    OMPRTL__kmpc_ordered,
+    // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
+    OMPRTL__kmpc_end_ordered,
+    // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+    // global_tid);
+    OMPRTL__kmpc_omp_taskwait,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -219,6 +232,16 @@
   /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
   llvm::Type *KmpRoutineEntryPtrTy;
   QualType KmpRoutineEntryPtrQTy;
+  /// \brief Type typedef struct kmp_task {
+  ///    void *              shareds; /**< pointer to block of pointers to
+  ///    shared vars   */
+  ///    kmp_routine_entry_t routine; /**< pointer to routine to call for
+  ///    executing task */
+  ///    kmp_int32           part_id; /**< part id for the task */
+  ///    kmp_routine_entry_t destructors; /* pointer to function to invoke
+  ///    deconstructors of firstprivate C++ objects */
+  /// } kmp_task_t;
+  QualType KmpTaskTQTy;
 
   /// \brief Build type kmp_routine_entry_t (if not built yet).
   void emitKmpRoutineEntryT(QualType KmpInt32Ty);
@@ -252,6 +275,10 @@
   /// size \a IVSize and sign \a IVSigned.
   llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned);
 
+  /// \brief Returns __kmpc_dispatch_fini_* runtime function for the specified
+  /// size \a IVSize and sign \a IVSigned.
+  llvm::Constant *createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
+
   /// \brief If the specified mangled name is not in the module, create and
   /// return threadprivate cache object. This object is a pointer's worth of
   /// storage that's reserved for use by the OpenMP runtime.
@@ -328,26 +355,20 @@
   ///
   void functionFinished(CodeGenFunction &CGF);
 
-  /// \brief Emits code for parallel call of the \a OutlinedFn with variables
-  /// captured in a record which address is stored in \a CapturedStruct.
+  /// \brief Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
   /// \param CapturedStruct A pointer to the record with the references to
   /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
   ///
   virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
                                 llvm::Value *OutlinedFn,
-                                llvm::Value *CapturedStruct);
-
-  /// \brief Emits code for serial call of the \a OutlinedFn with variables
-  /// captured in a record which address is stored in \a CapturedStruct.
-  /// \param OutlinedFn Outlined function to be run in serial mode.
-  /// \param CapturedStruct A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  ///
-  virtual void emitSerialCall(CodeGenFunction &CGF, SourceLocation Loc,
-                              llvm::Value *OutlinedFn,
-                              llvm::Value *CapturedStruct);
+                                llvm::Value *CapturedStruct,
+                                const Expr *IfCond);
 
   /// \brief Emits a critical region.
   /// \param CriticalName Name of the critical region.
@@ -378,6 +399,13 @@
                                 ArrayRef<const Expr *> SrcExprs,
                                 ArrayRef<const Expr *> AssignmentOps);
 
+  /// \brief Emit an ordered region.
+  /// \param OrderedOpGen Generator for the statement associated with the given
+  /// critical region.
+  virtual void emitOrderedRegion(CodeGenFunction &CGF,
+                                 const RegionCodeGenTy &OrderedOpGen,
+                                 SourceLocation Loc);
+
   /// \brief Emit an implicit/explicit barrier for OpenMP threads.
   /// \param Kind Directive for which this implicit barrier call must be
   /// generated. Must be OMPD_barrier for explicit barrier generation.
@@ -411,6 +439,7 @@
   /// \param SchedKind Schedule kind, specified by the 'schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
   /// \param IVSigned Sign of the interation variable.
+  /// \param Ordered true if loop is ordered, false otherwise.
   /// \param IL Address of the output variable in which the flag of the
   /// last iteration is returned.
   /// \param LB Address of the output variable in which the lower iteration
@@ -424,19 +453,29 @@
   ///
   virtual void emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
                            OpenMPScheduleClauseKind SchedKind, unsigned IVSize,
-                           bool IVSigned, llvm::Value *IL, llvm::Value *LB,
-                           llvm::Value *UB, llvm::Value *ST,
+                           bool IVSigned, bool Ordered, llvm::Value *IL,
+                           llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
                            llvm::Value *Chunk = nullptr);
 
   /// \brief Call the appropriate runtime routine to notify that we finished
+  /// iteration of the ordered loop with the dynamic scheduling.
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the interation variable.
+  ///
+  virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF,
+                                          SourceLocation Loc, unsigned IVSize,
+                                          bool IVSigned);
+
+  /// \brief Call the appropriate runtime routine to notify that we finished
   /// all the work with current loop.
   ///
   /// \param CGF Reference to current CodeGenFunction.
   /// \param Loc Clang source location.
-  /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
   ///
-  virtual void emitForFinish(CodeGenFunction &CGF, SourceLocation Loc,
-                             OpenMPScheduleClauseKind ScheduleKind);
+  virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc);
 
   /// Call __kmpc_dispatch_next(
   ///          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
@@ -495,7 +534,7 @@
                          SourceLocation Loc);
 
   /// \brief Emit task region for the task directive. The task region is
-  /// emmitted in several steps:
+  /// emitted in several steps:
   /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
   /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
@@ -511,6 +550,7 @@
   /// 4. Emit a call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid,
   /// kmp_task_t *new_task), where new_task is a resulting structure from
   /// previous items.
+  /// \param D Current task directive.
   /// \param Tied true if the task is tied (the task is tied to the thread that
   /// can suspend its task region), false - untied (the task is not tied to any
   /// thread).
@@ -522,10 +562,29 @@
   /// \param SharedsTy A type which contains references the shared variables.
   /// \param Shareds Context with the list of shared variables from the \a
   /// TaskFunction.
-  virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, bool Tied,
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param PrivateVars List of references to private variables for the task
+  /// directive.
+  /// \param PrivateCopies List of private copies for each private variable in
+  /// \p PrivateVars.
+  /// \param FirstprivateVars List of references to private variables for the
+  /// task directive.
+  /// \param FirstprivateCopies List of private copies for each private variable
+  /// in \p FirstprivateVars.
+  /// \param FirstprivateInits List of references to auto generated variables
+  /// used for initialization of a single array element. Used if firstprivate
+  /// variable is of array type.
+  virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                            const OMPExecutableDirective &D, bool Tied,
                             llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
                             llvm::Value *TaskFunction, QualType SharedsTy,
-                            llvm::Value *Shareds);
+                            llvm::Value *Shareds, const Expr *IfCond,
+                            const ArrayRef<const Expr *> PrivateVars,
+                            const ArrayRef<const Expr *> PrivateCopies,
+                            const ArrayRef<const Expr *> FirstprivateVars,
+                            const ArrayRef<const Expr *> FirstprivateCopies,
+                            const ArrayRef<const Expr *> FirstprivateInits);
 
   /// \brief Emit code for the directive that does not require outlining.
   ///
@@ -574,6 +633,9 @@
                              ArrayRef<const Expr *> RHSExprs,
                              ArrayRef<const Expr *> ReductionOps,
                              bool WithNowait);
+
+  /// \brief Emit code for 'taskwait' directive.
+  virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc);
 };
 
 } // namespace CodeGen
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 202ea97..c89d5cc 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -99,10 +99,25 @@
   MemberInfo StorageInfo(CharUnits Offset, llvm::Type *Data) {
     return MemberInfo(Offset, MemberInfo::Field, Data);
   }
-  bool useMSABI() {
+
+  /// The Microsoft bitfield layout rule allocates discrete storage
+  /// units of the field's formal type and only combines adjacent
+  /// fields of the same formal type.  We want to emit a layout with
+  /// these discrete storage units instead of combining them into a
+  /// continuous run.
+  bool isDiscreteBitFieldABI() {
     return Context.getTargetInfo().getCXXABI().isMicrosoft() ||
            D->isMsStruct(Context);
   }
+
+  /// The Itanium base layout rule allows virtual bases to overlap
+  /// other bases, which complicates layout in specific ways.
+  ///
+  /// Note specifically that the ms_struct attribute doesn't change this.
+  bool isOverlappingVBaseABI() {
+    return !Context.getTargetInfo().getCXXABI().isMicrosoft();
+  }
+
   /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments.
   llvm::Type *getIntNType(uint64_t NumBits) {
     return llvm::Type::getIntNTy(Types.getLLVMContext(),
@@ -119,8 +134,9 @@
   /// for itanium bitfields that are smaller than their declared type.
   llvm::Type *getStorageType(const FieldDecl *FD) {
     llvm::Type *Type = Types.ConvertTypeForMem(FD->getType());
-    return useMSABI() || !FD->isBitField() ? Type :
-        getIntNType(std::min(FD->getBitWidthValue(Context),
+    if (!FD->isBitField()) return Type;
+    if (isDiscreteBitFieldABI()) return Type;
+    return getIntNType(std::min(FD->getBitWidthValue(Context),
                              (unsigned)Context.toBits(getSize(Type))));
   }
   /// \brief Gets the llvm Basesubobject type from a CXXRecordDecl.
@@ -137,15 +153,10 @@
     return CharUnits::fromQuantity(DataLayout.getABITypeAlignment(Type));
   }
   bool isZeroInitializable(const FieldDecl *FD) {
-    const Type *Type = FD->getType()->getBaseElementTypeUnsafe();
-    if (const MemberPointerType *MPT = Type->getAs<MemberPointerType>())
-      return Types.getCXXABI().isZeroInitializable(MPT);
-    if (const RecordType *RT = Type->getAs<RecordType>())
-      return isZeroInitializable(RT->getDecl());
-    return true;
+    return Types.isZeroInitializable(FD->getType());
   }
   bool isZeroInitializable(const RecordDecl *RD) {
-    return Types.getCGRecordLayout(RD).isZeroInitializable();
+    return Types.isZeroInitializable(RD);
   }
   void appendPaddingBytes(CharUnits Size) {
     if (!Size.isZero())
@@ -303,9 +314,13 @@
     // If this is the case, then we aught not to try and come up with a "better"
     // type, it might not be very easy to come up with a Constant which
     // correctly initializes it.
-    if (!SeenNamedMember && Field->getDeclName()) {
-      SeenNamedMember = true;
-      if (!isZeroInitializable(Field)) {
+    if (!SeenNamedMember) {
+      SeenNamedMember = Field->getIdentifier();
+      if (!SeenNamedMember)
+        if (const auto *FieldRD =
+                dyn_cast_or_null<RecordDecl>(Field->getType()->getAsTagDecl()))
+        SeenNamedMember = FieldRD->findFirstNamedDataMember();
+      if (SeenNamedMember && !isZeroInitializable(Field)) {
         IsZeroInitializable = IsZeroInitializableAsBase = false;
         StorageType = FieldType;
       }
@@ -365,7 +380,7 @@
   // used to determine if the ASTRecordLayout is treating these two bitfields as
   // contiguous.  StartBitOffset is offset of the beginning of the Run.
   uint64_t StartBitOffset, Tail = 0;
-  if (useMSABI()) {
+  if (isDiscreteBitFieldABI()) {
     for (; Field != FieldEnd; ++Field) {
       uint64_t BitOffset = getFieldBitOffset(*Field);
       // Zero-width bitfields end runs.
@@ -438,8 +453,12 @@
   for (const auto &Base : RD->bases()) {
     if (Base.isVirtual())
       continue;
+
+    // Bases can be zero-sized even if not technically empty if they
+    // contain only a trailing array member.
     const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
-    if (!BaseDecl->isEmpty())
+    if (!BaseDecl->isEmpty() &&
+        !Context.getASTRecordLayout(BaseDecl).getSize().isZero())
       Members.push_back(MemberInfo(Layout.getBaseClassOffset(BaseDecl),
           MemberInfo::Base, getStorageType(BaseDecl), BaseDecl));
   }
@@ -461,7 +480,7 @@
   // smaller than the nvsize.  Here we check to see if such a base is placed
   // before the nvsize and set the scissor offset to that, instead of the
   // nvsize.
-  if (!useMSABI())
+  if (isOverlappingVBaseABI())
     for (const auto &Base : RD->vbases()) {
       const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
       if (BaseDecl->isEmpty())
@@ -482,7 +501,8 @@
     CharUnits Offset = Layout.getVBaseClassOffset(BaseDecl);
     // If the vbase is a primary virtual base of some base, then it doesn't
     // get its own storage location but instead lives inside of that base.
-    if (!useMSABI() && Context.isNearlyEmpty(BaseDecl) &&
+    if (isOverlappingVBaseABI() &&
+        Context.isNearlyEmpty(BaseDecl) &&
         !hasOwnStorage(RD, BaseDecl)) {
       Members.push_back(MemberInfo(Offset, MemberInfo::VBase, nullptr,
                                    BaseDecl));
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 481fdbe..a79b3e3 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -423,9 +423,8 @@
     ResolveBranchFixups(Dest.getBlock());
   }
 
-  RegionCounter Cnt = getPGORegionCounter(D->getStmt());
   EmitBlock(Dest.getBlock());
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(D->getStmt());
 }
 
 /// Change the cleanup scope of the labels in this lexical scope to
@@ -513,7 +512,6 @@
   // C99 6.8.4.1: The first substatement is executed if the expression compares
   // unequal to 0.  The condition must be a scalar type.
   LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
-  RegionCounter Cnt = getPGORegionCounter(&S);
 
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
@@ -532,7 +530,7 @@
     // This avoids emitting dead code and simplifies the CFG substantially.
     if (!ContainsLabel(Skipped)) {
       if (CondConstant)
-        Cnt.beginRegion(Builder);
+        incrementProfileCounter(&S);
       if (Executed) {
         RunCleanupsScope ExecutedScope(*this);
         EmitStmt(Executed);
@@ -549,11 +547,12 @@
   if (S.getElse())
     ElseBlock = createBasicBlock("if.else");
 
-  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Cnt.getCount());
+  EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock,
+                       getProfileCount(S.getThen()));
 
   // Emit the 'then' code.
   EmitBlock(ThenBlock);
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
   {
     RunCleanupsScope ThenScope(*this);
     EmitStmt(S.getThen());
@@ -678,14 +677,12 @@
 
 void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
                                     ArrayRef<const Attr *> WhileAttrs) {
-  RegionCounter Cnt = getPGORegionCounter(&S);
-
   // Emit the header for the loop, which will also become
   // the continue target.
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
-  LoopStack.push(LoopHeader.getBlock());
+  LoopStack.push(LoopHeader.getBlock(), WhileAttrs);
 
   // Create an exit block for when the condition fails, which will
   // also become the break target.
@@ -724,9 +721,9 @@
     llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
     if (ConditionScope.requiresCleanups())
       ExitBlock = createBasicBlock("while.exit");
-    llvm::BranchInst *CondBr =
-        Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock,
-                             PGO.createLoopWeights(S.getCond(), Cnt));
+    llvm::BranchInst *CondBr = Builder.CreateCondBr(
+        BoolCondVal, LoopBody, ExitBlock,
+        createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody())));
 
     if (ExitBlock != LoopExit.getBlock()) {
       EmitBlock(ExitBlock);
@@ -742,7 +739,7 @@
   {
     RunCleanupsScope BodyScope(*this);
     EmitBlock(LoopBody);
-    Cnt.beginRegion(Builder);
+    incrementProfileCounter(&S);
     EmitStmt(S.getBody());
   }
 
@@ -771,7 +768,7 @@
   JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
   JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
 
-  RegionCounter Cnt = getPGORegionCounter(&S);
+  uint64_t ParentCount = getCurrentProfileCount();
 
   // Store the blocks to use for break and continue.
   BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
@@ -779,9 +776,9 @@
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
 
-  LoopStack.push(LoopBody);
+  LoopStack.push(LoopBody, DoAttrs);
 
-  EmitBlockWithFallThrough(LoopBody, Cnt);
+  EmitBlockWithFallThrough(LoopBody, &S);
   {
     RunCleanupsScope BodyScope(*this);
     EmitStmt(S.getBody());
@@ -808,9 +805,10 @@
 
   // As long as the condition is true, iterate the loop.
   if (EmitBoolCondBranch) {
-    llvm::BranchInst *CondBr =
-        Builder.CreateCondBr(BoolCondVal, LoopBody, LoopExit.getBlock(),
-                             PGO.createLoopWeights(S.getCond(), Cnt));
+    uint64_t BackedgeCount = getProfileCount(S.getBody()) - ParentCount;
+    llvm::BranchInst *CondBr = Builder.CreateCondBr(
+        BoolCondVal, LoopBody, LoopExit.getBlock(),
+        createProfileWeightsForLoop(S.getCond(), BackedgeCount));
 
     // Attach metadata to loop body conditional branch.
     EmitCondBrHints(LoopBody->getContext(), CondBr, DoAttrs);
@@ -837,8 +835,6 @@
   if (S.getInit())
     EmitStmt(S.getInit());
 
-  RegionCounter Cnt = getPGORegionCounter(&S);
-
   // Start the loop with a block that tests the condition.
   // If there's an increment, the continue scope will be overwritten
   // later.
@@ -846,7 +842,7 @@
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, ForAttrs);
 
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
@@ -880,9 +876,9 @@
     // C99 6.8.5p2/p4: The first substatement is executed if the expression
     // compares unequal to 0.  The condition must be a scalar type.
     llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
-    llvm::BranchInst *CondBr =
-        Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock,
-                             PGO.createLoopWeights(S.getCond(), Cnt));
+    llvm::BranchInst *CondBr = Builder.CreateCondBr(
+        BoolCondVal, ForBody, ExitBlock,
+        createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody())));
 
     // Attach metadata to loop body conditional branch.
     EmitCondBrHints(ForBody->getContext(), CondBr, ForAttrs);
@@ -897,7 +893,7 @@
     // Treat it as a non-zero constant.  Don't even create a new block for the
     // body, just fall into it.
   }
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
 
   {
     // Create a separate cleanup scope for the body, in case it is not
@@ -938,15 +934,13 @@
   EmitStmt(S.getRangeStmt());
   EmitStmt(S.getBeginEndStmt());
 
-  RegionCounter Cnt = getPGORegionCounter(&S);
-
   // Start the loop with a block that tests the condition.
   // If there's an increment, the continue scope will be overwritten
   // later.
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, ForAttrs);
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -961,7 +955,8 @@
   // to bool, is true.
   llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
   llvm::BranchInst *CondBr = Builder.CreateCondBr(
-      BoolCondVal, ForBody, ExitBlock, PGO.createLoopWeights(S.getCond(), Cnt));
+      BoolCondVal, ForBody, ExitBlock,
+      createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody())));
 
   // Attach metadata to loop body conditional branch.
   EmitCondBrHints(ForBody->getContext(), CondBr, ForAttrs);
@@ -972,7 +967,7 @@
   }
 
   EmitBlock(ForBody);
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
 
   // Create a block for the increment. In case of a 'continue', we jump there.
   JumpDest Continue = getJumpDestInCurrentScope("for.inc");
@@ -1138,13 +1133,11 @@
   llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
   llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
 
-  RegionCounter CaseCnt = getPGORegionCounter(&S);
-
   // Emit the code for this case. We do this first to make sure it is
   // properly chained from our predecessor before generating the
   // switch machinery to enter this block.
   llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
-  EmitBlockWithFallThrough(CaseDest, CaseCnt);
+  EmitBlockWithFallThrough(CaseDest, &S);
   EmitStmt(S.getSubStmt());
 
   // If range is empty, do nothing.
@@ -1155,7 +1148,7 @@
   // FIXME: parameters such as this should not be hardcoded.
   if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
     // Range is small enough to add multiple switch instruction cases.
-    uint64_t Total = CaseCnt.getCount();
+    uint64_t Total = getProfileCount(&S);
     unsigned NCases = Range.getZExtValue() + 1;
     // We only have one region counter for the entire set of cases here, so we
     // need to divide the weights evenly between the generated cases, ensuring
@@ -1194,9 +1187,9 @@
 
   llvm::MDNode *Weights = nullptr;
   if (SwitchWeights) {
-    uint64_t ThisCount = CaseCnt.getCount();
+    uint64_t ThisCount = getProfileCount(&S);
     uint64_t DefaultCount = (*SwitchWeights)[0];
-    Weights = PGO.createBranchWeights(ThisCount, DefaultCount);
+    Weights = createProfileWeights(ThisCount, DefaultCount);
 
     // Since we're chaining the switch default through each large case range, we
     // need to update the weight for the default, ie, the first case, to include
@@ -1229,7 +1222,6 @@
     return;
   }
 
-  RegionCounter CaseCnt = getPGORegionCounter(&S);
   llvm::ConstantInt *CaseVal =
     Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
 
@@ -1244,7 +1236,7 @@
     // Only do this optimization if there are no cleanups that need emitting.
     if (isObviouslyBranchWithoutCleanups(Block)) {
       if (SwitchWeights)
-        SwitchWeights->push_back(CaseCnt.getCount());
+        SwitchWeights->push_back(getProfileCount(&S));
       SwitchInsn->addCase(CaseVal, Block.getBlock());
 
       // If there was a fallthrough into this case, make sure to redirect it to
@@ -1258,9 +1250,9 @@
   }
 
   llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
-  EmitBlockWithFallThrough(CaseDest, CaseCnt);
+  EmitBlockWithFallThrough(CaseDest, &S);
   if (SwitchWeights)
-    SwitchWeights->push_back(CaseCnt.getCount());
+    SwitchWeights->push_back(getProfileCount(&S));
   SwitchInsn->addCase(CaseVal, CaseDest);
 
   // Recursively emitting the statement is acceptable, but is not wonderful for
@@ -1281,12 +1273,11 @@
     llvm::ConstantInt *CaseVal =
       Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
 
-    CaseCnt = getPGORegionCounter(NextCase);
     if (SwitchWeights)
-      SwitchWeights->push_back(CaseCnt.getCount());
+      SwitchWeights->push_back(getProfileCount(NextCase));
     if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
       CaseDest = createBasicBlock("sw.bb");
-      EmitBlockWithFallThrough(CaseDest, CaseCnt);
+      EmitBlockWithFallThrough(CaseDest, &S);
     }
 
     SwitchInsn->addCase(CaseVal, CaseDest);
@@ -1302,8 +1293,7 @@
   assert(DefaultBlock->empty() &&
          "EmitDefaultStmt: Default block already defined?");
 
-  RegionCounter Cnt = getPGORegionCounter(&S);
-  EmitBlockWithFallThrough(DefaultBlock, Cnt);
+  EmitBlockWithFallThrough(DefaultBlock, &S);
 
   EmitStmt(S.getSubStmt());
 }
@@ -1525,10 +1515,8 @@
     const SwitchCase *Case = nullptr;
     if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
                                    getContext(), Case)) {
-      if (Case) {
-        RegionCounter CaseCnt = getPGORegionCounter(Case);
-        CaseCnt.beginRegion(Builder);
-      }
+      if (Case)
+        incrementProfileCounter(Case);
       RunCleanupsScope ExecutedScope(*this);
 
       // Emit the condition variable if needed inside the entire cleanup scope
@@ -1545,8 +1533,7 @@
       // specified series of statements and we're good.
       for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
         EmitStmt(CaseStmts[i]);
-      RegionCounter ExitCnt = getPGORegionCounter(&S);
-      ExitCnt.beginRegion(Builder);
+      incrementProfileCounter(&S);
 
       // Now we want to restore the saved switch instance so that nested
       // switches continue to function properly
@@ -1577,7 +1564,7 @@
          Case;
          Case = Case->getNextSwitchCase()) {
       if (isa<DefaultStmt>(Case))
-        DefaultCount = getPGORegionCounter(Case).getCount();
+        DefaultCount = getProfileCount(Case);
       NumCases += 1;
     }
     SwitchWeights = new SmallVector<uint64_t, 16>();
@@ -1626,8 +1613,7 @@
 
   // Emit continuation.
   EmitBlock(SwitchExit.getBlock(), true);
-  RegionCounter ExitCnt = getPGORegionCounter(&S);
-  ExitCnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
 
   if (SwitchWeights) {
     assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
@@ -1635,7 +1621,7 @@
     // If there's only one jump destination there's no sense weighting it.
     if (SwitchWeights->size() > 1)
       SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
-                              PGO.createBranchWeights(*SwitchWeights));
+                              createProfileWeights(*SwitchWeights));
     delete SwitchWeights;
   }
   SwitchInsn = SavedSwitchInsn;
@@ -1764,6 +1750,16 @@
                                          const TargetInfo::ConstraintInfo &Info,
                                            const Expr *InputExpr,
                                            std::string &ConstraintStr) {
+  // If this can't be a register or memory, i.e., has to be a constant
+  // (immediate or symbolic), try to emit it as such.
+  if (!Info.allowsRegister() && !Info.allowsMemory()) {
+    llvm::APSInt Result;
+    if (InputExpr->EvaluateAsInt(Result, getContext()))
+      return llvm::ConstantInt::get(getLLVMContext(), Result);
+    assert(!Info.requiresImmediateConstant() &&
+           "Required-immediate inlineasm arg isn't constant?");
+  }
+
   if (Info.allowsRegister() || !Info.allowsMemory())
     if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
       return EmitScalarExpr(InputExpr);
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index aa53756..895baa7 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -23,52 +23,6 @@
 //===----------------------------------------------------------------------===//
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
-/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
-/// function. Here is the logic:
-/// if (Cond) {
-///   CodeGen(true);
-/// } else {
-///   CodeGen(false);
-/// }
-static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
-                            const std::function<void(bool)> &CodeGen) {
-  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
-
-  // If the condition constant folds and can be elided, try to avoid emitting
-  // the condition and the dead arm of the if/else.
-  bool CondConstant;
-  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
-    CodeGen(CondConstant);
-    return;
-  }
-
-  // Otherwise, the condition did not fold, or we couldn't elide it.  Just
-  // emit the conditional branch.
-  auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then");
-  auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else");
-  auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end");
-  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0);
-
-  // Emit the 'then' code.
-  CGF.EmitBlock(ThenBlock);
-  CodeGen(/*ThenBlock*/ true);
-  CGF.EmitBranch(ContBlock);
-  // Emit the 'else' code if present.
-  {
-    // There is no need to emit line number for unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBlock(ElseBlock);
-  }
-  CodeGen(/*ThenBlock*/ false);
-  {
-    // There is no need to emit line number for unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBranch(ContBlock);
-  }
-  // Emit the continuation block for code after the if.
-  CGF.EmitBlock(ContBlock, /*IsFinished*/ true);
-}
-
 void CodeGenFunction::EmitOMPAggregateAssign(
     llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
     const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
@@ -160,13 +114,8 @@
 
 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                                                 OMPPrivateScope &PrivateScope) {
-  auto FirstprivateFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_firstprivate;
-  };
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
-           FirstprivateFilter)> I(D.clauses(), FirstprivateFilter);
-       I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_firstprivate); I; ++I) {
     auto *C = cast<OMPFirstprivateClause>(*I);
     auto IRef = C->varlist_begin();
     auto InitsRef = C->inits().begin();
@@ -183,7 +132,8 @@
                 OrigVD) != nullptr,
             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
         auto *OriginalAddr = EmitLValue(&DRE).getAddress();
-        if (OrigVD->getType()->isArrayType()) {
+        QualType Type = OrigVD->getType();
+        if (Type->isArrayType()) {
           // Emit VarDecl with copy init for arrays.
           // Get the address of the original variable captured in current
           // captured region.
@@ -193,11 +143,10 @@
             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
               // Perform simple memcpy.
               EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
-                                  (*IRef)->getType());
+                                  Type);
             } else {
               EmitOMPAggregateAssign(
-                  Emission.getAllocatedAddress(), OriginalAddr,
-                  (*IRef)->getType(),
+                  Emission.getAllocatedAddress(), OriginalAddr, Type,
                   [this, VDInit, Init](llvm::Value *DestElement,
                                        llvm::Value *SrcElement) {
                     // Clean up any temporaries needed by the initialization.
@@ -239,25 +188,24 @@
 void CodeGenFunction::EmitOMPPrivateClause(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  auto PrivateFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_private;
-  };
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)>
-           I(D.clauses(), PrivateFilter); I; ++I) {
+  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
+  for (auto &&I = D.getClausesOfKind(OMPC_private); I; ++I) {
     auto *C = cast<OMPPrivateClause>(*I);
     auto IRef = C->varlist_begin();
     for (auto IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
-      auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-      bool IsRegistered =
-          PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * {
-            // Emit private VarDecl with copy init.
-            EmitDecl(*VD);
-            return GetAddrOfLocalVar(VD);
-          });
-      assert(IsRegistered && "private var already registered as private");
-      // Silence the warning about unused variable.
-      (void)IsRegistered;
+      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+        auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+        bool IsRegistered =
+            PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+              // Emit private VarDecl with copy init.
+              EmitDecl(*VD);
+              return GetAddrOfLocalVar(VD);
+            });
+        assert(IsRegistered && "private var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+      }
       ++IRef;
     }
   }
@@ -268,20 +216,16 @@
   // operator=(threadprivate_var2, master_threadprivate_var2);
   // ...
   // __kmpc_barrier(&loc, global_tid);
-  auto CopyinFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_copyin;
-  };
   llvm::DenseSet<const VarDecl *> CopiedVars;
   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(CopyinFilter)>
-           I(D.clauses(), CopyinFilter);
-       I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_copyin); I; ++I) {
     auto *C = cast<OMPCopyinClause>(*I);
     auto IRef = C->varlist_begin();
     auto ISrcRef = C->source_exprs().begin();
     auto IDestRef = C->destination_exprs().begin();
     for (auto *AssignOp : C->assignment_ops()) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      QualType Type = VD->getType();
       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
         // Get the address of the master variable.
         auto *MasterAddr = VD->isStaticLocal()
@@ -303,8 +247,8 @@
         }
         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
-        EmitOMPCopy(*this, (*IRef)->getType(), PrivateAddr, MasterAddr, DestVD,
-                    SrcVD, AssignOp);
+        EmitOMPCopy(*this, Type, PrivateAddr, MasterAddr, DestVD, SrcVD,
+                    AssignOp);
       }
       ++IRef;
       ++ISrcRef;
@@ -321,14 +265,10 @@
 
 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
-  auto LastprivateFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_lastprivate;
-  };
   bool HasAtLeastOneLastprivate = false;
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
-           LastprivateFilter)> I(D.clauses(), LastprivateFilter);
-       I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) {
+    HasAtLeastOneLastprivate = true;
     auto *C = cast<OMPLastprivateClause>(*I);
     auto IRef = C->varlist_begin();
     auto IDestRef = C->destination_exprs().begin();
@@ -349,17 +289,18 @@
         // Check if the variable is also a firstprivate: in this case IInit is
         // not generated. Initialization of this variable will happen in codegen
         // for 'firstprivate' clause.
-        if (!IInit)
-          continue;
-        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-        bool IsRegistered =
-            PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
-              // Emit private VarDecl with copy init.
-              EmitDecl(*VD);
-              return GetAddrOfLocalVar(VD);
-            });
-        assert(IsRegistered && "lastprivate var already registered as private");
-        HasAtLeastOneLastprivate = HasAtLeastOneLastprivate || IsRegistered;
+        if (IInit) {
+          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+          bool IsRegistered =
+              PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+                // Emit private VarDecl with copy init.
+                EmitDecl(*VD);
+                return GetAddrOfLocalVar(VD);
+              });
+          assert(IsRegistered &&
+                 "lastprivate var already registered as private");
+          (void)IsRegistered;
+        }
       }
       ++IRef, ++IDestRef;
     }
@@ -379,29 +320,58 @@
   auto *DoneBB = createBasicBlock(".omp.lastprivate.done");
   Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
   EmitBlock(ThenBB);
+  llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
+  const Expr *LastIterVal = nullptr;
+  const Expr *IVExpr = nullptr;
+  const Expr *IncExpr = nullptr;
+  if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
+    LastIterVal =
+        cast<VarDecl>(cast<DeclRefExpr>(LoopDirective->getUpperBoundVariable())
+                          ->getDecl())
+            ->getAnyInitializer();
+    IVExpr = LoopDirective->getIterationVariable();
+    IncExpr = LoopDirective->getInc();
+    auto IUpdate = LoopDirective->updates().begin();
+    for (auto *E : LoopDirective->counters()) {
+      auto *D = cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
+      LoopCountersAndUpdates[D] = *IUpdate;
+      ++IUpdate;
+    }
+  }
   {
-    auto LastprivateFilter = [](const OMPClause *C) -> bool {
-      return C->getClauseKind() == OMPC_lastprivate;
-    };
     llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
-    for (OMPExecutableDirective::filtered_clause_iterator<decltype(
-             LastprivateFilter)> I(D.clauses(), LastprivateFilter);
-         I; ++I) {
+    bool FirstLCV = true;
+    for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) {
       auto *C = cast<OMPLastprivateClause>(*I);
       auto IRef = C->varlist_begin();
       auto ISrcRef = C->source_exprs().begin();
       auto IDestRef = C->destination_exprs().begin();
       for (auto *AssignOp : C->assignment_ops()) {
         auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
-        if (AlreadyEmittedVars.insert(PrivateVD->getCanonicalDecl()).second) {
+        QualType Type = PrivateVD->getType();
+        auto *CanonicalVD = PrivateVD->getCanonicalDecl();
+        if (AlreadyEmittedVars.insert(CanonicalVD).second) {
+          // If lastprivate variable is a loop control variable for loop-based
+          // directive, update its value before copyin back to original
+          // variable.
+          if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD)) {
+            if (FirstLCV) {
+              EmitAnyExprToMem(LastIterVal, EmitLValue(IVExpr).getAddress(),
+                               IVExpr->getType().getQualifiers(),
+                               /*IsInitializer=*/false);
+              EmitIgnoredExpr(IncExpr);
+              FirstLCV = false;
+            }
+            EmitIgnoredExpr(UpExpr);
+          }
           auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
           auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
           // Get the address of the original variable.
           auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
           // Get the address of the private variable.
           auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
-          EmitOMPCopy(*this, (*IRef)->getType(), OriginalAddr, PrivateAddr,
-                      DestVD, SrcVD, AssignOp);
+          EmitOMPCopy(*this, Type, OriginalAddr, PrivateAddr, DestVD, SrcVD,
+                      AssignOp);
         }
         ++IRef;
         ++ISrcRef;
@@ -415,12 +385,7 @@
 void CodeGenFunction::EmitOMPReductionClauseInit(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  auto ReductionFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_reduction;
-  };
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
-           ReductionFilter)> I(D.clauses(), ReductionFilter);
-       I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) {
     auto *C = cast<OMPReductionClause>(*I);
     auto ILHS = C->lhs_exprs().begin();
     auto IRHS = C->rhs_exprs().begin();
@@ -456,13 +421,8 @@
   llvm::SmallVector<const Expr *, 8> LHSExprs;
   llvm::SmallVector<const Expr *, 8> RHSExprs;
   llvm::SmallVector<const Expr *, 8> ReductionOps;
-  auto ReductionFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_reduction;
-  };
   bool HasAtLeastOneReduction = false;
-  for (OMPExecutableDirective::filtered_clause_iterator<decltype(
-           ReductionFilter)> I(D.clauses(), ReductionFilter);
-       I; ++I) {
+  for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) {
     HasAtLeastOneReduction = true;
     auto *C = cast<OMPReductionClause>(*I);
     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
@@ -479,23 +439,6 @@
   }
 }
 
-/// \brief Emits code for OpenMP parallel directive in the parallel region.
-static void emitOMPParallelCall(CodeGenFunction &CGF,
-                                const OMPExecutableDirective &S,
-                                llvm::Value *OutlinedFn,
-                                llvm::Value *CapturedStruct) {
-  if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) {
-    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
-    auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
-    auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
-                                         /*IgnoreResultAssign*/ true);
-    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
-        CGF, NumThreads, NumThreadsClause->getLocStart());
-  }
-  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
-                                              CapturedStruct);
-}
-
 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
                                            const OMPExecutableDirective &S,
                                            const RegionCodeGenTy &CodeGen) {
@@ -503,17 +446,20 @@
   auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
       S, *CS->getCapturedDecl()->param_begin(), CodeGen);
-  if (auto C = S.getSingleClause(/*K*/ OMPC_if)) {
-    auto Cond = cast<OMPIfClause>(C)->getCondition();
-    EmitOMPIfClause(CGF, Cond, [&](bool ThenBlock) {
-      if (ThenBlock)
-        emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
-      else
-        CGF.CGM.getOpenMPRuntime().emitSerialCall(CGF, S.getLocStart(),
-                                                  OutlinedFn, CapturedStruct);
-    });
-  } else
-    emitOMPParallelCall(CGF, S, OutlinedFn, CapturedStruct);
+  if (auto C = S.getSingleClause(OMPC_num_threads)) {
+    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+    auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
+    auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+                                         /*IgnoreResultAssign*/ true);
+    CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
+        CGF, NumThreads, NumThreadsClause->getLocStart());
+  }
+  const Expr *IfCond = nullptr;
+  if (auto C = S.getSingleClause(OMPC_if)) {
+    IfCond = cast<OMPIfClause>(C)->getCondition();
+  }
+  CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
+                                              CapturedStruct, IfCond);
 }
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
@@ -551,7 +497,8 @@
     EmitIgnoredExpr(I);
   }
   // Update the linear variables.
-  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+  for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
+    auto *C = cast<OMPLinearClause>(*I);
     for (auto U : C->updates()) {
       EmitIgnoredExpr(U);
     }
@@ -576,9 +523,9 @@
 void CodeGenFunction::EmitOMPInnerLoop(
     const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
     const Expr *IncExpr,
-    const llvm::function_ref<void(CodeGenFunction &)> &BodyGen) {
+    const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
+    const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
   auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
-  auto Cnt = getPGORegionCounter(&S);
 
   // Start the loop with a block that tests the condition.
   auto CondBlock = createBasicBlock("omp.inner.for.cond");
@@ -594,14 +541,14 @@
   auto LoopBody = createBasicBlock("omp.inner.for.body");
 
   // Emit condition.
-  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount());
+  EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
   if (ExitBlock != LoopExit.getBlock()) {
     EmitBlock(ExitBlock);
     EmitBranchThroughCleanup(LoopExit);
   }
 
   EmitBlock(LoopBody);
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(&S);
 
   // Create a block for the increment.
   auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
@@ -612,6 +559,7 @@
   // Emit "IV = IV + 1" and a back-edge to the condition block.
   EmitBlock(Continue.getBlock());
   EmitIgnoredExpr(IncExpr);
+  PostIncGen(*this);
   BreakContinueStack.pop_back();
   EmitBranch(CondBlock);
   LoopStack.pop();
@@ -622,15 +570,36 @@
 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) {
   auto IC = S.counters().begin();
   for (auto F : S.finals()) {
-    if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) {
+    auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
+    if (LocalDeclMap.lookup(OrigVD)) {
+      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
+      auto *OrigAddr = EmitLValue(&DRE).getAddress();
+      OMPPrivateScope VarScope(*this);
+      VarScope.addPrivate(OrigVD,
+                          [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
+      (void)VarScope.Privatize();
       EmitIgnoredExpr(F);
     }
     ++IC;
   }
   // Emit the final values of the linear variables.
-  for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+  for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
+    auto *C = cast<OMPLinearClause>(*I);
+    auto IC = C->varlist_begin();
     for (auto F : C->finals()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
+      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
+      auto *OrigAddr = EmitLValue(&DRE).getAddress();
+      OMPPrivateScope VarScope(*this);
+      VarScope.addPrivate(OrigVD,
+                          [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
+      (void)VarScope.Privatize();
       EmitIgnoredExpr(F);
+      ++IC;
     }
   }
 }
@@ -666,23 +635,55 @@
                                     ArrayRef<Expr *> Counters) {
   for (auto *E : Counters) {
     auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-    bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * {
+    (void)LoopScope.addPrivate(VD, [&]() -> llvm::Value *{
       // Emit var without initialization.
       auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
       CGF.EmitAutoVarCleanups(VarEmission);
       return VarEmission.getAllocatedAddress();
     });
+  }
+}
+
+static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
+                        const Expr *Cond, llvm::BasicBlock *TrueBlock,
+                        llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
+  {
+    CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
+    EmitPrivateLoopCounters(CGF, PreCondScope, S.counters());
+    const VarDecl *IVDecl =
+        cast<VarDecl>(cast<DeclRefExpr>(S.getIterationVariable())->getDecl());
+    bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{
+      // Emit var without initialization.
+      auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl);
+      CGF.EmitAutoVarCleanups(VarEmission);
+      return VarEmission.getAllocatedAddress();
+    });
     assert(IsRegistered && "counter already registered as private");
     // Silence the warning about unused variable.
     (void)IsRegistered;
+    (void)PreCondScope.Privatize();
+    // Initialize internal counter to 0 to calculate initial values of real
+    // counters.
+    LValue IV = CGF.EmitLValue(S.getIterationVariable());
+    CGF.EmitStoreOfScalar(
+        llvm::ConstantInt::getNullValue(
+            IV.getAddress()->getType()->getPointerElementType()),
+        CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true);
+    // Get initial values of real counters.
+    for (auto I : S.updates()) {
+      CGF.EmitIgnoredExpr(I);
+    }
   }
+  // Check that loop is executed at least one time.
+  CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
 }
 
 static void
 EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) {
-    for (auto *E : Clause->varlists()) {
+  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
+    auto *C = cast<OMPLinearClause>(*I);
+    for (auto *E : C->varlists()) {
       auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
       bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
         // Emit var without initialization.
@@ -702,7 +703,7 @@
     // Pragma 'simd' code depends on presence of 'lastprivate'.
     // If present, we have to separate last iteration of the loop:
     //
-    // if (LastIteration != 0) {
+    // if (PreCond) {
     //   for (IV in 0..LastIteration-1) BODY;
     //   BODY with updates of lastprivate vars;
     //   <Final counter/linear vars updates>;
@@ -710,10 +711,28 @@
     //
     // otherwise (when there's no lastprivate):
     //
+    // if (PreCond) {
     //   for (IV in 0..LastIteration) BODY;
     //   <Final counter/linear vars updates>;
+    // }
     //
 
+    // Emit: if (PreCond) - begin.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
+      ContBlock = CGF.createBasicBlock("simd.if.end");
+      emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
+                  CGF.getProfileCount(&S));
+      CGF.EmitBlock(ThenBlock);
+      CGF.incrementProfileCounter(&S);
+    }
     // Walk clauses and process safelen/lastprivate.
     bool SeparateIter = false;
     CGF.LoopStack.setParallel();
@@ -744,7 +763,8 @@
     }
 
     // Emit inits for the linear variables.
-    for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+    for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
+      auto *C = cast<OMPLinearClause>(*I);
       for (auto Init : C->inits()) {
         auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
         CGF.EmitVarDecl(*D);
@@ -759,8 +779,7 @@
 
     // Emit the iterations count variable.
     // If it is not a variable, Sema decided to calculate iterations count on
-    // each
-    // iteration (e.g., it is foldable into a constant).
+    // each iteration (e.g., it is foldable into a constant).
     if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
       CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
       // Emit calculation of the iterations count.
@@ -769,7 +788,8 @@
 
     // Emit the linear steps for the linear clauses.
     // If a step is not constant, it is pre-calculated before the loop.
-    for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) {
+    for (auto &&I = S.getClausesOfKind(OMPC_linear); I; ++I) {
+      auto *C = cast<OMPLinearClause>(*I);
       if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
         if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
           CGF.EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
@@ -778,49 +798,28 @@
         }
     }
 
-    if (SeparateIter) {
-      // Emit: if (LastIteration > 0) - begin.
-      RegionCounter Cnt = CGF.getPGORegionCounter(&S);
-      auto ThenBlock = CGF.createBasicBlock("simd.if.then");
-      auto ContBlock = CGF.createBasicBlock("simd.if.end");
-      CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock,
-                               Cnt.getCount());
-      CGF.EmitBlock(ThenBlock);
-      Cnt.beginRegion(CGF.Builder);
-      // Emit 'then' code.
-      {
-        OMPPrivateScope LoopScope(CGF);
-        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
-        EmitPrivateLinearVars(CGF, S, LoopScope);
-        CGF.EmitOMPPrivateClause(S, LoopScope);
-        (void)LoopScope.Privatize();
-        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                             S.getCond(/*SeparateIter=*/true), S.getInc(),
-                             [&S](CodeGenFunction &CGF) {
-                               CGF.EmitOMPLoopBody(S);
-                               CGF.EmitStopPoint(&S);
-                             });
-        CGF.EmitOMPLoopBody(S, /* SeparateIter */ true);
+    {
+      OMPPrivateScope LoopScope(CGF);
+      EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
+      EmitPrivateLinearVars(CGF, S, LoopScope);
+      CGF.EmitOMPPrivateClause(S, LoopScope);
+      (void)LoopScope.Privatize();
+      CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+                           S.getCond(SeparateIter), S.getInc(),
+                           [&S](CodeGenFunction &CGF) {
+                             CGF.EmitOMPLoopBody(S);
+                             CGF.EmitStopPoint(&S);
+                           },
+                           [](CodeGenFunction &) {});
+      if (SeparateIter) {
+        CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true);
       }
-      CGF.EmitOMPSimdFinal(S);
-      // Emit: if (LastIteration != 0) - end.
+    }
+    CGF.EmitOMPSimdFinal(S);
+    // Emit: if (PreCond) - end.
+    if (ContBlock) {
       CGF.EmitBranch(ContBlock);
       CGF.EmitBlock(ContBlock, true);
-    } else {
-      {
-        OMPPrivateScope LoopScope(CGF);
-        EmitPrivateLoopCounters(CGF, LoopScope, S.counters());
-        EmitPrivateLinearVars(CGF, S, LoopScope);
-        CGF.EmitOMPPrivateClause(S, LoopScope);
-        (void)LoopScope.Privatize();
-        CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                             S.getCond(/*SeparateIter=*/false), S.getInc(),
-                             [&S](CodeGenFunction &CGF) {
-                               CGF.EmitOMPLoopBody(S);
-                               CGF.EmitStopPoint(&S);
-                             });
-      }
-      CGF.EmitOMPSimdFinal(S);
     }
   };
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
@@ -829,15 +828,16 @@
 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                                           const OMPLoopDirective &S,
                                           OMPPrivateScope &LoopScope,
-                                          llvm::Value *LB, llvm::Value *UB,
-                                          llvm::Value *ST, llvm::Value *IL,
-                                          llvm::Value *Chunk) {
+                                          bool Ordered, llvm::Value *LB,
+                                          llvm::Value *UB, llvm::Value *ST,
+                                          llvm::Value *IL, llvm::Value *Chunk) {
   auto &RT = CGM.getOpenMPRuntime();
 
   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
-  const bool Dynamic = RT.isDynamic(ScheduleKind);
+  const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
 
-  assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+  assert((Ordered ||
+          !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
          "static non-chunked schedule does not need outer loop");
 
   // Emit outer loop.
@@ -873,7 +873,9 @@
   //
   // while(__kmpc_dispatch_next(&LB, &UB)) {
   //   idx = LB;
-  //   while (idx <= UB) { BODY; ++idx; } // inner loop
+  //   while (idx <= UB) { BODY; ++idx;
+  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
+  //   } // inner loop
   // }
   //
   // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
@@ -893,9 +895,10 @@
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
 
   RT.emitForInit(
-      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
-      (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
-      Chunk);
+      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB,
+      (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal()
+                        : UB),
+      ST, Chunk);
 
   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
 
@@ -905,7 +908,7 @@
   LoopStack.push(CondBlock);
 
   llvm::Value *BoolCondVal = nullptr;
-  if (!Dynamic) {
+  if (!DynamicOrOrdered) {
     // UB = min(UB, GlobalUB)
     EmitIgnoredExpr(S.getEnsureUpperBound());
     // IV = LB
@@ -933,23 +936,36 @@
 
   // Emit "IV = LB" (in case of static schedule, we have already calculated new
   // LB for loop condition and emitted it above).
-  if (Dynamic)
+  if (DynamicOrOrdered)
     EmitIgnoredExpr(S.getInit());
 
   // Create a block for the increment.
   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
-                   S.getCond(/*SeparateIter=*/false), S.getInc(),
-                   [&S](CodeGenFunction &CGF) {
-                     CGF.EmitOMPLoopBody(S);
-                     CGF.EmitStopPoint(&S);
-                   });
+  SourceLocation Loc = S.getLocStart();
+  // Generate !llvm.loop.parallel metadata for loads and stores for loops with
+  // dynamic/guided scheduling and without ordered clause.
+  LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
+                         ScheduleKind == OMPC_SCHEDULE_guided) &&
+                        !Ordered);
+  EmitOMPInnerLoop(
+      S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false),
+      S.getInc(),
+      [&S](CodeGenFunction &CGF) {
+        CGF.EmitOMPLoopBody(S);
+        CGF.EmitStopPoint(&S);
+      },
+      [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
+        if (Ordered) {
+          CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
+              CGF, Loc, IVSize, IVSigned);
+        }
+      });
 
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
-  if (!Dynamic) {
+  if (!DynamicOrOrdered) {
     // Emit "LB = LB + Stride", "UB = UB + Stride".
     EmitIgnoredExpr(S.getNextLowerBound());
     EmitIgnoredExpr(S.getNextUpperBound());
@@ -961,9 +977,8 @@
   EmitBlock(LoopExit.getBlock());
 
   // Tell the runtime we are done.
-  // FIXME: Also call fini for ordered loops with dynamic scheduling.
-  if (!Dynamic)
-    RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
+  if (!DynamicOrOrdered)
+    RT.emitForStaticFinish(*this, S.getLocEnd());
 }
 
 /// \brief Emit a helper variable and return corresponding lvalue.
@@ -974,6 +989,38 @@
   return CGF.EmitLValue(Helper);
 }
 
+static std::pair<llvm::Value * /*Chunk*/, OpenMPScheduleClauseKind>
+emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
+                   bool OuterRegion) {
+  // Detect the loop schedule kind and chunk.
+  auto ScheduleKind = OMPC_SCHEDULE_unknown;
+  llvm::Value *Chunk = nullptr;
+  if (auto *C =
+          cast_or_null<OMPScheduleClause>(S.getSingleClause(OMPC_schedule))) {
+    ScheduleKind = C->getScheduleKind();
+    if (const auto *Ch = C->getChunkSize()) {
+      if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
+        if (OuterRegion) {
+          const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl());
+          CGF.EmitVarDecl(*ImpVar);
+          CGF.EmitStoreThroughLValue(
+              CGF.EmitAnyExpr(Ch),
+              CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
+                                             ImpVar->getType()));
+        } else {
+          Ch = ImpRef;
+        }
+      }
+      if (!C->getHelperChunkSize() || !OuterRegion) {
+        Chunk = CGF.EmitScalarExpr(Ch);
+        Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
+                                         S.getIterationVariable()->getType());
+      }
+    }
+  }
+  return std::make_pair(Chunk, ScheduleKind);
+}
+
 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   // Emit the loop iteration variable.
   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
@@ -995,12 +1042,21 @@
   // Check pre-condition.
   {
     // Skip the entire loop if we don't meet the precondition.
-    RegionCounter Cnt = getPGORegionCounter(&S);
-    auto ThenBlock = createBasicBlock("omp.precond.then");
-    auto ContBlock = createBasicBlock("omp.precond.end");
-    EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
-    EmitBlock(ThenBlock);
-    Cnt.beginRegion(Builder);
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return false;
+    } else {
+      auto *ThenBlock = createBasicBlock("omp.precond.then");
+      ContBlock = createBasicBlock("omp.precond.end");
+      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
+                  getProfileCount(&S));
+      EmitBlock(ThenBlock);
+      incrementProfileCounter(&S);
+    }
     // Emit 'then' code.
     {
       // Emit helper vars inits.
@@ -1020,34 +1076,33 @@
         CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
                                                OMPD_unknown);
       }
+      EmitOMPPrivateClause(S, LoopScope);
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
+      EmitOMPReductionClauseInit(S, LoopScope);
       EmitPrivateLoopCounters(*this, LoopScope, S.counters());
       (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
-      auto ScheduleKind = OMPC_SCHEDULE_unknown;
-      llvm::Value *Chunk = nullptr;
-      if (auto C = cast_or_null<OMPScheduleClause>(
-              S.getSingleClause(OMPC_schedule))) {
-        ScheduleKind = C->getScheduleKind();
-        if (auto Ch = C->getChunkSize()) {
-          Chunk = EmitScalarExpr(Ch);
-          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
-                                       S.getIterationVariable()->getType());
-        }
-      }
+      llvm::Value *Chunk;
+      OpenMPScheduleClauseKind ScheduleKind;
+      auto ScheduleInfo =
+          emitScheduleClause(*this, S, /*OuterRegion=*/false);
+      Chunk = ScheduleInfo.first;
+      ScheduleKind = ScheduleInfo.second;
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+      const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr;
       if (RT.isStaticNonchunked(ScheduleKind,
-                                /* Chunked */ Chunk != nullptr)) {
+                                /* Chunked */ Chunk != nullptr) &&
+          !Ordered) {
         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
         // When no chunk_size is specified, the iteration space is divided into
         // chunks that are approximately equal in size, and at most one chunk is
         // distributed to each thread. Note that the size of the chunks is
         // unspecified in this case.
         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
-                       IL.getAddress(), LB.getAddress(), UB.getAddress(),
-                       ST.getAddress());
+                       Ordered, IL.getAddress(), LB.getAddress(),
+                       UB.getAddress(), ST.getAddress());
         // UB = min(UB, GlobalUB);
         EmitIgnoredExpr(S.getEnsureUpperBound());
         // IV = LB;
@@ -1058,24 +1113,28 @@
                          [&S](CodeGenFunction &CGF) {
                            CGF.EmitOMPLoopBody(S);
                            CGF.EmitStopPoint(&S);
-                         });
+                         },
+                         [](CodeGenFunction &) {});
         // Tell the runtime we are done.
-        RT.emitForFinish(*this, S.getLocStart(), ScheduleKind);
+        RT.emitForStaticFinish(*this, S.getLocStart());
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
-        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
-                            UB.getAddress(), ST.getAddress(), IL.getAddress(),
-                            Chunk);
+        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered,
+                            LB.getAddress(), UB.getAddress(), ST.getAddress(),
+                            IL.getAddress(), Chunk);
       }
+      EmitOMPReductionClauseFinal(S);
       // Emit final copy of the lastprivate variables if IsLastIter != 0.
       if (HasLastprivateClause)
         EmitOMPLastprivateClauseFinal(
             S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
     }
     // We're now done with the loop, so jump to the continuation block.
-    EmitBranch(ContBlock);
-    EmitBlock(ContBlock, true);
+    if (ContBlock) {
+      EmitBranch(ContBlock);
+      EmitBlock(ContBlock, true);
+    }
   }
   return HasLastprivateClause;
 }
@@ -1112,7 +1171,8 @@
   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   if (CS && CS->size() > 1) {
-    auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
+    bool HasLastprivates = false;
+    auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) {
       auto &C = CGF.CGM.getContext();
       auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
       // Emit helper vars inits.
@@ -1164,11 +1224,24 @@
         }
         CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
       };
+
+      CodeGenFunction::OMPPrivateScope LoopScope(CGF);
+      if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
+        // Emit implicit barrier to synchronize threads and avoid data races on
+        // initialization of firstprivate variables.
+        CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                                   OMPD_unknown);
+      }
+      CGF.EmitOMPPrivateClause(S, LoopScope);
+      HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
+      CGF.EmitOMPReductionClauseInit(S, LoopScope);
+      (void)LoopScope.Privatize();
+
       // Emit static non-chunked loop.
       CGF.CGM.getOpenMPRuntime().emitForInit(
           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
-          /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
-          ST.getAddress());
+          /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
+          LB.getAddress(), UB.getAddress(), ST.getAddress());
       // UB = min(UB, GlobalUB);
       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
@@ -1177,24 +1250,63 @@
       // IV = LB;
       CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
       // while (idx <= UB) { BODY; ++idx; }
-      CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen);
+      CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
+                           [](CodeGenFunction &) {});
       // Tell the runtime we are done.
-      CGF.CGM.getOpenMPRuntime().emitForFinish(CGF, S.getLocStart(),
-                                               OMPC_SCHEDULE_static);
+      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
+      CGF.EmitOMPReductionClauseFinal(S);
+
+      // Emit final copy of the lastprivate variables if IsLastIter != 0.
+      if (HasLastprivates)
+        CGF.EmitOMPLastprivateClauseFinal(
+            S, CGF.Builder.CreateIsNotNull(
+                   CGF.EmitLoadOfScalar(IL, S.getLocStart())));
     };
 
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
+    // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
+    // clause. Otherwise the barrier will be generated by the codegen for the
+    // directive.
+    if (HasLastprivates && S.getSingleClause(OMPC_nowait)) {
+      // Emit implicit barrier to synchronize threads and avoid data races on
+      // initialization of firstprivate variables.
+      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                                 OMPD_unknown);
+    }
     return OMPD_sections;
   }
   // If only one section is found - no need to generate loop, emit as a single
   // region.
-  auto &&CodeGen = [Stmt](CodeGenFunction &CGF) {
+  bool HasFirstprivates;
+  // No need to generate reductions for sections with single section region, we
+  // can use original shared variables for all operations.
+  bool HasReductions = !S.getClausesOfKind(OMPC_reduction).empty();
+  // No need to generate lastprivates for sections with single section region,
+  // we can use original shared variable for all calculations with barrier at
+  // the end of the sections.
+  bool HasLastprivates = !S.getClausesOfKind(OMPC_lastprivate).empty();
+  auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) {
+    CodeGenFunction::OMPPrivateScope SingleScope(CGF);
+    HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
+    CGF.EmitOMPPrivateClause(S, SingleScope);
+    (void)SingleScope.Privatize();
+
     CGF.EmitStmt(Stmt);
     CGF.EnsureInsertPoint();
   };
   CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
                                               llvm::None, llvm::None,
                                               llvm::None, llvm::None);
+  // Emit barrier for firstprivates, lastprivates or reductions only if
+  // 'sections' directive has 'nowait' clause. Otherwise the barrier will be
+  // generated by the codegen for the directive.
+  if ((HasFirstprivates || HasLastprivates || HasReductions) &&
+      S.getSingleClause(OMPC_nowait)) {
+    // Emit implicit barrier to synchronize threads and avoid data races on
+    // initialization of firstprivate variables.
+    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+                                               OMPD_unknown);
+  }
   return OMPD_single;
 }
 
@@ -1224,14 +1336,9 @@
   // Check if there are any 'copyprivate' clauses associated with this
   // 'single'
   // construct.
-  auto CopyprivateFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_copyprivate;
-  };
   // Build a list of copyprivate variables along with helper expressions
   // (<source>, <destination>, <destination>=<source> expressions)
-  typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
-      CopyprivateFilter)> CopyprivateIter;
-  for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
+  for (auto &&I = S.getClausesOfKind(OMPC_copyprivate); I; ++I) {
     auto *C = cast<OMPCopyprivateClause>(*I);
     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
     DestExprs.append(C->destination_exprs().begin(),
@@ -1242,16 +1349,26 @@
   }
   LexicalScope Scope(*this, S.getSourceRange());
   // Emit code for 'single' region along with 'copyprivate' clauses
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  bool HasFirstprivates;
+  auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
+    CodeGenFunction::OMPPrivateScope SingleScope(CGF);
+    HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
+    CGF.EmitOMPPrivateClause(S, SingleScope);
+    (void)SingleScope.Privatize();
+
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     CGF.EnsureInsertPoint();
   };
   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
                                           CopyprivateVars, DestExprs, SrcExprs,
                                           AssignmentOps);
-  // Emit an implicit barrier at the end.
-  if (!S.getSingleClause(OMPC_nowait)) {
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_single);
+  // Emit an implicit barrier at the end (to avoid data race on firstprivate
+  // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
+  if ((!S.getSingleClause(OMPC_nowait) || HasFirstprivates) &&
+      CopyprivateVars.empty()) {
+    CGM.getOpenMPRuntime().emitBarrierCall(
+        *this, S.getLocStart(),
+        S.getSingleClause(OMPC_nowait) ? OMPD_unknown : OMPD_single);
   }
 }
 
@@ -1279,6 +1396,7 @@
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
   LexicalScope Scope(*this, S.getSourceRange());
+  (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitOMPWorksharingLoop(S);
     // Emit implicit barrier at the end of parallel region, but this barrier
@@ -1318,11 +1436,84 @@
   auto *PartId = std::next(I);
   // The first function argument for tasks is a thread id, the second one is a
   // part id (0 for tied tasks, >=0 for untied task).
-  auto &&CodeGen = [PartId, &S](CodeGenFunction &CGF) {
+  llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
+  // Get list of private variables.
+  llvm::SmallVector<const Expr *, 8> PrivateVars;
+  llvm::SmallVector<const Expr *, 8> PrivateCopies;
+  for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) {
+    auto *C = cast<OMPPrivateClause>(*I);
+    auto IRef = C->varlist_begin();
+    for (auto *IInit : C->private_copies()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+        PrivateVars.push_back(*IRef);
+        PrivateCopies.push_back(IInit);
+      }
+      ++IRef;
+    }
+  }
+  EmittedAsPrivate.clear();
+  // Get list of firstprivate variables.
+  llvm::SmallVector<const Expr *, 8> FirstprivateVars;
+  llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
+  llvm::SmallVector<const Expr *, 8> FirstprivateInits;
+  for (auto &&I = S.getClausesOfKind(OMPC_firstprivate); I; ++I) {
+    auto *C = cast<OMPFirstprivateClause>(*I);
+    auto IRef = C->varlist_begin();
+    auto IElemInitRef = C->inits().begin();
+    for (auto *IInit : C->private_copies()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+        FirstprivateVars.push_back(*IRef);
+        FirstprivateCopies.push_back(IInit);
+        FirstprivateInits.push_back(*IElemInitRef);
+      }
+      ++IRef, ++IElemInitRef;
+    }
+  }
+  auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
+      CodeGenFunction &CGF) {
+    // Set proper addresses for generated private copies.
+    auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+    OMPPrivateScope Scope(CGF);
+    if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
+      auto *CopyFn = CGF.Builder.CreateAlignedLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)),
+          CGF.PointerAlignInBytes);
+      auto *PrivatesPtr = CGF.Builder.CreateAlignedLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)),
+          CGF.PointerAlignInBytes);
+      // Map privates.
+      llvm::SmallVector<std::pair<const VarDecl *, llvm::Value *>, 16>
+          PrivatePtrs;
+      llvm::SmallVector<llvm::Value *, 16> CallArgs;
+      CallArgs.push_back(PrivatesPtr);
+      for (auto *E : PrivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        auto *PrivatePtr =
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr);
+      }
+      for (auto *E : FirstprivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        auto *PrivatePtr =
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr);
+      }
+      CGF.EmitRuntimeCall(CopyFn, CallArgs);
+      for (auto &&Pair : PrivatePtrs) {
+        auto *Replacement =
+            CGF.Builder.CreateAlignedLoad(Pair.second, CGF.PointerAlignInBytes);
+        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
+      }
+    }
+    (void)Scope.Privatize();
     if (*PartId) {
       // TODO: emit code for untied tasks.
     }
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EmitStmt(CS->getCapturedStmt());
   };
   auto OutlinedFn =
       CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, CodeGen);
@@ -1344,8 +1535,14 @@
     Final.setInt(/*IntVal=*/false);
   }
   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
-  CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final,
-                                      OutlinedFn, SharedsTy, CapturedStruct);
+  const Expr *IfCond = nullptr;
+  if (auto C = S.getSingleClause(OMPC_if)) {
+    IfCond = cast<OMPIfClause>(C)->getCondition();
+  }
+  CGM.getOpenMPRuntime().emitTaskCall(
+      *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
+      CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
+      FirstprivateCopies, FirstprivateInits);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -1357,8 +1554,8 @@
   CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
 }
 
-void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) {
-  llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet.");
+void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
+  CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
@@ -1372,8 +1569,13 @@
   }(), S.getLocStart());
 }
 
-void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) {
-  llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
+void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    CGF.EnsureInsertPoint();
+  };
+  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart());
 }
 
 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
@@ -1412,6 +1614,35 @@
   return ComplexVal;
 }
 
+static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
+                                  LValue LVal, RValue RVal) {
+  if (LVal.isGlobalReg()) {
+    CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
+  } else {
+    CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
+                                             : llvm::Monotonic,
+                        LVal.isVolatile(), /*IsInit=*/false);
+  }
+}
+
+static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal,
+                            QualType RValTy) {
+  switch (CGF.getEvaluationKind(LVal.getType())) {
+  case TEK_Scalar:
+    CGF.EmitStoreThroughLValue(
+        RValue::get(convertToScalarValue(CGF, RVal, RValTy, LVal.getType())),
+        LVal);
+    break;
+  case TEK_Complex:
+    CGF.EmitStoreOfComplex(
+        convertToComplexValue(CGF, RVal, RValTy, LVal.getType()), LVal,
+        /*isInit=*/false);
+    break;
+  case TEK_Aggregate:
+    llvm_unreachable("Must be a scalar or complex.");
+  }
+}
+
 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
                                   const Expr *X, const Expr *V,
                                   SourceLocation Loc) {
@@ -1432,19 +1663,7 @@
   // list.
   if (IsSeqCst)
     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
-  switch (CGF.getEvaluationKind(V->getType())) {
-  case TEK_Scalar:
-    CGF.EmitStoreOfScalar(
-        convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
-    break;
-  case TEK_Complex:
-    CGF.EmitStoreOfComplex(
-        convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
-        /*isInit=*/false);
-    break;
-  case TEK_Aggregate:
-    llvm_unreachable("Must be a scalar or complex.");
-  }
+  emitSimpleStore(CGF,VLValue, Res, X->getType().getNonReferenceType());
 }
 
 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
@@ -1452,15 +1671,7 @@
                                    SourceLocation Loc) {
   // x = expr;
   assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
-  LValue XLValue = CGF.EmitLValue(X);
-  RValue ExprRValue = CGF.EmitAnyExpr(E);
-  if (XLValue.isGlobalReg())
-    CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue);
-  else
-    CGF.EmitAtomicStore(ExprRValue, XLValue,
-                        IsSeqCst ? llvm::SequentiallyConsistent
-                                 : llvm::Monotonic,
-                        XLValue.isVolatile(), /*IsInit=*/false);
+  emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
   // OpenMP, 2.12.6, atomic Construct
   // Any atomic construct with a seq_cst clause forces the atomically
   // performed operation to include an implicit flush operation without a
@@ -1469,21 +1680,24 @@
     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
 }
 
-bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
-                      BinaryOperatorKind BO, llvm::AtomicOrdering AO,
-                      bool IsXLHSInRHSPart) {
+static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
+                                                RValue Update,
+                                                BinaryOperatorKind BO,
+                                                llvm::AtomicOrdering AO,
+                                                bool IsXLHSInRHSPart) {
   auto &Context = CGF.CGM.getContext();
   // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
   // expression is simple and atomic is allowed for the given type for the
   // target platform.
   if (BO == BO_Comma || !Update.isScalar() ||
-      !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
-      (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
-       (Update.getScalarVal()->getType() !=
-        X.getAddress()->getType()->getPointerElementType())) ||
+      !Update.getScalarVal()->getType()->isIntegerTy() ||
+      !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
+                        (Update.getScalarVal()->getType() !=
+                         X.getAddress()->getType()->getPointerElementType())) ||
+      !X.getAddress()->getType()->getPointerElementType()->isIntegerTy() ||
       !Context.getTargetInfo().hasBuiltinAtomic(
           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
-    return false;
+    return std::make_pair(false, RValue::get(nullptr));
 
   llvm::AtomicRMWInst::BinOp RMWOp;
   switch (BO) {
@@ -1492,7 +1706,7 @@
     break;
   case BO_Sub:
     if (!IsXLHSInRHSPart)
-      return false;
+      return std::make_pair(false, RValue::get(nullptr));
     RMWOp = llvm::AtomicRMWInst::Sub;
     break;
   case BO_And:
@@ -1518,6 +1732,9 @@
                 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
                                    : llvm::AtomicRMWInst::UMin);
     break;
+  case BO_Assign:
+    RMWOp = llvm::AtomicRMWInst::Xchg;
+    break;
   case BO_Mul:
   case BO_Div:
   case BO_Rem:
@@ -1525,14 +1742,13 @@
   case BO_Shr:
   case BO_LAnd:
   case BO_LOr:
-    return false;
+    return std::make_pair(false, RValue::get(nullptr));
   case BO_PtrMemD:
   case BO_PtrMemI:
   case BO_LE:
   case BO_GE:
   case BO_EQ:
   case BO_NE:
-  case BO_Assign:
   case BO_AddAssign:
   case BO_SubAssign:
   case BO_AndAssign:
@@ -1552,11 +1768,11 @@
         IC, X.getAddress()->getType()->getPointerElementType(),
         X.getType()->hasSignedIntegerRepresentation());
   }
-  CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
-  return true;
+  auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
+  return std::make_pair(true, RValue::get(Res));
 }
 
-void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
+std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
     LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
     llvm::AtomicOrdering AO, SourceLocation Loc,
     const llvm::function_ref<RValue(RValue)> &CommonGen) {
@@ -1566,7 +1782,8 @@
   // x--, --x -> xrval - 1;
   // x = x binop expr; -> xrval binop expr
   // x = expr Op x; - > expr binop xrval;
-  if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
+  auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
+  if (!Res.first) {
     if (X.isGlobalReg()) {
       // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
       // 'xrval'.
@@ -1576,6 +1793,7 @@
       EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
     }
   }
+  return Res;
 }
 
 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
@@ -1605,8 +1823,103 @@
         CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
         return CGF.EmitAnyExpr(UE);
       };
-  CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
-                                    IsXLHSInRHSPart, AO, Loc, Gen);
+  (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
+      XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+}
+
+static RValue convertToType(CodeGenFunction &CGF, RValue Value,
+                            QualType SourceType, QualType ResType) {
+  switch (CGF.getEvaluationKind(ResType)) {
+  case TEK_Scalar:
+    return RValue::get(convertToScalarValue(CGF, Value, SourceType, ResType));
+  case TEK_Complex: {
+    auto Res = convertToComplexValue(CGF, Value, SourceType, ResType);
+    return RValue::getComplex(Res.first, Res.second);
+  }
+  case TEK_Aggregate:
+    break;
+  }
+  llvm_unreachable("Must be a scalar or complex.");
+}
+
+static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                     bool IsPostfixUpdate, const Expr *V,
+                                     const Expr *X, const Expr *E,
+                                     const Expr *UE, bool IsXLHSInRHSPart,
+                                     SourceLocation Loc) {
+  assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
+  assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
+  RValue NewVVal;
+  LValue VLValue = CGF.EmitLValue(V);
+  LValue XLValue = CGF.EmitLValue(X);
+  RValue ExprRValue = CGF.EmitAnyExpr(E);
+  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
+  QualType NewVValType;
+  if (UE) {
+    // 'x' is updated with some additional value.
+    assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
+           "Update expr in 'atomic capture' must be a binary operator.");
+    auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
+    // Update expressions are allowed to have the following forms:
+    // x binop= expr; -> xrval + expr;
+    // x++, ++x -> xrval + 1;
+    // x--, --x -> xrval - 1;
+    // x = x binop expr; -> xrval binop expr
+    // x = expr Op x; - > expr binop xrval;
+    auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
+    auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
+    auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
+    NewVValType = XRValExpr->getType();
+    auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
+    auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
+                  IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
+      CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
+      CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
+      RValue Res = CGF.EmitAnyExpr(UE);
+      NewVVal = IsPostfixUpdate ? XRValue : Res;
+      return Res;
+    };
+    auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
+        XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
+    if (Res.first) {
+      // 'atomicrmw' instruction was generated.
+      if (IsPostfixUpdate) {
+        // Use old value from 'atomicrmw'.
+        NewVVal = Res.second;
+      } else {
+        // 'atomicrmw' does not provide new value, so evaluate it using old
+        // value of 'x'.
+        CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
+        CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
+        NewVVal = CGF.EmitAnyExpr(UE);
+      }
+    }
+  } else {
+    // 'x' is simply rewritten with some 'expr'.
+    NewVValType = X->getType().getNonReferenceType();
+    ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
+                               X->getType().getNonReferenceType());
+    auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
+      NewVVal = XRValue;
+      return ExprRValue;
+    };
+    // Try to perform atomicrmw xchg, otherwise simple exchange.
+    auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
+        XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
+        Loc, Gen);
+    if (Res.first) {
+      // 'atomicrmw' instruction was generated.
+      NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
+    }
+  }
+  // Emit post-update store to 'v' of old/new 'x' value.
+  emitSimpleStore(CGF, VLValue, NewVVal, NewVValType);
   // OpenMP, 2.12.6, atomic Construct
   // Any atomic construct with a seq_cst clause forces the atomically
   // performed operation to include an implicit flush operation without a
@@ -1616,9 +1929,10 @@
 }
 
 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
-                              bool IsSeqCst, const Expr *X, const Expr *V,
-                              const Expr *E, const Expr *UE,
-                              bool IsXLHSInRHSPart, SourceLocation Loc) {
+                              bool IsSeqCst, bool IsPostfixUpdate,
+                              const Expr *X, const Expr *V, const Expr *E,
+                              const Expr *UE, bool IsXLHSInRHSPart,
+                              SourceLocation Loc) {
   switch (Kind) {
   case OMPC_read:
     EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
@@ -1631,7 +1945,9 @@
     EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
     break;
   case OMPC_capture:
-    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
+    EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
+                             IsXLHSInRHSPart, Loc);
+    break;
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
@@ -1673,13 +1989,23 @@
 
   const auto *CS =
       S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
-  if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS))
+  if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
     enterFullExpression(EWC);
+  }
+  // Processing for statements under 'atomic capture'.
+  if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
+    for (const auto *C : Compound->body()) {
+      if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
+        enterFullExpression(EWC);
+      }
+    }
+  }
 
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
-    EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
-                      S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart());
+    EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
+                      S.getV(), S.getExpr(), S.getUpdateExpr(),
+                      S.isXLHSInRHSPart(), S.getLocStart());
   };
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen);
 }
@@ -1691,4 +2017,3 @@
 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
   llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
 }
-
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index 895afd7..e3df5a4 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -177,4 +177,3 @@
   
   return I->second;
 }
-
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 18f505d..5a060b3 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -13,6 +13,7 @@
   ProfileData
   ScalarOpts
   Support
+  Target
   TransformUtils
   )
 
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 60aac07..7e82fcc 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -56,17 +56,17 @@
     std::unique_ptr<llvm::Module> TheModule, LinkModule;
 
   public:
-    BackendConsumer(BackendAction action, DiagnosticsEngine &_Diags,
-                    const CodeGenOptions &compopts,
-                    const TargetOptions &targetopts,
-                    const LangOptions &langopts, bool TimePasses,
-                    const std::string &infile, llvm::Module *LinkModule,
+    BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+                    const CodeGenOptions &CodeGenOpts,
+                    const TargetOptions &TargetOpts,
+                    const LangOptions &LangOpts, bool TimePasses,
+                    const std::string &InFile, llvm::Module *LinkModule,
                     raw_pwrite_stream *OS, LLVMContext &C,
                     CoverageSourceInfo *CoverageInfo = nullptr)
-        : Diags(_Diags), Action(action), CodeGenOpts(compopts),
-          TargetOpts(targetopts), LangOpts(langopts), AsmOutStream(OS),
+        : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts),
+          TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS),
           Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"),
-          Gen(CreateLLVMCodeGen(Diags, infile, compopts, C, CoverageInfo)),
+          Gen(CreateLLVMCodeGen(Diags, InFile, CodeGenOpts, C, CoverageInfo)),
           LinkModule(LinkModule) {
       llvm::TimePassesIsEnabled = TimePasses;
     }
@@ -79,6 +79,11 @@
     }
 
     void Initialize(ASTContext &Ctx) override {
+      if (Context) {
+        assert(Context == &Ctx);
+        return;
+      }
+        
       Context = &Ctx;
 
       if (llvm::TimePassesIsEnabled)
@@ -429,13 +434,16 @@
   FileManager &FileMgr = SourceMgr.getFileManager();
   StringRef Filename;
   unsigned Line, Column;
-  D.getLocation(&Filename, &Line, &Column);
   SourceLocation DILoc;
-  const FileEntry *FE = FileMgr.getFile(Filename);
-  if (FE && Line > 0) {
-    // If -gcolumn-info was not used, Column will be 0. This upsets the
-    // source manager, so pass 1 if Column is not set.
-    DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+
+  if (D.isLocationAvailable()) {
+    D.getLocation(&Filename, &Line, &Column);
+    const FileEntry *FE = FileMgr.getFile(Filename);
+    if (FE && Line > 0) {
+      // If -gcolumn-info was not used, Column will be 0. This upsets the
+      // source manager, so pass 1 if Column is not set.
+      DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
+    }
   }
 
   // If a location isn't available, try to approximate it using the associated
@@ -450,7 +458,7 @@
       << AddFlagValue(D.getPassName() ? D.getPassName() : "")
       << D.getMsg().str();
 
-  if (DILoc.isInvalid())
+  if (DILoc.isInvalid() && D.isLocationAvailable())
     // If we were not able to translate the file:line:col information
     // back to a SourceLocation, at least emit a note stating that
     // we could not translate this location. This can happen in the
@@ -624,7 +632,7 @@
 std::unique_ptr<ASTConsumer>
 CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   BackendAction BA = static_cast<BackendAction>(Act);
-  std::unique_ptr<raw_pwrite_stream> OS(GetOutputStream(CI, InFile, BA));
+  raw_pwrite_stream *OS = GetOutputStream(CI, InFile, BA);
   if (BA != Backend_EmitNothing && !OS)
     return nullptr;
 
@@ -661,7 +669,7 @@
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
       BA, CI.getDiagnostics(), CI.getCodeGenOpts(), CI.getTargetOpts(),
       CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
-      LinkModuleToUse, OS.release(), *VMContext, CoverageInfo));
+      LinkModuleToUse, OS, *VMContext, CoverageInfo));
   BEConsumer = Result.get();
   return std::move(Result);
 }
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index 42c3a42..f370ac2 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
+#include "CGCleanup.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGDebugInfo.h"
@@ -243,12 +244,13 @@
   // parameters.  Do this in whatever block we're currently in; it's
   // important to do this before we enter the return block or return
   // edges will be *really* confused.
-  bool EmitRetDbgLoc = true;
-  if (EHStack.stable_begin() != PrologueCleanupDepth) {
+  bool HasCleanups = EHStack.stable_begin() != PrologueCleanupDepth;
+  bool HasOnlyLifetimeMarkers =
+      HasCleanups && EHStack.containsOnlyLifetimeMarkers(PrologueCleanupDepth);
+  bool EmitRetDbgLoc = !HasCleanups || HasOnlyLifetimeMarkers;
+  if (HasCleanups) {
     // Make sure the line table doesn't jump back into the body for
     // the ret after it's been at EndLoc.
-    EmitRetDbgLoc = false;
-
     if (CGDebugInfo *DI = getDebugInfo())
       if (OnlySimpleReturnStmts)
         DI->EmitLocation(Builder, EndLoc);
@@ -606,6 +608,22 @@
   if (CGM.isInSanitizerBlacklist(Fn, Loc))
     SanOpts.clear();
 
+  if (D) {
+    // Apply the no_sanitize* attributes to SanOpts.
+    for (auto Attr : D->specific_attrs<NoSanitizeAttr>())
+      SanOpts.Mask &= ~Attr->getMask();
+  }
+
+  // Apply sanitizer attributes to the function.
+  if (SanOpts.has(SanitizerKind::Address))
+    Fn->addFnAttr(llvm::Attribute::SanitizeAddress);
+  if (SanOpts.has(SanitizerKind::Thread))
+    Fn->addFnAttr(llvm::Attribute::SanitizeThread);
+  if (SanOpts.has(SanitizerKind::Memory))
+    Fn->addFnAttr(llvm::Attribute::SanitizeMemory);
+  if (SanOpts.has(SanitizerKind::SafeStack))
+    Fn->addFnAttr(llvm::Attribute::SafeStack);
+
   // Pass inline keyword to optimizer if it appears explicitly on any
   // declaration. Also, in the case of -fno-inline attach NoInline
   // attribute to all function that are not marked AlwaysInline.
@@ -771,8 +789,7 @@
 
 void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
                                        const Stmt *Body) {
-  RegionCounter Cnt = getPGORegionCounter(Body);
-  Cnt.beginRegion(Builder);
+  incrementProfileCounter(Body);
   if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
     EmitCompoundStmtWithoutScope(*S);
   else
@@ -784,7 +801,7 @@
 /// emit a branch around the instrumentation code. When not instrumenting,
 /// this just calls EmitBlock().
 void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
-                                               RegionCounter &Cnt) {
+                                               const Stmt *S) {
   llvm::BasicBlock *SkipCountBB = nullptr;
   if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) {
     // When instrumenting for profiling, the fallthrough to certain
@@ -794,7 +811,9 @@
     EmitBranch(SkipCountBB);
   }
   EmitBlock(BB);
-  Cnt.beginRegion(Builder, /*AddIncomingFallThrough=*/true);
+  uint64_t CurrentCount = getCurrentProfileCount();
+  incrementProfileCounter(S);
+  setCurrentProfileCount(getCurrentProfileCount() + CurrentCount);
   if (SkipCountBB)
     EmitBlock(SkipCountBB);
 }
@@ -839,7 +858,7 @@
       ResTy = CGM.getContext().VoidPtrTy;
     CGM.getCXXABI().buildThisParam(*this, Args);
   }
-  
+
   Args.append(FD->param_begin(), FD->param_end());
 
   if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
@@ -875,7 +894,7 @@
   else if (getLangOpts().CUDA &&
            !getLangOpts().CUDAIsDevice &&
            FD->hasAttr<CUDAGlobalAttr>())
-    CGM.getCUDARuntime().EmitDeviceStubBody(*this, Args);
+    CGM.getCUDARuntime().emitDeviceStub(*this, Args);
   else if (isa<CXXConversionDecl>(FD) &&
            cast<CXXConversionDecl>(FD)->isLambdaToBlockPointerConversion()) {
     // The lambda conversion to block pointer is special; the semantics can't be
@@ -912,7 +931,7 @@
                 "missing_return", EmitCheckSourceLocation(FD->getLocation()),
                 None);
     } else if (CGM.getCodeGenOpts().OptimizationLevel == 0)
-      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap));
+      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::trap), {});
     Builder.CreateUnreachable();
     Builder.ClearInsertionPoint();
   }
@@ -1030,15 +1049,13 @@
 
     // Handle X && Y in a condition.
     if (CondBOp->getOpcode() == BO_LAnd) {
-      RegionCounter Cnt = getPGORegionCounter(CondBOp);
-
       // If we have "1 && X", simplify the code.  "0 && X" would have constant
       // folded if the case was simple enough.
       bool ConstantBool = false;
       if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
           ConstantBool) {
         // br(1 && X) -> br(X).
-        Cnt.beginRegion(Builder);
+        incrementProfileCounter(CondBOp);
         return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
                                     TrueCount);
       }
@@ -1057,7 +1074,7 @@
       llvm::BasicBlock *LHSTrue = createBasicBlock("land.lhs.true");
       // The counter tells us how often we evaluate RHS, and all of TrueCount
       // can be propagated to that branch.
-      uint64_t RHSCount = Cnt.getCount();
+      uint64_t RHSCount = getProfileCount(CondBOp->getRHS());
 
       ConditionalEvaluation eval(*this);
       {
@@ -1066,8 +1083,10 @@
         EmitBlock(LHSTrue);
       }
 
+      incrementProfileCounter(CondBOp);
+      setCurrentProfileCount(getProfileCount(CondBOp->getRHS()));
+
       // Any temporaries created here are conditional.
-      Cnt.beginRegion(Builder);
       eval.begin(*this);
       EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, TrueCount);
       eval.end(*this);
@@ -1076,15 +1095,13 @@
     }
 
     if (CondBOp->getOpcode() == BO_LOr) {
-      RegionCounter Cnt = getPGORegionCounter(CondBOp);
-
       // If we have "0 || X", simplify the code.  "1 || X" would have constant
       // folded if the case was simple enough.
       bool ConstantBool = false;
       if (ConstantFoldsToSimpleInteger(CondBOp->getLHS(), ConstantBool) &&
           !ConstantBool) {
         // br(0 || X) -> br(X).
-        Cnt.beginRegion(Builder);
+        incrementProfileCounter(CondBOp);
         return EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock,
                                     TrueCount);
       }
@@ -1104,7 +1121,8 @@
       // We have the count for entry to the RHS and for the whole expression
       // being true, so we can divy up True count between the short circuit and
       // the RHS.
-      uint64_t LHSCount = Cnt.getParentCount() - Cnt.getCount();
+      uint64_t LHSCount =
+          getCurrentProfileCount() - getProfileCount(CondBOp->getRHS());
       uint64_t RHSCount = TrueCount - LHSCount;
 
       ConditionalEvaluation eval(*this);
@@ -1114,8 +1132,10 @@
         EmitBlock(LHSFalse);
       }
 
+      incrementProfileCounter(CondBOp);
+      setCurrentProfileCount(getProfileCount(CondBOp->getRHS()));
+
       // Any temporaries created here are conditional.
-      Cnt.beginRegion(Builder);
       eval.begin(*this);
       EmitBranchOnBoolExpr(CondBOp->getRHS(), TrueBlock, FalseBlock, RHSCount);
 
@@ -1129,7 +1149,7 @@
     // br(!x, t, f) -> br(x, f, t)
     if (CondUOp->getOpcode() == UO_LNot) {
       // Negate the count.
-      uint64_t FalseCount = PGO.getCurrentRegionCount() - TrueCount;
+      uint64_t FalseCount = getCurrentProfileCount() - TrueCount;
       // Negate the condition and swap the destination blocks.
       return EmitBranchOnBoolExpr(CondUOp->getSubExpr(), FalseBlock, TrueBlock,
                                   FalseCount);
@@ -1141,9 +1161,9 @@
     llvm::BasicBlock *LHSBlock = createBasicBlock("cond.true");
     llvm::BasicBlock *RHSBlock = createBasicBlock("cond.false");
 
-    RegionCounter Cnt = getPGORegionCounter(CondOp);
     ConditionalEvaluation cond(*this);
-    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock, Cnt.getCount());
+    EmitBranchOnBoolExpr(CondOp->getCond(), LHSBlock, RHSBlock,
+                         getProfileCount(CondOp));
 
     // When computing PGO branch weights, we only know the overall count for
     // the true block. This code is essentially doing tail duplication of the
@@ -1152,13 +1172,14 @@
     // the conditional operator.
     uint64_t LHSScaledTrueCount = 0;
     if (TrueCount) {
-      double LHSRatio = Cnt.getCount() / (double) Cnt.getParentCount();
+      double LHSRatio =
+          getProfileCount(CondOp) / (double)getCurrentProfileCount();
       LHSScaledTrueCount = TrueCount * LHSRatio;
     }
 
     cond.begin(*this);
     EmitBlock(LHSBlock);
-    Cnt.beginRegion(Builder);
+    incrementProfileCounter(CondOp);
     {
       ApplyDebugLocation DL(*this, Cond);
       EmitBranchOnBoolExpr(CondOp->getLHS(), TrueBlock, FalseBlock,
@@ -1187,9 +1208,9 @@
 
   // Create branch weights based on the number of times we get here and the
   // number of times the condition should be true.
-  uint64_t CurrentCount = std::max(PGO.getCurrentRegionCount(), TrueCount);
-  llvm::MDNode *Weights = PGO.createBranchWeights(TrueCount,
-                                                  CurrentCount - TrueCount);
+  uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount);
+  llvm::MDNode *Weights =
+      createProfileWeights(TrueCount, CurrentCount - TrueCount);
 
   // Emit the code with the fully general case.
   llvm::Value *CondV;
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 4e7a7e2..469022d 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -210,8 +210,7 @@
 
     /// \brief Emit the captured statement body.
     virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) {
-      RegionCounter Cnt = CGF.getPGORegionCounter(S);
-      Cnt.beginRegion(CGF.Builder);
+      CGF.incrementProfileCounter(S);
       CGF.EmitStmt(S);
     }
 
@@ -890,12 +889,39 @@
 
   CodeGenPGO PGO;
 
+  /// Calculate branch weights appropriate for PGO data
+  llvm::MDNode *createProfileWeights(uint64_t TrueCount, uint64_t FalseCount);
+  llvm::MDNode *createProfileWeights(ArrayRef<uint64_t> Weights);
+  llvm::MDNode *createProfileWeightsForLoop(const Stmt *Cond,
+                                            uint64_t LoopCount);
+
 public:
-  /// Get a counter for instrumentation of the region associated with the given
-  /// statement.
-  RegionCounter getPGORegionCounter(const Stmt *S) {
-    return RegionCounter(PGO, S);
+  /// Increment the profiler's counter for the given statement.
+  void incrementProfileCounter(const Stmt *S) {
+    if (CGM.getCodeGenOpts().ProfileInstrGenerate)
+      PGO.emitCounterIncrement(Builder, S);
+    PGO.setCurrentStmt(S);
   }
+
+  /// Get the profiler's count for the given statement.
+  uint64_t getProfileCount(const Stmt *S) {
+    Optional<uint64_t> Count = PGO.getStmtCount(S);
+    if (!Count.hasValue())
+      return 0;
+    return *Count;
+  }
+
+  /// Set the profiler's current count.
+  void setCurrentProfileCount(uint64_t Count) {
+    PGO.setCurrentRegionCount(Count);
+  }
+
+  /// Get the profiler's current count. This is generally the count for the most
+  /// recently incremented counter.
+  uint64_t getCurrentProfileCount() {
+    return PGO.getCurrentRegionCount();
+  }
+
 private:
 
   /// SwitchInsn - This is nearest current switch instruction. It is null if
@@ -1221,7 +1247,7 @@
   void EmitDestructorBody(FunctionArgList &Args);
   void emitImplicitAssignmentOperatorBody(FunctionArgList &Args);
   void EmitFunctionBody(FunctionArgList &Args, const Stmt *Body);
-  void EmitBlockWithFallThrough(llvm::BasicBlock *BB, RegionCounter &Cnt);
+  void EmitBlockWithFallThrough(llvm::BasicBlock *BB, const Stmt *S);
 
   void EmitForwardingCallToLambda(const CXXMethodDecl *LambdaCallOperator,
                                   CallArgList &CallArgs);
@@ -1730,6 +1756,9 @@
   void EmitCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
                         llvm::Value *Ptr);
 
+  llvm::Value *EmitLifetimeStart(uint64_t Size, llvm::Value *Addr);
+  void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr);
+
   llvm::Value *EmitCXXNewExpr(const CXXNewExpr *E);
   void EmitCXXDeleteExpr(const CXXDeleteExpr *E);
 
@@ -2060,7 +2089,9 @@
   /// \param AO Atomic ordering of the generated atomic instructions.
   /// \param CommonGen Code generator for complex expressions that cannot be
   /// expressed through atomicrmw instruction.
-  void EmitOMPAtomicSimpleUpdateExpr(
+  /// \returns <true, OldAtomicValue> if simple 'atomicrmw' instruction was
+  /// generated, <false, RValue::get(nullptr)> otherwise.
+  std::pair<bool, RValue> EmitOMPAtomicSimpleUpdateExpr(
       LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
       llvm::AtomicOrdering AO, SourceLocation Loc,
       const llvm::function_ref<RValue(RValue)> &CommonGen);
@@ -2139,10 +2170,21 @@
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
 
-  void
-  EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
-                   const Expr *IncExpr,
-                   const llvm::function_ref<void(CodeGenFunction &)> &BodyGen);
+  /// \brief Emit inner loop of the worksharing/simd construct.
+  ///
+  /// \param S Directive, for which the inner loop must be emitted.
+  /// \param RequiresCleanup true, if directive has some associated private
+  /// variables.
+  /// \param LoopCond Bollean condition for loop continuation.
+  /// \param IncExpr Increment expression for loop control variable.
+  /// \param BodyGen Generator for the inner body of the inner loop.
+  /// \param PostIncGen Genrator for post-increment code (required for ordered
+  /// loop directvies).
+  void EmitOMPInnerLoop(
+      const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
+      const Expr *IncExpr,
+      const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
+      const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen);
 
 private:
 
@@ -2156,9 +2198,9 @@
   bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                            const OMPLoopDirective &S,
-                           OMPPrivateScope &LoopScope, llvm::Value *LB,
-                           llvm::Value *UB, llvm::Value *ST, llvm::Value *IL,
-                           llvm::Value *Chunk);
+                           OMPPrivateScope &LoopScope, bool Ordered,
+                           llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
+                           llvm::Value *IL, llvm::Value *Chunk);
 
 public:
 
@@ -2230,7 +2272,7 @@
       bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
 
   void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
-                        const std::function<RValue(RValue)> &UpdateOp,
+                        const llvm::function_ref<RValue(RValue)> &UpdateOp,
                         bool IsVolatile);
 
   /// EmitToMemory - Change a scalar value from its value
@@ -2521,14 +2563,7 @@
   // Helper functions for EmitAArch64BuiltinExpr.
   llvm::Value *vectorWrapScalar8(llvm::Value *Op);
   llvm::Value *vectorWrapScalar16(llvm::Value *Op);
-  llvm::Value *emitVectorWrappedScalar8Intrinsic(
-      unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
-  llvm::Value *emitVectorWrappedScalar16Intrinsic(
-      unsigned Int, SmallVectorImpl<llvm::Value *> &Ops, const char *Name);
   llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
-  llvm::Value *EmitNeon64Call(llvm::Function *F,
-                              llvm::SmallVectorImpl<llvm::Value *> &O,
-                              const char *name);
 
   llvm::Value *BuildVector(ArrayRef<llvm::Value*> Ops);
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
@@ -2793,7 +2828,7 @@
   /// \brief Create a basic block that will call a handler function in a
   /// sanitizer runtime with the provided arguments, and create a conditional
   /// branch to it.
-  void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerKind>> Checked,
+  void EmitCheck(ArrayRef<std::pair<llvm::Value *, SanitizerMask>> Checked,
                  StringRef CheckName, ArrayRef<llvm::Constant *> StaticArgs,
                  ArrayRef<llvm::Value *> DynamicArgs);
 
@@ -2801,6 +2836,11 @@
   /// conditional branch to it, for the -ftrapv checks.
   void EmitTrapCheck(llvm::Value *Checked);
 
+  /// \brief Create a check for a function parameter that may potentially be
+  /// declared as non-null.
+  void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
+                           const FunctionDecl *FD, unsigned ParmNum);
+
   /// EmitCallArg - Emit a single call argument.
   void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);
 
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 17b7ddc..af4e6d9 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -205,11 +205,9 @@
 }
 
 void CodeGenModule::applyReplacements() {
-  for (ReplacementsTy::iterator I = Replacements.begin(),
-                                E = Replacements.end();
-       I != E; ++I) {
-    StringRef MangledName = I->first();
-    llvm::Constant *Replacement = I->second;
+  for (auto &I : Replacements) {
+    StringRef MangledName = I.first();
+    llvm::Constant *Replacement = I.second;
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
     if (!Entry)
       continue;
@@ -261,9 +259,7 @@
   // and aliases during codegen.
   bool Error = false;
   DiagnosticsEngine &Diags = getDiags();
-  for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
-         E = Aliases.end(); I != E; ++I) {
-    const GlobalDecl &GD = *I;
+  for (const GlobalDecl &GD : Aliases) {
     const auto *D = cast<ValueDecl>(GD.getDecl());
     const AliasAttr *AA = D->getAttr<AliasAttr>();
     StringRef MangledName = getMangledName(GD);
@@ -310,9 +306,7 @@
   if (!Error)
     return;
 
-  for (std::vector<GlobalDecl>::iterator I = Aliases.begin(),
-         E = Aliases.end(); I != E; ++I) {
-    const GlobalDecl &GD = *I;
+  for (const GlobalDecl &GD : Aliases) {
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
     auto *Alias = cast<llvm::GlobalAlias>(Entry);
@@ -350,6 +344,13 @@
   if (ObjCRuntime)
     if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction())
       AddGlobalCtor(ObjCInitFunction);
+  if (Context.getLangOpts().CUDA && !Context.getLangOpts().CUDAIsDevice &&
+      CUDARuntime) {
+    if (llvm::Function *CudaCtorFunction = CUDARuntime->makeModuleCtorFunction())
+      AddGlobalCtor(CudaCtorFunction);
+    if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction())
+      AddGlobalDtor(CudaDtorFunction);
+  }
   if (PGOReader && PGOStats.hasDiagnostics())
     PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
   EmitCtorList(GlobalCtors, "llvm.global_ctors");
@@ -630,15 +631,14 @@
       Int32Ty, llvm::PointerType::getUnqual(CtorFTy), VoidPtrTy, nullptr);
 
   // Construct the constructor and destructor arrays.
-  SmallVector<llvm::Constant*, 8> Ctors;
-  for (CtorList::const_iterator I = Fns.begin(), E = Fns.end(); I != E; ++I) {
+  SmallVector<llvm::Constant *, 8> Ctors;
+  for (const auto &I : Fns) {
     llvm::Constant *S[] = {
-      llvm::ConstantInt::get(Int32Ty, I->Priority, false),
-      llvm::ConstantExpr::getBitCast(I->Initializer, CtorPFTy),
-      (I->AssociatedData
-           ? llvm::ConstantExpr::getBitCast(I->AssociatedData, VoidPtrTy)
-           : llvm::Constant::getNullValue(VoidPtrTy))
-    };
+        llvm::ConstantInt::get(Int32Ty, I.Priority, false),
+        llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy),
+        (I.AssociatedData
+             ? llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy)
+             : llvm::Constant::getNullValue(VoidPtrTy))};
     Ctors.push_back(llvm::ConstantStruct::get(CtorStructTy, S));
   }
 
@@ -669,6 +669,25 @@
   return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
 }
 
+void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F) {
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
+
+  if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) {
+    if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
+      // Don't dllexport/import destructor thunks.
+      F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
+      return;
+    }
+  }
+
+  if (FD->hasAttr<DLLImportAttr>())
+    F->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
+  else if (FD->hasAttr<DLLExportAttr>())
+    F->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
+  else
+    F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
+}
+
 void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D,
                                                     llvm::Function *F) {
   setNonAliasAttributes(D, F);
@@ -745,23 +764,6 @@
   else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
     B.addAttribute(llvm::Attribute::StackProtectReq);
 
-  // Add sanitizer attributes if function is not blacklisted.
-  if (!isInSanitizerBlacklist(F, D->getLocation())) {
-    // When AddressSanitizer is enabled, set SanitizeAddress attribute
-    // unless __attribute__((no_sanitize_address)) is used.
-    if (LangOpts.Sanitize.has(SanitizerKind::Address) &&
-        !D->hasAttr<NoSanitizeAddressAttr>())
-      B.addAttribute(llvm::Attribute::SanitizeAddress);
-    // Same for ThreadSanitizer and __attribute__((no_sanitize_thread))
-    if (LangOpts.Sanitize.has(SanitizerKind::Thread) &&
-        !D->hasAttr<NoSanitizeThreadAttr>())
-      B.addAttribute(llvm::Attribute::SanitizeThread);
-    // Same for MemorySanitizer and __attribute__((no_sanitize_memory))
-    if (LangOpts.Sanitize.has(SanitizerKind::Memory) &&
-        !D->hasAttr<NoSanitizeMemoryAttr>())
-      B.addAttribute(llvm::Attribute::SanitizeMemory);
-  }
-
   F->addAttributes(llvm::AttributeSet::FunctionIndex,
                    llvm::AttributeSet::get(
                        F->getContext(), llvm::AttributeSet::FunctionIndex, B));
@@ -827,7 +829,7 @@
   if (const SectionAttr *SA = D->getAttr<SectionAttr>())
     GO->setSection(SA->getName());
 
-  getTargetCodeGenInfo().SetTargetAttributes(D, GO, *this);
+  getTargetCodeGenInfo().setTargetAttributes(D, GO, *this);
 }
 
 void CodeGenModule::SetInternalFunctionAttributes(const Decl *D,
@@ -869,11 +871,10 @@
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
                                           bool IsIncompleteFunction,
                                           bool IsThunk) {
-  if (unsigned IID = F->getIntrinsicID()) {
+  if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) {
     // If this is an intrinsic function, set the function's attributes
     // to the intrinsic's attributes.
-    F->setAttributes(llvm::Intrinsic::getAttributes(getLLVMContext(),
-                                                    (llvm::Intrinsic::ID)IID));
+    F->setAttributes(llvm::Intrinsic::getAttributes(getLLVMContext(), IID));
     return;
   }
 
@@ -900,13 +901,6 @@
 
   setLinkageAndVisibilityForGV(F, FD);
 
-  if (const auto *Dtor = dyn_cast_or_null<CXXDestructorDecl>(FD)) {
-    if (getCXXABI().useThunkForDtorVariant(Dtor, GD.getDtorType())) {
-      // Don't dllexport/import destructor thunks.
-      F->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
-    }
-  }
-
   if (const SectionAttr *SA = FD->getAttr<SectionAttr>())
     F->setSection(SA->getName());
 
@@ -920,13 +914,13 @@
 void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
   assert(!GV->isDeclaration() &&
          "Only globals with definition can force usage.");
-  LLVMUsed.push_back(GV);
+  LLVMUsed.emplace_back(GV);
 }
 
 void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) {
   assert(!GV->isDeclaration() &&
          "Only globals with definition can force usage.");
-  LLVMCompilerUsed.push_back(GV);
+  LLVMCompilerUsed.emplace_back(GV);
 }
 
 static void emitUsed(CodeGenModule &CGM, StringRef Name,
@@ -1028,12 +1022,9 @@
   SmallVector<clang::Module *, 16> Stack;
 
   // Seed the stack with imported modules.
-  for (llvm::SetVector<clang::Module *>::iterator M = ImportedModules.begin(),
-                                               MEnd = ImportedModules.end();
-       M != MEnd; ++M) {
-    if (Visited.insert(*M).second)
-      Stack.push_back(*M);
-  }
+  for (Module *M : ImportedModules)
+    if (Visited.insert(M).second)
+      Stack.push_back(M);
 
   // Find all of the modules to import, making a little effort to prune
   // non-leaf modules.
@@ -1069,12 +1060,9 @@
   // to linker options inserted by things like #pragma comment().
   SmallVector<llvm::Metadata *, 16> MetadataArgs;
   Visited.clear();
-  for (llvm::SetVector<clang::Module *>::iterator M = LinkModules.begin(),
-                                               MEnd = LinkModules.end();
-       M != MEnd; ++M) {
-    if (Visited.insert(*M).second)
-      addLinkOptionsPostorder(*this, *M, MetadataArgs, Visited);
-  }
+  for (Module *M : LinkModules)
+    if (Visited.insert(M).second)
+      addLinkOptionsPostorder(*this, M, MetadataArgs, Visited);
   std::reverse(MetadataArgs.begin(), MetadataArgs.end());
   LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end());
 
@@ -1777,6 +1765,8 @@
     // handling.
     GV->setConstant(isTypeConstant(D->getType(), false));
 
+    GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
+
     setLinkageAndVisibilityForGV(GV, D);
 
     if (D->getTLSKind()) {
@@ -2464,12 +2454,7 @@
   // declarations).
   auto *Fn = cast<llvm::Function>(GV);
   setFunctionLinkage(GD, Fn);
-  if (D->hasAttr<DLLImportAttr>())
-    GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass);
-  else if (D->hasAttr<DLLExportAttr>())
-    GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass);
-  else
-    GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
+  setFunctionDLLStorageClass(GD, Fn);
 
   // FIXME: this is redundant with part of setFunctionDefinitionAttributes
   setGlobalVisibility(Fn, D);
@@ -2521,7 +2506,7 @@
 
   // Create the new alias itself, but don't set a name yet.
   auto *GA = llvm::GlobalAlias::create(
-      cast<llvm::PointerType>(Aliasee->getType())->getElementType(), 0,
+      cast<llvm::PointerType>(Aliasee->getType()),
       llvm::Function::ExternalLinkage, "", Aliasee, &getModule());
 
   if (Entry) {
@@ -2688,7 +2673,8 @@
   }
 
   // String.
-  Fields[2] = llvm::ConstantExpr::getGetElementPtr(GV->getType(), GV, Zeros);
+  Fields[2] =
+      llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
 
   if (isUTF16)
     // Cast the UTF16 string to the correct type.
@@ -3354,6 +3340,9 @@
     break;
 
   case Decl::FileScopeAsm: {
+    // File-scope asm is ignored during device-side CUDA compilation.
+    if (LangOpts.CUDA && LangOpts.CUDAIsDevice)
+      break;
     auto *AD = cast<FileScopeAsmDecl>(D);
     getModule().appendModuleInlineAsm(AD->getAsmString()->getString());
     break;
@@ -3363,7 +3352,7 @@
     auto *Import = cast<ImportDecl>(D);
 
     // Ignore import declarations that come from imported modules.
-    if (clang::Module *Owner = Import->getOwningModule()) {
+    if (clang::Module *Owner = Import->getImportedOwningModule()) {
       if (getLangOpts().CurrentModule.empty() ||
           Owner->getTopLevelModule()->Name == getLangOpts().CurrentModule)
         break;
@@ -3508,11 +3497,9 @@
 /// to such functions with an unmangled name from inline assembly within the
 /// same translation unit.
 void CodeGenModule::EmitStaticExternCAliases() {
-  for (StaticExternCMap::iterator I = StaticExternCValues.begin(),
-                                  E = StaticExternCValues.end();
-       I != E; ++I) {
-    IdentifierInfo *Name = I->first;
-    llvm::GlobalValue *Val = I->second;
+  for (auto &I : StaticExternCValues) {
+    IdentifierInfo *Name = I.first;
+    llvm::GlobalValue *Val = I.second;
     if (Val && !getModule().getNamedValue(Name->getName()))
       addUsedGlobal(llvm::GlobalAlias::create(Name->getName(), Val));
   }
@@ -3672,4 +3659,3 @@
       CXXGlobalInits.push_back(InitFunction);
   }
 }
-
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index feef6c2..edde426 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -329,7 +329,7 @@
   };
   std::vector<DeferredGlobal> DeferredDeclsToEmit;
   void addDeferredDeclToEmit(llvm::GlobalValue *GV, GlobalDecl GD) {
-    DeferredDeclsToEmit.push_back(DeferredGlobal(GV, GD));
+    DeferredDeclsToEmit.emplace_back(GV, GD);
   }
 
   /// List of alias we have emitted. Used to make sure that what they point to
@@ -876,7 +876,7 @@
 
   /// Add a destructor and object to add to the C++ global destructor function.
   void AddCXXDtorEntry(llvm::Constant *DtorFn, llvm::Constant *Object) {
-    CXXGlobalDtors.push_back(std::make_pair(DtorFn, Object));
+    CXXGlobalDtors.emplace_back(DtorFn, Object);
   }
 
   /// Create a new runtime function with the specified type and name.
@@ -1018,6 +1018,9 @@
     F->setLinkage(getFunctionLinkage(GD));
   }
 
+  /// Set the DLL storage class on F.
+  void setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F);
+
   /// Return the appropriate linkage for the vtable, VTT, and type information
   /// of the given class.
   llvm::GlobalVariable::LinkageTypes getVTableLinkage(const CXXRecordDecl *RD);
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index cc6ac20..f182a46 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -242,6 +242,9 @@
   /// next statement, such as at the exit of a loop.
   bool RecordNextStmtCount;
 
+  /// The count at the current location in the traversal.
+  uint64_t CurrentCount;
+
   /// The map of statements to count values.
   llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
 
@@ -259,11 +262,17 @@
 
   void RecordStmtCount(const Stmt *S) {
     if (RecordNextStmtCount) {
-      CountMap[S] = PGO.getCurrentRegionCount();
+      CountMap[S] = CurrentCount;
       RecordNextStmtCount = false;
     }
   }
 
+  /// Set and return the current count.
+  uint64_t setCount(uint64_t Count) {
+    CurrentCount = Count;
+    return Count;
+  }
+
   void VisitStmt(const Stmt *S) {
     RecordStmtCount(S);
     for (Stmt::const_child_range I = S->children(); I; ++I) {
@@ -274,9 +283,8 @@
 
   void VisitFunctionDecl(const FunctionDecl *D) {
     // Counter tracks entry to the function body.
-    RegionCounter Cnt(PGO, D->getBody());
-    Cnt.beginRegion();
-    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
+    CountMap[D->getBody()] = BodyCount;
     Visit(D->getBody());
   }
 
@@ -287,25 +295,22 @@
 
   void VisitCapturedDecl(const CapturedDecl *D) {
     // Counter tracks entry to the capture body.
-    RegionCounter Cnt(PGO, D->getBody());
-    Cnt.beginRegion();
-    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
+    CountMap[D->getBody()] = BodyCount;
     Visit(D->getBody());
   }
 
   void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
     // Counter tracks entry to the method body.
-    RegionCounter Cnt(PGO, D->getBody());
-    Cnt.beginRegion();
-    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
+    CountMap[D->getBody()] = BodyCount;
     Visit(D->getBody());
   }
 
   void VisitBlockDecl(const BlockDecl *D) {
     // Counter tracks entry to the block body.
-    RegionCounter Cnt(PGO, D->getBody());
-    Cnt.beginRegion();
-    CountMap[D->getBody()] = PGO.getCurrentRegionCount();
+    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
+    CountMap[D->getBody()] = BodyCount;
     Visit(D->getBody());
   }
 
@@ -313,89 +318,91 @@
     RecordStmtCount(S);
     if (S->getRetValue())
       Visit(S->getRetValue());
-    PGO.setCurrentRegionUnreachable();
+    CurrentCount = 0;
+    RecordNextStmtCount = true;
+  }
+
+  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
+    RecordStmtCount(E);
+    if (E->getSubExpr())
+      Visit(E->getSubExpr());
+    CurrentCount = 0;
     RecordNextStmtCount = true;
   }
 
   void VisitGotoStmt(const GotoStmt *S) {
     RecordStmtCount(S);
-    PGO.setCurrentRegionUnreachable();
+    CurrentCount = 0;
     RecordNextStmtCount = true;
   }
 
   void VisitLabelStmt(const LabelStmt *S) {
     RecordNextStmtCount = false;
     // Counter tracks the block following the label.
-    RegionCounter Cnt(PGO, S);
-    Cnt.beginRegion();
-    CountMap[S] = PGO.getCurrentRegionCount();
+    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
+    CountMap[S] = BlockCount;
     Visit(S->getSubStmt());
   }
 
   void VisitBreakStmt(const BreakStmt *S) {
     RecordStmtCount(S);
     assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
-    BreakContinueStack.back().BreakCount += PGO.getCurrentRegionCount();
-    PGO.setCurrentRegionUnreachable();
+    BreakContinueStack.back().BreakCount += CurrentCount;
+    CurrentCount = 0;
     RecordNextStmtCount = true;
   }
 
   void VisitContinueStmt(const ContinueStmt *S) {
     RecordStmtCount(S);
     assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
-    BreakContinueStack.back().ContinueCount += PGO.getCurrentRegionCount();
-    PGO.setCurrentRegionUnreachable();
+    BreakContinueStack.back().ContinueCount += CurrentCount;
+    CurrentCount = 0;
     RecordNextStmtCount = true;
   }
 
   void VisitWhileStmt(const WhileStmt *S) {
     RecordStmtCount(S);
-    // Counter tracks the body of the loop.
-    RegionCounter Cnt(PGO, S);
+    uint64_t ParentCount = CurrentCount;
+
     BreakContinueStack.push_back(BreakContinue());
     // Visit the body region first so the break/continue adjustments can be
     // included when visiting the condition.
-    Cnt.beginRegion();
-    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
+    CountMap[S->getBody()] = CurrentCount;
     Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    uint64_t BackedgeCount = CurrentCount;
 
     // ...then go back and propagate counts through the condition. The count
     // at the start of the condition is the sum of the incoming edges,
     // the backedge from the end of the loop body, and the edges from
     // continue statements.
     BreakContinue BC = BreakContinueStack.pop_back_val();
-    Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
-                              BC.ContinueCount);
-    CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+    uint64_t CondCount =
+        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
+    CountMap[S->getCond()] = CondCount;
     Visit(S->getCond());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    setCount(BC.BreakCount + CondCount - BodyCount);
     RecordNextStmtCount = true;
   }
 
   void VisitDoStmt(const DoStmt *S) {
     RecordStmtCount(S);
-    // Counter tracks the body of the loop.
-    RegionCounter Cnt(PGO, S);
+    uint64_t LoopCount = PGO.getRegionCount(S);
+
     BreakContinueStack.push_back(BreakContinue());
-    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    // The count doesn't include the fallthrough from the parent scope. Add it.
+    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
+    CountMap[S->getBody()] = BodyCount;
     Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    uint64_t BackedgeCount = CurrentCount;
 
     BreakContinue BC = BreakContinueStack.pop_back_val();
     // The count at the start of the condition is equal to the count at the
-    // end of the body. The adjusted count does not include either the
-    // fall-through count coming into the loop or the continue count, so add
-    // both of those separately. This is coincidentally the same equation as
-    // with while loops but for different reasons.
-    Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
-                              BC.ContinueCount);
-    CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+    // end of the body, plus any continues.
+    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
+    CountMap[S->getCond()] = CondCount;
     Visit(S->getCond());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    setCount(BC.BreakCount + CondCount - LoopCount);
     RecordNextStmtCount = true;
   }
 
@@ -403,94 +410,89 @@
     RecordStmtCount(S);
     if (S->getInit())
       Visit(S->getInit());
-    // Counter tracks the body of the loop.
-    RegionCounter Cnt(PGO, S);
+
+    uint64_t ParentCount = CurrentCount;
+
     BreakContinueStack.push_back(BreakContinue());
     // Visit the body region first. (This is basically the same as a while
     // loop; see further comments in VisitWhileStmt.)
-    Cnt.beginRegion();
-    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
+    CountMap[S->getBody()] = BodyCount;
     Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    uint64_t BackedgeCount = CurrentCount;
+    BreakContinue BC = BreakContinueStack.pop_back_val();
 
     // The increment is essentially part of the body but it needs to include
     // the count for all the continue statements.
     if (S->getInc()) {
-      Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
-                                BreakContinueStack.back().ContinueCount);
-      CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
+      CountMap[S->getInc()] = IncCount;
       Visit(S->getInc());
-      Cnt.adjustForControlFlow();
     }
 
-    BreakContinue BC = BreakContinueStack.pop_back_val();
-
     // ...then go back and propagate counts through the condition.
+    uint64_t CondCount =
+        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
     if (S->getCond()) {
-      Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
-                                BC.ContinueCount);
-      CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+      CountMap[S->getCond()] = CondCount;
       Visit(S->getCond());
-      Cnt.adjustForControlFlow();
     }
-    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    setCount(BC.BreakCount + CondCount - BodyCount);
     RecordNextStmtCount = true;
   }
 
   void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
     RecordStmtCount(S);
+    Visit(S->getLoopVarStmt());
     Visit(S->getRangeStmt());
     Visit(S->getBeginEndStmt());
-    // Counter tracks the body of the loop.
-    RegionCounter Cnt(PGO, S);
+
+    uint64_t ParentCount = CurrentCount;
     BreakContinueStack.push_back(BreakContinue());
     // Visit the body region first. (This is basically the same as a while
     // loop; see further comments in VisitWhileStmt.)
-    Cnt.beginRegion();
-    CountMap[S->getLoopVarStmt()] = PGO.getCurrentRegionCount();
-    Visit(S->getLoopVarStmt());
+    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
+    CountMap[S->getBody()] = BodyCount;
     Visit(S->getBody());
-    Cnt.adjustForControlFlow();
+    uint64_t BackedgeCount = CurrentCount;
+    BreakContinue BC = BreakContinueStack.pop_back_val();
 
     // The increment is essentially part of the body but it needs to include
     // the count for all the continue statements.
-    Cnt.setCurrentRegionCount(PGO.getCurrentRegionCount() +
-                              BreakContinueStack.back().ContinueCount);
-    CountMap[S->getInc()] = PGO.getCurrentRegionCount();
+    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
+    CountMap[S->getInc()] = IncCount;
     Visit(S->getInc());
-    Cnt.adjustForControlFlow();
-
-    BreakContinue BC = BreakContinueStack.pop_back_val();
 
     // ...then go back and propagate counts through the condition.
-    Cnt.setCurrentRegionCount(Cnt.getParentCount() + Cnt.getAdjustedCount() +
-                              BC.ContinueCount);
-    CountMap[S->getCond()] = PGO.getCurrentRegionCount();
+    uint64_t CondCount =
+        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
+    CountMap[S->getCond()] = CondCount;
     Visit(S->getCond());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+    setCount(BC.BreakCount + CondCount - BodyCount);
     RecordNextStmtCount = true;
   }
 
   void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
     RecordStmtCount(S);
     Visit(S->getElement());
-    // Counter tracks the body of the loop.
-    RegionCounter Cnt(PGO, S);
+    uint64_t ParentCount = CurrentCount;
     BreakContinueStack.push_back(BreakContinue());
-    Cnt.beginRegion();
-    CountMap[S->getBody()] = PGO.getCurrentRegionCount();
+    // Counter tracks the body of the loop.
+    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
+    CountMap[S->getBody()] = BodyCount;
     Visit(S->getBody());
+    uint64_t BackedgeCount = CurrentCount;
     BreakContinue BC = BreakContinueStack.pop_back_val();
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(BC.BreakCount + BC.ContinueCount);
+
+    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
+             BodyCount);
     RecordNextStmtCount = true;
   }
 
   void VisitSwitchStmt(const SwitchStmt *S) {
     RecordStmtCount(S);
     Visit(S->getCond());
-    PGO.setCurrentRegionUnreachable();
+    CurrentCount = 0;
     BreakContinueStack.push_back(BreakContinue());
     Visit(S->getBody());
     // If the switch is inside a loop, add the continue counts.
@@ -498,53 +500,45 @@
     if (!BreakContinueStack.empty())
       BreakContinueStack.back().ContinueCount += BC.ContinueCount;
     // Counter tracks the exit block of the switch.
-    RegionCounter ExitCnt(PGO, S);
-    ExitCnt.beginRegion();
+    setCount(PGO.getRegionCount(S));
     RecordNextStmtCount = true;
   }
 
-  void VisitCaseStmt(const CaseStmt *S) {
+  void VisitSwitchCase(const SwitchCase *S) {
     RecordNextStmtCount = false;
     // Counter for this particular case. This counts only jumps from the
     // switch header and does not include fallthrough from the case before
     // this one.
-    RegionCounter Cnt(PGO, S);
-    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-    CountMap[S] = Cnt.getCount();
-    RecordNextStmtCount = true;
-    Visit(S->getSubStmt());
-  }
-
-  void VisitDefaultStmt(const DefaultStmt *S) {
-    RecordNextStmtCount = false;
-    // Counter for this default case. This does not include fallthrough from
-    // the previous case.
-    RegionCounter Cnt(PGO, S);
-    Cnt.beginRegion(/*AddIncomingFallThrough=*/true);
-    CountMap[S] = Cnt.getCount();
+    uint64_t CaseCount = PGO.getRegionCount(S);
+    setCount(CurrentCount + CaseCount);
+    // We need the count without fallthrough in the mapping, so it's more useful
+    // for branch probabilities.
+    CountMap[S] = CaseCount;
     RecordNextStmtCount = true;
     Visit(S->getSubStmt());
   }
 
   void VisitIfStmt(const IfStmt *S) {
     RecordStmtCount(S);
-    // Counter tracks the "then" part of an if statement. The count for
-    // the "else" part, if it exists, will be calculated from this counter.
-    RegionCounter Cnt(PGO, S);
+    uint64_t ParentCount = CurrentCount;
     Visit(S->getCond());
 
-    Cnt.beginRegion();
-    CountMap[S->getThen()] = PGO.getCurrentRegionCount();
+    // Counter tracks the "then" part of an if statement. The count for
+    // the "else" part, if it exists, will be calculated from this counter.
+    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
+    CountMap[S->getThen()] = ThenCount;
     Visit(S->getThen());
-    Cnt.adjustForControlFlow();
+    uint64_t OutCount = CurrentCount;
 
+    uint64_t ElseCount = ParentCount - ThenCount;
     if (S->getElse()) {
-      Cnt.beginElseRegion();
-      CountMap[S->getElse()] = PGO.getCurrentRegionCount();
+      setCount(ElseCount);
+      CountMap[S->getElse()] = ElseCount;
       Visit(S->getElse());
-      Cnt.adjustForControlFlow();
-    }
-    Cnt.applyAdjustmentsToRegion(0);
+      OutCount += CurrentCount;
+    } else
+      OutCount += ElseCount;
+    setCount(OutCount);
     RecordNextStmtCount = true;
   }
 
@@ -554,64 +548,60 @@
     for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
       Visit(S->getHandler(I));
     // Counter tracks the continuation block of the try statement.
-    RegionCounter Cnt(PGO, S);
-    Cnt.beginRegion();
+    setCount(PGO.getRegionCount(S));
     RecordNextStmtCount = true;
   }
 
   void VisitCXXCatchStmt(const CXXCatchStmt *S) {
     RecordNextStmtCount = false;
     // Counter tracks the catch statement's handler block.
-    RegionCounter Cnt(PGO, S);
-    Cnt.beginRegion();
-    CountMap[S] = PGO.getCurrentRegionCount();
+    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
+    CountMap[S] = CatchCount;
     Visit(S->getHandlerBlock());
   }
 
   void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
     RecordStmtCount(E);
-    // Counter tracks the "true" part of a conditional operator. The
-    // count in the "false" part will be calculated from this counter.
-    RegionCounter Cnt(PGO, E);
+    uint64_t ParentCount = CurrentCount;
     Visit(E->getCond());
 
-    Cnt.beginRegion();
-    CountMap[E->getTrueExpr()] = PGO.getCurrentRegionCount();
+    // Counter tracks the "true" part of a conditional operator. The
+    // count in the "false" part will be calculated from this counter.
+    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
+    CountMap[E->getTrueExpr()] = TrueCount;
     Visit(E->getTrueExpr());
-    Cnt.adjustForControlFlow();
+    uint64_t OutCount = CurrentCount;
 
-    Cnt.beginElseRegion();
-    CountMap[E->getFalseExpr()] = PGO.getCurrentRegionCount();
+    uint64_t FalseCount = setCount(ParentCount - TrueCount);
+    CountMap[E->getFalseExpr()] = FalseCount;
     Visit(E->getFalseExpr());
-    Cnt.adjustForControlFlow();
+    OutCount += CurrentCount;
 
-    Cnt.applyAdjustmentsToRegion(0);
+    setCount(OutCount);
     RecordNextStmtCount = true;
   }
 
   void VisitBinLAnd(const BinaryOperator *E) {
     RecordStmtCount(E);
-    // Counter tracks the right hand side of a logical and operator.
-    RegionCounter Cnt(PGO, E);
+    uint64_t ParentCount = CurrentCount;
     Visit(E->getLHS());
-    Cnt.beginRegion();
-    CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+    // Counter tracks the right hand side of a logical and operator.
+    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
+    CountMap[E->getRHS()] = RHSCount;
     Visit(E->getRHS());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(0);
+    setCount(ParentCount + RHSCount - CurrentCount);
     RecordNextStmtCount = true;
   }
 
   void VisitBinLOr(const BinaryOperator *E) {
     RecordStmtCount(E);
-    // Counter tracks the right hand side of a logical or operator.
-    RegionCounter Cnt(PGO, E);
+    uint64_t ParentCount = CurrentCount;
     Visit(E->getLHS());
-    Cnt.beginRegion();
-    CountMap[E->getRHS()] = PGO.getCurrentRegionCount();
+    // Counter tracks the right hand side of a logical or operator.
+    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
+    CountMap[E->getRHS()] = RHSCount;
     Visit(E->getRHS());
-    Cnt.adjustForControlFlow();
-    Cnt.applyAdjustmentsToRegion(0);
+    setCount(ParentCount + RHSCount - CurrentCount);
     RecordNextStmtCount = true;
   }
 };
@@ -729,7 +719,7 @@
 }
 
 void
-CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
+CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
                                     llvm::GlobalValue::LinkageTypes Linkage) {
   if (SkipCoverageMapping)
     return;
@@ -749,7 +739,7 @@
   if (CoverageMapping.empty())
     return;
 
-  setFuncName(FuncName, Linkage);
+  setFuncName(Name, Linkage);
   CGM.getCoverageMapping()->addFunctionMappingRecord(
       FuncNameVar, FuncName, FunctionHash, CoverageMapping);
 }
@@ -783,19 +773,23 @@
     // Turn on Cold attribute for cold functions.
     // FIXME: 1% is from preliminary tuning on SPEC, it may not be optimal.
     Fn->addFnAttr(llvm::Attribute::Cold);
+
+  Fn->setEntryCount(FunctionCount);
 }
 
-void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) {
+void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
   if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
     return;
   if (!Builder.GetInsertPoint())
     return;
+
+  unsigned Counter = (*RegionCounterMap)[S];
   auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
-  Builder.CreateCall4(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
-                      llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+  Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
+                     {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
                       Builder.getInt64(FunctionHash),
                       Builder.getInt32(NumRegionCounters),
-                      Builder.getInt32(Counter));
+                      Builder.getInt32(Counter)});
 }
 
 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
@@ -839,8 +833,8 @@
   return Scaled;
 }
 
-llvm::MDNode *CodeGenPGO::createBranchWeights(uint64_t TrueCount,
-                                              uint64_t FalseCount) {
+llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
+                                                    uint64_t FalseCount) {
   // Check for empty weights.
   if (!TrueCount && !FalseCount)
     return nullptr;
@@ -853,7 +847,8 @@
                                       scaleBranchWeight(FalseCount, Scale));
 }
 
-llvm::MDNode *CodeGenPGO::createBranchWeights(ArrayRef<uint64_t> Weights) {
+llvm::MDNode *
+CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) {
   // We need at least two elements to create meaningful weights.
   if (Weights.size() < 2)
     return nullptr;
@@ -875,15 +870,14 @@
   return MDHelper.createBranchWeights(ScaledWeights);
 }
 
-llvm::MDNode *CodeGenPGO::createLoopWeights(const Stmt *Cond,
-                                            RegionCounter &Cnt) {
-  if (!haveRegionCounts())
+llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
+                                                           uint64_t LoopCount) {
+  if (!PGO.haveRegionCounts())
     return nullptr;
-  uint64_t LoopCount = Cnt.getCount();
-  Optional<uint64_t> CondCount = getStmtCount(Cond);
+  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
   assert(CondCount.hasValue() && "missing expected loop condition count");
   if (*CondCount == 0)
     return nullptr;
-  return createBranchWeights(LoopCount,
-                             std::max(*CondCount, LoopCount) - LoopCount);
+  return createProfileWeights(LoopCount,
+                              std::max(*CondCount, LoopCount) - LoopCount);
 }
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index c92a057..de6f369 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -24,10 +24,8 @@
 
 namespace clang {
 namespace CodeGen {
-class RegionCounter;
 
-/// Per-function PGO state. This class should generally not be used directly,
-/// but instead through the CodeGenFunction and RegionCounter types.
+/// Per-function PGO state.
 class CodeGenPGO {
 private:
   CodeGenModule &CGM;
@@ -62,11 +60,6 @@
   /// exits.
   void setCurrentRegionCount(uint64_t Count) { CurrentRegionCount = Count; }
 
-  /// Indicate that the current region is never reached, and thus should have a
-  /// counter value of zero. This is important so that subsequent regions can
-  /// correctly track their parent counts.
-  void setCurrentRegionUnreachable() { setCurrentRegionCount(0); }
-
   /// Check if an execution count is known for a given statement. If so, return
   /// true and put the value in Count; else return false.
   Optional<uint64_t> getStmtCount(const Stmt *S) {
@@ -85,11 +78,6 @@
       setCurrentRegionCount(*Count);
   }
 
-  /// Calculate branch weights appropriate for PGO data
-  llvm::MDNode *createBranchWeights(uint64_t TrueCount, uint64_t FalseCount);
-  llvm::MDNode *createBranchWeights(ArrayRef<uint64_t> Weights);
-  llvm::MDNode *createLoopWeights(const Stmt *Cond, RegionCounter &Cnt);
-
   /// Check if we need to emit coverage mapping for a given declaration
   void checkGlobalDecl(GlobalDecl GD);
   /// Assign counters to regions and configure them for PGO of a given
@@ -114,110 +102,16 @@
   void emitCounterVariables();
   void emitCounterRegionMapping(const Decl *D);
 
-  /// Emit code to increment the counter at the given index
-  void emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter);
-
-  /// Return the region counter for the given statement. This should only be
-  /// called on statements that have a dedicated counter.
-  unsigned getRegionCounter(const Stmt *S) {
-    if (!RegionCounterMap)
-      return 0;
-    return (*RegionCounterMap)[S];
-  }
+public:
+  void emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S);
 
   /// Return the region count for the counter at the given index.
-  uint64_t getRegionCount(unsigned Counter) {
+  uint64_t getRegionCount(const Stmt *S) {
+    if (!RegionCounterMap)
+      return 0;
     if (!haveRegionCounts())
       return 0;
-    return RegionCounts[Counter];
-  }
-
-  friend class RegionCounter;
-};
-
-/// A counter for a particular region. This is the primary interface through
-/// which clients manage PGO counters and their values.
-class RegionCounter {
-  CodeGenPGO *PGO;
-  unsigned Counter;
-  uint64_t Count;
-  uint64_t ParentCount;
-  uint64_t RegionCount;
-  int64_t Adjust;
-
-  RegionCounter(CodeGenPGO &PGO, unsigned CounterIndex)
-    : PGO(&PGO), Counter(CounterIndex), Count(PGO.getRegionCount(Counter)),
-      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
-
-public:
-  RegionCounter(CodeGenPGO &PGO, const Stmt *S)
-    : PGO(&PGO), Counter(PGO.getRegionCounter(S)),
-      Count(PGO.getRegionCount(Counter)),
-      ParentCount(PGO.getCurrentRegionCount()), Adjust(0) {}
-
-  /// Get the value of the counter. In most cases this is the number of times
-  /// the region of the counter was entered, but for switch labels it's the
-  /// number of direct jumps to that label.
-  uint64_t getCount() const { return Count; }
-
-  /// Get the value of the counter with adjustments applied. Adjustments occur
-  /// when control enters or leaves the region abnormally; i.e., if there is a
-  /// jump to a label within the region, or if the function can return from
-  /// within the region. The adjusted count, then, is the value of the counter
-  /// at the end of the region.
-  uint64_t getAdjustedCount() const {
-    return Count + Adjust;
-  }
-
-  /// Get the value of the counter in this region's parent, i.e., the region
-  /// that was active when this region began. This is useful for deriving
-  /// counts in implicitly counted regions, like the false case of a condition
-  /// or the normal exits of a loop.
-  uint64_t getParentCount() const { return ParentCount; }
-
-  /// Activate the counter by emitting an increment and starting to track
-  /// adjustments. If AddIncomingFallThrough is true, the current region count
-  /// will be added to the counter for the purposes of tracking the region.
-  void beginRegion(CGBuilderTy &Builder, bool AddIncomingFallThrough=false) {
-    beginRegion(AddIncomingFallThrough);
-    PGO->emitCounterIncrement(Builder, Counter);
-  }
-  void beginRegion(bool AddIncomingFallThrough=false) {
-    RegionCount = Count;
-    if (AddIncomingFallThrough)
-      RegionCount += PGO->getCurrentRegionCount();
-    PGO->setCurrentRegionCount(RegionCount);
-  }
-
-  /// For counters on boolean branches, begins tracking adjustments for the
-  /// uncounted path.
-  void beginElseRegion() {
-    RegionCount = ParentCount - Count;
-    PGO->setCurrentRegionCount(RegionCount);
-  }
-
-  /// Reset the current region count.
-  void setCurrentRegionCount(uint64_t CurrentCount) {
-    RegionCount = CurrentCount;
-    PGO->setCurrentRegionCount(RegionCount);
-  }
-
-  /// Adjust for non-local control flow after emitting a subexpression or
-  /// substatement. This must be called to account for constructs such as gotos,
-  /// labels, and returns, so that we can ensure that our region's count is
-  /// correct in the code that follows.
-  void adjustForControlFlow() {
-    Adjust += PGO->getCurrentRegionCount() - RegionCount;
-    // Reset the region count in case this is called again later.
-    RegionCount = PGO->getCurrentRegionCount();
-  }
-
-  /// Commit all adjustments to the current region. If the region is a loop,
-  /// the LoopAdjust value should be the count of all the breaks and continues
-  /// from the loop, to compensate for those counts being deducted from the
-  /// adjustments for the body of the loop.
-  void applyAdjustmentsToRegion(uint64_t LoopAdjust) {
-    PGO->setCurrentRegionCount(ParentCount + Adjust + LoopAdjust);
+    return RegionCounts[(*RegionCounterMap)[S]];
   }
 };
 
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 67a9fbe..e0f926c 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -715,9 +715,16 @@
   // No need to check for member pointers when not compiling C++.
   if (!Context.getLangOpts().CPlusPlus)
     return true;
-  
-  T = Context.getBaseElementType(T);
-  
+
+  if (const auto *AT = Context.getAsArrayType(T)) {
+    if (isa<IncompleteArrayType>(AT))
+      return true;
+    if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
+      if (Context.getConstantArrayElementCount(CAT) == 0)
+        return true;
+    T = Context.getBaseElementType(T);
+  }
+
   // Records are non-zero-initializable if they contain any
   // non-zero-initializable subobjects.
   if (const RecordType *RT = T->getAs<RecordType>()) {
@@ -733,6 +740,6 @@
   return true;
 }
 
-bool CodeGenTypes::isZeroInitializable(const CXXRecordDecl *RD) {
+bool CodeGenTypes::isZeroInitializable(const RecordDecl *RD) {
   return getCGRecordLayout(RD).isZeroInitializable();
 }
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 26d37f3..1580e21 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -115,8 +115,8 @@
   llvm_unreachable("not a CXXDtorType");
 }
 
-/// CodeGenTypes - This class organizes the cross-module state that is used
-/// while lowering AST types to LLVM types.
+/// This class organizes the cross-module state that is used while lowering
+/// AST types to LLVM types.
 class CodeGenTypes {
   CodeGenModule &CGM;
   // Some of this stuff should probably be left on the CGM.
@@ -136,34 +136,32 @@
   /// types are never refined.
   llvm::DenseMap<const ObjCInterfaceType*, llvm::Type *> InterfaceTypes;
 
-  /// CGRecordLayouts - This maps llvm struct type with corresponding
-  /// record layout info.
+  /// Maps clang struct type with corresponding record layout info.
   llvm::DenseMap<const Type*, CGRecordLayout *> CGRecordLayouts;
 
-  /// RecordDeclTypes - This contains the LLVM IR type for any converted
-  /// RecordDecl.
+  /// Contains the LLVM IR type for any converted RecordDecl.
   llvm::DenseMap<const Type*, llvm::StructType *> RecordDeclTypes;
   
-  /// FunctionInfos - Hold memoized CGFunctionInfo results.
+  /// Hold memoized CGFunctionInfo results.
   llvm::FoldingSet<CGFunctionInfo> FunctionInfos;
 
-  /// RecordsBeingLaidOut - This set keeps track of records that we're currently
-  /// converting to an IR type.  For example, when converting:
+  /// This set keeps track of records that we're currently converting
+  /// to an IR type.  For example, when converting:
   /// struct A { struct B { int x; } } when processing 'x', the 'A' and 'B'
   /// types will be in this set.
   llvm::SmallPtrSet<const Type*, 4> RecordsBeingLaidOut;
   
   llvm::SmallPtrSet<const CGFunctionInfo*, 4> FunctionsBeingProcessed;
   
-  /// SkippedLayout - True if we didn't layout a function due to a being inside
+  /// True if we didn't layout a function due to a being inside
   /// a recursive struct conversion, set this to true.
   bool SkippedLayout;
 
   SmallVector<const RecordDecl *, 8> DeferredRecords;
   
 private:
-  /// TypeCache - This map keeps cache of llvm::Types
-  /// and maps clang::Type to corresponding llvm::Type.
+  /// This map keeps cache of llvm::Types and maps clang::Type to
+  /// corresponding llvm::Type.
   llvm::DenseMap<const Type *, llvm::Type *> TypeCache;
 
 public:
@@ -310,7 +308,7 @@
 
   /// IsZeroInitializable - Return whether a record type can be
   /// zero-initialized (in the C++ sense) with an LLVM zeroinitializer.
-  bool isZeroInitializable(const CXXRecordDecl *RD);
+  bool isZeroInitializable(const RecordDecl *RD);
   
   bool isRecordLayoutComplete(const Type *Ty) const;
   bool noRecordsBeingLaidOut() const {
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index d26eced..024a45d 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -134,18 +134,23 @@
                            : SM.getIncludeLoc(SM.getFileID(Loc));
   }
 
-  /// \brief Get the start of \c S ignoring macro argument locations.
+  /// \brief Return true if \c Loc is a location in a built-in macro.
+  bool isInBuiltin(SourceLocation Loc) {
+    return strcmp(SM.getBufferName(SM.getSpellingLoc(Loc)), "<built-in>") == 0;
+  }
+
+  /// \brief Get the start of \c S ignoring macro arguments and builtin macros.
   SourceLocation getStart(const Stmt *S) {
     SourceLocation Loc = S->getLocStart();
-    while (SM.isMacroArgExpansion(Loc))
+    while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
       Loc = SM.getImmediateExpansionRange(Loc).first;
     return Loc;
   }
 
-  /// \brief Get the end of \c S ignoring macro argument locations.
+  /// \brief Get the end of \c S ignoring macro arguments and builtin macros.
   SourceLocation getEnd(const Stmt *S) {
     SourceLocation Loc = S->getLocEnd();
-    while (SM.isMacroArgExpansion(Loc))
+    while (SM.isMacroArgExpansion(Loc) || isInBuiltin(Loc))
       Loc = SM.getImmediateExpansionRange(Loc).first;
     return getPreciseTokenLocEnd(Loc);
   }
@@ -447,7 +452,10 @@
   /// This should be used after visiting any statements in non-source order.
   void adjustForOutOfOrderTraversal(SourceLocation EndLoc) {
     MostRecentLocation = EndLoc;
-    if (MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation))
+    // Avoid adding duplicate regions if we have a completed region on the top
+    // of the stack and are adjusting to the end of a virtual file.
+    if (getRegion().hasEndLoc() &&
+        MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation))
       MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation);
   }
 
@@ -592,6 +600,13 @@
     terminateRegion(S);
   }
 
+  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
+    extendRegion(E);
+    if (E->getSubExpr())
+      Visit(E->getSubExpr());
+    terminateRegion(E);
+  }
+
   void VisitGotoStmt(const GotoStmt *S) { terminateRegion(S); }
 
   void VisitLabelStmt(const LabelStmt *S) {
@@ -707,8 +722,10 @@
     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
 
-    Counter OutCount = addCounters(ParentCount, BC.BreakCount, BC.ContinueCount,
-                                   subtractCounters(BodyCount, BackedgeCount));
+    Counter LoopCount =
+        addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
+    Counter OutCount =
+        addCounters(BC.BreakCount, subtractCounters(LoopCount, BodyCount));
     if (OutCount != ParentCount)
       pushRegion(OutCount);
   }
@@ -725,8 +742,10 @@
     Counter BackedgeCount = propagateCounts(BodyCount, S->getBody());
     BreakContinue BC = BreakContinueStack.pop_back_val();
 
-    Counter OutCount = addCounters(ParentCount, BC.BreakCount, BC.ContinueCount,
-                                   subtractCounters(BodyCount, BackedgeCount));
+    Counter LoopCount =
+        addCounters(ParentCount, BackedgeCount, BC.ContinueCount);
+    Counter OutCount =
+        addCounters(BC.BreakCount, subtractCounters(LoopCount, BodyCount));
     if (OutCount != ParentCount)
       pushRegion(OutCount);
   }
@@ -830,7 +849,13 @@
     Counter ParentCount = getRegion().getCounter();
     Counter TrueCount = getRegionCounter(E);
 
-    propagateCounts(TrueCount, E->getTrueExpr());
+    Visit(E->getCond());
+
+    if (!isa<BinaryConditionalOperator>(E)) {
+      extendRegion(E->getTrueExpr());
+      propagateCounts(TrueCount, E->getTrueExpr());
+    }
+    extendRegion(E->getFalseExpr());
     propagateCounts(subtractCounters(ParentCount, TrueCount),
                     E->getFalseExpr());
   }
@@ -952,7 +977,7 @@
     llvm::sys::fs::make_absolute(Path);
 
     auto I = Entry.second;
-    FilenameStrs[I] = std::move(std::string(Path.begin(), Path.end()));
+    FilenameStrs[I] = std::string(Path.begin(), Path.end());
     FilenameRefs[I] = FilenameStrs[I];
   }
 
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index 363d8b8..a795188 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -319,6 +319,10 @@
   /// Pops a terminate handler off the stack.
   void popTerminate();
 
+  // Returns true iff the current scope is either empty or contains only
+  // lifetime markers, i.e. no real cleanup code
+  bool containsOnlyLifetimeMarkers(stable_iterator Old) const;
+
   /// Determines whether the exception-scopes stack is empty.
   bool empty() const { return StartOfData == EndOfBuffer; }
 
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index eb7ab1d..0a1a4ce 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -865,7 +865,7 @@
 /// The Itanium ABI requires non-zero initialization only for data
 /// member pointers, for which '0' is a valid offset.
 bool ItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
-  return MPT->getPointeeType()->isFunctionType();
+  return MPT->isMemberFunctionPointer();
 }
 
 /// The Itanium ABI always places an offset to the complete object
@@ -2090,7 +2090,7 @@
     CGBuilderTy Builder(Entry);
     if (InitIsInitFunc) {
       if (Init)
-        Builder.CreateCall(Init);
+        Builder.CreateCall(Init, {});
     } else {
       // Don't know whether we have an init function. Call it if it exists.
       llvm::Value *Have = Builder.CreateIsNotNull(Init);
@@ -2099,7 +2099,7 @@
       Builder.CreateCondBr(Have, InitBB, ExitBB);
 
       Builder.SetInsertPoint(InitBB);
-      Builder.CreateCall(Init);
+      Builder.CreateCall(Init, {});
       Builder.CreateBr(ExitBB);
 
       Builder.SetInsertPoint(ExitBB);
@@ -2128,7 +2128,7 @@
   llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD, Ty);
   llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Val);
 
-  Val = CGF.Builder.CreateCall(Wrapper);
+  Val = CGF.Builder.CreateCall(Wrapper, {});
 
   LValue LV;
   if (VD->getType()->isReferenceType())
@@ -3220,8 +3220,8 @@
   llvm::PointerType *AliasType = Aliasee->getType();
 
   // Create the alias with no name.
-  auto *Alias = llvm::GlobalAlias::create(
-      AliasType->getElementType(), 0, Linkage, "", Aliasee, &CGM.getModule());
+  auto *Alias = llvm::GlobalAlias::create(AliasType, Linkage, "", Aliasee,
+                                          &CGM.getModule());
 
   // Switch any previous uses to the alias.
   if (Entry) {
@@ -3615,7 +3615,7 @@
     catchCall->setCallingConv(CGM.getRuntimeCC());
 
     // Call std::terminate().
-    llvm::CallInst *termCall = builder.CreateCall(CGM.getTerminateFn());
+    llvm::CallInst *termCall = builder.CreateCall(CGM.getTerminateFn(), {});
     termCall->setDoesNotThrow();
     termCall->setDoesNotReturn();
     termCall->setCallingConv(CGM.getRuntimeCC());
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index f00cd9c..e19ad69 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -496,7 +496,8 @@
   llvm::Constant *EmitFullMemberPointer(llvm::Constant *FirstField,
                                         bool IsMemberFunction,
                                         const CXXRecordDecl *RD,
-                                        CharUnits NonVirtualBaseAdjustment);
+                                        CharUnits NonVirtualBaseAdjustment,
+                                        unsigned VBTableIndex);
 
   llvm::Constant *BuildMemberPointer(const CXXRecordDecl *RD,
                                      const CXXMethodDecl *MD,
@@ -687,6 +688,8 @@
   /// Map from DeclContext to the current guard variable.  We assume that the
   /// AST is visited in source code order.
   llvm::DenseMap<const DeclContext *, GuardInfo> GuardVariableMap;
+  llvm::DenseMap<const DeclContext *, GuardInfo> ThreadLocalGuardVariableMap;
+  llvm::DenseMap<const DeclContext *, unsigned> ThreadSafeGuardNumMap;
 
   llvm::DenseMap<size_t, llvm::StructType *> TypeDescriptorTypeMap;
   llvm::StructType *BaseClassDescriptorType;
@@ -814,7 +817,7 @@
   if (!CatchParam || !CatchParam->getDeclName()) {
     llvm::Value *Args[2] = {Exn, llvm::Constant::getNullValue(CGF.Int8PtrTy)};
     CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
-    CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalAndEHCleanup);
+    CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalCleanup);
     return;
   }
 
@@ -823,8 +826,7 @@
       CGF.Builder.CreateBitCast(var.getObjectAddress(CGF), CGF.Int8PtrTy);
   llvm::Value *Args[2] = {Exn, ParamAddr};
   CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
-  // FIXME: Do we really need exceptional endcatch cleanups?
-  CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalAndEHCleanup);
+  CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalCleanup);
   CGF.EmitAutoVarCleanups(var);
 }
 
@@ -1561,9 +1563,8 @@
         C->setSelectionKind(llvm::Comdat::Largest);
     }
     VFTable = llvm::GlobalAlias::create(
-        cast<llvm::SequentialType>(VTableGEP->getType())->getElementType(),
-        /*AddressSpace=*/0, VFTableLinkage, VFTableName.str(), VTableGEP,
-        &CGM.getModule());
+        cast<llvm::PointerType>(VTableGEP->getType()), VFTableLinkage,
+        VFTableName.str(), VTableGEP, &CGM.getModule());
     VFTable->setUnnamedAddr(true);
   } else {
     // We don't need a GlobalAlias to be a symbol for the VTable if we won't
@@ -2015,6 +2016,81 @@
   return LValue();
 }
 
+static llvm::GlobalVariable *getInitThreadEpochPtr(CodeGenModule &CGM) {
+  StringRef VarName("_Init_thread_epoch");
+  if (auto *GV = CGM.getModule().getNamedGlobal(VarName))
+    return GV;
+  auto *GV = new llvm::GlobalVariable(
+      CGM.getModule(), CGM.IntTy,
+      /*Constant=*/false, llvm::GlobalVariable::ExternalLinkage,
+      /*Initializer=*/nullptr, VarName,
+      /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel);
+  GV->setAlignment(CGM.getTarget().getIntAlign() / 8);
+  return GV;
+}
+
+static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) {
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
+                              CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
+  return CGM.CreateRuntimeFunction(
+      FTy, "_Init_thread_header",
+      llvm::AttributeSet::get(CGM.getLLVMContext(),
+                              llvm::AttributeSet::FunctionIndex,
+                              llvm::Attribute::NoUnwind));
+}
+
+static llvm::Constant *getInitThreadFooterFn(CodeGenModule &CGM) {
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
+                              CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
+  return CGM.CreateRuntimeFunction(
+      FTy, "_Init_thread_footer",
+      llvm::AttributeSet::get(CGM.getLLVMContext(),
+                              llvm::AttributeSet::FunctionIndex,
+                              llvm::Attribute::NoUnwind));
+}
+
+static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) {
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(llvm::Type::getVoidTy(CGM.getLLVMContext()),
+                              CGM.IntTy->getPointerTo(), /*isVarArg=*/false);
+  return CGM.CreateRuntimeFunction(
+      FTy, "_Init_thread_abort",
+      llvm::AttributeSet::get(CGM.getLLVMContext(),
+                              llvm::AttributeSet::FunctionIndex,
+                              llvm::Attribute::NoUnwind));
+}
+
+namespace {
+struct ResetGuardBit : EHScopeStack::Cleanup {
+  llvm::GlobalVariable *Guard;
+  unsigned GuardNum;
+  ResetGuardBit(llvm::GlobalVariable *Guard, unsigned GuardNum)
+      : Guard(Guard), GuardNum(GuardNum) {}
+
+  void Emit(CodeGenFunction &CGF, Flags flags) override {
+    // Reset the bit in the mask so that the static variable may be
+    // reinitialized.
+    CGBuilderTy &Builder = CGF.Builder;
+    llvm::LoadInst *LI = Builder.CreateLoad(Guard);
+    llvm::ConstantInt *Mask =
+        llvm::ConstantInt::get(CGF.IntTy, ~(1U << GuardNum));
+    Builder.CreateStore(Builder.CreateAnd(LI, Mask), Guard);
+  }
+};
+
+struct CallInitThreadAbort : EHScopeStack::Cleanup {
+  llvm::GlobalVariable *Guard;
+  CallInitThreadAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
+
+  void Emit(CodeGenFunction &CGF, Flags flags) override {
+    // Calling _Init_thread_abort will reset the guard's state.
+    CGF.EmitNounwindRuntimeCall(getInitThreadAbortFn(CGF.CGM), Guard);
+  }
+};
+}
+
 void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                                       llvm::GlobalVariable *GV,
                                       bool PerformInit) {
@@ -2029,89 +2105,154 @@
     return;
   }
 
-  // MSVC always uses an i32 bitfield to guard initialization, which is *not*
-  // threadsafe.  Since the user may be linking in inline functions compiled by
-  // cl.exe, there's no reason to provide a false sense of security by using
-  // critical sections here.
+  bool ThreadlocalStatic = D.getTLSKind();
+  bool ThreadsafeStatic = getContext().getLangOpts().ThreadsafeStatics;
 
-  if (D.getTLSKind())
-    CGM.ErrorUnsupported(&D, "dynamic TLS initialization");
+  // Thread-safe static variables which aren't thread-specific have a
+  // per-variable guard.
+  bool HasPerVariableGuard = ThreadsafeStatic && !ThreadlocalStatic;
 
   CGBuilderTy &Builder = CGF.Builder;
   llvm::IntegerType *GuardTy = CGF.Int32Ty;
   llvm::ConstantInt *Zero = llvm::ConstantInt::get(GuardTy, 0);
 
   // Get the guard variable for this function if we have one already.
-  GuardInfo *GI = &GuardVariableMap[D.getDeclContext()];
+  GuardInfo *GI = nullptr;
+  if (ThreadlocalStatic)
+    GI = &ThreadLocalGuardVariableMap[D.getDeclContext()];
+  else if (!ThreadsafeStatic)
+    GI = &GuardVariableMap[D.getDeclContext()];
 
-  unsigned BitIndex;
-  if (D.isStaticLocal() && D.isExternallyVisible()) {
+  llvm::GlobalVariable *GuardVar = GI ? GI->Guard : nullptr;
+  unsigned GuardNum;
+  if (D.isExternallyVisible()) {
     // Externally visible variables have to be numbered in Sema to properly
     // handle unreachable VarDecls.
-    BitIndex = getContext().getStaticLocalNumber(&D);
-    assert(BitIndex > 0);
-    BitIndex--;
+    GuardNum = getContext().getStaticLocalNumber(&D);
+    assert(GuardNum > 0);
+    GuardNum--;
+  } else if (HasPerVariableGuard) {
+    GuardNum = ThreadSafeGuardNumMap[D.getDeclContext()]++;
   } else {
     // Non-externally visible variables are numbered here in CodeGen.
-    BitIndex = GI->BitIndex++;
+    GuardNum = GI->BitIndex++;
   }
 
-  if (BitIndex >= 32) {
+  if (!HasPerVariableGuard && GuardNum >= 32) {
     if (D.isExternallyVisible())
       ErrorUnsupportedABI(CGF, "more than 32 guarded initializations");
-    BitIndex %= 32;
-    GI->Guard = nullptr;
+    GuardNum %= 32;
+    GuardVar = nullptr;
   }
 
-  // Lazily create the i32 bitfield for this function.
-  if (!GI->Guard) {
+  if (!GuardVar) {
     // Mangle the name for the guard.
     SmallString<256> GuardName;
     {
       llvm::raw_svector_ostream Out(GuardName);
-      getMangleContext().mangleStaticGuardVariable(&D, Out);
+      if (HasPerVariableGuard)
+        getMangleContext().mangleThreadSafeStaticGuardVariable(&D, GuardNum,
+                                                               Out);
+      else
+        getMangleContext().mangleStaticGuardVariable(&D, Out);
       Out.flush();
     }
 
     // Create the guard variable with a zero-initializer. Just absorb linkage,
     // visibility and dll storage class from the guarded variable.
-    GI->Guard =
-        new llvm::GlobalVariable(CGM.getModule(), GuardTy, false,
+    GuardVar =
+        new llvm::GlobalVariable(CGM.getModule(), GuardTy, /*isConstant=*/false,
                                  GV->getLinkage(), Zero, GuardName.str());
-    GI->Guard->setVisibility(GV->getVisibility());
-    GI->Guard->setDLLStorageClass(GV->getDLLStorageClass());
-    if (GI->Guard->isWeakForLinker())
-      GI->Guard->setComdat(
-          CGM.getModule().getOrInsertComdat(GI->Guard->getName()));
-  } else {
-    assert(GI->Guard->getLinkage() == GV->getLinkage() &&
-           "static local from the same function had different linkage");
+    GuardVar->setVisibility(GV->getVisibility());
+    GuardVar->setDLLStorageClass(GV->getDLLStorageClass());
+    if (GuardVar->isWeakForLinker())
+      GuardVar->setComdat(
+          CGM.getModule().getOrInsertComdat(GuardVar->getName()));
+    if (D.getTLSKind())
+      GuardVar->setThreadLocal(true);
+    if (GI && !HasPerVariableGuard)
+      GI->Guard = GuardVar;
   }
 
-  // Pseudo code for the test:
-  // if (!(GuardVar & MyGuardBit)) {
-  //   GuardVar |= MyGuardBit;
-  //   ... initialize the object ...;
-  // }
+  assert(GuardVar->getLinkage() == GV->getLinkage() &&
+         "static local from the same function had different linkage");
 
-  // Test our bit from the guard variable.
-  llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << BitIndex);
-  llvm::LoadInst *LI = Builder.CreateLoad(GI->Guard);
-  llvm::Value *IsInitialized =
-      Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
-  llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
-  llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
-  Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock);
+  if (!HasPerVariableGuard) {
+    // Pseudo code for the test:
+    // if (!(GuardVar & MyGuardBit)) {
+    //   GuardVar |= MyGuardBit;
+    //   ... initialize the object ...;
+    // }
 
-  // Set our bit in the guard variable and emit the initializer and add a global
-  // destructor if appropriate.
-  CGF.EmitBlock(InitBlock);
-  Builder.CreateStore(Builder.CreateOr(LI, Bit), GI->Guard);
-  CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
-  Builder.CreateBr(EndBlock);
+    // Test our bit from the guard variable.
+    llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << GuardNum);
+    llvm::LoadInst *LI = Builder.CreateLoad(GuardVar);
+    llvm::Value *IsInitialized =
+        Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
+    llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
+    llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
+    Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock);
 
-  // Continue.
-  CGF.EmitBlock(EndBlock);
+    // Set our bit in the guard variable and emit the initializer and add a global
+    // destructor if appropriate.
+    CGF.EmitBlock(InitBlock);
+    Builder.CreateStore(Builder.CreateOr(LI, Bit), GuardVar);
+    CGF.EHStack.pushCleanup<ResetGuardBit>(EHCleanup, GuardVar, GuardNum);
+    CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
+    CGF.PopCleanupBlock();
+    Builder.CreateBr(EndBlock);
+
+    // Continue.
+    CGF.EmitBlock(EndBlock);
+  } else {
+    // Pseudo code for the test:
+    // if (TSS > _Init_thread_epoch) {
+    //   _Init_thread_header(&TSS);
+    //   if (TSS == -1) {
+    //     ... initialize the object ...;
+    //     _Init_thread_footer(&TSS);
+    //   }
+    // }
+    //
+    // The algorithm is almost identical to what can be found in the appendix
+    // found in N2325.
+
+    unsigned IntAlign = CGM.getTarget().getIntAlign() / 8;
+
+    // This BasicBLock determines whether or not we have any work to do.
+    llvm::LoadInst *FirstGuardLoad =
+        Builder.CreateAlignedLoad(GuardVar, IntAlign);
+    FirstGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
+    llvm::LoadInst *InitThreadEpoch =
+        Builder.CreateLoad(getInitThreadEpochPtr(CGM));
+    llvm::Value *IsUninitialized =
+        Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch);
+    llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt");
+    llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end");
+    Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock);
+
+    // This BasicBlock attempts to determine whether or not this thread is
+    // responsible for doing the initialization.
+    CGF.EmitBlock(AttemptInitBlock);
+    CGF.EmitNounwindRuntimeCall(getInitThreadHeaderFn(CGM), GuardVar);
+    llvm::LoadInst *SecondGuardLoad =
+        Builder.CreateAlignedLoad(GuardVar, IntAlign);
+    SecondGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
+    llvm::Value *ShouldDoInit =
+        Builder.CreateICmpEQ(SecondGuardLoad, getAllOnesInt());
+    llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
+    Builder.CreateCondBr(ShouldDoInit, InitBlock, EndBlock);
+
+    // Ok, we ended up getting selected as the initializing thread.
+    CGF.EmitBlock(InitBlock);
+    CGF.EHStack.pushCleanup<CallInitThreadAbort>(EHCleanup, GuardVar);
+    CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
+    CGF.PopCleanupBlock();
+    CGF.EmitNounwindRuntimeCall(getInitThreadFooterFn(CGM), GuardVar);
+    Builder.CreateBr(EndBlock);
+
+    CGF.EmitBlock(EndBlock);
+  }
 }
 
 bool MicrosoftCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
@@ -2192,8 +2333,8 @@
 MicrosoftCXXABI::EmitFullMemberPointer(llvm::Constant *FirstField,
                                        bool IsMemberFunction,
                                        const CXXRecordDecl *RD,
-                                       CharUnits NonVirtualBaseAdjustment)
-{
+                                       CharUnits NonVirtualBaseAdjustment,
+                                       unsigned VBTableIndex) {
   MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
 
   // Single inheritance class member pointer are represented as scalars instead
@@ -2217,7 +2358,7 @@
 
   // The rest of the fields are adjusted by conversions to a more derived class.
   if (MSInheritanceAttr::hasVBTableOffsetField(Inheritance))
-    fields.push_back(getZeroInt());
+    fields.push_back(llvm::ConstantInt::get(CGM.IntTy, VBTableIndex));
 
   return llvm::ConstantStruct::getAnon(fields);
 }
@@ -2229,7 +2370,7 @@
   llvm::Constant *FirstField =
     llvm::ConstantInt::get(CGM.IntTy, offset.getQuantity());
   return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/false, RD,
-                               CharUnits::Zero());
+                               CharUnits::Zero(), /*VBTableIndex=*/0);
 }
 
 llvm::Constant *MicrosoftCXXABI::EmitMemberPointer(const CXXMethodDecl *MD) {
@@ -2265,6 +2406,7 @@
   RD = RD->getMostRecentDecl();
   CodeGenTypes &Types = CGM.getTypes();
 
+  unsigned VBTableIndex = 0;
   llvm::Constant *FirstField;
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
   if (!MD->isVirtual()) {
@@ -2281,31 +2423,20 @@
     FirstField = CGM.GetAddrOfFunction(MD, Ty);
     FirstField = llvm::ConstantExpr::getBitCast(FirstField, CGM.VoidPtrTy);
   } else {
+    auto &VTableContext = CGM.getMicrosoftVTableContext();
     MicrosoftVTableContext::MethodVFTableLocation ML =
-        CGM.getMicrosoftVTableContext().getMethodVFTableLocation(MD);
-    if (!CGM.getTypes().isFuncTypeConvertible(
-            MD->getType()->castAs<FunctionType>())) {
-      CGM.ErrorUnsupported(MD, "pointer to virtual member function with "
-                               "incomplete return or parameter type");
-      FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
-    } else if (FPT->getCallConv() == CC_X86FastCall) {
-      CGM.ErrorUnsupported(MD, "pointer to fastcall virtual member function");
-      FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
-    } else if (ML.VBase) {
-      CGM.ErrorUnsupported(MD, "pointer to virtual member function overriding "
-                               "member function in virtual base class");
-      FirstField = llvm::Constant::getNullValue(CGM.VoidPtrTy);
-    } else {
-      llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
-      FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
-      // Include the vfptr adjustment if the method is in a non-primary vftable.
-      NonVirtualBaseAdjustment += ML.VFPtrOffset;
-    }
+        VTableContext.getMethodVFTableLocation(MD);
+    llvm::Function *Thunk = EmitVirtualMemPtrThunk(MD, ML);
+    FirstField = llvm::ConstantExpr::getBitCast(Thunk, CGM.VoidPtrTy);
+    // Include the vfptr adjustment if the method is in a non-primary vftable.
+    NonVirtualBaseAdjustment += ML.VFPtrOffset;
+    if (ML.VBase)
+      VBTableIndex = VTableContext.getVBTableIndex(RD, ML.VBase) * 4;
   }
 
   // The rest of the fields are common with data member pointers.
   return EmitFullMemberPointer(FirstField, /*IsMemberFunction=*/true, RD,
-                               NonVirtualBaseAdjustment);
+                               NonVirtualBaseAdjustment, VBTableIndex);
 }
 
 /// Member pointers are the same if they're either bitwise identical *or* both
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 48c85e6..d6f009e 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -108,6 +108,10 @@
   return false;
 }
 
+bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
+  return false;
+}
+
 void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
@@ -406,8 +410,16 @@
 }
 
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
-  if (isAggregateTypeForABI(Ty))
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  if (isAggregateTypeForABI(Ty)) {
+    // Records with non-trivial destructors/copy-constructors should not be
+    // passed by value.
+    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+      return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+
     return ABIArgInfo::getIndirect(0);
+  }
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -637,7 +649,7 @@
   static bool isStructReturnInRegABI(
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
@@ -814,7 +826,8 @@
   return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
 }
 
-ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, CCState &State) const {
+ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
+                                             CCState &State) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
@@ -1318,7 +1331,7 @@
   }
 }
 
-void X86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
@@ -1483,14 +1496,13 @@
     return !getTarget().getTriple().isOSDarwin();
   }
 
-  bool HasAVX;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
   bool Has64BitPointers;
 
 public:
-  X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, bool hasavx) :
-      ABIInfo(CGT), HasAVX(hasavx),
+  X86_64ABIInfo(CodeGen::CodeGenTypes &CGT) :
+      ABIInfo(CGT),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
@@ -1515,6 +1527,10 @@
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
+
+  bool hasAVX() const {
+    return getTarget().getABI() == "avx";
+  }
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1544,10 +1560,9 @@
 };
 
 class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
-  bool HasAVX;
 public:
-  X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
-      : TargetCodeGenInfo(new X86_64ABIInfo(CGT, HasAVX)), HasAVX(HasAVX) {}
+  X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+      : TargetCodeGenInfo(new X86_64ABIInfo(CGT)) {}
 
   const X86_64ABIInfo &getABIInfo() const {
     return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
@@ -1615,14 +1630,14 @@
   }
 
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return HasAVX ? 32 : 16;
+    return getABIInfo().hasAVX() ? 32 : 16;
   }
 };
 
 class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
 public:
-  PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
-    : X86_64TargetCodeGenInfo(CGT, HasAVX) {}
+  PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+    : X86_64TargetCodeGenInfo(CGT) {}
 
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
@@ -1650,7 +1665,7 @@
         bool d, bool p, bool w, unsigned RegParms)
     : X86_32TargetCodeGenInfo(CGT, d, p, w, RegParms) {}
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   void getDependentLibraryOption(llvm::StringRef Lib,
@@ -1673,26 +1688,28 @@
     if (CGM.getCodeGenOpts().StackProbeSize != 4096) {
       llvm::Function *Fn = cast<llvm::Function>(GV);
 
-      Fn->addFnAttr("stack-probe-size", llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
+      Fn->addFnAttr("stack-probe-size",
+                    llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
     }
   }
 }
 
-void WinX86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+void WinX86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                      llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
-  X86_32TargetCodeGenInfo::SetTargetAttributes(D, GV, CGM);
+  X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 
 class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
-  bool HasAVX;
-public:
-  WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX)
-    : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)), HasAVX(HasAVX) {}
+  bool hasAVX() const { return getABIInfo().getTarget().getABI() == "avx"; }
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+public:
+  WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)) {}
+
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
@@ -1722,14 +1739,14 @@
   }
 
   unsigned getOpenMPSimdDefaultAlignment(QualType) const override {
-    return HasAVX ? 32 : 16;
+    return hasAVX() ? 32 : 16;
   }
 };
 
-void WinX86_64TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                      llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
-  TargetCodeGenInfo::SetTargetAttributes(D, GV, CGM);
+  TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
@@ -1911,7 +1928,7 @@
       // split.
       if (OffsetBase && OffsetBase != 64)
         Hi = Lo;
-    } else if (Size == 128 || (HasAVX && isNamedArg && Size == 256)) {
+    } else if (Size == 128 || (hasAVX() && isNamedArg && Size == 256)) {
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
@@ -2133,7 +2150,7 @@
 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
   if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
     uint64_t Size = getContext().getTypeSize(VecTy);
-    unsigned LargestVector = HasAVX ? 256 : 128;
+    unsigned LargestVector = hasAVX() ? 256 : 128;
     if (Size <= 64 || Size > LargestVector)
       return true;
   }
@@ -2210,9 +2227,16 @@
     Ty = QualType(InnerTy, 0);
 
   llvm::Type *IRType = CGT.ConvertType(Ty);
-  assert(isa<llvm::VectorType>(IRType) &&
-         "Trying to return a non-vector type in a vector register!");
-  return IRType;
+  if(isa<llvm::VectorType>(IRType))
+    return IRType;
+
+  // We couldn't find the preferred IR vector type for 'Ty'.
+  uint64_t Size = getContext().getTypeSize(Ty);
+  assert((Size == 128 || Size == 256) && "Invalid type found!");
+
+  // Return a LLVM IR vector type based on the size of 'Ty'.
+  return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
+                               Size / 64);
 }
 
 /// BitsContainNoUserData - Return true if the specified [start,end) bit range
@@ -2832,7 +2856,7 @@
   unsigned neededInt, neededSSE;
 
   Ty = CGF.getContext().getCanonicalType(Ty);
-  ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 
+  ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
                                        /*isNamedArg*/false);
 
   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
@@ -3111,7 +3135,8 @@
 
 class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  PPC32TargetCodeGenInfo(CodeGenTypes &CGT) : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT)) {}
+  PPC32TargetCodeGenInfo(CodeGenTypes &CGT)
+      : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3138,19 +3163,25 @@
   }
 
   bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
-  bool isInt = Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType();
+  bool isInt =
+      Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType();
   llvm::Type *CharPtr = CGF.Int8PtrTy;
   llvm::Type *CharPtrPtr = CGF.Int8PtrPtrTy;
 
   CGBuilderTy &Builder = CGF.Builder;
   llvm::Value *GPRPtr = Builder.CreateBitCast(VAListAddr, CharPtr, "gprptr");
   llvm::Value *GPRPtrAsInt = Builder.CreatePtrToInt(GPRPtr, CGF.Int32Ty);
-  llvm::Value *FPRPtrAsInt = Builder.CreateAdd(GPRPtrAsInt, Builder.getInt32(1));
+  llvm::Value *FPRPtrAsInt =
+      Builder.CreateAdd(GPRPtrAsInt, Builder.getInt32(1));
   llvm::Value *FPRPtr = Builder.CreateIntToPtr(FPRPtrAsInt, CharPtr);
-  llvm::Value *OverflowAreaPtrAsInt = Builder.CreateAdd(FPRPtrAsInt, Builder.getInt32(3));
-  llvm::Value *OverflowAreaPtr = Builder.CreateIntToPtr(OverflowAreaPtrAsInt, CharPtrPtr);
-  llvm::Value *RegsaveAreaPtrAsInt = Builder.CreateAdd(OverflowAreaPtrAsInt, Builder.getInt32(4));
-  llvm::Value *RegsaveAreaPtr = Builder.CreateIntToPtr(RegsaveAreaPtrAsInt, CharPtrPtr);
+  llvm::Value *OverflowAreaPtrAsInt =
+      Builder.CreateAdd(FPRPtrAsInt, Builder.getInt32(3));
+  llvm::Value *OverflowAreaPtr =
+      Builder.CreateIntToPtr(OverflowAreaPtrAsInt, CharPtrPtr);
+  llvm::Value *RegsaveAreaPtrAsInt =
+      Builder.CreateAdd(OverflowAreaPtrAsInt, Builder.getInt32(4));
+  llvm::Value *RegsaveAreaPtr =
+      Builder.CreateIntToPtr(RegsaveAreaPtrAsInt, CharPtrPtr);
   llvm::Value *GPR = Builder.CreateLoad(GPRPtr, false, "gpr");
   // Align GPR when TY is i64.
   if (isI64) {
@@ -3160,18 +3191,23 @@
     GPR = Builder.CreateSelect(CC64, GPRPlusOne, GPR);
   }
   llvm::Value *FPR = Builder.CreateLoad(FPRPtr, false, "fpr");
-  llvm::Value *OverflowArea = Builder.CreateLoad(OverflowAreaPtr, false, "overflow_area");
-  llvm::Value *OverflowAreaAsInt = Builder.CreatePtrToInt(OverflowArea, CGF.Int32Ty);
-  llvm::Value *RegsaveArea = Builder.CreateLoad(RegsaveAreaPtr, false, "regsave_area");
-  llvm::Value *RegsaveAreaAsInt = Builder.CreatePtrToInt(RegsaveArea, CGF.Int32Ty);
+  llvm::Value *OverflowArea =
+      Builder.CreateLoad(OverflowAreaPtr, false, "overflow_area");
+  llvm::Value *OverflowAreaAsInt =
+      Builder.CreatePtrToInt(OverflowArea, CGF.Int32Ty);
+  llvm::Value *RegsaveArea =
+      Builder.CreateLoad(RegsaveAreaPtr, false, "regsave_area");
+  llvm::Value *RegsaveAreaAsInt =
+      Builder.CreatePtrToInt(RegsaveArea, CGF.Int32Ty);
 
-  llvm::Value *CC = Builder.CreateICmpULT(isInt ? GPR : FPR,
-                                          Builder.getInt8(8), "cond");
+  llvm::Value *CC =
+      Builder.CreateICmpULT(isInt ? GPR : FPR, Builder.getInt8(8), "cond");
 
-  llvm::Value *RegConstant = Builder.CreateMul(isInt ? GPR : FPR,
-                                               Builder.getInt8(isInt ? 4 : 8));
+  llvm::Value *RegConstant =
+      Builder.CreateMul(isInt ? GPR : FPR, Builder.getInt8(isInt ? 4 : 8));
 
-  llvm::Value *OurReg = Builder.CreateAdd(RegsaveAreaAsInt, Builder.CreateSExt(RegConstant, CGF.Int32Ty));
+  llvm::Value *OurReg = Builder.CreateAdd(
+      RegsaveAreaAsInt, Builder.CreateSExt(RegConstant, CGF.Int32Ty));
 
   if (Ty->isFloatingType())
     OurReg = Builder.CreateAdd(OurReg, Builder.getInt32(32));
@@ -3200,8 +3236,10 @@
 
   // Increase the overflow area.
   llvm::Value *Result2 = Builder.CreateIntToPtr(OverflowAreaAsInt, PTy);
-  OverflowAreaAsInt = Builder.CreateAdd(OverflowAreaAsInt, Builder.getInt32(isInt ? 4 : 8));
-  Builder.CreateStore(Builder.CreateIntToPtr(OverflowAreaAsInt, CharPtr), OverflowAreaPtr);
+  OverflowAreaAsInt =
+      Builder.CreateAdd(OverflowAreaAsInt, Builder.getInt32(isInt ? 4 : 8));
+  Builder.CreateStore(Builder.CreateIntToPtr(OverflowAreaAsInt, CharPtr),
+                      OverflowAreaPtr);
   CGF.EmitBranch(Cont);
 
   CGF.EmitBlock(Cont);
@@ -3211,7 +3249,7 @@
   Result->addIncoming(Result2, UsingOverflow);
 
   if (Ty->isAggregateType()) {
-    llvm::Value *AGGPtr = Builder.CreateBitCast(Result, CharPtrPtr, "aggrptr")  ;
+    llvm::Value *AGGPtr = Builder.CreateBitCast(Result, CharPtrPtr, "aggrptr");
     return Builder.CreateLoad(AGGPtr, false, "aggr");
   }
 
@@ -3780,8 +3818,10 @@
     llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
     llvm::Value *ImagAddr = RealAddr;
     if (CGF.CGM.getDataLayout().isBigEndian()) {
-      RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
-      ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
+      RealAddr =
+          Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
+      ImagAddr =
+          Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
     } else {
       ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(8));
     }
@@ -4049,7 +4089,15 @@
   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(RetTy);
   if (Size <= 128) {
+    unsigned Alignment = getContext().getTypeAlign(RetTy);
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
+
+    // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
+    // For aggregates with 16-byte alignment, we use i128.
+    if (Alignment < 128 && Size == 128) {
+      llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
+      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
+    }
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
@@ -4336,8 +4384,9 @@
   return ResAddr;
 }
 
-llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                           CodeGenFunction &CGF) const {
+llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr,
+                                             QualType Ty,
+                                             CodeGenFunction &CGF) const {
   // We do not support va_arg for aggregates or illegal vector types.
   // Lower VAArg here for these cases and use the LLVM va_arg instruction for
   // other cases.
@@ -4493,7 +4542,7 @@
     return TargetCodeGenInfo::getSizeOfUnwindException();
   }
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
     if (!FD)
@@ -4540,7 +4589,7 @@
   WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
       : ARMTargetCodeGenInfo(CGT, K) {}
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
 };
 
@@ -4556,16 +4605,17 @@
                llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
 }
 
-void WindowsARMTargetCodeGenInfo::SetTargetAttributes(
+void WindowsARMTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const {
-  ARMTargetCodeGenInfo::SetTargetAttributes(D, GV, CGM);
+  ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 }
 
 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (!getCXXABI().classifyReturnType(FI))
-    FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
+    FI.getReturnInfo() =
+        classifyReturnType(FI.getReturnType(), FI.isVariadic());
 
   for (auto &I : FI.arguments())
     I.info = classifyArgumentType(I.type, FI.isVariadic());
@@ -5010,7 +5060,7 @@
   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 private:
   // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the
@@ -5066,7 +5116,7 @@
 }
 
 void NVPTXTargetCodeGenInfo::
-SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                     CodeGen::CodeGenModule &M) const{
   const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
   if (!FD) return;
@@ -5095,18 +5145,22 @@
       // Create !{<func-ref>, metadata !"kernel", i32 1} node
       addNVVMMetadata(F, "kernel", 1);
     }
-    if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
+    if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
-      addNVVMMetadata(F, "maxntidx",
-                      FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
-      // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
-      // zero value from getMinBlocks either means it was not specified in
-      // __launch_bounds__ or the user specified a 0 value. In both cases, we
-      // don't have to add a PTX directive.
-      int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
-      if (MinCTASM > 0) {
-        // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
-        addNVVMMetadata(F, "minctasm", MinCTASM);
+      llvm::APSInt MaxThreads(32);
+      MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
+      if (MaxThreads > 0)
+        addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
+
+      // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
+      // not specified in __launch_bounds__ or if the user specified a 0 value,
+      // we don't have to add a PTX directive.
+      if (Attr->getMinBlocks()) {
+        llvm::APSInt MinBlocks(32);
+        MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
+        if (MinBlocks > 0)
+          // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+          addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
       }
     }
   }
@@ -5136,12 +5190,17 @@
 namespace {
 
 class SystemZABIInfo : public ABIInfo {
+  bool HasVector;
+
 public:
-  SystemZABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+  SystemZABIInfo(CodeGenTypes &CGT, bool HV)
+    : ABIInfo(CGT), HasVector(HV) {}
 
   bool isPromotableIntegerType(QualType Ty) const;
   bool isCompoundType(QualType Ty) const;
+  bool isVectorArgumentType(QualType Ty) const;
   bool isFPArgumentType(QualType Ty) const;
+  QualType GetSingleElementType(QualType Ty) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType ArgTy) const;
@@ -5159,8 +5218,8 @@
 
 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  SystemZTargetCodeGenInfo(CodeGenTypes &CGT)
-    : TargetCodeGenInfo(new SystemZABIInfo(CGT)) {}
+  SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector)
+    : TargetCodeGenInfo(new SystemZABIInfo(CGT, HasVector)) {}
 };
 
 }
@@ -5192,6 +5251,12 @@
           isAggregateTypeForABI(Ty));
 }
 
+bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
+  return (HasVector &&
+          Ty->isVectorType() &&
+          getContext().getTypeSize(Ty) <= 128);
+}
+
 bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
     switch (BT->getKind()) {
@@ -5202,9 +5267,13 @@
       return false;
     }
 
+  return false;
+}
+
+QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
   if (const RecordType *RT = Ty->getAsStructureType()) {
     const RecordDecl *RD = RT->getDecl();
-    bool Found = false;
+    QualType Found;
 
     // If this is a C++ record, check the bases first.
     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
@@ -5215,11 +5284,9 @@
         if (isEmptyRecord(getContext(), Base, true))
           continue;
 
-        if (Found)
-          return false;
-        Found = isFPArgumentType(Base);
-        if (!Found)
-          return false;
+        if (!Found.isNull())
+          return Ty;
+        Found = GetSingleElementType(Base);
       }
 
     // Check the fields.
@@ -5232,20 +5299,19 @@
         continue;
 
       // Unlike isSingleElementStruct(), arrays do not count.
-      // Nested isFPArgumentType structures still do though.
-      if (Found)
-        return false;
-      Found = isFPArgumentType(FD->getType());
-      if (!Found)
-        return false;
+      // Nested structures still do though.
+      if (!Found.isNull())
+        return Ty;
+      Found = GetSingleElementType(FD->getType());
     }
 
     // Unlike isSingleElementStruct(), trailing padding is allowed.
     // An 8-byte aligned struct s { float f; } is passed as a double.
-    return Found;
+    if (!Found.isNull())
+      return Found;
   }
 
-  return false;
+  return Ty;
 }
 
 llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -5258,14 +5324,16 @@
   //   i8 *__reg_save_area;
   // };
 
-  // Every argument occupies 8 bytes and is passed by preference in either
-  // GPRs or FPRs.
+  // Every non-vector argument occupies 8 bytes and is passed by preference
+  // in either GPRs or FPRs.  Vector arguments occupy 8 or 16 bytes and are
+  // always passed on the stack.
   Ty = CGF.getContext().getCanonicalType(Ty);
   llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
   llvm::Type *APTy = llvm::PointerType::getUnqual(ArgTy);
   ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
   bool InFPRs = false;
+  bool IsVector = false;
   unsigned UnpaddedBitSize;
   if (IsIndirect) {
     APTy = llvm::PointerType::getUnqual(APTy);
@@ -5274,14 +5342,38 @@
     if (AI.getCoerceToType())
       ArgTy = AI.getCoerceToType();
     InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy();
+    IsVector = ArgTy->isVectorTy();
     UnpaddedBitSize = getContext().getTypeSize(Ty);
   }
-  unsigned PaddedBitSize = 64;
+  unsigned PaddedBitSize = (IsVector && UnpaddedBitSize > 64) ? 128 : 64;
   assert((UnpaddedBitSize <= PaddedBitSize) && "Invalid argument size.");
 
   unsigned PaddedSize = PaddedBitSize / 8;
   unsigned Padding = (PaddedBitSize - UnpaddedBitSize) / 8;
 
+  llvm::Type *IndexTy = CGF.Int64Ty;
+  llvm::Value *PaddedSizeV = llvm::ConstantInt::get(IndexTy, PaddedSize);
+
+  if (IsVector) {
+    // Work out the address of a vector argument on the stack.
+    // Vector arguments are always passed in the high bits of a
+    // single (8 byte) or double (16 byte) stack slot.
+    llvm::Value *OverflowArgAreaPtr =
+      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 2,
+                                  "overflow_arg_area_ptr");
+    llvm::Value *OverflowArgArea =
+      CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area");
+    llvm::Value *MemAddr =
+      CGF.Builder.CreateBitCast(OverflowArgArea, APTy, "mem_addr");
+
+    // Update overflow_arg_area_ptr pointer
+    llvm::Value *NewOverflowArgArea =
+      CGF.Builder.CreateGEP(OverflowArgArea, PaddedSizeV, "overflow_arg_area");
+    CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
+
+    return MemAddr;
+  }
+
   unsigned MaxRegs, RegCountField, RegSaveIndex, RegPadding;
   if (InFPRs) {
     MaxRegs = 4; // Maximum of 4 FPR arguments
@@ -5298,7 +5390,6 @@
   llvm::Value *RegCountPtr = CGF.Builder.CreateStructGEP(
       nullptr, VAListAddr, RegCountField, "reg_count_ptr");
   llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
-  llvm::Type *IndexTy = RegCount->getType();
   llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
   llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
                                                  "fits_in_regs");
@@ -5312,7 +5403,6 @@
   CGF.EmitBlock(InRegBlock);
 
   // Work out the address of an argument register.
-  llvm::Value *PaddedSizeV = llvm::ConstantInt::get(IndexTy, PaddedSize);
   llvm::Value *ScaledRegCount =
     CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count");
   llvm::Value *RegBase =
@@ -5370,6 +5460,8 @@
 ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
+  if (isVectorArgumentType(RetTy))
+    return ABIArgInfo::getDirect();
   if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
     return ABIArgInfo::getIndirect(0);
   return (isPromotableIntegerType(RetTy) ?
@@ -5385,8 +5477,16 @@
   if (isPromotableIntegerType(Ty))
     return ABIArgInfo::getExtend();
 
-  // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
+  // Handle vector types and vector-like structure types.  Note that
+  // as opposed to float-like structure types, we do not allow any
+  // padding for vector-like structures, so verify the sizes match.
   uint64_t Size = getContext().getTypeSize(Ty);
+  QualType SingleElementTy = GetSingleElementType(Ty);
+  if (isVectorArgumentType(SingleElementTy) &&
+      getContext().getTypeSize(SingleElementTy) == Size)
+    return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy));
+
+  // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
   if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
@@ -5400,7 +5500,7 @@
 
     // The structure is passed as an unextended integer, a float, or a double.
     llvm::Type *PassTy;
-    if (isFPArgumentType(Ty)) {
+    if (isFPArgumentType(SingleElementTy)) {
       assert(Size == 32 || Size == 64);
       if (Size == 32)
         PassTy = llvm::Type::getFloatTy(getVMContext());
@@ -5428,13 +5528,13 @@
 public:
   MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 };
 
 }
 
-void MSP430TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
+void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                              CodeGen::CodeGenModule &M) const {
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
@@ -5480,6 +5580,7 @@
   void computeInfo(CGFunctionInfo &FI) const override;
   llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                          CodeGenFunction &CGF) const override;
+  bool shouldSignExtUnsignedType(QualType Ty) const override;
 };
 
 class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5493,7 +5594,7 @@
     return 29;
   }
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
     const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
     if (!FD) return;
@@ -5515,8 +5616,8 @@
 };
 }
 
-void MipsABIInfo::CoerceToIntArgs(uint64_t TySize,
-                                  SmallVectorImpl<llvm::Type *> &ArgList) const {
+void MipsABIInfo::CoerceToIntArgs(
+    uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
   llvm::IntegerType *IntTy =
     llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);
 
@@ -5555,7 +5656,7 @@
   const RecordDecl *RD = RT->getDecl();
   const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
   assert(!(TySize % 8) && "Size of structure must be multiple of 8.");
-  
+
   uint64_t LastOffset = 0;
   unsigned idx = 0;
   llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);
@@ -5657,7 +5758,7 @@
     // 1. The size of the struct/class is no larger than 128-bit.
     // 2. The struct/class has one or two fields all of which are floating
     //    point types.
-    // 3. The offset of the first field is zero (this follows what gcc does). 
+    // 3. The offset of the first field is zero (this follows what gcc does).
     //
     // Any other composite results are returned in integer registers.
     //
@@ -5727,7 +5828,7 @@
   if (!getCXXABI().classifyReturnType(FI))
     RetInfo = classifyReturnType(FI.getReturnType());
 
-  // Check if a pointer to an aggregate is passed as a hidden argument.  
+  // Check if a pointer to an aggregate is passed as a hidden argument.
   uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;
 
   for (auto &I : FI.arguments())
@@ -5749,7 +5850,7 @@
     Ty = CGF.getContext().getIntTypeForBitwidth(SlotSizeInBits,
                                                 Ty->isSignedIntegerType());
   }
- 
+
   CGBuilderTy &Builder = CGF.Builder;
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
@@ -5768,7 +5869,7 @@
     AddrTyped = CGF.Builder.CreateIntToPtr(And, PTy);
   }
   else
-    AddrTyped = Builder.CreateBitCast(Addr, PTy);  
+    AddrTyped = Builder.CreateBitCast(Addr, PTy);
 
   llvm::Value *AlignedAddr = Builder.CreateBitCast(AddrTyped, BP);
   TypeAlign = std::max((unsigned)TypeAlign, MinABIStackAlignInBytes);
@@ -5778,10 +5879,20 @@
     Builder.CreateGEP(AlignedAddr, llvm::ConstantInt::get(IntTy, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-  
+
   return AddrTyped;
 }
 
+bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
+  int TySize = getContext().getTypeSize(Ty);
+
+  // MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
+  if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
+    return true;
+
+  return false;
+}
+
 bool
 MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                                llvm::Value *Address) const {
@@ -5812,7 +5923,7 @@
 
 //===----------------------------------------------------------------------===//
 // TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
-// Currently subclassed only to implement custom OpenCL C function attribute 
+// Currently subclassed only to implement custom OpenCL C function attribute
 // handling.
 //===----------------------------------------------------------------------===//
 
@@ -5823,18 +5934,17 @@
   TCETargetCodeGenInfo(CodeGenTypes &CGT)
     : DefaultTargetCodeGenInfo(CGT) {}
 
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 };
 
-void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
-                                               llvm::GlobalValue *GV,
-                                               CodeGen::CodeGenModule &M) const {
+void TCETargetCodeGenInfo::setTargetAttributes(
+    const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
   if (!FD) return;
 
   llvm::Function *F = cast<llvm::Function>(GV);
-  
+
   if (M.getLangOpts().OpenCL) {
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
@@ -5843,8 +5953,9 @@
       if (Attr) {
         // Convert the reqd_work_group_size() attributes to metadata.
         llvm::LLVMContext &Context = F->getContext();
-        llvm::NamedMDNode *OpenCLMetadata = 
-            M.getModule().getOrInsertNamedMetadata("opencl.kernel_wg_size_info");
+        llvm::NamedMDNode *OpenCLMetadata =
+            M.getModule().getOrInsertNamedMetadata(
+                "opencl.kernel_wg_size_info");
 
         SmallVector<llvm::Metadata *, 5> Operands;
         Operands.push_back(llvm::ConstantAsMetadata::get(F));
@@ -5859,9 +5970,9 @@
             llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
                 M.Int32Ty, llvm::APInt(32, Attr->getZDim()))));
 
-        // Add a boolean constant operand for "required" (true) or "hint" (false)
-        // for implementing the work_group_size_hint attr later. Currently 
-        // always true as the hint is not yet implemented.
+        // Add a boolean constant operand for "required" (true) or "hint"
+        // (false) for implementing the work_group_size_hint attr later.
+        // Currently always true as the hint is not yet implemented.
         Operands.push_back(
             llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context)));
         OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands));
@@ -6015,13 +6126,13 @@
 public:
   AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
-  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
 };
 
 }
 
-void AMDGPUTargetCodeGenInfo::SetTargetAttributes(
+void AMDGPUTargetCodeGenInfo::setTargetAttributes(
   const Decl *D,
   llvm::GlobalValue *GV,
   CodeGen::CodeGenModule &M) const {
@@ -6337,7 +6448,7 @@
   //   FSR = 70
   //   CSR = 71
   AssignToArrayRange(Builder, Address, Eight8, 64, 71);
-   
+
   // 72-87: d0-15, the 8-byte floating-point registers
   AssignToArrayRange(Builder, Address, Eight8, 72, 87);
 
@@ -6610,7 +6721,7 @@
 ///
 /// The TypeString carries type, qualifier, name, size & value details.
 /// Please see 'Tools Development Guide' section 2.16.2 for format details:
-/// <https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf>
+/// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf
 /// The output is tested by test/CodeGen/xcore-stringtype.c.
 ///
 static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
@@ -6636,7 +6747,8 @@
                        TypeStringCache &TSC);
 
 /// Helper function for appendRecordType().
-/// Builds a SmallVector containing the encoded field types in declaration order.
+/// Builds a SmallVector containing the encoded field types in declaration
+/// order.
 static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
                              const RecordDecl *RD,
                              const CodeGen::CodeGenModule &CGM,
@@ -6659,7 +6771,7 @@
     if (Field->isBitField())
       Enc += ')';
     Enc += '}';
-    FE.push_back(FieldEncoding(!Field->getName().empty(), Enc));
+    FE.emplace_back(!Field->getName().empty(), Enc);
   }
   return true;
 }
@@ -7057,8 +7169,11 @@
   case llvm::Triple::msp430:
     return *(TheTargetCodeGenInfo = new MSP430TargetCodeGenInfo(Types));
 
-  case llvm::Triple::systemz:
-    return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types));
+  case llvm::Triple::systemz: {
+    bool HasVector = getTarget().getABI() == "vector";
+    return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types,
+                                                                 HasVector));
+  }
 
   case llvm::Triple::tce:
     return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types));
@@ -7070,32 +7185,24 @@
     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
-      return *(TheTargetCodeGenInfo =
-               new WinX86_32TargetCodeGenInfo(Types,
-                                              IsDarwinVectorABI, IsSmallStructInRegABI,
-                                              IsWin32FloatStructABI,
-                                              CodeGenOpts.NumRegisterParameters));
+      return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo(
+                   Types, IsDarwinVectorABI, IsSmallStructInRegABI,
+                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     } else {
-      return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types,
-                                           IsDarwinVectorABI, IsSmallStructInRegABI,
-                                           IsWin32FloatStructABI,
-                                           CodeGenOpts.NumRegisterParameters));
+      return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo(
+                   Types, IsDarwinVectorABI, IsSmallStructInRegABI,
+                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     }
   }
 
   case llvm::Triple::x86_64: {
-    bool HasAVX = getTarget().getABI() == "avx";
-
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
-      return *(TheTargetCodeGenInfo =
-                   new WinX86_64TargetCodeGenInfo(Types, HasAVX));
+      return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types));
     case llvm::Triple::PS4:
-      return *(TheTargetCodeGenInfo = new PS4TargetCodeGenInfo(Types, HasAVX));
+      return *(TheTargetCodeGenInfo = new PS4TargetCodeGenInfo(Types));
     default:
-      return *(TheTargetCodeGenInfo =
-                   new X86_64TargetCodeGenInfo(Types, HasAVX));
+      return *(TheTargetCodeGenInfo = new X86_64TargetCodeGenInfo(Types));
     }
   }
   case llvm::Triple::hexagon:
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index cc469d6..bf63265 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -53,12 +53,12 @@
   /// getABIInfo() - Returns ABI info helper for the target.
   const ABIInfo &getABIInfo() const { return *Info; }
 
-  /// SetTargetAttributes - Provides a convenient hook to handle extra
+  /// setTargetAttributes - Provides a convenient hook to handle extra
   /// target-specific attributes for the given global.
-  virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+  virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                                    CodeGen::CodeGenModule &M) const {}
 
-  /// EmitTargetMD - Provides a convenient hook to handle extra
+  /// emitTargetMD - Provides a convenient hook to handle extra
   /// target-specific metadata for the given global.
   virtual void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
                             CodeGen::CodeGenModule &M) const {}
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 07a5e42..2ead48b 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -140,10 +140,8 @@
     }
   }
 
-  for (arg_iterator it = Args->filtered_begin(options::OPT_UNKNOWN),
-         ie = Args->filtered_end(); it != ie; ++it) {
-    Diags.Report(diag::err_drv_unknown_argument) << (*it) ->getAsString(*Args);
-  }
+  for (const Arg *A : Args->filtered(options::OPT_UNKNOWN))
+    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(*Args);
 
   return Args;
 }
@@ -279,7 +277,8 @@
   // Add a default value of -mlinker-version=, if one was given and the user
   // didn't specify one.
 #if defined(HOST_LINK_VERSION)
-  if (!Args.hasArg(options::OPT_mlinker_version_EQ)) {
+  if (!Args.hasArg(options::OPT_mlinker_version_EQ) &&
+      strlen(HOST_LINK_VERSION) > 0) {
     DAL->AddJoinedArg(0, Opts->getOption(options::OPT_mlinker_version_EQ),
                       HOST_LINK_VERSION);
     DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim();
@@ -347,9 +346,7 @@
     DefaultTargetTriple = A->getValue();
   if (const Arg *A = Args->getLastArg(options::OPT_ccc_install_dir))
     Dir = InstalledDir = A->getValue();
-  for (arg_iterator it = Args->filtered_begin(options::OPT_B),
-         ie = Args->filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
+  for (const Arg *A : Args->filtered(options::OPT_B)) {
     A->claim();
     PrefixDirs.push_back(A->getValue(0));
   }
@@ -818,9 +815,12 @@
   return true;
 }
 
+// Display an action graph human-readably.  Action A is the "sink" node
+// and latest-occuring action. Traversal is in pre-order, visiting the
+// inputs to each action before printing the action itself.
 static unsigned PrintActions1(const Compilation &C, Action *A,
                               std::map<Action*, unsigned> &Ids) {
-  if (Ids.count(A))
+  if (Ids.count(A)) // A was already visited.
     return Ids[A];
 
   std::string str;
@@ -851,6 +851,8 @@
   return Id;
 }
 
+// Print the action graphs in a compilation C.
+// For example "clang -c file1.c file2.c" is composed of two subgraphs.
 void Driver::PrintActions(const Compilation &C) const {
   std::map<Action*, unsigned> Ids;
   for (ActionList::const_iterator it = C.getActions().begin(),
@@ -989,7 +991,8 @@
   if (llvm::sys::fs::exists(Twine(Path)))
     return true;
 
-  if (D.IsCLMode() && llvm::sys::Process::FindInEnvPath("LIB", Value))
+  if (D.IsCLMode() && !llvm::sys::path::is_absolute(Twine(Path)) &&
+      llvm::sys::Process::FindInEnvPath("LIB", Value))
     return true;
 
   D.Diag(clang::diag::err_drv_no_such_file) << Path;
@@ -1466,9 +1469,8 @@
       if (Opt.getKind() == Option::FlagClass) {
         bool DuplicateClaimed = false;
 
-        for (arg_iterator it = C.getArgs().filtered_begin(&Opt),
-               ie = C.getArgs().filtered_end(); it != ie; ++it) {
-          if ((*it)->isClaimed()) {
+        for (const Arg *AA : C.getArgs().filtered(&Opt)) {
+          if (AA->isClaimed()) {
             DuplicateClaimed = true;
             break;
           }
@@ -1696,8 +1698,7 @@
     assert(AtTopLevel && isa<PreprocessJobAction>(JA));
     StringRef BaseName = llvm::sys::path::filename(BaseInput);
     StringRef NameArg;
-    if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi,
-                                        options::OPT__SLASH_o))
+    if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi))
       NameArg = A->getValue();
     return C.addResultFile(MakeCLOutputFilename(C.getArgs(), NameArg, BaseName,
                                                 types::TY_PP_C), &JA);
@@ -1877,8 +1878,8 @@
 Driver::generatePrefixedToolNames(const char *Tool, const ToolChain &TC,
                                   SmallVectorImpl<std::string> &Names) const {
   // FIXME: Needs a better variable than DefaultTargetTriple
-  Names.push_back(DefaultTargetTriple + "-" + Tool);
-  Names.push_back(Tool);
+  Names.emplace_back(DefaultTargetTriple + "-" + Tool);
+  Names.emplace_back(Tool);
 }
 
 static bool ScanDirForExecutable(SmallString<128> &Dir,
@@ -2024,8 +2025,8 @@
 
 const ToolChain &Driver::getToolChain(const ArgList &Args,
                                       StringRef DarwinArchName) const {
-  llvm::Triple Target = computeTargetTriple(DefaultTargetTriple, Args,
-                                            DarwinArchName);
+  llvm::Triple Target =
+      computeTargetTriple(DefaultTargetTriple, Args, DarwinArchName);
 
   ToolChain *&TC = ToolChains[Target.str()];
   if (!TC) {
@@ -2096,29 +2097,20 @@
       }
       break;
     default:
-      // TCE is an OSless target
-      if (Target.getArchName() == "tce") {
+      // Of these targets, Hexagon is the only one that might have
+      // an OS of Linux, in which case it got handled above already.
+      if (Target.getArchName() == "tce")
         TC = new toolchains::TCEToolChain(*this, Target, Args);
-        break;
-      }
-      // If Hexagon is configured as an OSless target
-      if (Target.getArch() == llvm::Triple::hexagon) {
+      else if (Target.getArch() == llvm::Triple::hexagon)
         TC = new toolchains::Hexagon_TC(*this, Target, Args);
-        break;
-      }
-      if (Target.getArch() == llvm::Triple::xcore) {
+      else if (Target.getArch() == llvm::Triple::xcore)
         TC = new toolchains::XCore(*this, Target, Args);
-        break;
-      }
-      if (Target.isOSBinFormatELF()) {
+      else if (Target.isOSBinFormatELF())
         TC = new toolchains::Generic_ELF(*this, Target, Args);
-        break;
-      }
-      if (Target.isOSBinFormatMachO()) {
+      else if (Target.isOSBinFormatMachO())
         TC = new toolchains::MachO(*this, Target, Args);
-        break;
-      }
-      TC = new toolchains::Generic_GCC(*this, Target, Args);
+      else
+        TC = new toolchains::Generic_GCC(*this, Target, Args);
       break;
     }
   }
@@ -2126,13 +2118,12 @@
 }
 
 bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {
-  // Check if user requested no clang, or clang doesn't understand this type (we
-  // only handle single inputs for now).
+  // Say "no" if there is not exactly one input of a type clang understands.
   if (JA.size() != 1 ||
       !types::isAcceptedByClang((*JA.begin())->getType()))
     return false;
 
-  // Otherwise make sure this is an action clang understands.
+  // And say "no" if this is not a kind of action clang understands.
   if (!isa<PreprocessJobAction>(JA) && !isa<PrecompileJobAction>(JA) &&
       !isa<CompileJobAction>(JA) && !isa<BackendJobAction>(JA))
     return false;
@@ -2193,6 +2184,6 @@
   return std::make_pair(IncludedFlagsBitmask, ExcludedFlagsBitmask);
 }
 
-bool clang::driver::isOptimizationLevelFast(const llvm::opt::ArgList &Args) {
+bool clang::driver::isOptimizationLevelFast(const ArgList &Args) {
   return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false);
 }
diff --git a/lib/Driver/MSVCToolChain.cpp b/lib/Driver/MSVCToolChain.cpp
index 7739cb0..d824fe4 100644
--- a/lib/Driver/MSVCToolChain.cpp
+++ b/lib/Driver/MSVCToolChain.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ToolChains.h"
+#include "Tools.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/Version.h"
 #include "clang/Driver/Compilation.h"
@@ -495,3 +496,29 @@
                                                  ArgStringList &CC1Args) const {
   // FIXME: There should probably be logic here to find libc++ on Windows.
 }
+
+std::string
+MSVCToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
+                                           types::ID InputType) const {
+  std::string TripleStr =
+      ToolChain::ComputeEffectiveClangTriple(Args, InputType);
+  llvm::Triple Triple(TripleStr);
+  VersionTuple MSVT =
+      tools::visualstudio::getMSVCVersion(/*D=*/nullptr, Triple, Args,
+                                          /*IsWindowsMSVC=*/true);
+  if (MSVT.empty())
+    return TripleStr;
+
+  MSVT = VersionTuple(MSVT.getMajor(), MSVT.getMinor().getValueOr(0),
+                      MSVT.getSubminor().getValueOr(0));
+
+  if (Triple.getEnvironment() == llvm::Triple::MSVC) {
+    StringRef ObjFmt = Triple.getEnvironmentName().split('-').second;
+    if (ObjFmt.empty())
+      Triple.setEnvironmentName((Twine("msvc") + MSVT.getAsString()).str());
+    else
+      Triple.setEnvironmentName(
+          (Twine("msvc") + MSVT.getAsString() + Twine('-') + ObjFmt).str());
+  }
+  return Triple.getTriple();
+}
diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp
index cd3785c..72530b4 100644
--- a/lib/Driver/SanitizerArgs.cpp
+++ b/lib/Driver/SanitizerArgs.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Driver/SanitizerArgs.h"
+#include "clang/Basic/Sanitizers.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
@@ -18,28 +19,12 @@
 #include "llvm/Support/SpecialCaseList.h"
 #include <memory>
 
+using namespace clang;
+using namespace clang::SanitizerKind;
 using namespace clang::driver;
 using namespace llvm::opt;
 
-namespace {
-/// Assign ordinals to possible values of -fsanitize= flag.
-/// We use the ordinal values as bit positions within \c SanitizeKind.
-enum SanitizeOrdinal : uint64_t {
-#define SANITIZER(NAME, ID) SO_##ID,
-#define SANITIZER_GROUP(NAME, ID, ALIAS) SO_##ID##Group,
-#include "clang/Basic/Sanitizers.def"
-  SO_Count
-};
-
-/// Represents a set of sanitizer kinds. It is also used to define:
-/// 1) set of sanitizers each sanitizer group expands into.
-/// 2) set of sanitizers sharing a specific property (e.g.
-///    all sanitizers with zero-base shadow).
-enum SanitizeKind : uint64_t {
-#define SANITIZER(NAME, ID) ID = 1ULL << SO_##ID,
-#define SANITIZER_GROUP(NAME, ID, ALIAS)                                       \
-  ID = ALIAS, ID##Group = 1ULL << SO_##ID##Group,
-#include "clang/Basic/Sanitizers.def"
+enum : SanitizerMask {
   NeedsUbsanRt = Undefined | Integer,
   NotAllowedWithTrap = Vptr,
   RequiresPIE = Memory | DataFlow,
@@ -50,43 +35,25 @@
   LegacyFsanitizeRecoverMask = Undefined | Integer,
   NeedsLTO = CFI,
 };
-}
 
-/// Returns true if set of \p Sanitizers contain at least one sanitizer from
-/// \p Kinds.
-static bool hasOneOf(const clang::SanitizerSet &Sanitizers, uint64_t Kinds) {
-#define SANITIZER(NAME, ID)                                                    \
-  if (Sanitizers.has(clang::SanitizerKind::ID) && (Kinds & ID))                \
-    return true;
-#include "clang/Basic/Sanitizers.def"
-  return false;
-}
-
-/// Adds all sanitizers from \p Kinds to \p Sanitizers.
-static void addAllOf(clang::SanitizerSet &Sanitizers, uint64_t Kinds) {
-#define SANITIZER(NAME, ID) \
-  if (Kinds & ID) \
-    Sanitizers.set(clang::SanitizerKind::ID, true);
-#include "clang/Basic/Sanitizers.def"
-}
-
-static uint64_t toSanitizeKind(clang::SanitizerKind K) {
-#define SANITIZER(NAME, ID) \
-  if (K == clang::SanitizerKind::ID) \
-    return ID;
-#include "clang/Basic/Sanitizers.def"
-  llvm_unreachable("Invalid SanitizerKind!");
-}
-
-/// Parse a single value from a -fsanitize= or -fno-sanitize= value list.
-/// Returns a member of the \c SanitizeKind enumeration, or \c 0
-/// if \p Value is not known.
-static uint64_t parseValue(const char *Value);
+enum CoverageFeature {
+  CoverageFunc = 1 << 0,
+  CoverageBB = 1 << 1,
+  CoverageEdge = 1 << 2,
+  CoverageIndirCall = 1 << 3,
+  CoverageTraceBB = 1 << 4,
+  CoverageTraceCmp = 1 << 5,
+  Coverage8bitCounters = 1 << 6,
+};
 
 /// Parse a -fsanitize= or -fno-sanitize= argument's values, diagnosing any
-/// invalid components. Returns OR of members of \c SanitizeKind enumeration.
-static uint64_t parseArgValues(const Driver &D, const llvm::opt::Arg *A,
-                               bool DiagnoseErrors);
+/// invalid components. Returns a SanitizerMask.
+static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
+                                    bool DiagnoseErrors);
+
+/// Parse -f(no-)?sanitize-coverage= flag values, diagnosing any invalid
+/// components. Returns OR of members of \c CoverageFeature enumeration.
+static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A);
 
 /// Produce an argument string from ArgList \p Args, which shows how it
 /// provides some sanitizer kind from \p Mask. For example, the argument list
@@ -94,29 +61,20 @@
 /// would produce "-fsanitize=vptr".
 static std::string lastArgumentForMask(const Driver &D,
                                        const llvm::opt::ArgList &Args,
-                                       uint64_t Mask);
-
-static std::string lastArgumentForKind(const Driver &D,
-                                       const llvm::opt::ArgList &Args,
-                                       clang::SanitizerKind K) {
-  return lastArgumentForMask(D, Args, toSanitizeKind(K));
-}
+                                       SanitizerMask Mask);
 
 /// Produce an argument string from argument \p A, which shows how it provides
 /// a value in \p Mask. For instance, the argument
 /// "-fsanitize=address,alignment" with mask \c NeedsUbsanRt would produce
 /// "-fsanitize=alignment".
-static std::string describeSanitizeArg(const llvm::opt::Arg *A, uint64_t Mask);
+static std::string describeSanitizeArg(const llvm::opt::Arg *A,
+                                       SanitizerMask Mask);
 
 /// Produce a string containing comma-separated names of sanitizers in \p
 /// Sanitizers set.
 static std::string toString(const clang::SanitizerSet &Sanitizers);
 
-/// For each sanitizer group bit set in \p Kinds, set the bits for sanitizers
-/// this group enables.
-static uint64_t expandGroups(uint64_t Kinds);
-
-static uint64_t getToolchainUnsupportedKinds(const ToolChain &TC) {
+static SanitizerMask getToolchainUnsupportedKinds(const ToolChain &TC) {
   bool IsFreeBSD = TC.getTriple().getOS() == llvm::Triple::FreeBSD;
   bool IsLinux = TC.getTriple().getOS() == llvm::Triple::Linux;
   bool IsX86 = TC.getTriple().getArch() == llvm::Triple::x86;
@@ -124,7 +82,7 @@
   bool IsMIPS64 = TC.getTriple().getArch() == llvm::Triple::mips64 ||
                   TC.getTriple().getArch() == llvm::Triple::mips64el;
 
-  uint64_t Unsupported = 0;
+  SanitizerMask Unsupported = 0;
   if (!(IsLinux && (IsX86_64 || IsMIPS64))) {
     Unsupported |= Memory | DataFlow;
   }
@@ -137,16 +95,16 @@
   return Unsupported;
 }
 
-static bool getDefaultBlacklist(const Driver &D, uint64_t Kinds,
+static bool getDefaultBlacklist(const Driver &D, SanitizerMask Kinds,
                                 std::string &BLPath) {
   const char *BlacklistFile = nullptr;
-  if (Kinds & SanitizeKind::Address)
+  if (Kinds & Address)
     BlacklistFile = "asan_blacklist.txt";
-  else if (Kinds & SanitizeKind::Memory)
+  else if (Kinds & Memory)
     BlacklistFile = "msan_blacklist.txt";
-  else if (Kinds & SanitizeKind::Thread)
+  else if (Kinds & Thread)
     BlacklistFile = "tsan_blacklist.txt";
-  else if (Kinds & SanitizeKind::DataFlow)
+  else if (Kinds & DataFlow)
     BlacklistFile = "dfsan_abilist.txt";
 
   if (BlacklistFile) {
@@ -159,27 +117,29 @@
 }
 
 bool SanitizerArgs::needsUbsanRt() const {
-  return !UbsanTrapOnError && hasOneOf(Sanitizers, NeedsUbsanRt) &&
-         !Sanitizers.has(SanitizerKind::Address);
+  return !UbsanTrapOnError && (Sanitizers.Mask & NeedsUbsanRt) &&
+         !Sanitizers.has(Address) &&
+         !Sanitizers.has(Memory) &&
+         !Sanitizers.has(Thread);
 }
 
 bool SanitizerArgs::requiresPIE() const {
-  return AsanZeroBaseShadow || hasOneOf(Sanitizers, RequiresPIE);
+  return AsanZeroBaseShadow || (Sanitizers.Mask & RequiresPIE);
 }
 
 bool SanitizerArgs::needsUnwindTables() const {
-  return hasOneOf(Sanitizers, NeedsUnwindTables);
+  return Sanitizers.Mask & NeedsUnwindTables;
 }
 
 bool SanitizerArgs::needsLTO() const {
-  return hasOneOf(Sanitizers, NeedsLTO);
+  return Sanitizers.Mask & NeedsLTO;
 }
 
 void SanitizerArgs::clear() {
   Sanitizers.clear();
   RecoverableSanitizers.clear();
   BlacklistFiles.clear();
-  SanitizeCoverage = 0;
+  CoverageFeatures = 0;
   MsanTrackOrigins = 0;
   AsanFieldPadding = 0;
   AsanZeroBaseShadow = false;
@@ -191,13 +151,18 @@
 SanitizerArgs::SanitizerArgs(const ToolChain &TC,
                              const llvm::opt::ArgList &Args) {
   clear();
-  uint64_t AllRemove = 0;  // During the loop below, the accumulated set of
-                           // sanitizers disabled by the current sanitizer
-                           // argument or any argument after it.
-  uint64_t DiagnosedKinds = 0;  // All Kinds we have diagnosed up to now.
-                                // Used to deduplicate diagnostics.
-  uint64_t Kinds = 0;
-  uint64_t NotSupported = getToolchainUnsupportedKinds(TC);
+  SanitizerMask AllRemove = 0;  // During the loop below, the accumulated set of
+                                // sanitizers disabled by the current sanitizer
+                                // argument or any argument after it.
+  SanitizerMask AllAddedKinds = 0;  // Mask of all sanitizers ever enabled by
+                                    // -fsanitize= flags (directly or via group
+                                    // expansion), some of which may be disabled
+                                    // later. Used to carefully prune
+                                    // unused-argument diagnostics.
+  SanitizerMask DiagnosedKinds = 0;  // All Kinds we have diagnosed up to now.
+                                     // Used to deduplicate diagnostics.
+  SanitizerMask Kinds = 0;
+  SanitizerMask NotSupported = getToolchainUnsupportedKinds(TC);
   ToolChain::RTTIMode RTTIMode = TC.getRTTIMode();
 
   const Driver &D = TC.getDriver();
@@ -206,14 +171,16 @@
     const auto *Arg = *I;
     if (Arg->getOption().matches(options::OPT_fsanitize_EQ)) {
       Arg->claim();
-      uint64_t Add = parseArgValues(D, Arg, true);
+      SanitizerMask Add = parseArgValues(D, Arg, true);
+      AllAddedKinds |= expandSanitizerGroups(Add);
 
       // Avoid diagnosing any sanitizer which is disabled later.
       Add &= ~AllRemove;
       // At this point we have not expanded groups, so any unsupported
       // sanitizers in Add are those which have been explicitly enabled.
       // Diagnose them.
-      if (uint64_t KindsToDiagnose = Add & NotSupported & ~DiagnosedKinds) {
+      if (SanitizerMask KindsToDiagnose =
+              Add & NotSupported & ~DiagnosedKinds) {
         // Only diagnose the new kinds.
         std::string Desc = describeSanitizeArg(*I, KindsToDiagnose);
         D.Diag(diag::err_drv_unsupported_opt_for_target)
@@ -225,7 +192,7 @@
       // Test for -fno-rtti + explicit -fsanitizer=vptr before expanding groups
       // so we don't error out if -fno-rtti and -fsanitize=undefined were
       // passed.
-      if (Add & SanitizeKind::Vptr &&
+      if (Add & Vptr &&
           (RTTIMode == ToolChain::RM_DisabledImplicitly ||
            RTTIMode == ToolChain::RM_DisabledExplicitly)) {
         if (RTTIMode == ToolChain::RM_DisabledImplicitly)
@@ -241,10 +208,10 @@
         }
 
         // Take out the Vptr sanitizer from the enabled sanitizers
-        AllRemove |= SanitizeKind::Vptr;
+        AllRemove |= Vptr;
       }
 
-      Add = expandGroups(Add);
+      Add = expandSanitizerGroups(Add);
       // Group expansion may have enabled a sanitizer which is disabled later.
       Add &= ~AllRemove;
       // Silently discard any unsupported sanitizers implicitly enabled through
@@ -254,43 +221,39 @@
       Kinds |= Add;
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) {
       Arg->claim();
-      uint64_t Remove = parseArgValues(D, Arg, true);
-      AllRemove |= expandGroups(Remove);
+      SanitizerMask Remove = parseArgValues(D, Arg, true);
+      AllRemove |= expandSanitizerGroups(Remove);
     }
   }
 
   // We disable the vptr sanitizer if it was enabled by group expansion but RTTI
   // is disabled.
-  if ((Kinds & SanitizeKind::Vptr) &&
+  if ((Kinds & Vptr) &&
       (RTTIMode == ToolChain::RM_DisabledImplicitly ||
        RTTIMode == ToolChain::RM_DisabledExplicitly)) {
-    Kinds &= ~SanitizeKind::Vptr;
+    Kinds &= ~Vptr;
   }
 
   // Warn about undefined sanitizer options that require runtime support.
   UbsanTrapOnError =
     Args.hasFlag(options::OPT_fsanitize_undefined_trap_on_error,
                  options::OPT_fno_sanitize_undefined_trap_on_error, false);
-  if (UbsanTrapOnError && (Kinds & SanitizeKind::NotAllowedWithTrap)) {
+  if (UbsanTrapOnError && (Kinds & NotAllowedWithTrap)) {
     D.Diag(clang::diag::err_drv_argument_not_allowed_with)
         << lastArgumentForMask(D, Args, NotAllowedWithTrap)
         << "-fsanitize-undefined-trap-on-error";
-    Kinds &= ~SanitizeKind::NotAllowedWithTrap;
+    Kinds &= ~NotAllowedWithTrap;
   }
 
   // Warn about incompatible groups of sanitizers.
-  std::pair<uint64_t, uint64_t> IncompatibleGroups[] = {
-      std::make_pair(SanitizeKind::Address, SanitizeKind::Thread),
-      std::make_pair(SanitizeKind::Address, SanitizeKind::Memory),
-      std::make_pair(SanitizeKind::Thread, SanitizeKind::Memory),
-      std::make_pair(SanitizeKind::Leak, SanitizeKind::Thread),
-      std::make_pair(SanitizeKind::Leak, SanitizeKind::Memory),
-      std::make_pair(SanitizeKind::NeedsUbsanRt, SanitizeKind::Thread),
-      std::make_pair(SanitizeKind::NeedsUbsanRt, SanitizeKind::Memory)};
+  std::pair<SanitizerMask, SanitizerMask> IncompatibleGroups[] = {
+      std::make_pair(Address, Thread), std::make_pair(Address, Memory),
+      std::make_pair(Thread, Memory), std::make_pair(Leak, Thread),
+      std::make_pair(Leak, Memory)};
   for (auto G : IncompatibleGroups) {
-    uint64_t Group = G.first;
+    SanitizerMask Group = G.first;
     if (Kinds & Group) {
-      if (uint64_t Incompatible = Kinds & G.second) {
+      if (SanitizerMask Incompatible = Kinds & G.second) {
         D.Diag(clang::diag::err_drv_argument_not_allowed_with)
             << lastArgumentForMask(D, Args, Group)
             << lastArgumentForMask(D, Args, Incompatible);
@@ -304,34 +267,34 @@
   // default in ASan?
 
   // Parse -f(no-)?sanitize-recover flags.
-  uint64_t RecoverableKinds = RecoverableByDefault;
-  uint64_t DiagnosedUnrecoverableKinds = 0;
+  SanitizerMask RecoverableKinds = RecoverableByDefault;
+  SanitizerMask DiagnosedUnrecoverableKinds = 0;
   for (const auto *Arg : Args) {
     const char *DeprecatedReplacement = nullptr;
     if (Arg->getOption().matches(options::OPT_fsanitize_recover)) {
       DeprecatedReplacement = "-fsanitize-recover=undefined,integer";
-      RecoverableKinds |= expandGroups(LegacyFsanitizeRecoverMask);
+      RecoverableKinds |= expandSanitizerGroups(LegacyFsanitizeRecoverMask);
       Arg->claim();
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_recover)) {
       DeprecatedReplacement = "-fno-sanitize-recover=undefined,integer";
-      RecoverableKinds &= ~expandGroups(LegacyFsanitizeRecoverMask);
+      RecoverableKinds &= ~expandSanitizerGroups(LegacyFsanitizeRecoverMask);
       Arg->claim();
     } else if (Arg->getOption().matches(options::OPT_fsanitize_recover_EQ)) {
-      uint64_t Add = parseArgValues(D, Arg, true);
+      SanitizerMask Add = parseArgValues(D, Arg, true);
       // Report error if user explicitly tries to recover from unrecoverable
       // sanitizer.
-      if (uint64_t KindsToDiagnose =
+      if (SanitizerMask KindsToDiagnose =
               Add & Unrecoverable & ~DiagnosedUnrecoverableKinds) {
         SanitizerSet SetToDiagnose;
-        addAllOf(SetToDiagnose, KindsToDiagnose);
+        SetToDiagnose.Mask |= KindsToDiagnose;
         D.Diag(diag::err_drv_unsupported_option_argument)
             << Arg->getOption().getName() << toString(SetToDiagnose);
         DiagnosedUnrecoverableKinds |= KindsToDiagnose;
       }
-      RecoverableKinds |= expandGroups(Add);
+      RecoverableKinds |= expandSanitizerGroups(Add);
       Arg->claim();
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_recover_EQ)) {
-      RecoverableKinds &= ~expandGroups(parseArgValues(D, Arg, true));
+      RecoverableKinds &= ~expandSanitizerGroups(parseArgValues(D, Arg, true));
       Arg->claim();
     }
     if (DeprecatedReplacement) {
@@ -373,7 +336,7 @@
   }
 
   // Parse -f[no-]sanitize-memory-track-origins[=level] options.
-  if (Kinds & SanitizeKind::Memory) {
+  if (AllAddedKinds & Memory) {
     if (Arg *A =
             Args.getLastArg(options::OPT_fsanitize_memory_track_origins_EQ,
                             options::OPT_fsanitize_memory_track_origins,
@@ -393,18 +356,72 @@
     }
   }
 
-  // Parse -fsanitize-coverage=N. Currently one of asan/msan/lsan is required.
-  if (Kinds & SanitizeKind::SupportsCoverage) {
-    if (Arg *A = Args.getLastArg(options::OPT_fsanitize_coverage)) {
-      StringRef S = A->getValue();
-      // Legal values are 0..4.
-      if (S.getAsInteger(0, SanitizeCoverage) || SanitizeCoverage < 0 ||
-          SanitizeCoverage > 4)
-        D.Diag(clang::diag::err_drv_invalid_value) << A->getAsString(Args) << S;
+  // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the
+  // enabled sanitizers.
+  if (AllAddedKinds & SupportsCoverage) {
+    for (const auto *Arg : Args) {
+      if (Arg->getOption().matches(options::OPT_fsanitize_coverage)) {
+        Arg->claim();
+        int LegacySanitizeCoverage;
+        if (Arg->getNumValues() == 1 &&
+            !StringRef(Arg->getValue(0))
+                 .getAsInteger(0, LegacySanitizeCoverage) &&
+            LegacySanitizeCoverage >= 0 && LegacySanitizeCoverage <= 4) {
+          // TODO: Add deprecation notice for this form.
+          switch (LegacySanitizeCoverage) {
+          case 0:
+            CoverageFeatures = 0;
+            break;
+          case 1:
+            CoverageFeatures = CoverageFunc;
+            break;
+          case 2:
+            CoverageFeatures = CoverageBB;
+            break;
+          case 3:
+            CoverageFeatures = CoverageEdge;
+            break;
+          case 4:
+            CoverageFeatures = CoverageEdge | CoverageIndirCall;
+            break;
+          }
+          continue;
+        }
+        CoverageFeatures |= parseCoverageFeatures(D, Arg);
+      } else if (Arg->getOption().matches(options::OPT_fno_sanitize_coverage)) {
+        Arg->claim();
+        CoverageFeatures &= ~parseCoverageFeatures(D, Arg);
+      }
     }
   }
+  // Choose at most one coverage type: function, bb, or edge.
+  if ((CoverageFeatures & CoverageFunc) && (CoverageFeatures & CoverageBB))
+    D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+        << "-fsanitize-coverage=func"
+        << "-fsanitize-coverage=bb";
+  if ((CoverageFeatures & CoverageFunc) && (CoverageFeatures & CoverageEdge))
+    D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+        << "-fsanitize-coverage=func"
+        << "-fsanitize-coverage=edge";
+  if ((CoverageFeatures & CoverageBB) && (CoverageFeatures & CoverageEdge))
+    D.Diag(clang::diag::err_drv_argument_not_allowed_with)
+        << "-fsanitize-coverage=bb"
+        << "-fsanitize-coverage=edge";
+  // Basic block tracing and 8-bit counters require some type of coverage
+  // enabled.
+  int CoverageTypes = CoverageFunc | CoverageBB | CoverageEdge;
+  if ((CoverageFeatures & CoverageTraceBB) &&
+      !(CoverageFeatures & CoverageTypes))
+    D.Diag(clang::diag::err_drv_argument_only_allowed_with)
+        << "-fsanitize-coverage=trace-bb"
+        << "-fsanitize-coverage=(func|bb|edge)";
+  if ((CoverageFeatures & Coverage8bitCounters) &&
+      !(CoverageFeatures & CoverageTypes))
+    D.Diag(clang::diag::err_drv_argument_only_allowed_with)
+        << "-fsanitize-coverage=8bit-counters"
+        << "-fsanitize-coverage=(func|bb|edge)";
 
-  if (Kinds & SanitizeKind::Address) {
+  if (AllAddedKinds & Address) {
     AsanSharedRuntime =
         Args.hasArg(options::OPT_shared_libasan) ||
         (TC.getTriple().getEnvironment() == llvm::Triple::Android);
@@ -430,7 +447,7 @@
       case options::OPT__SLASH_LDd:
         D.Diag(clang::diag::err_drv_argument_not_allowed_with)
             << WindowsDebugRTArg->getAsString(Args)
-            << lastArgumentForKind(D, Args, SanitizerKind::Address);
+            << lastArgumentForMask(D, Args, Address);
         D.Diag(clang::diag::note_drv_address_sanitizer_debug_runtime);
       }
     }
@@ -441,14 +458,14 @@
       Args.hasArg(options::OPT_fsanitize_link_cxx_runtime) || D.CCCIsCXX();
 
   // Finally, initialize the set of available and recoverable sanitizers.
-  addAllOf(Sanitizers, Kinds);
-  addAllOf(RecoverableSanitizers, RecoverableKinds);
+  Sanitizers.Mask |= Kinds;
+  RecoverableSanitizers.Mask |= RecoverableKinds;
 }
 
 static std::string toString(const clang::SanitizerSet &Sanitizers) {
   std::string Res;
 #define SANITIZER(NAME, ID)                                                    \
-  if (Sanitizers.has(clang::SanitizerKind::ID)) {                              \
+  if (Sanitizers.has(ID)) {                                                    \
     if (!Res.empty())                                                          \
       Res += ",";                                                              \
     Res += NAME;                                                               \
@@ -482,52 +499,47 @@
   if (AsanFieldPadding)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" +
                                          llvm::utostr(AsanFieldPadding)));
-  if (SanitizeCoverage)
-    CmdArgs.push_back(Args.MakeArgString("-fsanitize-coverage=" +
-                                         llvm::utostr(SanitizeCoverage)));
+  // Translate available CoverageFeatures to corresponding clang-cc1 flags.
+  std::pair<int, const char *> CoverageFlags[] = {
+    std::make_pair(CoverageFunc, "-fsanitize-coverage-type=1"),
+    std::make_pair(CoverageBB, "-fsanitize-coverage-type=2"),
+    std::make_pair(CoverageEdge, "-fsanitize-coverage-type=3"),
+    std::make_pair(CoverageIndirCall, "-fsanitize-coverage-indirect-calls"),
+    std::make_pair(CoverageTraceBB, "-fsanitize-coverage-trace-bb"),
+    std::make_pair(CoverageTraceCmp, "-fsanitize-coverage-trace-cmp"),
+    std::make_pair(Coverage8bitCounters, "-fsanitize-coverage-8bit-counters")};
+  for (auto F : CoverageFlags) {
+    if (CoverageFeatures & F.first)
+      CmdArgs.push_back(Args.MakeArgString(F.second));
+  }
+
+
   // MSan: Workaround for PR16386.
   // ASan: This is mainly to help LSan with cases such as
   // https://code.google.com/p/address-sanitizer/issues/detail?id=373
   // We can't make this conditional on -fsanitize=leak, as that flag shouldn't
   // affect compilation.
-  if (Sanitizers.has(SanitizerKind::Memory) ||
-      Sanitizers.has(SanitizerKind::Address))
+  if (Sanitizers.has(Memory) || Sanitizers.has(Address))
     CmdArgs.push_back(Args.MakeArgString("-fno-assume-sane-operator-new"));
 }
 
-uint64_t parseValue(const char *Value) {
-  uint64_t ParsedKind = llvm::StringSwitch<SanitizeKind>(Value)
-#define SANITIZER(NAME, ID) .Case(NAME, ID)
-#define SANITIZER_GROUP(NAME, ID, ALIAS) .Case(NAME, ID##Group)
-#include "clang/Basic/Sanitizers.def"
-    .Default(SanitizeKind());
-  return ParsedKind;
-}
-
-uint64_t expandGroups(uint64_t Kinds) {
-#define SANITIZER(NAME, ID)
-#define SANITIZER_GROUP(NAME, ID, ALIAS) if (Kinds & ID##Group) Kinds |= ID;
-#include "clang/Basic/Sanitizers.def"
-  return Kinds;
-}
-
-uint64_t parseArgValues(const Driver &D, const llvm::opt::Arg *A,
-                        bool DiagnoseErrors) {
+SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
+                             bool DiagnoseErrors) {
   assert((A->getOption().matches(options::OPT_fsanitize_EQ) ||
           A->getOption().matches(options::OPT_fno_sanitize_EQ) ||
           A->getOption().matches(options::OPT_fsanitize_recover_EQ) ||
           A->getOption().matches(options::OPT_fno_sanitize_recover_EQ)) &&
          "Invalid argument in parseArgValues!");
-  uint64_t Kinds = 0;
+  SanitizerMask Kinds = 0;
   for (int i = 0, n = A->getNumValues(); i != n; ++i) {
     const char *Value = A->getValue(i);
-    uint64_t Kind;
+    SanitizerMask Kind;
     // Special case: don't accept -fsanitize=all.
     if (A->getOption().matches(options::OPT_fsanitize_EQ) &&
         0 == strcmp("all", Value))
       Kind = 0;
     else
-      Kind = parseValue(Value);
+      Kind = parseSanitizerValue(Value, /*AllowGroups=*/true);
 
     if (Kind)
       Kinds |= Kind;
@@ -538,31 +550,58 @@
   return Kinds;
 }
 
+int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A) {
+  assert(A->getOption().matches(options::OPT_fsanitize_coverage) ||
+         A->getOption().matches(options::OPT_fno_sanitize_coverage));
+  int Features = 0;
+  for (int i = 0, n = A->getNumValues(); i != n; ++i) {
+    const char *Value = A->getValue(i);
+    int F = llvm::StringSwitch<int>(Value)
+        .Case("func", CoverageFunc)
+        .Case("bb", CoverageBB)
+        .Case("edge", CoverageEdge)
+        .Case("indirect-calls", CoverageIndirCall)
+        .Case("trace-bb", CoverageTraceBB)
+        .Case("trace-cmp", CoverageTraceCmp)
+        .Case("8bit-counters", Coverage8bitCounters)
+        .Default(0);
+    if (F == 0)
+      D.Diag(clang::diag::err_drv_unsupported_option_argument)
+          << A->getOption().getName() << Value;
+    Features |= F;
+  }
+  return Features;
+}
+
 std::string lastArgumentForMask(const Driver &D, const llvm::opt::ArgList &Args,
-                                uint64_t Mask) {
+                                SanitizerMask Mask) {
   for (llvm::opt::ArgList::const_reverse_iterator I = Args.rbegin(),
                                                   E = Args.rend();
        I != E; ++I) {
     const auto *Arg = *I;
     if (Arg->getOption().matches(options::OPT_fsanitize_EQ)) {
-      uint64_t AddKinds = expandGroups(parseArgValues(D, Arg, false));
+      SanitizerMask AddKinds =
+          expandSanitizerGroups(parseArgValues(D, Arg, false));
       if (AddKinds & Mask)
         return describeSanitizeArg(Arg, Mask);
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_EQ)) {
-      uint64_t RemoveKinds = expandGroups(parseArgValues(D, Arg, false));
+      SanitizerMask RemoveKinds =
+          expandSanitizerGroups(parseArgValues(D, Arg, false));
       Mask &= ~RemoveKinds;
     }
   }
   llvm_unreachable("arg list didn't provide expected value");
 }
 
-std::string describeSanitizeArg(const llvm::opt::Arg *A, uint64_t Mask) {
+std::string describeSanitizeArg(const llvm::opt::Arg *A, SanitizerMask Mask) {
   assert(A->getOption().matches(options::OPT_fsanitize_EQ)
          && "Invalid argument in describeSanitizerArg!");
 
   std::string Sanitizers;
   for (int i = 0, n = A->getNumValues(); i != n; ++i) {
-    if (expandGroups(parseValue(A->getValue(i))) & Mask) {
+    if (expandSanitizerGroups(
+            parseSanitizerValue(A->getValue(i), /*AllowGroups=*/true)) &
+        Mask) {
       if (!Sanitizers.empty())
         Sanitizers += ",";
       Sanitizers += A->getValue(i);
diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp
index 52e8603..82eb854 100644
--- a/lib/Driver/ToolChain.cpp
+++ b/lib/Driver/ToolChain.cpp
@@ -303,9 +303,12 @@
     // Thumb2 is the default for V7 on Darwin.
     //
     // FIXME: Thumb should just be another -target-feaure, not in the triple.
-    StringRef Suffix = Triple.isOSBinFormatMachO()
-      ? tools::arm::getLLVMArchSuffixForARM(tools::arm::getARMCPUForMArch(Args, Triple))
-      : tools::arm::getLLVMArchSuffixForARM(tools::arm::getARMTargetCPU(Args, Triple));
+    std::string CPU = Triple.isOSBinFormatMachO()
+      ? tools::arm::getARMCPUForMArch(Args, Triple)
+      : tools::arm::getARMTargetCPU(Args, Triple);
+    StringRef Suffix = 
+      tools::arm::getLLVMArchSuffixForARM(CPU,
+                                          tools::arm::getARMArch(Args, Triple));
     bool ThumbDefault = Suffix.startswith("v6m") || Suffix.startswith("v7m") ||
       Suffix.startswith("v7em") ||
       (Suffix.startswith("v7") && getTriple().isOSBinFormatMachO());
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp
index 434dc4d..02154dc 100644
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib> // ::getenv
 #include <system_error>
@@ -108,8 +109,12 @@
   }
 }
 
-static const char *GetArmArchForMArch(StringRef Value) {
-  return llvm::StringSwitch<const char*>(Value)
+// This is just a MachO name translation routine and there's no
+// way to join this into ARMTargetParser without breaking all
+// other assumptions. Maybe MachO should consider standardising
+// their nomenclature.
+static const char *ArmMachOArchName(StringRef Arch) {
+  return llvm::StringSwitch<const char*>(Arch)
     .Case("armv6k", "armv6")
     .Case("armv6m", "armv6m")
     .Case("armv5tej", "armv5")
@@ -125,21 +130,23 @@
     .Default(nullptr);
 }
 
-static const char *GetArmArchForMCpu(StringRef Value) {
-  return llvm::StringSwitch<const char *>(Value)
-    .Cases("arm9e", "arm946e-s", "arm966e-s", "arm968e-s", "arm926ej-s","armv5")
-    .Cases("arm10e", "arm10tdmi", "armv5")
-    .Cases("arm1020t", "arm1020e", "arm1022e", "arm1026ej-s", "armv5")
-    .Case("xscale", "xscale")
-    .Cases("arm1136j-s", "arm1136jf-s", "arm1176jz-s", "arm1176jzf-s", "armv6")
-    .Cases("sc000", "cortex-m0", "cortex-m0plus", "cortex-m1", "armv6m")
-    .Cases("cortex-a5", "cortex-a7", "cortex-a8", "armv7")
-    .Cases("cortex-a9", "cortex-a12", "cortex-a15", "cortex-a17", "krait", "armv7")
-    .Cases("cortex-r4", "cortex-r4f", "cortex-r5", "cortex-r7", "armv7r")
-    .Cases("sc300", "cortex-m3", "armv7m")
-    .Cases("cortex-m4", "cortex-m7", "armv7em")
-    .Case("swift", "armv7s")
-    .Default(nullptr);
+static const char *ArmMachOArchNameCPU(StringRef CPU) {
+  unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(CPU);
+  if (ArchKind == llvm::ARM::AK_INVALID)
+    return nullptr;
+  StringRef Arch = llvm::ARMTargetParser::getArchName(ArchKind);
+
+  // FIXME: Make sure this MachO triple mangling is really necessary.
+  // ARMv5* normalises to ARMv5.
+  if (Arch.startswith("armv5"))
+    Arch = Arch.substr(0, 5);
+  // ARMv6*, except ARMv6M, normalises to ARMv6.
+  else if (Arch.startswith("armv6") && !Arch.endswith("6m"))
+    Arch = Arch.substr(0, 5);
+  // ARMv7A normalises to ARMv7.
+  else if (Arch.endswith("v7a"))
+    Arch = Arch.substr(0, 5);
+  return Arch.data();
 }
 
 static bool isSoftFloatABI(const ArgList &Args) {
@@ -164,11 +171,11 @@
   case llvm::Triple::thumb:
   case llvm::Triple::arm: {
     if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
-      if (const char *Arch = GetArmArchForMArch(A->getValue()))
+      if (const char *Arch = ArmMachOArchName(A->getValue()))
         return Arch;
 
     if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
-      if (const char *Arch = GetArmArchForMCpu(A->getValue()))
+      if (const char *Arch = ArmMachOArchNameCPU(A->getValue()))
         return Arch;
 
     return "arm";
@@ -324,6 +331,26 @@
   }
 }
 
+void Darwin::addProfileRTLibs(const ArgList &Args,
+                             ArgStringList &CmdArgs) const {
+  if (!(Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
+                     false) ||
+        Args.hasArg(options::OPT_fprofile_generate) ||
+        Args.hasArg(options::OPT_fprofile_instr_generate) ||
+        Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
+        Args.hasArg(options::OPT_fcreate_profile) ||
+        Args.hasArg(options::OPT_coverage)))
+    return;
+
+  // Select the appropriate runtime library for the target.
+  if (isTargetIOSBased())
+    AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_ios.a",
+                      /*AlwaysLink*/ true);
+  else
+    AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_osx.a",
+                      /*AlwaysLink*/ true);
+}
+
 void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
                                           ArgStringList &CmdArgs,
                                           StringRef Sanitizer) const {
@@ -374,19 +401,6 @@
     return;
   }
 
-  // If we are building profile support, link that library in.
-  if (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
-                   false) ||
-      Args.hasArg(options::OPT_fprofile_generate) ||
-      Args.hasArg(options::OPT_fprofile_instr_generate) ||
-      Args.hasArg(options::OPT_fcreate_profile) ||
-      Args.hasArg(options::OPT_coverage)) {
-    // Select the appropriate runtime library for the target.
-    if (isTargetIOSBased())
-      AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_ios.a");
-    else
-      AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_osx.a");
-  }
 
   const SanitizerArgs &Sanitize = getSanitizerArgs();
 
@@ -865,8 +879,8 @@
   return DAL;
 }
 
-void MachO::AddLinkRuntimeLibArgs(const llvm::opt::ArgList &Args,
-                                  llvm::opt::ArgStringList &CmdArgs) const {
+void MachO::AddLinkRuntimeLibArgs(const ArgList &Args,
+                                  ArgStringList &CmdArgs) const {
   // Embedded targets are simple at the moment, not supporting sanitizers and
   // with different libraries for each member of the product { static, PIC } x
   // { hard-float, soft-float }
@@ -975,8 +989,8 @@
   return getArch() == llvm::Triple::x86 || getArch() == llvm::Triple::x86_64;
 }
 
-void Darwin::addMinVersionArgs(const llvm::opt::ArgList &Args,
-                               llvm::opt::ArgStringList &CmdArgs) const {
+void Darwin::addMinVersionArgs(const ArgList &Args,
+                               ArgStringList &CmdArgs) const {
   VersionTuple TargetVersion = getTargetVersion();
 
   if (isTargetIOSSimulator())
@@ -991,8 +1005,8 @@
   CmdArgs.push_back(Args.MakeArgString(TargetVersion.getAsString()));
 }
 
-void Darwin::addStartObjectFileArgs(const llvm::opt::ArgList &Args,
-                                    llvm::opt::ArgStringList &CmdArgs) const {
+void Darwin::addStartObjectFileArgs(const ArgList &Args,
+                                    ArgStringList &CmdArgs) const {
   // Derived from startfile spec.
   if (Args.hasArg(options::OPT_dynamiclib)) {
     // Derived from darwin_dylib1 spec.
@@ -1561,7 +1575,7 @@
 }
 
 static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
-                              const llvm::opt::ArgList &Args,
+                              const ArgList &Args,
                               DetectedMultilibs &Result) {
   // Some MIPS toolchains put libraries and object files compiled
   // using different options in to the sub-directoris which names
@@ -2063,20 +2077,28 @@
 }
 
 bool Generic_GCC::IsIntegratedAssemblerDefault() const {
-  return getTriple().getArch() == llvm::Triple::x86 ||
-         getTriple().getArch() == llvm::Triple::x86_64 ||
-         getTriple().getArch() == llvm::Triple::aarch64 ||
-         getTriple().getArch() == llvm::Triple::aarch64_be ||
-         getTriple().getArch() == llvm::Triple::arm ||
-         getTriple().getArch() == llvm::Triple::armeb ||
-         getTriple().getArch() == llvm::Triple::thumb ||
-         getTriple().getArch() == llvm::Triple::thumbeb ||
-         getTriple().getArch() == llvm::Triple::ppc ||
-         getTriple().getArch() == llvm::Triple::ppc64 ||
-         getTriple().getArch() == llvm::Triple::ppc64le ||
-         getTriple().getArch() == llvm::Triple::sparc ||
-         getTriple().getArch() == llvm::Triple::sparcv9 ||
-         getTriple().getArch() == llvm::Triple::systemz;
+  switch (getTriple().getArch()) {
+  case llvm::Triple::x86:
+  case llvm::Triple::x86_64:
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::arm:
+  case llvm::Triple::armeb:
+  case llvm::Triple::bpfel:
+  case llvm::Triple::bpfeb:
+  case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb:
+  case llvm::Triple::ppc:
+  case llvm::Triple::ppc64:
+  case llvm::Triple::ppc64le:
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+  case llvm::Triple::sparcv9:
+  case llvm::Triple::systemz:
+    return true;
+  default:
+    return false;
+  }
 }
 
 void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
@@ -2118,6 +2140,30 @@
   return InstallRelDir;
 }
 
+const char *Hexagon_TC::GetSmallDataThreshold(const ArgList &Args)
+{
+  Arg *A;
+
+  A = Args.getLastArg(options::OPT_G,
+                      options::OPT_G_EQ,
+                      options::OPT_msmall_data_threshold_EQ);
+  if (A)
+    return A->getValue();
+
+  A = Args.getLastArg(options::OPT_shared,
+                      options::OPT_fpic,
+                      options::OPT_fPIC);
+  if (A)
+    return "0";
+
+  return 0;
+}
+
+bool Hexagon_TC::UsesG0(const char* smallDataThreshold)
+{
+  return smallDataThreshold && smallDataThreshold[0] == '0';
+}
+
 static void GetHexagonLibraryPaths(
   const ArgList &Args,
   const std::string &Ver,
@@ -2130,14 +2176,9 @@
   //----------------------------------------------------------------------------
   // -L Args
   //----------------------------------------------------------------------------
-  for (arg_iterator
-         it = Args.filtered_begin(options::OPT_L),
-         ie = Args.filtered_end();
-       it != ie;
-       ++it) {
-    for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i)
-      LibPaths->push_back((*it)->getValue(i));
-  }
+  for (const Arg *A : Args.filtered(options::OPT_L))
+    for (unsigned i = 0, e = A->getNumValues(); i != e; ++i)
+      LibPaths->push_back(A->getValue(i));
 
   //----------------------------------------------------------------------------
   // Other standard paths
@@ -3071,6 +3112,14 @@
     if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64le-linux-gnu"))
       return "powerpc64le-linux-gnu";
     return TargetTriple.str();
+  case llvm::Triple::sparc:
+    if (llvm::sys::fs::exists(SysRoot + "/lib/sparc-linux-gnu"))
+      return "sparc-linux-gnu";
+    return TargetTriple.str();
+  case llvm::Triple::sparcv9:
+    if (llvm::sys::fs::exists(SysRoot + "/lib/sparc64-linux-gnu"))
+      return "sparc64-linux-gnu";
+    return TargetTriple.str();
   }
 }
 
@@ -3423,6 +3472,12 @@
   const StringRef PPC64LEMultiarchIncludeDirs[] = {
     "/usr/include/powerpc64le-linux-gnu"
   };
+  const StringRef SparcMultiarchIncludeDirs[] = {
+    "/usr/include/sparc-linux-gnu"
+  };
+  const StringRef Sparc64MultiarchIncludeDirs[] = {
+    "/usr/include/sparc64-linux-gnu"
+  };
   ArrayRef<StringRef> MultiarchIncludeDirs;
   if (getTriple().getArch() == llvm::Triple::x86_64) {
     MultiarchIncludeDirs = X86_64MultiarchIncludeDirs;
@@ -3450,6 +3505,10 @@
     MultiarchIncludeDirs = PPC64MultiarchIncludeDirs;
   } else if (getTriple().getArch() == llvm::Triple::ppc64le) {
     MultiarchIncludeDirs = PPC64LEMultiarchIncludeDirs;
+  } else if (getTriple().getArch() == llvm::Triple::sparc) {
+    MultiarchIncludeDirs = SparcMultiarchIncludeDirs;
+  } else if (getTriple().getArch() == llvm::Triple::sparcv9) {
+    MultiarchIncludeDirs = Sparc64MultiarchIncludeDirs;
   }
   for (StringRef Dir : MultiarchIncludeDirs) {
     if (llvm::sys::fs::exists(SysRoot + Dir)) {
@@ -3661,8 +3720,8 @@
   }
 }
 
-void XCore::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args) const {
+void XCore::addClangTargetOptions(const ArgList &DriverArgs,
+                                  ArgStringList &CC1Args) const {
   CC1Args.push_back("-nostdsysteminc");
 }
 
diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h
index 456bf77..0b7073f 100644
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -239,6 +239,13 @@
                          bool IsEmbedded = false,
                          bool AddRPath = false) const;
 
+  /// Add any profiling runtime libraries that are needed. This is essentially a
+  /// MachO specific version of addProfileRT in Tools.cpp.
+  virtual void addProfileRTLibs(const llvm::opt::ArgList &Args,
+                                llvm::opt::ArgStringList &CmdArgs) const {
+    // There aren't any profiling libs for embedded targets currently.
+  }
+
   /// }
   /// @name ToolChain Implementation
   /// {
@@ -365,6 +372,9 @@
     return !isTargetIPhoneOS() || isIPhoneOSVersionLT(6, 0);
   }
 
+  void addProfileRTLibs(const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs) const override;
+
 protected:
   /// }
   /// @name Darwin specific Toolchain functions
@@ -714,6 +724,10 @@
                                const llvm::opt::ArgList &Args);
 
   static StringRef GetTargetCPU(const llvm::opt::ArgList &Args);
+
+  static const char *GetSmallDataThreshold(const llvm::opt::ArgList &Args);
+
+  static bool UsesG0(const char* smallDataThreshold);
 };
 
 class LLVM_LIBRARY_VISIBILITY NaCl_TC : public Generic_ELF {
@@ -793,6 +807,9 @@
   bool getVisualStudioBinariesFolder(const char *clangProgramPath,
                                      std::string &path) const;
 
+  std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
+                                          types::ID InputType) const override;
+
 protected:
   void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs,
                                      llvm::opt::ArgStringList &CC1Args,
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index 3053d9b..800053c 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -338,12 +339,10 @@
   }
 
   Args.AddLastArg(CmdArgs, options::OPT_MP);
+  Args.AddLastArg(CmdArgs, options::OPT_MV);
 
   // Convert all -MQ <target> args to -MT <quoted target>
-  for (arg_iterator it = Args.filtered_begin(options::OPT_MT,
-                                             options::OPT_MQ),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
+  for (const Arg *A : Args.filtered(options::OPT_MT, options::OPT_MQ)) {
     A->claim();
 
     if (A->getOption().matches(options::OPT_MQ)) {
@@ -364,10 +363,7 @@
   // replacement into a build system already set up to be generating
   // .gch files.
   bool RenderedImplicitInclude = false;
-  for (arg_iterator it = Args.filtered_begin(options::OPT_clang_i_Group),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = it;
-
+  for (const Arg *A : Args.filtered(options::OPT_clang_i_Group)) {
     if (A->getOption().matches(options::OPT_include)) {
       bool IsFirstImplicitInclude = !RenderedImplicitInclude;
       RenderedImplicitInclude = true;
@@ -495,6 +491,7 @@
       return true;
     return false;
 
+  case llvm::Triple::hexagon:
   case llvm::Triple::ppc64le:
   case llvm::Triple::systemz:
   case llvm::Triple::xcore:
@@ -534,85 +531,26 @@
 }
 
 // Handle -mfpu=.
-//
-// FIXME: Centralize feature selection, defaulting shouldn't be also in the
-// frontend target.
 static void getARMFPUFeatures(const Driver &D, const Arg *A,
                               const ArgList &Args,
                               std::vector<const char *> &Features) {
   StringRef FPU = A->getValue();
-
-  // Set the target features based on the FPU.
-  if (FPU == "fpa" || FPU == "fpe2" || FPU == "fpe3" || FPU == "maverick") {
-    // Disable any default FPU support.
-    Features.push_back("-vfp2");
-    Features.push_back("-vfp3");
-    Features.push_back("-neon");
-  } else if (FPU == "vfp") {
-    Features.push_back("+vfp2");
-    Features.push_back("-neon");
-  } else if (FPU == "vfp3-d16" || FPU == "vfpv3-d16") {
-    Features.push_back("+vfp3");
-    Features.push_back("+d16");
-    Features.push_back("-neon");
-  } else if (FPU == "vfp3" || FPU == "vfpv3") {
-    Features.push_back("+vfp3");
-    Features.push_back("-neon");
-  } else if (FPU == "vfp4-d16" || FPU == "vfpv4-d16") {
-    Features.push_back("+vfp4");
-    Features.push_back("+d16");
-    Features.push_back("-neon");
-  } else if (FPU == "vfp4" || FPU == "vfpv4") {
-    Features.push_back("+vfp4");
-    Features.push_back("-neon");
-  } else if (FPU == "fp4-sp-d16" || FPU == "fpv4-sp-d16") {
-    Features.push_back("+vfp4");
-    Features.push_back("+d16");
-    Features.push_back("+fp-only-sp");
-    Features.push_back("-neon");
-  } else if (FPU == "fp5-sp-d16" || FPU == "fpv5-sp-d16") {
-    Features.push_back("+fp-armv8");
-    Features.push_back("+fp-only-sp");
-    Features.push_back("+d16");
-    Features.push_back("-neon");
-    Features.push_back("-crypto");
-  } else if (FPU == "fp5-dp-d16" || FPU == "fpv5-dp-d16" ||
-             FPU == "fp5-d16" || FPU == "fpv5-d16") {
-    Features.push_back("+fp-armv8");
-    Features.push_back("+d16");
-    Features.push_back("-neon");
-    Features.push_back("-crypto");
-  } else if (FPU == "fp-armv8") {
-    Features.push_back("+fp-armv8");
-    Features.push_back("-neon");
-    Features.push_back("-crypto");
-  } else if (FPU == "neon-fp-armv8") {
-    Features.push_back("+fp-armv8");
-    Features.push_back("+neon");
-    Features.push_back("-crypto");
-  } else if (FPU == "crypto-neon-fp-armv8") {
-    Features.push_back("+fp-armv8");
-    Features.push_back("+neon");
-    Features.push_back("+crypto");
-  } else if (FPU == "neon") {
-    Features.push_back("+neon");
-  } else if (FPU == "neon-vfpv3") {
-    Features.push_back("+vfp3");
-    Features.push_back("+neon");
-  } else if (FPU == "neon-vfpv4") {
-    Features.push_back("+neon");
-    Features.push_back("+vfp4");
-  } else if (FPU == "none") {
-    Features.push_back("-vfp2");
-    Features.push_back("-vfp3");
-    Features.push_back("-vfp4");
-    Features.push_back("-fp-armv8");
-    Features.push_back("-crypto");
-    Features.push_back("-neon");
-  } else
+  unsigned FPUID = llvm::ARMTargetParser::parseFPU(FPU);
+  if (!llvm::ARMTargetParser::getFPUFeatures(FPUID, Features))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
+static int getARMSubArchVersionNumber(const llvm::Triple &Triple) {
+  llvm::StringRef Arch = Triple.getArchName();
+  return llvm::ARMTargetParser::parseArchVersion(Arch);
+}
+
+static bool isARMMProfile(const llvm::Triple &Triple) {
+  llvm::StringRef Arch = Triple.getArchName();
+  unsigned Profile = llvm::ARMTargetParser::parseArchProfile(Arch);
+  return Profile == llvm::ARM::PK_M;
+}
+
 // Select the float ABI as determined by -msoft-float, -mhard-float, and
 // -mfloat-abi=.
 StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
@@ -643,11 +581,8 @@
     case llvm::Triple::IOS: {
       // Darwin defaults to "softfp" for v6 and v7.
       //
-      // FIXME: Factor out an ARM class so we can cache the arch somewhere.
-      std::string ArchName =
-        arm::getLLVMArchSuffixForARM(arm::getARMTargetCPU(Args, Triple));
-      if (StringRef(ArchName).startswith("v6") ||
-          StringRef(ArchName).startswith("v7"))
+      if (getARMSubArchVersionNumber(Triple) == 6 ||
+          getARMSubArchVersionNumber(Triple) == 7)
         FloatABI = "softfp";
       else
         FloatABI = "soft";
@@ -687,9 +622,7 @@
         FloatABI = "softfp";
         break;
       case llvm::Triple::Android: {
-        std::string ArchName =
-          arm::getLLVMArchSuffixForARM(arm::getARMTargetCPU(Args, Triple));
-        if (StringRef(ArchName).startswith("v7"))
+        if (getARMSubArchVersionNumber(Triple) == 7)
           FloatABI = "softfp";
         else
           FloatABI = "soft";
@@ -742,6 +675,25 @@
   if (const Arg *A = Args.getLastArg(options::OPT_mhwdiv_EQ))
     getARMHWDivFeatures(D, A, Args, Features);
 
+  // Check if -march is valid by checking if it can be canonicalised and parsed.
+  // getARMArch is used here instead of just checking the -march value in order
+  // to handle -march=native correctly.
+  if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
+    std::string Arch = arm::getARMArch(Args, Triple);
+    if (llvm::ARMTargetParser::parseArch(Arch) == llvm::ARM::AK_INVALID)
+      D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
+  }
+
+  // We do a similar thing with -mcpu, but here things are complicated because
+  // the only function we have to check if a cpu is valid is
+  // getLLVMArchSuffixForARM which also needs an architecture.
+  if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    std::string CPU = arm::getARMTargetCPU(Args, Triple);
+    std::string Arch = arm::getARMArch(Args, Triple);
+    if (strcmp(arm::getLLVMArchSuffixForARM(CPU, Arch), "") == 0)
+      D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
+  }
+
   // Setting -msoft-float effectively disables NEON because of the GCC
   // implementation, although the same isn't true of VFP or VFP3.
   if (FloatABI == "soft") {
@@ -757,6 +709,10 @@
     else
       Features.push_back("-crc");
   }
+
+  if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8_1a) {
+    Features.insert(Features.begin(), "+v8.1a");
+  }
 }
 
 void Clang::AddARMTargetArgs(const ArgList &Args,
@@ -766,7 +722,6 @@
   // Get the effective triple, which takes into account the deployment target.
   std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   llvm::Triple Triple(TripleStr);
-  std::string CPUName = arm::getARMTargetCPU(Args, Triple);
 
   // Select the ABI to use.
   //
@@ -780,7 +735,7 @@
     // the frontend matches that.
     if (Triple.getEnvironment() == llvm::Triple::EABI ||
         Triple.getOS() == llvm::Triple::UnknownOS ||
-        StringRef(CPUName).startswith("cortex-m")) {
+        isARMMProfile(Triple)) {
       ABIName = "aapcs";
     } else {
       ABIName = "apcs-gnu";
@@ -895,7 +850,7 @@
     CPU = A->getValue();
   } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) {
     StringRef Mcpu = A->getValue();
-    CPU = Mcpu.split("+").first;
+    CPU = Mcpu.split("+").first.lower();
   }
 
   // Handle CPU name is 'native'.
@@ -1293,11 +1248,9 @@
 
 static void getPPCTargetFeatures(const ArgList &Args,
                                  std::vector<const char *> &Features) {
-  for (arg_iterator it = Args.filtered_begin(options::OPT_m_ppc_Features_Group),
-                    ie = Args.filtered_end();
-       it != ie; ++it) {
-    StringRef Name = (*it)->getOption().getName();
-    (*it)->claim();
+  for (const Arg *A : Args.filtered(options::OPT_m_ppc_Features_Group)) {
+    StringRef Name = A->getOption().getName();
+    A->claim();
 
     // Skip over "-m".
     assert(Name.startswith("m") && "Invalid feature name.");
@@ -1381,47 +1334,26 @@
   return "";
 }
 
-static void getSparcTargetFeatures(const ArgList &Args,
-                                   std::vector<const char *> &Features) {
-  bool SoftFloatABI = true;
-  if (Arg *A =
-          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) {
-    if (A->getOption().matches(options::OPT_mhard_float))
-      SoftFloatABI = false;
-  }
-  if (SoftFloatABI)
-    Features.push_back("+soft-float");
-}
-
 void Clang::AddSparcTargetArgs(const ArgList &Args,
                              ArgStringList &CmdArgs) const {
   const Driver &D = getToolChain().getDriver();
+  std::string Triple = getToolChain().ComputeEffectiveClangTriple(Args);
 
-  // Select the float ABI as determined by -msoft-float and -mhard-float.
-  StringRef FloatABI;
-  if (Arg *A = Args.getLastArg(options::OPT_msoft_float,
-                               options::OPT_mhard_float)) {
+  bool SoftFloatABI = false;
+  if (Arg *A =
+          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float)) {
     if (A->getOption().matches(options::OPT_msoft_float))
-      FloatABI = "soft";
-    else if (A->getOption().matches(options::OPT_mhard_float))
-      FloatABI = "hard";
+      SoftFloatABI = true;
   }
 
-  // If unspecified, choose the default based on the platform.
-  if (FloatABI.empty()) {
-    // Assume "soft", but warn the user we are guessing.
-    FloatABI = "soft";
-    D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
-  }
-
-  if (FloatABI == "soft") {
-    // Floating point operations and argument passing are soft.
-    //
-    // FIXME: This changes CPP defines, we need -target-soft-float.
-    CmdArgs.push_back("-msoft-float");
-  } else {
-    assert(FloatABI == "hard" && "Invalid float abi!");
-    CmdArgs.push_back("-mhard-float");
+  // Only the hard-float ABI on Sparc is standardized, and it is the
+  // default. GCC also supports a nonstandard soft-float ABI mode, and
+  // perhaps LLVM should implement that, too. However, since llvm
+  // currently does not support Sparc soft-float, at all, display an
+  // error if it's requested.
+  if (SoftFloatABI) {
+    D.Diag(diag::err_drv_unsupported_opt_for_target)
+        << "-msoft-float" << Triple;
   }
 }
 
@@ -1441,6 +1373,14 @@
     else
       Features.push_back("-transactional-execution");
   }
+  // -m(no-)vx overrides use of the vector facility.
+  if (Arg *A = Args.getLastArg(options::OPT_mvx,
+                               options::OPT_mno_vx)) {
+    if (A->getOption().matches(options::OPT_mvx))
+      Features.push_back("+vector");
+    else
+      Features.push_back("-vector");
+  }
 }
 
 static const char *getX86TargetCPU(const ArgList &Args,
@@ -1549,6 +1489,7 @@
   }
 
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
     if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
       return A->getValue();
@@ -1588,6 +1529,137 @@
     CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
 }
 
+/// This is a helper function for validating the optional refinement step
+/// parameter in reciprocal argument strings. Return false if there is an error
+/// parsing the refinement step. Otherwise, return true and set the Position
+/// of the refinement step in the input string.
+static bool getRefinementStep(const StringRef &In, const Driver &D,
+                                const Arg &A, size_t &Position) {
+  const char RefinementStepToken = ':';
+  Position = In.find(RefinementStepToken);
+  if (Position != StringRef::npos) {
+    StringRef Option = A.getOption().getName();
+    StringRef RefStep = In.substr(Position + 1);
+    // Allow exactly one numeric character for the additional refinement
+    // step parameter. This is reasonable for all currently-supported
+    // operations and architectures because we would expect that a larger value
+    // of refinement steps would cause the estimate "optimization" to
+    // under-perform the native operation. Also, if the estimate does not
+    // converge quickly, it probably will not ever converge, so further
+    // refinement steps will not produce a better answer.
+    if (RefStep.size() != 1) {
+      D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
+      return false;
+    }
+    char RefStepChar = RefStep[0];
+    if (RefStepChar < '0' || RefStepChar > '9') {
+      D.Diag(diag::err_drv_invalid_value) << Option << RefStep;
+      return false;
+    }
+  }
+  return true;
+}
+
+/// The -mrecip flag requires processing of many optional parameters.
+static void ParseMRecip(const Driver &D, const ArgList &Args,
+                        ArgStringList &OutStrings) {
+  StringRef DisabledPrefixIn = "!";
+  StringRef DisabledPrefixOut = "!";
+  StringRef EnabledPrefixOut = "";
+  StringRef Out = "-mrecip=";
+
+  Arg *A = Args.getLastArg(options::OPT_mrecip, options::OPT_mrecip_EQ);
+  if (!A)
+    return;
+
+  unsigned NumOptions = A->getNumValues();
+  if (NumOptions == 0) {
+    // No option is the same as "all".
+    OutStrings.push_back(Args.MakeArgString(Out + "all"));
+    return;
+  }
+
+  // Pass through "all", "none", or "default" with an optional refinement step.
+  if (NumOptions == 1) {
+    StringRef Val = A->getValue(0);
+    size_t RefStepLoc;
+    if (!getRefinementStep(Val, D, *A, RefStepLoc))
+      return;
+    StringRef ValBase = Val.slice(0, RefStepLoc);
+    if (ValBase == "all" || ValBase == "none" || ValBase == "default") {
+      OutStrings.push_back(Args.MakeArgString(Out + Val));
+      return;
+    }
+  }
+
+  // Each reciprocal type may be enabled or disabled individually.
+  // Check each input value for validity, concatenate them all back together,
+  // and pass through.
+
+  llvm::StringMap<bool> OptionStrings;
+  OptionStrings.insert(std::make_pair("divd",       false));
+  OptionStrings.insert(std::make_pair("divf",       false));
+  OptionStrings.insert(std::make_pair("vec-divd",   false));
+  OptionStrings.insert(std::make_pair("vec-divf",   false));
+  OptionStrings.insert(std::make_pair("sqrtd",      false));
+  OptionStrings.insert(std::make_pair("sqrtf",      false));
+  OptionStrings.insert(std::make_pair("vec-sqrtd",  false));
+  OptionStrings.insert(std::make_pair("vec-sqrtf",  false));
+
+  for (unsigned i = 0; i != NumOptions; ++i) {
+    StringRef Val = A->getValue(i);
+
+    bool IsDisabled = Val.startswith(DisabledPrefixIn);
+    // Ignore the disablement token for string matching.
+    if (IsDisabled)
+      Val = Val.substr(1);
+
+    size_t RefStep;
+    if (!getRefinementStep(Val, D, *A, RefStep))
+      return;
+
+    StringRef ValBase = Val.slice(0, RefStep);
+    llvm::StringMap<bool>::iterator OptionIter = OptionStrings.find(ValBase);
+    if (OptionIter == OptionStrings.end()) {
+      // Try again specifying float suffix.
+      OptionIter = OptionStrings.find(ValBase.str() + 'f');
+      if (OptionIter == OptionStrings.end()) {
+        // The input name did not match any known option string.
+        D.Diag(diag::err_drv_unknown_argument) << Val;
+        return;
+      }
+      // The option was specified without a float or double suffix.
+      // Make sure that the double entry was not already specified.
+      // The float entry will be checked below.
+      if (OptionStrings[ValBase.str() + 'd']) {
+        D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
+        return;
+      }
+    }
+    
+    if (OptionIter->second == true) {
+      // Duplicate option specified.
+      D.Diag(diag::err_drv_invalid_value) << A->getOption().getName() << Val;
+      return;
+    }
+
+    // Mark the matched option as found. Do not allow duplicate specifiers.
+    OptionIter->second = true;
+
+    // If the precision was not specified, also mark the double entry as found.
+    if (ValBase.back() != 'f' && ValBase.back() != 'd')
+      OptionStrings[ValBase.str() + 'd'] = true;
+
+    // Build the output string.
+    StringRef Prefix = IsDisabled ? DisabledPrefixOut : EnabledPrefixOut;
+    Out = Args.MakeArgString(Out + Prefix + Val);
+    if (i != NumOptions - 1)
+      Out = Args.MakeArgString(Out + ",");
+  }
+
+  OutStrings.push_back(Args.MakeArgString(Out));
+}
+
 static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                  const ArgList &Args,
                                  std::vector<const char *> &Features) {
@@ -1613,9 +1685,10 @@
     Features.push_back("-fsgsbase");
   }
 
+  const llvm::Triple::ArchType ArchType = Triple.getArch();
   // Add features to be compatible with gcc for Android.
   if (Triple.getEnvironment() == llvm::Triple::Android) {
-    if (Triple.getArch() == llvm::Triple::x86_64) {
+    if (ArchType == llvm::Triple::x86_64) {
       Features.push_back("+sse4.2");
       Features.push_back("+popcnt");
     } else
@@ -1627,15 +1700,14 @@
     StringRef Arch = A->getValue();
     bool ArchUsed = false;
     // First, look for flags that are shared in x86 and x86-64.
-    if (Triple.getArch() == llvm::Triple::x86_64 ||
-        Triple.getArch() == llvm::Triple::x86) {
+    if (ArchType == llvm::Triple::x86_64 || ArchType == llvm::Triple::x86) {
       if (Arch == "AVX" || Arch == "AVX2") {
         ArchUsed = true;
         Features.push_back(Args.MakeArgString("+" + Arch.lower()));
       }
     }
     // Then, look for x86-specific flags.
-    if (Triple.getArch() == llvm::Triple::x86) {
+    if (ArchType == llvm::Triple::x86) {
       if (Arch == "IA32") {
         ArchUsed = true;
       } else if (Arch == "SSE" || Arch == "SSE2") {
@@ -1649,11 +1721,9 @@
 
   // Now add any that the user explicitly requested on the command line,
   // which may override the defaults.
-  for (arg_iterator it = Args.filtered_begin(options::OPT_m_x86_Features_Group),
-                    ie = Args.filtered_end();
-       it != ie; ++it) {
-    StringRef Name = (*it)->getOption().getName();
-    (*it)->claim();
+  for (const Arg *A : Args.filtered(options::OPT_m_x86_Features_Group)) {
+    StringRef Name = A->getOption().getName();
+    A->claim();
 
     // Skip over "-m".
     assert(Name.startswith("m") && "Invalid feature name.");
@@ -1703,39 +1773,16 @@
   }
 }
 
-static inline bool HasPICArg(const ArgList &Args) {
-  return Args.hasArg(options::OPT_fPIC)
-    || Args.hasArg(options::OPT_fpic);
-}
-
-static Arg *GetLastSmallDataThresholdArg(const ArgList &Args) {
-  return Args.getLastArg(options::OPT_G,
-                         options::OPT_G_EQ,
-                         options::OPT_msmall_data_threshold_EQ);
-}
-
-static std::string GetHexagonSmallDataThresholdValue(const ArgList &Args) {
-  std::string value;
-  if (HasPICArg(Args))
-    value = "0";
-  else if (Arg *A = GetLastSmallDataThresholdArg(Args)) {
-    value = A->getValue();
-    A->claim();
-  }
-  return value;
-}
-
 void Clang::AddHexagonTargetArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
-  CmdArgs.push_back("-fno-signed-char");
   CmdArgs.push_back("-mqdsp6-compat");
   CmdArgs.push_back("-Wreturn-type");
 
-  std::string SmallDataThreshold = GetHexagonSmallDataThresholdValue(Args);
-  if (!SmallDataThreshold.empty()) {
+  if (const char* v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
+    std::string SmallDataThreshold="-hexagon-small-data-threshold=";
+    SmallDataThreshold += v;
     CmdArgs.push_back ("-mllvm");
-    CmdArgs.push_back(Args.MakeArgString(
-                        "-hexagon-small-data-threshold=" + SmallDataThreshold));
+    CmdArgs.push_back(Args.MakeArgString(SmallDataThreshold));
   }
 
   if (!Args.hasArg(options::OPT_fno_short_enums))
@@ -1825,7 +1872,8 @@
                                const ArgList &Args,
                                std::vector<const char *> &Features) {
   StringRef CPU;
-  if (!DecodeAArch64Mcpu(D, Mcpu, CPU, Features))
+  std::string McpuLowerCase = Mcpu.lower();
+  if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Features))
     return false;
 
   return true;
@@ -1851,7 +1899,8 @@
                                     std::vector<const char *> &Features) {
   StringRef CPU;
   std::vector<const char *> DecodedFeature;
-  if (!DecodeAArch64Mcpu(D, Mcpu, CPU, DecodedFeature))
+  std::string McpuLowerCase = Mcpu.lower();
+  if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, DecodedFeature))
     return false;
 
   return getAArch64MicroArchFeaturesFromMtune(D, CPU, Args, Features);
@@ -1926,10 +1975,6 @@
   case llvm::Triple::ppc64le:
     getPPCTargetFeatures(Args, Features);
     break;
-  case llvm::Triple::sparc:
-  case llvm::Triple::sparcv9:
-    getSparcTargetFeatures(Args, Features);
-    break;
   case llvm::Triple::systemz:
     getSystemZTargetFeatures(Args, Features);
     break;
@@ -2138,10 +2183,8 @@
     // When using an integrated assembler, translate -Wa, and -Xassembler
     // options.
     bool CompressDebugSections = false;
-    for (arg_iterator it = Args.filtered_begin(options::OPT_Wa_COMMA,
-                                               options::OPT_Xassembler),
-           ie = Args.filtered_end(); it != ie; ++it) {
-      const Arg *A = *it;
+    for (const Arg *A :
+         Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
       A->claim();
 
       for (unsigned i = 0, e = A->getNumValues(); i != e; ++i) {
@@ -2250,6 +2293,7 @@
                      false) ||
         Args.hasArg(options::OPT_fprofile_generate) ||
         Args.hasArg(options::OPT_fprofile_instr_generate) ||
+        Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
         Args.hasArg(options::OPT_fcreate_profile) ||
         Args.hasArg(options::OPT_coverage)))
     return;
@@ -2257,6 +2301,55 @@
   CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, "profile")));
 }
 
+namespace {
+enum OpenMPRuntimeKind {
+  /// An unknown OpenMP runtime. We can't generate effective OpenMP code
+  /// without knowing what runtime to target.
+  OMPRT_Unknown,
+
+  /// The LLVM OpenMP runtime. When completed and integrated, this will become
+  /// the default for Clang.
+  OMPRT_OMP,
+
+  /// The GNU OpenMP runtime. Clang doesn't support generating OpenMP code for
+  /// this runtime but can swallow the pragmas, and find and link against the
+  /// runtime library itself.
+  OMPRT_GOMP,
+
+  /// The legacy name for the LLVM OpenMP runtime from when it was the Intel
+  /// OpenMP runtime. We support this mode for users with existing dependencies
+  /// on this runtime library name.
+  OMPRT_IOMP5
+};
+}
+
+/// Compute the desired OpenMP runtime from the flag provided.
+static OpenMPRuntimeKind getOpenMPRuntime(const ToolChain &TC,
+                                          const ArgList &Args) {
+  StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME);
+
+  const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ);
+  if (A)
+    RuntimeName = A->getValue();
+
+  auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName)
+      .Case("libomp", OMPRT_OMP)
+      .Case("libgomp", OMPRT_GOMP)
+      .Case("libiomp5", OMPRT_IOMP5)
+      .Default(OMPRT_Unknown);
+
+  if (RT == OMPRT_Unknown) {
+    if (A)
+      TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
+        << A->getOption().getName() << A->getValue();
+    else
+      // FIXME: We could use a nicer diagnostic here.
+      TC.getDriver().Diag(diag::err_drv_unsupported_opt) << "-fopenmp";
+  }
+
+  return RT;
+}
+
 static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
                                 ArgStringList &CmdArgs, StringRef Sanitizer,
                                 bool IsShared) {
@@ -2325,15 +2418,23 @@
     StaticRuntimes.push_back("dfsan");
   if (SanArgs.needsLsanRt())
     StaticRuntimes.push_back("lsan");
-  if (SanArgs.needsMsanRt())
+  if (SanArgs.needsMsanRt()) {
     StaticRuntimes.push_back("msan");
-  if (SanArgs.needsTsanRt())
+    if (SanArgs.linkCXXRuntimes())
+      StaticRuntimes.push_back("msan_cxx");
+  }
+  if (SanArgs.needsTsanRt()) {
     StaticRuntimes.push_back("tsan");
+    if (SanArgs.linkCXXRuntimes())
+      StaticRuntimes.push_back("tsan_cxx");
+  }
   if (SanArgs.needsUbsanRt()) {
     StaticRuntimes.push_back("ubsan_standalone");
     if (SanArgs.linkCXXRuntimes())
       StaticRuntimes.push_back("ubsan_standalone_cxx");
   }
+  if (SanArgs.needsSafeStackRt())
+    StaticRuntimes.push_back("safestack");
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
@@ -2436,7 +2537,7 @@
 }
 
 static const char *SplitDebugName(const ArgList &Args,
-                                  const InputInfoList &Inputs) {
+                                  const InputInfo &Input) {
   Arg *FinalOutput = Args.getLastArg(options::OPT_o);
   if (FinalOutput && Args.hasArg(options::OPT_c)) {
     SmallString<128> T(FinalOutput->getValue());
@@ -2446,7 +2547,7 @@
     // Use the compilation dir.
     SmallString<128> T(
         Args.getLastArgValue(options::OPT_fdebug_compilation_dir));
-    SmallString<128> F(llvm::sys::path::stem(Inputs[0].getBaseInput()));
+    SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput()));
     llvm::sys::path::replace_extension(F, "dwo");
     T += F;
     return Args.MakeArgString(F);
@@ -2580,6 +2681,53 @@
   Result.append(UID.begin(), UID.end());
 }
 
+VersionTuple visualstudio::getMSVCVersion(const Driver *D,
+                                          const llvm::Triple &Triple,
+                                          const llvm::opt::ArgList &Args,
+                                          bool IsWindowsMSVC) {
+  if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
+                   IsWindowsMSVC) ||
+      Args.hasArg(options::OPT_fmsc_version) ||
+      Args.hasArg(options::OPT_fms_compatibility_version)) {
+    const Arg *MSCVersion = Args.getLastArg(options::OPT_fmsc_version);
+    const Arg *MSCompatibilityVersion =
+      Args.getLastArg(options::OPT_fms_compatibility_version);
+
+    if (MSCVersion && MSCompatibilityVersion) {
+      if (D)
+        D->Diag(diag::err_drv_argument_not_allowed_with)
+            << MSCVersion->getAsString(Args)
+            << MSCompatibilityVersion->getAsString(Args);
+      return VersionTuple();
+    }
+
+    if (MSCompatibilityVersion) {
+      VersionTuple MSVT;
+      if (MSVT.tryParse(MSCompatibilityVersion->getValue()) && D)
+        D->Diag(diag::err_drv_invalid_value)
+            << MSCompatibilityVersion->getAsString(Args)
+            << MSCompatibilityVersion->getValue();
+      return MSVT;
+    }
+
+    if (MSCVersion) {
+      unsigned Version = 0;
+      if (StringRef(MSCVersion->getValue()).getAsInteger(10, Version) && D)
+        D->Diag(diag::err_drv_invalid_value) << MSCVersion->getAsString(Args)
+                                             << MSCVersion->getValue();
+      return getMSCompatibilityVersion(Version);
+    }
+
+    unsigned Major, Minor, Micro;
+    Triple.getEnvironmentVersion(Major, Minor, Micro);
+    if (Major || Minor || Micro)
+      return VersionTuple(Major, Minor, Micro);
+
+    return VersionTuple(18);
+  }
+  return VersionTuple();
+}
+
 void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                          const InputInfo &Output,
                          const InputInfoList &Inputs,
@@ -2596,6 +2744,7 @@
   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
 
   assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
+  const InputInfo &Input = Inputs[0];
 
   // Invoke ourselves in -cc1 mode.
   //
@@ -2710,7 +2859,7 @@
   // Set the main file name, so that debug info works even with
   // -save-temps.
   CmdArgs.push_back("-main-file-name");
-  CmdArgs.push_back(getBaseInputName(Args, Inputs));
+  CmdArgs.push_back(getBaseInputName(Args, Input));
 
   // Some flags which affect the language (via preprocessor
   // defines).
@@ -2738,7 +2887,7 @@
       
       CmdArgs.push_back("-analyzer-checker=deadcode");
       
-      if (types::isCXX(Inputs[0].getType()))
+      if (types::isCXX(Input.getType()))
         CmdArgs.push_back("-analyzer-checker=cplusplus");
 
       // Enable the following experimental checkers for testing.
@@ -2776,7 +2925,7 @@
 
   // Android-specific defaults for PIC/PIE
   if (getToolChain().getTriple().getEnvironment() == llvm::Triple::Android) {
-    switch (getToolChain().getTriple().getArch()) {
+    switch (getToolChain().getArch()) {
     case llvm::Triple::arm:
     case llvm::Triple::armeb:
     case llvm::Triple::thumb:
@@ -2802,16 +2951,17 @@
 
   // OpenBSD-specific defaults for PIE
   if (getToolChain().getTriple().getOS() == llvm::Triple::OpenBSD) {
-    switch (getToolChain().getTriple().getArch()) {
+    switch (getToolChain().getArch()) {
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
-    case llvm::Triple::sparc:
+    case llvm::Triple::sparcel:
     case llvm::Triple::x86:
     case llvm::Triple::x86_64:
       IsPICLevelTwo = false; // "-fpie"
       break;
 
     case llvm::Triple::ppc:
+    case llvm::Triple::sparc:
     case llvm::Triple::sparcv9:
       IsPICLevelTwo = true; // "-fPIE"
       break;
@@ -2917,14 +3067,11 @@
 
   if (Args.hasArg(options::OPT_frewrite_map_file) ||
       Args.hasArg(options::OPT_frewrite_map_file_EQ)) {
-    for (arg_iterator
-             MFI = Args.filtered_begin(options::OPT_frewrite_map_file,
-                                       options::OPT_frewrite_map_file_EQ),
-             MFE = Args.filtered_end();
-         MFI != MFE; ++MFI) {
+    for (const Arg *A : Args.filtered(options::OPT_frewrite_map_file,
+                                      options::OPT_frewrite_map_file_EQ)) {
       CmdArgs.push_back("-frewrite-map-file");
-      CmdArgs.push_back((*MFI)->getValue());
-      (*MFI)->claim();
+      CmdArgs.push_back(A->getValue());
+      A->claim();
     }
   }
 
@@ -3113,6 +3260,8 @@
       CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast"));
     }
   }
+  
+  ParseMRecip(getToolChain().getDriver(), Args, CmdArgs);
 
   // We separately look for the '-ffast-math' and '-ffinite-math-only' flags,
   // and if we find them, tell the frontend to provide the appropriate
@@ -3137,10 +3286,8 @@
       Args.hasArg(options::OPT_dA))
     CmdArgs.push_back("-masm-verbose");
 
-  bool UsingIntegratedAssembler =
-      Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as,
-                   IsIntegratedAssemblerDefault);
-  if (!UsingIntegratedAssembler)
+  if (!Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as,
+                    IsIntegratedAssemblerDefault))
     CmdArgs.push_back("-no-integrated-as");
 
   if (Args.hasArg(options::OPT_fdebug_pass_structure)) {
@@ -3195,9 +3342,7 @@
   }
 
   // Add the target cpu
-  std::string ETripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
-  llvm::Triple ETriple(ETripleStr);
-  std::string CPU = getCPUName(Args, ETriple);
+  std::string CPU = getCPUName(Args, Triple);
   if (!CPU.empty()) {
     CmdArgs.push_back("-target-cpu");
     CmdArgs.push_back(Args.MakeArgString(CPU));
@@ -3209,7 +3354,7 @@
   }
 
   // Add the target features
-  getTargetFeatures(D, ETriple, Args, CmdArgs, false);
+  getTargetFeatures(D, Triple, Args, CmdArgs, false);
 
   // Add target specific flags.
   switch(getToolChain().getArch()) {
@@ -3242,6 +3387,7 @@
     break;
 
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
     AddSparcTargetArgs(Args, CmdArgs);
     break;
@@ -3271,7 +3417,7 @@
 
   // Explicitly error on some things we know we don't support and can't just
   // ignore.
-  types::ID InputType = Inputs[0].getType();
+  types::ID InputType = Input.getType();
   if (!Args.hasArg(options::OPT_fallow_unsupported)) {
     Arg *Unsupported;
     if (types::isCXX(InputType) &&
@@ -3384,19 +3530,22 @@
   }
 
   if (!Args.hasFlag(options::OPT_funique_section_names,
-                    options::OPT_fno_unique_section_names,
-                    !UsingIntegratedAssembler))
+                    options::OPT_fno_unique_section_names, true))
     CmdArgs.push_back("-fno-unique-section-names");
 
   Args.AddAllArgs(CmdArgs, options::OPT_finstrument_functions);
 
-  if (Args.hasArg(options::OPT_fprofile_instr_generate) &&
+  if ((Args.hasArg(options::OPT_fprofile_instr_generate) ||
+       Args.hasArg(options::OPT_fprofile_instr_generate_EQ)) &&
       (Args.hasArg(options::OPT_fprofile_instr_use) ||
        Args.hasArg(options::OPT_fprofile_instr_use_EQ)))
     D.Diag(diag::err_drv_argument_not_allowed_with)
       << "-fprofile-instr-generate" << "-fprofile-instr-use";
 
-  Args.AddAllArgs(CmdArgs, options::OPT_fprofile_instr_generate);
+  if (Arg *A = Args.getLastArg(options::OPT_fprofile_instr_generate_EQ))
+    A->render(Args, CmdArgs);
+  else
+    Args.AddAllArgs(CmdArgs, options::OPT_fprofile_instr_generate);
 
   if (Arg *A = Args.getLastArg(options::OPT_fprofile_instr_use_EQ))
     A->render(Args, CmdArgs);
@@ -3412,7 +3561,8 @@
     CmdArgs.push_back("-femit-coverage-data");
 
   if (Args.hasArg(options::OPT_fcoverage_mapping) &&
-      !Args.hasArg(options::OPT_fprofile_instr_generate))
+      !(Args.hasArg(options::OPT_fprofile_instr_generate) ||
+        Args.hasArg(options::OPT_fprofile_instr_generate_EQ)))
     D.Diag(diag::err_drv_argument_only_allowed_with)
       << "-fcoverage-mapping" << "-fprofile-instr-generate";
 
@@ -3551,10 +3701,9 @@
   }
 
   // Warn about ignored options to clang.
-  for (arg_iterator it = Args.filtered_begin(
-       options::OPT_clang_ignored_gcc_optimization_f_Group),
-       ie = Args.filtered_end(); it != ie; ++it) {
-    D.Diag(diag::warn_ignored_gcc_optimization) << (*it)->getAsString(Args);
+  for (const Arg *A :
+       Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) {
+    D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args);
   }
 
   claimNoWarnArgs(Args);
@@ -3571,6 +3720,7 @@
   //
   // If a std is supplied, only add -trigraphs if it follows the
   // option.
+  bool ImplyVCPPCXXVer = false;
   if (Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi)) {
     if (Std->getOption().matches(options::OPT_ansi))
       if (types::isCXX(InputType))
@@ -3597,7 +3747,7 @@
       Args.AddAllArgsTranslated(CmdArgs, options::OPT_std_default_EQ,
                                 "-std=", /*Joined=*/true);
     else if (IsWindowsMSVC)
-      CmdArgs.push_back("-std=c++11");
+      ImplyVCPPCXXVer = true;
 
     Args.AddLastArg(CmdArgs, options::OPT_ftrigraphs,
                     options::OPT_fno_trigraphs);
@@ -3772,16 +3922,36 @@
   Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree);
   Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type);
 
+  // Forward flags for OpenMP
+  if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+                   options::OPT_fno_openmp, false))
+    switch (getOpenMPRuntime(getToolChain(), Args)) {
+    case OMPRT_OMP:
+    case OMPRT_IOMP5:
+      // Clang can generate useful OpenMP code for these two runtime libraries.
+      CmdArgs.push_back("-fopenmp");
+      break;
+    default:
+      // By default, if Clang doesn't know how to generate useful OpenMP code
+      // for a specific runtime library, we just don't pass the '-fopenmp' flag
+      // down to the actual compilation.
+      // FIXME: It would be better to have a mode which *only* omits IR
+      // generation based on the OpenMP support so that we get consistent
+      // semantic analysis, etc.
+      break;
+    }
+
   const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
   Sanitize.addArgs(Args, CmdArgs);
 
   // Report an error for -faltivec on anything other than PowerPC.
-  if (const Arg *A = Args.getLastArg(options::OPT_faltivec))
-    if (!(getToolChain().getArch() == llvm::Triple::ppc ||
-          getToolChain().getArch() == llvm::Triple::ppc64 ||
-          getToolChain().getArch() == llvm::Triple::ppc64le))
-      D.Diag(diag::err_drv_argument_only_allowed_with)
-        << A->getAsString(Args) << "ppc/ppc64/ppc64le";
+  if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) {
+    const llvm::Triple::ArchType Arch = getToolChain().getArch();
+    if (!(Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 ||
+          Arch == llvm::Triple::ppc64le))
+      D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
+                                                       << "ppc/ppc64/ppc64le";
+  }
 
   if (getToolChain().SupportsProfiling())
     Args.AddLastArg(CmdArgs, options::OPT_pg);
@@ -3833,7 +4003,12 @@
 
   // -stack-protector=0 is default.
   unsigned StackProtectorLevel = 0;
-  if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
+  if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
+    Args.ClaimAllArgs(options::OPT_fno_stack_protector);
+    Args.ClaimAllArgs(options::OPT_fstack_protector_all);
+    Args.ClaimAllArgs(options::OPT_fstack_protector_strong);
+    Args.ClaimAllArgs(options::OPT_fstack_protector);
+  } else if (Arg *A = Args.getLastArg(options::OPT_fno_stack_protector,
                                options::OPT_fstack_protector_all,
                                options::OPT_fstack_protector_strong,
                                options::OPT_fstack_protector)) {
@@ -3854,16 +4029,15 @@
   }
 
   // --param ssp-buffer-size=
-  for (arg_iterator it = Args.filtered_begin(options::OPT__param),
-       ie = Args.filtered_end(); it != ie; ++it) {
-    StringRef Str((*it)->getValue());
+  for (const Arg *A : Args.filtered(options::OPT__param)) {
+    StringRef Str(A->getValue());
     if (Str.startswith("ssp-buffer-size=")) {
       if (StackProtectorLevel) {
         CmdArgs.push_back("-stack-protector-buffer-size");
         // FIXME: Verify the argument is a valid integer.
         CmdArgs.push_back(Args.MakeArgString(Str.drop_front(16)));
       }
-      (*it)->claim();
+      A->claim();
     }
   }
 
@@ -3892,8 +4066,8 @@
       CmdArgs.push_back("-mstack-probe-size=0");
   }
 
-  if (getToolChain().getTriple().getArch() == llvm::Triple::aarch64 ||
-      getToolChain().getTriple().getArch() == llvm::Triple::aarch64_be)
+  if (getToolChain().getArch() == llvm::Triple::aarch64 ||
+      getToolChain().getArch() == llvm::Triple::aarch64_be)
     CmdArgs.push_back("-fallow-half-arguments-and-returns");
 
   if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
@@ -4114,9 +4288,16 @@
     CmdArgs.push_back("-fshort-enums");
 
   // -fsigned-char is default.
-  if (!Args.hasFlag(options::OPT_fsigned_char, options::OPT_funsigned_char,
-                    isSignedCharDefault(getToolChain().getTriple())))
+  if (Arg *A = Args.getLastArg(
+          options::OPT_fsigned_char, options::OPT_fno_signed_char,
+          options::OPT_funsigned_char, options::OPT_fno_unsigned_char)) {
+    if (A->getOption().matches(options::OPT_funsigned_char) ||
+        A->getOption().matches(options::OPT_fno_signed_char)) {
+      CmdArgs.push_back("-fno-signed-char");
+    }
+  } else if (!isSignedCharDefault(getToolChain().getTriple())) {
     CmdArgs.push_back("-fno-signed-char");
+  }
 
   // -fuse-cxa-atexit is default.
   if (!Args.hasFlag(options::OPT_fuse_cxa_atexit,
@@ -4146,37 +4327,18 @@
     CmdArgs.push_back("-fms-compatibility");
 
   // -fms-compatibility-version=18.00 is default.
-  VersionTuple MSVT;
-  if (Args.hasFlag(options::OPT_fms_extensions, options::OPT_fno_ms_extensions,
-                   IsWindowsMSVC) ||
-      Args.hasArg(options::OPT_fmsc_version) ||
-      Args.hasArg(options::OPT_fms_compatibility_version)) {
-    const Arg *MSCVersion = Args.getLastArg(options::OPT_fmsc_version);
-    const Arg *MSCompatibilityVersion =
-      Args.getLastArg(options::OPT_fms_compatibility_version);
-
-    if (MSCVersion && MSCompatibilityVersion)
-      D.Diag(diag::err_drv_argument_not_allowed_with)
-          << MSCVersion->getAsString(Args)
-          << MSCompatibilityVersion->getAsString(Args);
-
-    if (MSCompatibilityVersion) {
-      if (MSVT.tryParse(MSCompatibilityVersion->getValue()))
-        D.Diag(diag::err_drv_invalid_value)
-            << MSCompatibilityVersion->getAsString(Args)
-            << MSCompatibilityVersion->getValue();
-    } else if (MSCVersion) {
-      unsigned Version = 0;
-      if (StringRef(MSCVersion->getValue()).getAsInteger(10, Version))
-        D.Diag(diag::err_drv_invalid_value) << MSCVersion->getAsString(Args)
-                                            << MSCVersion->getValue();
-      MSVT = getMSCompatibilityVersion(Version);
-    } else {
-      MSVT = VersionTuple(18);
-    }
-
+  VersionTuple MSVT = visualstudio::getMSVCVersion(
+      &D, getToolChain().getTriple(), Args, IsWindowsMSVC);
+  if (!MSVT.empty())
     CmdArgs.push_back(
         Args.MakeArgString("-fms-compatibility-version=" + MSVT.getAsString()));
+
+  bool IsMSVC2015Compatible = MSVT.getMajor() >= 19;
+  if (ImplyVCPPCXXVer) {
+    if (IsMSVC2015Compatible)
+      CmdArgs.push_back("-std=c++14");
+    else
+      CmdArgs.push_back("-std=c++11");
   }
 
   // -fno-borland-extensions is default.
@@ -4188,7 +4350,7 @@
   // than 19.
   if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
                     options::OPT_fno_threadsafe_statics,
-                    !IsWindowsMSVC || MSVT.getMajor() >= 19))
+                    !IsWindowsMSVC || IsMSVC2015Compatible))
     CmdArgs.push_back("-fno-threadsafe-statics");
 
   // -fno-delayed-template-parsing is default, except for Windows where MSVC STL
@@ -4235,7 +4397,7 @@
   // When ObjectiveC legacy runtime is in effect on MacOSX,
   // turn on the option to do Array/Dictionary subscripting
   // by default.
-  if (getToolChain().getTriple().getArch() == llvm::Triple::x86 &&
+  if (getToolChain().getArch() == llvm::Triple::x86 &&
       getToolChain().getTriple().isMacOSX() &&
       !getToolChain().getTriple().isMacOSXVersionLT(10, 7) &&
       objcRuntime.getKind() == ObjCRuntime::FragileMacOSX &&
@@ -4615,17 +4777,16 @@
   // parser.
   Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
   bool OptDisabled = false;
-  for (arg_iterator it = Args.filtered_begin(options::OPT_mllvm),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    (*it)->claim();
+  for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
+    A->claim();
 
     // We translate this by hand to the -cc1 argument, since nightly test uses
     // it and developers have been trained to spell it with -mllvm.
-    if (StringRef((*it)->getValue(0)) == "-disable-llvm-optzns") {
+    if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") {
       CmdArgs.push_back("-disable-llvm-optzns");
       OptDisabled = true;
     } else
-      (*it)->render(Args, CmdArgs);
+      A->render(Args, CmdArgs);
   }
 
   // With -save-temps, we want to save the unoptimized bitcode output from the
@@ -4685,7 +4846,7 @@
   const char *SplitDwarfOut;
   if (SplitDwarf) {
     CmdArgs.push_back("-split-dwarf-file");
-    SplitDwarfOut = SplitDebugName(Args, Inputs);
+    SplitDwarfOut = SplitDebugName(Args, Input);
     CmdArgs.push_back(SplitDwarfOut);
   }
 
@@ -4964,8 +5125,8 @@
   }
 
   unsigned VolatileOptionID;
-  if (getToolChain().getTriple().getArch() == llvm::Triple::x86_64 ||
-      getToolChain().getTriple().getArch() == llvm::Triple::x86)
+  if (getToolChain().getArch() == llvm::Triple::x86_64 ||
+      getToolChain().getArch() == llvm::Triple::x86)
     VolatileOptionID = options::OPT__SLASH_volatile_ms;
   else
     VolatileOptionID = options::OPT__SLASH_volatile_iso;
@@ -5067,10 +5228,10 @@
   // Set the main file name, so that debug info works even with
   // -save-temps or preprocessed assembly.
   CmdArgs.push_back("-main-file-name");
-  CmdArgs.push_back(Clang::getBaseInputName(Args, Inputs));
+  CmdArgs.push_back(Clang::getBaseInputName(Args, Input));
 
   // Add the target cpu
-  const llvm::Triple &Triple = getToolChain().getTriple();
+  const llvm::Triple Triple(TripleStr);
   std::string CPU = getCPUName(Args, Triple);
   if (!CPU.empty()) {
     CmdArgs.push_back("-target-cpu");
@@ -5156,10 +5317,8 @@
   // doesn't handle that so rather than warning about unused flags that are
   // actually used, we'll lie by omission instead.
   // FIXME: Stop lying and consume only the appropriate driver flags
-  for (arg_iterator it = Args.filtered_begin(options::OPT_W_Group),
-                    ie = Args.filtered_end();
-       it != ie; ++it)
-    (*it)->claim();
+  for (const Arg *A : Args.filtered(options::OPT_W_Group))
+    A->claim();
 
   CollectArgsForIntegratedAssembler(C, Args, CmdArgs,
                                     getToolChain().getDriver());
@@ -5182,7 +5341,7 @@
   if (Args.hasArg(options::OPT_gsplit_dwarf) &&
       getToolChain().getTriple().isOSLinux())
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
-                   SplitDebugName(Args, Inputs));
+                   SplitDebugName(Args, Input));
 }
 
 void GnuTool::anchor() {}
@@ -5230,7 +5389,7 @@
   //
   // FIXME: The triple class should directly provide the information we want
   // here.
-  llvm::Triple::ArchType Arch = getToolChain().getArch();
+  const llvm::Triple::ArchType Arch = getToolChain().getArch();
   if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::ppc)
     CmdArgs.push_back("-m32");
   else if (Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::ppc64 ||
@@ -5366,10 +5525,8 @@
     CmdArgs.push_back("-fsyntax-only");
   }
 
-  std::string SmallDataThreshold = GetHexagonSmallDataThresholdValue(Args);
-  if (!SmallDataThreshold.empty())
-    CmdArgs.push_back(
-      Args.MakeArgString(std::string("-G") + SmallDataThreshold));
+  if (const char* v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args))
+    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + v));
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA,
                        options::OPT_Xassembler);
@@ -5412,17 +5569,16 @@
   // The types are (hopefully) good enough.
 }
 
-void hexagon::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                               const InputInfo &Output,
-                               const InputInfoList &Inputs,
-                               const ArgList &Args,
-                               const char *LinkingOutput) const {
+static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
+                              const toolchains::Hexagon_TC& ToolChain,
+                              const InputInfo &Output,
+                              const InputInfoList &Inputs,
+                              const ArgList &Args,
+                              ArgStringList &CmdArgs,
+                              const char *LinkingOutput) {
 
-  const toolchains::Hexagon_TC& ToolChain =
-    static_cast<const toolchains::Hexagon_TC&>(getToolChain());
   const Driver &D = ToolChain.getDriver();
 
-  ArgStringList CmdArgs;
 
   //----------------------------------------------------------------------------
   //
@@ -5433,6 +5589,7 @@
   bool incStdLib = !Args.hasArg(options::OPT_nostdlib);
   bool incStartFiles = !Args.hasArg(options::OPT_nostartfiles);
   bool incDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
+  bool useG0 = false;
   bool useShared = buildingLib && !hasStaticArg;
 
   //----------------------------------------------------------------------------
@@ -5466,10 +5623,9 @@
   if (buildPIE && !buildingLib)
     CmdArgs.push_back("-pie");
 
-  std::string SmallDataThreshold = GetHexagonSmallDataThresholdValue(Args);
-  if (!SmallDataThreshold.empty()) {
-    CmdArgs.push_back(
-      Args.MakeArgString(std::string("-G") + SmallDataThreshold));
+  if (const char* v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
+    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + v));
+    useG0 = toolchains::Hexagon_TC::UsesG0(v);
   }
 
   //----------------------------------------------------------------------------
@@ -5485,8 +5641,7 @@
       toolchains::Hexagon_TC::GetGnuDir(D.InstalledDir, Args) + "/";
   const std::string StartFilesDir = RootDir
                                     + "hexagon/lib"
-                                    + (buildingLib
-                                       ? MarchG0Suffix : MarchSuffix);
+                                    + (useG0 ? MarchG0Suffix : MarchSuffix);
 
   //----------------------------------------------------------------------------
   // moslib
@@ -5494,10 +5649,9 @@
   std::vector<std::string> oslibs;
   bool hasStandalone= false;
 
-  for (arg_iterator it = Args.filtered_begin(options::OPT_moslib_EQ),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    (*it)->claim();
-    oslibs.push_back((*it)->getValue());
+  for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) {
+    A->claim();
+    oslibs.emplace_back(A->getValue());
     hasStandalone = hasStandalone || (oslibs.back() == "standalone");
   }
   if (oslibs.empty()) {
@@ -5568,6 +5722,20 @@
     std::string finiObj = useShared ? "/finiS.o" : "/fini.o";
     CmdArgs.push_back(Args.MakeArgString(StartFilesDir + finiObj));
   }
+}
+
+void hexagon::Link::ConstructJob(Compilation &C, const JobAction &JA,
+                               const InputInfo &Output,
+                               const InputInfoList &Inputs,
+                               const ArgList &Args,
+                               const char *LinkingOutput) const {
+
+  const toolchains::Hexagon_TC& ToolChain =
+    static_cast<const toolchains::Hexagon_TC&>(getToolChain());
+
+  ArgStringList CmdArgs;
+  constructHexagonLinkArgs(C, JA, ToolChain, Output, Inputs, Args, CmdArgs,
+                           LinkingOutput);
 
   std::string Linker = ToolChain.GetProgramPath("hexagon-ld");
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
@@ -5575,10 +5743,9 @@
 }
 // Hexagon tools end.
 
-/// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting.
-const char *arm::getARMCPUForMArch(const ArgList &Args,
-                                   const llvm::Triple &Triple) {
-  StringRef MArch;
+const std::string arm::getARMArch(const ArgList &Args,
+                                  const llvm::Triple &Triple) {
+  std::string MArch;
   if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
     // Otherwise, if we have -march= choose the base CPU for that arch.
     MArch = A->getValue();
@@ -5586,27 +5753,50 @@
     // Otherwise, use the Arch from the triple.
     MArch = Triple.getArchName();
   }
+  MArch = StringRef(MArch).lower();
 
   // Handle -march=native.
   if (MArch == "native") {
     std::string CPU = llvm::sys::getHostCPUName();
     if (CPU != "generic") {
-      // Translate the native cpu into the architecture. The switch below will
-      // then chose the minimum cpu for that arch.
-      MArch = std::string("arm") + arm::getLLVMArchSuffixForARM(CPU);
+      // Translate the native cpu into the architecture suffix for that CPU.
+      const char *Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch);
+      // If there is no valid architecture suffix for this CPU we don't know how
+      // to handle it, so return no architecture.
+      if (strcmp(Suffix,"") == 0)
+        MArch = "";
+      else
+        MArch = std::string("arm") + Suffix;
     }
   }
 
-  return Triple.getARMCPUForArch(MArch);
+  return MArch;
+}
+/// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting.
+const char *arm::getARMCPUForMArch(const ArgList &Args,
+                                   const llvm::Triple &Triple) {
+  std::string MArch = getARMArch(Args, Triple);
+  // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch
+  // here means an -march=native that we can't handle, so instead return no CPU.
+  if (MArch.empty())
+    return "";
+
+  // We need to return an empty string here on invalid MArch values as the
+  // various places that call this function can't cope with a null result.
+  const char *result = Triple.getARMCPUForArch(MArch);
+  if (result)
+    return result;
+  else
+    return "";
 }
 
 /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting.
-StringRef arm::getARMTargetCPU(const ArgList &Args,
+std::string arm::getARMTargetCPU(const ArgList &Args,
                                const llvm::Triple &Triple) {
   // FIXME: Warn on inconsistent use of -mcpu and -march.
   // If we have -mcpu=, use that.
   if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
-    StringRef MCPU = A->getValue();
+    std::string MCPU = StringRef(A->getValue()).lower();
     // Handle -mcpu=native.
     if (MCPU == "native")
       return llvm::sys::getHostCPUName();
@@ -5618,49 +5808,28 @@
 }
 
 /// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
-/// CPU.
-//
+/// CPU  (or Arch, if CPU is generic).
 // FIXME: This is redundant with -mcpu, why does LLVM use this.
-// FIXME: tblgen this, or kill it!
-const char *arm::getLLVMArchSuffixForARM(StringRef CPU) {
-  return llvm::StringSwitch<const char *>(CPU)
-    .Case("strongarm", "v4")
-    .Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "v4t")
-    .Cases("arm720t", "arm9", "arm9tdmi", "v4t")
-    .Cases("arm920", "arm920t", "arm922t", "v4t")
-    .Cases("arm940t", "ep9312","v4t")
-    .Cases("arm10tdmi",  "arm1020t", "v5")
-    .Cases("arm9e",  "arm926ej-s",  "arm946e-s", "v5e")
-    .Cases("arm966e-s",  "arm968e-s",  "arm10e", "v5e")
-    .Cases("arm1020e",  "arm1022e",  "xscale", "iwmmxt", "v5e")
-    .Cases("arm1136j-s",  "arm1136jf-s", "v6")
-    .Cases("arm1176jz-s", "arm1176jzf-s", "v6k")
-    .Cases("mpcorenovfp",  "mpcore", "v6k")
-    .Cases("arm1156t2-s",  "arm1156t2f-s", "v6t2")
-    .Cases("cortex-a5", "cortex-a7", "cortex-a8", "v7")
-    .Cases("cortex-a9", "cortex-a12", "cortex-a15", "cortex-a17", "krait", "v7")
-    .Cases("cortex-r4", "cortex-r4f", "cortex-r5", "cortex-r7", "v7r")
-    .Cases("sc000", "cortex-m0", "cortex-m0plus", "cortex-m1", "v6m")
-    .Cases("sc300", "cortex-m3", "v7m")
-    .Cases("cortex-m4", "cortex-m7", "v7em")
-    .Case("swift", "v7s")
-    .Case("cyclone", "v8")
-    .Cases("cortex-a53", "cortex-a57", "cortex-a72", "v8")
-    .Default("");
+const char *arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch) {
+  if (CPU == "generic" &&
+      llvm::ARMTargetParser::parseArch(Arch) == llvm::ARM::AK_ARMV8_1A)
+    return "v8.1a";
+
+  unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(CPU);
+  if (ArchKind == llvm::ARM::AK_INVALID)
+    return "";
+  return llvm::ARMTargetParser::getSubArch(ArchKind);
 }
 
-void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple) {
+void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs, 
+                            const llvm::Triple &Triple) {
   if (Args.hasArg(options::OPT_r))
     return;
 
-  StringRef Suffix = getLLVMArchSuffixForARM(getARMCPUForMArch(Args, Triple));
-  const char *LinkFlag = llvm::StringSwitch<const char *>(Suffix)
-    .Cases("v4", "v4t", "v5", "v5e", nullptr)
-    .Cases("v6", "v6k", "v6t2", nullptr)
-    .Default("--be8");
-
-  if (LinkFlag)
-    CmdArgs.push_back(LinkFlag);
+  // ARMv7 (and later) and ARMv6-M do not support BE-32, so instruct the linker
+  // to generate BE-8 executables.
+  if (getARMSubArchVersionNumber(Triple) >= 7 || isARMMProfile(Triple))
+    CmdArgs.push_back("--be8");
 }
 
 mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
@@ -5760,7 +5929,7 @@
 }
 
 void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) {
-  llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str);
+  const llvm::Triple::ArchType Arch = getArchTypeForMachOArchName(Str);
   T.setArch(Arch);
 
   if (Str == "x86_64h")
@@ -5772,14 +5941,13 @@
 }
 
 const char *Clang::getBaseInputName(const ArgList &Args,
-                                    const InputInfoList &Inputs) {
-  return Args.MakeArgString(
-    llvm::sys::path::filename(Inputs[0].getBaseInput()));
+                                    const InputInfo &Input) {
+  return Args.MakeArgString(llvm::sys::path::filename(Input.getBaseInput()));
 }
 
 const char *Clang::getBaseInputStem(const ArgList &Args,
                                     const InputInfoList &Inputs) {
-  const char *Str = getBaseInputName(Args, Inputs);
+  const char *Str = getBaseInputName(Args, Inputs[0]);
 
   if (const char *End = strrchr(Str, '.'))
     return Args.MakeArgString(std::string(Str, End));
@@ -6148,12 +6316,6 @@
   Args.AddLastArg(CmdArgs, options::OPT_Mach);
 }
 
-enum LibOpenMP {
-  LibUnknown,
-  LibGOMP,
-  LibIOMP5
-};
-
 void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA,
                                 const InputInfo &Output,
                                 const InputInfoList &Inputs,
@@ -6209,29 +6371,33 @@
       !Args.hasArg(options::OPT_nostartfiles))
     getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs);
 
+  // SafeStack requires its own runtime libraries
+  // These libraries should be linked first, to make sure the
+  // __safestack_init constructor executes before everything else
+  if (getToolChain().getSanitizerArgs().needsSafeStackRt()) {
+    getMachOToolChain().AddLinkRuntimeLib(Args, CmdArgs,
+                                          "libclang_rt.safestack_osx.a",
+                                          /*AlwaysLink=*/true);
+  }
+
   Args.AddAllArgs(CmdArgs, options::OPT_L);
 
-  LibOpenMP UsedOpenMPLib = LibUnknown;
-  if (Args.hasArg(options::OPT_fopenmp)) {
-    UsedOpenMPLib = LibGOMP;
-  } else if (const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ)) {
-    UsedOpenMPLib = llvm::StringSwitch<LibOpenMP>(A->getValue())
-        .Case("libgomp",  LibGOMP)
-        .Case("libiomp5", LibIOMP5)
-        .Default(LibUnknown);
-    if (UsedOpenMPLib == LibUnknown)
-      getToolChain().getDriver().Diag(diag::err_drv_unsupported_option_argument)
-        << A->getOption().getName() << A->getValue();
-  }
-  switch (UsedOpenMPLib) {
-  case LibGOMP:
-    CmdArgs.push_back("-lgomp");
-    break;
-  case LibIOMP5:
-    CmdArgs.push_back("-liomp5");
-    break;
-  case LibUnknown:
-    break;
+  if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+                   options::OPT_fno_openmp, false)) {
+    switch (getOpenMPRuntime(getToolChain(), Args)) {
+    case OMPRT_OMP:
+      CmdArgs.push_back("-lomp");
+      break;
+    case OMPRT_GOMP:
+      CmdArgs.push_back("-lgomp");
+      break;
+    case OMPRT_IOMP5:
+      CmdArgs.push_back("-liomp5");
+      break;
+    case OMPRT_Unknown:
+      // Already diagnosed.
+      break;
+    }
   }
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
@@ -6273,6 +6439,11 @@
   if (Args.hasArg(options::OPT_fnested_functions))
     CmdArgs.push_back("-allow_stack_execute");
 
+  // TODO: It would be nice to use addProfileRT() here, but darwin's compiler-rt
+  // paths are different enough from other toolchains that this needs a fair
+  // amount of refactoring done first.
+  getMachOToolChain().addProfileRTLibs(Args, CmdArgs);
+
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nodefaultlibs)) {
     if (getToolChain().getDriver().CCCIsCXX())
@@ -6293,10 +6464,8 @@
   Args.AddAllArgs(CmdArgs, options::OPT_F);
 
   // -iframework should be forwarded as -F.
-  for (auto it = Args.filtered_begin(options::OPT_iframework),
-         ie = Args.filtered_end(); it != ie; ++it)
-    CmdArgs.push_back(Args.MakeArgString(std::string("-F") +
-                                         (*it)->getValue()));
+  for (const Arg *A : Args.filtered(options::OPT_iframework))
+    CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue()));
 
   if (!Args.hasArg(options::OPT_nostdlib) &&
       !Args.hasArg(options::OPT_nodefaultlibs)) {
@@ -6411,7 +6580,7 @@
   std::string GCCLibPath = "/usr/gcc/4.5/lib/gcc/";
   const llvm::Triple &T = getToolChain().getTriple();
   std::string LibPath = "/usr/lib/";
-  llvm::Triple::ArchType Arch = T.getArch();
+  const llvm::Triple::ArchType Arch = T.getArch();
   switch (Arch) {
   case llvm::Triple::x86:
     GCCLibPath +=
@@ -6529,6 +6698,7 @@
     break;
 
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
     CmdArgs.push_back("-32");
     NeedsKPIC = true;
     break;
@@ -6816,7 +6986,7 @@
     }
 
     StringRef MyArch;
-    switch (getToolChain().getTriple().getArch()) {
+    switch (getToolChain().getArch()) {
     case llvm::Triple::arm:
       MyArch = "arm";
       break;
@@ -6907,6 +7077,7 @@
       CmdArgs.push_back("-matpcs");
     }
   } else if (getToolChain().getArch() == llvm::Triple::sparc ||
+             getToolChain().getArch() == llvm::Triple::sparcel ||
              getToolChain().getArch() == llvm::Triple::sparcv9) {
     if (getToolChain().getArch() == llvm::Triple::sparc)
       CmdArgs.push_back("-Av8plusa");
@@ -6934,12 +7105,13 @@
                                  const InputInfoList &Inputs,
                                  const ArgList &Args,
                                  const char *LinkingOutput) const {
-  const toolchains::FreeBSD& ToolChain = 
-    static_cast<const toolchains::FreeBSD&>(getToolChain());
+  const toolchains::FreeBSD &ToolChain =
+      static_cast<const toolchains::FreeBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
+  const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool IsPIE =
-    !Args.hasArg(options::OPT_shared) &&
-    (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
+      !Args.hasArg(options::OPT_shared) &&
+      (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
   ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
@@ -6969,7 +7141,6 @@
       CmdArgs.push_back("/libexec/ld-elf.so.1");
     }
     if (ToolChain.getTriple().getOSMajorVersion() >= 9) {
-      llvm::Triple::ArchType Arch = ToolChain.getArch();
       if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc ||
           Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) {
         CmdArgs.push_back("--hash-style=both");
@@ -6980,12 +7151,12 @@
 
   // When building 32-bit code on FreeBSD/amd64, we have to explicitly
   // instruct ld in the base system to link 32-bit code.
-  if (ToolChain.getArch() == llvm::Triple::x86) {
+  if (Arch == llvm::Triple::x86) {
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf_i386_fbsd");
   }
 
-  if (ToolChain.getArch() == llvm::Triple::ppc) {
+  if (Arch == llvm::Triple::ppc) {
     CmdArgs.push_back("-m");
     CmdArgs.push_back("elf32ppc_fbsd");
   }
@@ -7131,7 +7302,7 @@
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
-    std::string MArch(arm::getARMTargetCPU(Args, getToolChain().getTriple()));
+    std::string MArch = arm::getARMTargetCPU(Args, getToolChain().getTriple());
     CmdArgs.push_back(Args.MakeArgString("-mcpu=" + MArch));
     break;
   }
@@ -7161,6 +7332,7 @@
   }
 
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
     CmdArgs.push_back("-32");
     addAssemblerKPIC(Args, CmdArgs);
     break;
@@ -7239,7 +7411,8 @@
     break;
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
-    arm::appendEBLinkFlags(Args, CmdArgs, getToolChain().getTriple());
+    arm::appendEBLinkFlags(Args, CmdArgs,
+        llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args)));
     CmdArgs.push_back("-m");
     switch (getToolChain().getTriple().getEnvironment()) {
     case llvm::Triple::EABI:
@@ -7438,6 +7611,7 @@
     CmdArgs.push_back("-mlittle-endian");
     break;
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
     CmdArgs.push_back("-32");
     CmdArgs.push_back("-Av8plusa");
     NeedsKPIC = true;
@@ -7475,7 +7649,7 @@
     // march from being picked in the absence of a cpu flag.
     Arg *A;
     if ((A = Args.getLastArg(options::OPT_mcpu_EQ)) &&
-      StringRef(A->getValue()) == "krait")
+      StringRef(A->getValue()).lower() == "krait")
         CmdArgs.push_back("-march=armv7-a");
     else
       Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
@@ -7565,6 +7739,9 @@
     Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
                     options::OPT_msoft_float);
 
+    Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
+                    options::OPT_msingle_float);
+
     Args.AddLastArg(CmdArgs, options::OPT_modd_spreg,
                     options::OPT_mno_odd_spreg);
 
@@ -7601,7 +7778,7 @@
   if (Args.hasArg(options::OPT_gsplit_dwarf) &&
       getToolChain().getTriple().isOSLinux())
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
-                   SplitDebugName(Args, Inputs));
+                   SplitDebugName(Args, Inputs[0]));
 }
 
 static void AddLibgcc(const llvm::Triple &Triple, const Driver &D,
@@ -7639,34 +7816,33 @@
 
 static std::string getLinuxDynamicLinker(const ArgList &Args,
                                          const toolchains::Linux &ToolChain) {
+  const llvm::Triple::ArchType Arch = ToolChain.getArch();
+
   if (ToolChain.getTriple().getEnvironment() == llvm::Triple::Android) {
     if (ToolChain.getTriple().isArch64Bit())
       return "/system/bin/linker64";
     else
       return "/system/bin/linker";
-  } else if (ToolChain.getArch() == llvm::Triple::x86 ||
-             ToolChain.getArch() == llvm::Triple::sparc)
+  } else if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::sparc ||
+             Arch == llvm::Triple::sparcel)
     return "/lib/ld-linux.so.2";
-  else if (ToolChain.getArch() == llvm::Triple::aarch64)
+  else if (Arch == llvm::Triple::aarch64)
     return "/lib/ld-linux-aarch64.so.1";
-  else if (ToolChain.getArch() == llvm::Triple::aarch64_be)
+  else if (Arch == llvm::Triple::aarch64_be)
     return "/lib/ld-linux-aarch64_be.so.1";
-  else if (ToolChain.getArch() == llvm::Triple::arm ||
-           ToolChain.getArch() == llvm::Triple::thumb) {
+  else if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb) {
     if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
       return "/lib/ld-linux-armhf.so.3";
     else
       return "/lib/ld-linux.so.3";
-  } else if (ToolChain.getArch() == llvm::Triple::armeb ||
-             ToolChain.getArch() == llvm::Triple::thumbeb) {
+  } else if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb) {
+    // TODO: check which dynamic linker name.
     if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
-      return "/lib/ld-linux-armhf.so.3";        /* TODO: check which dynamic linker name.  */
+      return "/lib/ld-linux-armhf.so.3";
     else
-      return "/lib/ld-linux.so.3";              /* TODO: check which dynamic linker name.  */
-  } else if (ToolChain.getArch() == llvm::Triple::mips ||
-             ToolChain.getArch() == llvm::Triple::mipsel ||
-             ToolChain.getArch() == llvm::Triple::mips64 ||
-             ToolChain.getArch() == llvm::Triple::mips64el) {
+      return "/lib/ld-linux.so.3";
+  } else if (Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel ||
+             Arch == llvm::Triple::mips64 || Arch == llvm::Triple::mips64el) {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, ToolChain.getTriple(), CPUName, ABIName);
@@ -7684,21 +7860,21 @@
       LibName = IsNaN2008 ? "ld-linux-mipsn8.so.1" : "ld.so.1";
 
     return (LibDir + "/" + LibName).str();
-  } else if (ToolChain.getArch() == llvm::Triple::ppc)
+  } else if (Arch == llvm::Triple::ppc)
     return "/lib/ld.so.1";
-  else if (ToolChain.getArch() == llvm::Triple::ppc64) {
+  else if (Arch == llvm::Triple::ppc64) {
     if (ppc::hasPPCAbiArg(Args, "elfv2"))
       return "/lib64/ld64.so.2";
     return "/lib64/ld64.so.1";
-  } else if (ToolChain.getArch() == llvm::Triple::ppc64le) {
+  } else if (Arch == llvm::Triple::ppc64le) {
     if (ppc::hasPPCAbiArg(Args, "elfv1"))
       return "/lib64/ld64.so.1";
     return "/lib64/ld64.so.2";
-  } else if (ToolChain.getArch() == llvm::Triple::systemz)
+  } else if (Arch == llvm::Triple::systemz)
     return "/lib64/ld64.so.1";
-  else if (ToolChain.getArch() == llvm::Triple::sparcv9)
+  else if (Arch == llvm::Triple::sparcv9)
     return "/lib64/ld-linux.so.2";
-  else if (ToolChain.getArch() == llvm::Triple::x86_64 &&
+  else if (Arch == llvm::Triple::x86_64 &&
            ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUX32)
     return "/libx32/ld-linux-x32.so.2";
   else
@@ -7747,6 +7923,7 @@
   case llvm::Triple::ppc64le:
     return "elf64lppc";
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
     return "elf32_sparc";
   case llvm::Triple::sparcv9:
     return "elf64_sparc";
@@ -7778,15 +7955,15 @@
                                   const InputInfoList &Inputs,
                                   const ArgList &Args,
                                   const char *LinkingOutput) const {
-  const toolchains::Linux& ToolChain =
-    static_cast<const toolchains::Linux&>(getToolChain());
+  const toolchains::Linux &ToolChain =
+      static_cast<const toolchains::Linux &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
+  const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool isAndroid =
-    ToolChain.getTriple().getEnvironment() == llvm::Triple::Android;
+      ToolChain.getTriple().getEnvironment() == llvm::Triple::Android;
   const bool IsPIE =
-    !Args.hasArg(options::OPT_shared) &&
-    !Args.hasArg(options::OPT_static) &&
-    (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
+      !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) &&
+      (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
 
   ArgStringList CmdArgs;
 
@@ -7810,9 +7987,10 @@
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
-  if (ToolChain.getArch() == llvm::Triple::armeb ||
-      ToolChain.getArch() == llvm::Triple::thumbeb)
-    arm::appendEBLinkFlags(Args, CmdArgs, getToolChain().getTriple());
+  if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb)
+    arm::appendEBLinkFlags(
+        Args, CmdArgs,
+        llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args)));
 
   for (const auto &Opt : ToolChain.ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
@@ -7825,10 +8003,8 @@
   CmdArgs.push_back(getLDMOption(ToolChain.getTriple(), Args));
 
   if (Args.hasArg(options::OPT_static)) {
-    if (ToolChain.getArch() == llvm::Triple::arm ||
-        ToolChain.getArch() == llvm::Triple::armeb ||
-        ToolChain.getArch() == llvm::Triple::thumb ||
-        ToolChain.getArch() == llvm::Triple::thumbeb)
+    if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb ||
+        Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb)
       CmdArgs.push_back("-Bstatic");
     else
       CmdArgs.push_back("-static");
@@ -7836,10 +8012,8 @@
     CmdArgs.push_back("-shared");
   }
 
-  if (ToolChain.getArch() == llvm::Triple::arm ||
-      ToolChain.getArch() == llvm::Triple::armeb ||
-      ToolChain.getArch() == llvm::Triple::thumb ||
-      ToolChain.getArch() == llvm::Triple::thumbeb ||
+  if (Arch == llvm::Triple::arm || Arch == llvm::Triple::armeb ||
+      Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb ||
       (!Args.hasArg(options::OPT_static) &&
        !Args.hasArg(options::OPT_shared))) {
     CmdArgs.push_back("-dynamic-linker");
@@ -7925,37 +8099,39 @@
       if (NeedsSanitizerDeps)
         linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
 
-      LibOpenMP UsedOpenMPLib = LibUnknown;
-      if (Args.hasArg(options::OPT_fopenmp)) {
-        UsedOpenMPLib = LibGOMP;
-      } else if (const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ)) {
-        UsedOpenMPLib = llvm::StringSwitch<LibOpenMP>(A->getValue())
-            .Case("libgomp",  LibGOMP)
-            .Case("libiomp5", LibIOMP5)
-            .Default(LibUnknown);
-        if (UsedOpenMPLib == LibUnknown)
-          D.Diag(diag::err_drv_unsupported_option_argument)
-            << A->getOption().getName() << A->getValue();
-      }
-      switch (UsedOpenMPLib) {
-      case LibGOMP:
-        CmdArgs.push_back("-lgomp");
+      bool WantPthread = Args.hasArg(options::OPT_pthread) ||
+                         Args.hasArg(options::OPT_pthreads);
 
-        // FIXME: Exclude this for platforms with libgomp that don't require
-        // librt. Most modern Linux platforms require it, but some may not.
-        CmdArgs.push_back("-lrt");
-        break;
-      case LibIOMP5:
-        CmdArgs.push_back("-liomp5");
-        break;
-      case LibUnknown:
-        break;
+      if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+                       options::OPT_fno_openmp, false)) {
+        // OpenMP runtimes implies pthreads when using the GNU toolchain.
+        // FIXME: Does this really make sense for all GNU toolchains?
+        WantPthread = true;
+
+        // Also link the particular OpenMP runtimes.
+        switch (getOpenMPRuntime(ToolChain, Args)) {
+        case OMPRT_OMP:
+          CmdArgs.push_back("-lomp");
+          break;
+        case OMPRT_GOMP:
+          CmdArgs.push_back("-lgomp");
+
+          // FIXME: Exclude this for platforms with libgomp that don't require
+          // librt. Most modern Linux platforms require it, but some may not.
+          CmdArgs.push_back("-lrt");
+          break;
+        case OMPRT_IOMP5:
+          CmdArgs.push_back("-liomp5");
+          break;
+        case OMPRT_Unknown:
+          // Already diagnosed.
+          break;
+        }
       }
+
       AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
 
-      if ((Args.hasArg(options::OPT_pthread) ||
-           Args.hasArg(options::OPT_pthreads) || UsedOpenMPLib != LibUnknown) &&
-          !isAndroid)
+      if (WantPthread && !isAndroid)
         CmdArgs.push_back("-lpthread");
 
       CmdArgs.push_back("-lc");
@@ -8012,17 +8188,17 @@
 // others. Eventually we can support more of that and hopefully migrate back
 // to gnutools::link.
 void nacltools::Link::ConstructJob(Compilation &C, const JobAction &JA,
-                                  const InputInfo &Output,
-                                  const InputInfoList &Inputs,
-                                  const ArgList &Args,
-                                  const char *LinkingOutput) const {
+                                   const InputInfo &Output,
+                                   const InputInfoList &Inputs,
+                                   const ArgList &Args,
+                                   const char *LinkingOutput) const {
 
-  const toolchains::NaCl_TC& ToolChain =
-    static_cast<const toolchains::NaCl_TC&>(getToolChain());
+  const toolchains::NaCl_TC &ToolChain =
+      static_cast<const toolchains::NaCl_TC &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
+  const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool IsStatic =
-    !Args.hasArg(options::OPT_dynamic) &&
-    !Args.hasArg(options::OPT_shared);
+      !Args.hasArg(options::OPT_dynamic) && !Args.hasArg(options::OPT_shared);
 
   ArgStringList CmdArgs;
 
@@ -8051,16 +8227,15 @@
     CmdArgs.push_back("--eh-frame-hdr");
 
   CmdArgs.push_back("-m");
-  if (ToolChain.getArch() == llvm::Triple::x86)
+  if (Arch == llvm::Triple::x86)
     CmdArgs.push_back("elf_i386_nacl");
-  else if (ToolChain.getArch() == llvm::Triple::arm)
+  else if (Arch == llvm::Triple::arm)
     CmdArgs.push_back("armelf_nacl");
-  else if (ToolChain.getArch() == llvm::Triple::x86_64)
+  else if (Arch == llvm::Triple::x86_64)
     CmdArgs.push_back("elf_x86_64_nacl");
   else
-    D.Diag(diag::err_target_unsupported_arch) << ToolChain.getArchName() <<
-        "Native Client";
-
+    D.Diag(diag::err_target_unsupported_arch) << ToolChain.getArchName()
+                                              << "Native Client";
 
   if (IsStatic)
     CmdArgs.push_back("-static");
diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h
index 33fadd1..0420eea 100644
--- a/lib/Driver/Tools.h
+++ b/lib/Driver/Tools.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLS_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLS_H
 
+#include "clang/Basic/VersionTuple.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/Types.h"
 #include "clang/Driver/Util.h"
@@ -40,7 +41,7 @@
   class LLVM_LIBRARY_VISIBILITY Clang : public Tool {
   public:
     static const char *getBaseInputName(const llvm::opt::ArgList &Args,
-                                        const InputInfoList &Inputs);
+                                        const InputInfo &Input);
     static const char *getBaseInputStem(const llvm::opt::ArgList &Args,
                                         const InputInfoList &Inputs);
     static const char *getDependencyFileName(const llvm::opt::ArgList &Args,
@@ -224,11 +225,13 @@
 } // end namespace hexagon.
 
 namespace arm {
-  StringRef getARMTargetCPU(const llvm::opt::ArgList &Args,
-                            const llvm::Triple &Triple);
+  std::string getARMTargetCPU(const llvm::opt::ArgList &Args,
+                              const llvm::Triple &Triple);
+  const std::string getARMArch(const llvm::opt::ArgList &Args,
+                               const llvm::Triple &Triple);
   const char* getARMCPUForMArch(const llvm::opt::ArgList &Args,
                                 const llvm::Triple &Triple);
-  const char* getLLVMArchSuffixForARM(StringRef CPU);
+  const char* getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch);
 
   void appendEBLinkFlags(const llvm::opt::ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple);
 }
@@ -628,6 +631,10 @@
 
 /// Visual studio tools.
 namespace visualstudio {
+  VersionTuple getMSVCVersion(const Driver *D, const llvm::Triple &Triple,
+                              const llvm::opt::ArgList &Args,
+                              bool IsWindowsMSVC);
+
   class LLVM_LIBRARY_VISIBILITY Link : public Tool {
   public:
     Link(const ToolChain &TC) : Tool("visualstudio::Link", "linker", TC,
@@ -724,7 +731,7 @@
 };
 }
 
-} // end namespace toolchains
+} // end namespace tools
 } // end namespace driver
 } // end namespace clang
 
diff --git a/lib/Format/BreakableToken.cpp b/lib/Format/BreakableToken.cpp
index c84d9af..e3e162d 100644
--- a/lib/Format/BreakableToken.cpp
+++ b/lib/Format/BreakableToken.cpp
@@ -183,7 +183,7 @@
 }
 
 static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = { "///", "//" };
+  static const char *const KnownPrefixes[] = { "///", "//", "//!" };
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -210,6 +210,8 @@
       Prefix = "// ";
     else if (Prefix == "///")
       Prefix = "/// ";
+    else if (Prefix == "//!")
+      Prefix = "//! ";
   }
 }
 
@@ -277,6 +279,8 @@
     // If the last line is empty, the closing "*/" will have a star.
     if (i + 1 == e && Lines[i].empty())
       break;
+    if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
+      continue;
     while (!Lines[i].startswith(Decoration))
       Decoration = Decoration.substr(0, Decoration.size() - 1);
   }
@@ -297,14 +301,18 @@
       }
       continue;
     }
+
     // The first line already excludes the star.
     // For all other lines, adjust the line to exclude the star and
     // (optionally) the first whitespace.
-    StartOfLineColumn[i] += Decoration.size();
-    Lines[i] = Lines[i].substr(Decoration.size());
-    LeadingWhitespace[i] += Decoration.size();
-    IndentAtLineBreak =
-        std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
+    unsigned DecorationSize =
+        Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
+    StartOfLineColumn[i] += DecorationSize;
+    Lines[i] = Lines[i].substr(DecorationSize);
+    LeadingWhitespace[i] += DecorationSize;
+    if (!Decoration.startswith(Lines[i]))
+      IndentAtLineBreak =
+          std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
   }
   IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
   DEBUG({
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index a2a68bd..dbdb548 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -143,11 +143,10 @@
   if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
     return true;
   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
-       (Style.BreakBeforeTernaryOperators &&
-        (Current.is(tok::question) ||
-         (Current.is(TT_ConditionalExpr) && Previous.isNot(tok::question)))) ||
+       (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
+        Previous.isNot(tok::question)) ||
        (!Style.BreakBeforeTernaryOperators &&
-        (Previous.is(tok::question) || Previous.is(TT_ConditionalExpr)))) &&
+        Previous.is(TT_ConditionalExpr))) &&
       State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() &&
       !Current.isOneOf(tok::r_paren, tok::r_brace))
     return true;
@@ -160,16 +159,24 @@
   if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) ||
        Previous.is(TT_ArrayInitializerLSquare)) &&
       Style.ColumnLimit > 0 &&
-      getLengthToMatchingParen(Previous) + State.Column > getColumnLimit(State))
+      getLengthToMatchingParen(Previous) + State.Column - 1 >
+          getColumnLimit(State))
     return true;
   if (Current.is(TT_CtorInitializerColon) &&
       ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) ||
        Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0))
     return true;
+  if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound &&
+      State.Stack.back().BreakBeforeParameter)
+    return true;
 
   if (State.Column < getNewLineColumn(State))
     return false;
-  if (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None) {
+
+  // Using CanBreakBefore here and below takes care of the decision whether the
+  // current style uses wrapping before or after operators for the given
+  // operator.
+  if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) {
     // If we need to break somewhere inside the LHS of a binary expression, we
     // should also break after the operator. Otherwise, the formatting would
     // hide the operator precedence, e.g. in:
@@ -185,16 +192,13 @@
                         Previous.Previous->isNot(TT_BinaryOperator); // For >>.
     bool LHSIsBinaryExpr =
         Previous.Previous && Previous.Previous->EndsBinaryExpression;
-    if (Previous.is(TT_BinaryOperator) && (!IsComparison || LHSIsBinaryExpr) &&
-        Current.isNot(TT_BinaryOperator) && // For >>.
-        !Current.isTrailingComment() && !Previous.is(tok::lessless) &&
+    if ((!IsComparison || LHSIsBinaryExpr) && !Current.isTrailingComment() &&
         Previous.getPrecedence() != prec::Assignment &&
         State.Stack.back().BreakBeforeParameter)
       return true;
-  } else {
-    if (Current.is(TT_BinaryOperator) && Previous.EndsBinaryExpression &&
-        State.Stack.back().BreakBeforeParameter)
-      return true;
+  } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore &&
+             State.Stack.back().BreakBeforeParameter) {
+    return true;
   }
 
   // Same as above, but for the first "<<" operator.
@@ -203,12 +207,14 @@
       State.Stack.back().FirstLessLess == 0)
     return true;
 
-  if (Current.is(TT_SelectorName) && State.Stack.back().ObjCSelectorNameFound &&
-      State.Stack.back().BreakBeforeParameter)
-    return true;
   if (Current.NestingLevel == 0 && !Current.isTrailingComment()) {
+    // Always break after "template <...>" and leading annotations. This is only
+    // for cases where the entire line does not fit on a single line as a
+    // different LineFormatter would be used otherwise.
     if (Previous.ClosesTemplateDeclaration)
       return true;
+    if (Previous.is(TT_FunctionAnnotationRParen))
+      return true;
     if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) &&
         Current.isNot(TT_LeadingJavaAnnotation))
       return true;
@@ -319,10 +325,11 @@
     State.Stack.back().Indent = State.Column + Spaces;
   if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style))
     State.Stack.back().NoLineBreak = true;
-  if (startsSegmentOfBuilderTypeCall(Current))
+  if (startsSegmentOfBuilderTypeCall(Current) &&
+      State.Column > getNewLineColumn(State))
     State.Stack.back().ContainsUnwrappedBuilder = true;
 
-  if (Current.is(TT_LambdaArrow))
+  if (Current.is(TT_LambdaArrow) && Style.Language == FormatStyle::LK_Java)
     State.Stack.back().NoLineBreak = true;
   if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
       (Previous.MatchingParen &&
@@ -419,7 +426,11 @@
         State.Stack.back().AlignColons = false;
       } else {
         State.Stack.back().ColonPos =
-            State.Stack.back().Indent + NextNonComment->LongestObjCSelectorName;
+            (Style.IndentWrappedFunctionNames
+                 ? std::max(State.Stack.back().Indent,
+                            State.FirstIndent + Style.ContinuationIndentWidth)
+                 : State.Stack.back().Indent) +
+            NextNonComment->LongestObjCSelectorName;
       }
     } else if (State.Stack.back().AlignColons &&
                State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) {
@@ -452,6 +463,8 @@
   if (NextNonComment->is(tok::question) ||
       (PreviousNonComment && PreviousNonComment->is(tok::question)))
     State.Stack.back().BreakBeforeParameter = true;
+  if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore)
+    State.Stack.back().BreakBeforeParameter = false;
 
   if (!DryRun) {
     unsigned Newlines = std::max(
@@ -471,18 +484,17 @@
   bool NestedBlockSpecialCase =
       Current.is(tok::r_brace) && State.Stack.size() > 1 &&
       State.Stack[State.Stack.size() - 2].NestedBlockInlined;
-  if (!NestedBlockSpecialCase) {
-    for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
+  if (!NestedBlockSpecialCase)
+    for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i)
       State.Stack[i].BreakBeforeParameter = true;
-    }
-  }
 
   if (PreviousNonComment &&
       !PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
       (PreviousNonComment->isNot(TT_TemplateCloser) ||
        Current.NestingLevel != 0) &&
-      !PreviousNonComment->isOneOf(TT_BinaryOperator, TT_JavaAnnotation,
-                                   TT_LeadingJavaAnnotation) &&
+      !PreviousNonComment->isOneOf(
+          TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
+          TT_LeadingJavaAnnotation) &&
       Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope())
     State.Stack.back().BreakBeforeParameter = true;
 
@@ -542,6 +554,9 @@
     return State.Stack.back().Indent;
   if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
     return State.StartOfStringLiteral;
+  if (NextNonComment->is(TT_ObjCStringLiteral) &&
+      State.StartOfStringLiteral != 0)
+    return State.StartOfStringLiteral - 1;
   if (NextNonComment->is(tok::lessless) &&
       State.Stack.back().FirstLessLess != 0)
     return State.Stack.back().FirstLessLess;
@@ -559,8 +574,9 @@
     return State.Stack.back().VariablePos;
   if ((PreviousNonComment &&
        (PreviousNonComment->ClosesTemplateDeclaration ||
-        PreviousNonComment->isOneOf(TT_AttributeParen, TT_JavaAnnotation,
-                                    TT_LeadingJavaAnnotation))) ||
+        PreviousNonComment->isOneOf(
+            TT_AttributeParen, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
+            TT_LeadingJavaAnnotation))) ||
       (!Style.IndentWrappedFunctionNames &&
        NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName)))
     return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent);
@@ -568,7 +584,10 @@
     if (!State.Stack.back().ObjCSelectorNameFound) {
       if (NextNonComment->LongestObjCSelectorName == 0)
         return State.Stack.back().Indent;
-      return State.Stack.back().Indent +
+      return (Style.IndentWrappedFunctionNames
+                  ? std::max(State.Stack.back().Indent,
+                             State.FirstIndent + Style.ContinuationIndentWidth)
+                  : State.Stack.back().Indent) +
              NextNonComment->LongestObjCSelectorName -
              NextNonComment->ColumnWidth;
     }
@@ -583,10 +602,16 @@
       return State.Stack.back().StartOfArraySubscripts;
     return ContinuationIndent;
   }
+
+  // This ensure that we correctly format ObjC methods calls without inputs,
+  // i.e. where the last element isn't selector like: [callee method];
+  if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 &&
+      NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr))
+    return State.Stack.back().Indent;
+
   if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) ||
-      Previous.isOneOf(tok::coloncolon, tok::equal)) {
+      Previous.isOneOf(tok::coloncolon, tok::equal))
     return ContinuationIndent;
-  }
   if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
       PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral))
     return ContinuationIndent;
@@ -650,6 +675,9 @@
       State.Stack.back().AvoidBinPacking = true;
     State.Stack.back().BreakBeforeParameter = false;
   }
+  if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
+    State.Stack.back().NestedBlockIndent =
+        State.Column + Current.ColumnWidth + 1;
 
   // Insert scopes created by fake parenthesis.
   const FormatToken *Previous = Current.getPreviousNonComment();
@@ -660,13 +688,12 @@
   //     foo();
   //     bar();
   //   }, a, b, c);
-  if (Current.isNot(tok::comment) && Previous && Previous->is(tok::l_brace) &&
+  if (Current.isNot(tok::comment) && Previous &&
+      Previous->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
       State.Stack.size() > 1) {
-    if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline) {
-      for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
+    if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline)
+      for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i)
         State.Stack[i].NoLineBreak = true;
-      }
-    }
     State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;
   }
   if (Previous && (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) ||
@@ -682,12 +709,13 @@
   moveStatePastScopeCloser(State);
   moveStatePastFakeRParens(State);
 
-  if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
+  if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
     State.StartOfStringLiteral = State.Column;
-  } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
-             !Current.isStringLiteral()) {
+  if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
+    State.StartOfStringLiteral = State.Column + 1;
+  else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
+             !Current.isStringLiteral())
     State.StartOfStringLiteral = 0;
-  }
 
   State.Column += Current.ColumnWidth;
   State.NextToken = State.NextToken->Next;
@@ -813,6 +841,7 @@
 
   unsigned NewIndent;
   unsigned NewIndentLevel = State.Stack.back().IndentLevel;
+  unsigned LastSpace = State.Stack.back().LastSpace;
   bool AvoidBinPacking;
   bool BreakBeforeParameter = false;
   if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
@@ -826,12 +855,24 @@
     const FormatToken *NextNoComment = Current.getNextNonComment();
     AvoidBinPacking =
         Current.isOneOf(TT_ArrayInitializerLSquare, TT_DictLiteral) ||
-        Style.Language == FormatStyle::LK_Proto || !Style.BinPackParameters ||
+        Style.Language == FormatStyle::LK_Proto || !Style.BinPackArguments ||
         (NextNoComment && NextNoComment->is(TT_DesignatedInitializerPeriod));
   } else {
     NewIndent = Style.ContinuationIndentWidth +
                 std::max(State.Stack.back().LastSpace,
                          State.Stack.back().StartOfFunctionCall);
+
+    // Ensure that different different brackets force relative alignment, e.g.:
+    // void SomeFunction(vector<  // break
+    //                       int> v);
+    // FIXME: We likely want to do this for more combinations of brackets.
+    // Verify that it is wanted for ObjC, too.
+    if (Current.Tok.getKind() == tok::less &&
+        Current.ParentBracket == tok::l_paren) {
+      NewIndent = std::max(NewIndent, State.Stack.back().Indent);
+      LastSpace = std::max(LastSpace, State.Stack.back().Indent);
+    }
+
     AvoidBinPacking =
         (State.Line->MustBeDeclaration && !Style.BinPackParameters) ||
         (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
@@ -839,20 +880,33 @@
          (Current.PackingKind == PPK_OnePerLine ||
           (!BinPackInconclusiveFunctions &&
            Current.PackingKind == PPK_Inconclusive)));
-    // If this '[' opens an ObjC call, determine whether all parameters fit
-    // into one line and put one per line if they don't.
-    if (Current.is(TT_ObjCMethodExpr) && Style.ColumnLimit != 0 &&
-        getLengthToMatchingParen(Current) + State.Column >
+    if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) {
+      if (Style.ColumnLimit) {
+        // If this '[' opens an ObjC call, determine whether all parameters fit
+        // into one line and put one per line if they don't.
+        if (getLengthToMatchingParen(Current) + State.Column >
             getColumnLimit(State))
-      BreakBeforeParameter = true;
+          BreakBeforeParameter = true;
+      } else {
+        // For ColumnLimit = 0, we have to figure out whether there is or has to
+        // be a line break within this call.
+        for (const FormatToken *Tok = &Current;
+             Tok && Tok != Current.MatchingParen; Tok = Tok->Next) {
+          if (Tok->MustBreakBefore || 
+              (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) {
+            BreakBeforeParameter = true;
+            break;
+          }
+        }
+      }
+    }
   }
   bool NoLineBreak = State.Stack.back().NoLineBreak ||
                      (Current.is(TT_TemplateOpener) &&
                       State.Stack.back().ContainsUnwrappedBuilder);
   unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall,
                                         State.Stack.back().NestedBlockIndent);
-  State.Stack.push_back(ParenState(NewIndent, NewIndentLevel,
-                                   State.Stack.back().LastSpace,
+  State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace,
                                    AvoidBinPacking, NoLineBreak));
   State.Stack.back().NestedBlockIndent = NestedBlockIndent;
   State.Stack.back().BreakBeforeParameter = BreakBeforeParameter;
diff --git a/lib/Format/ContinuationIndenter.h b/lib/Format/ContinuationIndenter.h
index 36691d9..9b9154e 100644
--- a/lib/Format/ContinuationIndenter.h
+++ b/lib/Format/ContinuationIndenter.h
@@ -148,13 +148,10 @@
   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
              bool AvoidBinPacking, bool NoLineBreak)
       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
-        NestedBlockIndent(Indent), FirstLessLess(0),
-        BreakBeforeClosingBrace(false), QuestionColumn(0),
+        NestedBlockIndent(Indent), BreakBeforeClosingBrace(false),
         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
-        NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
-        StartOfFunctionCall(0), StartOfArraySubscripts(0),
-        NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
-        ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
+        NoLineBreak(NoLineBreak), LastOperatorWrapped(true),
+        ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
         AlignColons(true), ObjCSelectorNameFound(false),
         HasMultipleNestedBlocks(false), NestedBlockInlined(false) {}
 
@@ -180,90 +177,90 @@
   ///
   /// Used to align "<<" operators. 0 if no such operator has been encountered
   /// on a level.
-  unsigned FirstLessLess;
+  unsigned FirstLessLess = 0;
+
+  /// \brief The column of a \c ? in a conditional expression;
+  unsigned QuestionColumn = 0;
+
+  /// \brief The position of the colon in an ObjC method declaration/call.
+  unsigned ColonPos = 0;
+
+  /// \brief The start of the most recent function in a builder-type call.
+  unsigned StartOfFunctionCall = 0;
+
+  /// \brief Contains the start of array subscript expressions, so that they
+  /// can be aligned.
+  unsigned StartOfArraySubscripts = 0;
+
+  /// \brief If a nested name specifier was broken over multiple lines, this
+  /// contains the start column of the second line. Otherwise 0.
+  unsigned NestedNameSpecifierContinuation = 0;
+
+  /// \brief If a call expression was broken over multiple lines, this
+  /// contains the start column of the second line. Otherwise 0.
+  unsigned CallContinuation = 0;
+
+  /// \brief The column of the first variable name in a variable declaration.
+  ///
+  /// Used to align further variables if necessary.
+  unsigned VariablePos = 0;
 
   /// \brief Whether a newline needs to be inserted before the block's closing
   /// brace.
   ///
   /// We only want to insert a newline before the closing brace if there also
   /// was a newline after the beginning left brace.
-  bool BreakBeforeClosingBrace;
-
-  /// \brief The column of a \c ? in a conditional expression;
-  unsigned QuestionColumn;
+  bool BreakBeforeClosingBrace : 1;
 
   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
   /// lines, in this context.
-  bool AvoidBinPacking;
+  bool AvoidBinPacking : 1;
 
   /// \brief Break after the next comma (or all the commas in this context if
   /// \c AvoidBinPacking is \c true).
-  bool BreakBeforeParameter;
+  bool BreakBeforeParameter : 1;
 
   /// \brief Line breaking in this context would break a formatting rule.
-  bool NoLineBreak;
+  bool NoLineBreak : 1;
 
   /// \brief True if the last binary operator on this level was wrapped to the
   /// next line.
-  bool LastOperatorWrapped;
-
-  /// \brief The position of the colon in an ObjC method declaration/call.
-  unsigned ColonPos;
-
-  /// \brief The start of the most recent function in a builder-type call.
-  unsigned StartOfFunctionCall;
-
-  /// \brief Contains the start of array subscript expressions, so that they
-  /// can be aligned.
-  unsigned StartOfArraySubscripts;
-
-  /// \brief If a nested name specifier was broken over multiple lines, this
-  /// contains the start column of the second line. Otherwise 0.
-  unsigned NestedNameSpecifierContinuation;
-
-  /// \brief If a call expression was broken over multiple lines, this
-  /// contains the start column of the second line. Otherwise 0.
-  unsigned CallContinuation;
-
-  /// \brief The column of the first variable name in a variable declaration.
-  ///
-  /// Used to align further variables if necessary.
-  unsigned VariablePos;
+  bool LastOperatorWrapped : 1;
 
   /// \brief \c true if this \c ParenState already contains a line-break.
   ///
   /// The first line break in a certain \c ParenState causes extra penalty so
   /// that clang-format prefers similar breaks, i.e. breaks in the same
   /// parenthesis.
-  bool ContainsLineBreak;
+  bool ContainsLineBreak : 1;
 
   /// \brief \c true if this \c ParenState contains multiple segments of a
   /// builder-type call on one line.
-  bool ContainsUnwrappedBuilder;
+  bool ContainsUnwrappedBuilder : 1;
 
   /// \brief \c true if the colons of the curren ObjC method expression should
   /// be aligned.
   ///
   /// Not considered for memoization as it will always have the same value at
   /// the same token.
-  bool AlignColons;
+  bool AlignColons : 1;
 
   /// \brief \c true if at least one selector name was found in the current
   /// ObjC method expression.
   ///
   /// Not considered for memoization as it will always have the same value at
   /// the same token.
-  bool ObjCSelectorNameFound;
+  bool ObjCSelectorNameFound : 1;
 
   /// \brief \c true if there are multiple nested blocks inside these parens.
   ///
   /// Not considered for memoization as it will always have the same value at
   /// the same token.
-  bool HasMultipleNestedBlocks;
+  bool HasMultipleNestedBlocks : 1;
 
   // \brief The start of a nested block (e.g. lambda introducer in C++ or
   // "function" in JavaScript) is not wrapped to a new line.
-  bool NestedBlockInlined;
+  bool NestedBlockInlined : 1;
 
   bool operator<(const ParenState &Other) const {
     if (Indent != Other.Indent)
@@ -297,11 +294,11 @@
     if (VariablePos != Other.VariablePos)
       return VariablePos < Other.VariablePos;
     if (ContainsLineBreak != Other.ContainsLineBreak)
-      return ContainsLineBreak < Other.ContainsLineBreak;
+      return ContainsLineBreak;
     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
-      return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
+      return ContainsUnwrappedBuilder;
     if (NestedBlockInlined != Other.NestedBlockInlined)
-      return NestedBlockInlined < Other.NestedBlockInlined;
+      return NestedBlockInlined;
     return false;
   }
 };
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index ad9398c..aa91658 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -174,6 +174,7 @@
     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
     IO.mapOptional("AlignOperands", Style.AlignOperands);
     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
+    IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments);
     IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
                    Style.AllowAllParametersOfDeclarationOnNextLine);
     IO.mapOptional("AllowShortBlocksOnASingleLine",
@@ -329,6 +330,7 @@
   LLVMStyle.AlignAfterOpenBracket = true;
   LLVMStyle.AlignOperands = true;
   LLVMStyle.AlignTrailingComments = true;
+  LLVMStyle.AlignConsecutiveAssignments = false;
   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
   LLVMStyle.AllowShortBlocksOnASingleLine = false;
@@ -648,15 +650,14 @@
       static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,
                                                     tok::greaterequal};
       static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
-      // FIXME: We probably need to change token type to mimic operator with the
-      // correct priority.
-      if (tryMergeTokens(JSIdentity))
+      // FIXME: Investigate what token type gives the correct operator priority.
+      if (tryMergeTokens(JSIdentity, TT_BinaryOperator))
         return;
-      if (tryMergeTokens(JSNotIdentity))
+      if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))
         return;
-      if (tryMergeTokens(JSShiftEqual))
+      if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))
         return;
-      if (tryMergeTokens(JSRightArrow))
+      if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
         return;
     }
   }
@@ -687,7 +688,7 @@
     return true;
   }
 
-  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
+  bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType) {
     if (Tokens.size() < Kinds.size())
       return false;
 
@@ -707,6 +708,7 @@
     First[0]->TokenText = StringRef(First[0]->TokenText.data(),
                                     First[0]->TokenText.size() + AddLength);
     First[0]->ColumnWidth += AddLength;
+    First[0]->Type = NewType;
     return true;
   }
 
@@ -750,7 +752,7 @@
     unsigned LastColumn = Tokens.back()->OriginalColumn;
     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
       ++TokenCount;
-      if (I[0]->is(tok::slash) && I + 1 != E &&
+      if (I[0]->isOneOf(tok::slash, tok::slashequal) && I + 1 != E &&
           (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
                          tok::exclaim, tok::l_square, tok::colon, tok::comma,
                          tok::question, tok::kw_return) ||
@@ -780,10 +782,11 @@
       return false;
 
     FormatToken *EndBacktick = Tokens.back();
-    // Backticks get lexed as tok:unknown tokens. If a template string contains
+    // Backticks get lexed as tok::unknown tokens. If a template string contains
     // a comment start, it gets lexed as a tok::comment, or tok::unknown if
     // unterminated.
-    if (!EndBacktick->isOneOf(tok::comment, tok::unknown))
+    if (!EndBacktick->isOneOf(tok::comment, tok::string_literal,
+                              tok::char_constant, tok::unknown))
       return false;
     size_t CommentBacktickPos = EndBacktick->TokenText.find('`');
     // Unknown token that's not actually a backtick, or a comment that doesn't
@@ -793,7 +796,8 @@
 
     unsigned TokenCount = 0;
     bool IsMultiline = false;
-    unsigned EndColumnInFirstLine = 0;
+    unsigned EndColumnInFirstLine =
+        EndBacktick->OriginalColumn + EndBacktick->ColumnWidth;
     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; I++) {
       ++TokenCount;
       if (I[0]->NewlinesBefore > 0 || I[0]->IsMultiline)
@@ -831,6 +835,15 @@
       Tokens.back()->TokenText =
           StringRef(Tokens.back()->TokenText.data(),
                     EndOffset - Tokens.back()->TokenText.data());
+
+      unsigned EndOriginalColumn = EndBacktick->OriginalColumn;
+      if (EndOriginalColumn == 0) {
+        SourceLocation Loc = EndBacktick->Tok.getLocation();
+        EndOriginalColumn = SourceMgr.getSpellingColumnNumber(Loc);
+      }
+      // If the ` is further down within the token (e.g. in a comment).
+      EndOriginalColumn += CommentBacktickPos;
+
       if (IsMultiline) {
         // ColumnWidth is from backtick to last token in line.
         // LastLineColumnWidth is 0 to backtick.
@@ -838,12 +851,12 @@
         //     until here`;
         Tokens.back()->ColumnWidth =
             EndColumnInFirstLine - Tokens.back()->OriginalColumn;
-        Tokens.back()->LastLineColumnWidth = EndBacktick->OriginalColumn;
+        Tokens.back()->LastLineColumnWidth = EndOriginalColumn;
         Tokens.back()->IsMultiline = true;
       } else {
         // Token simply spans from start to end, +1 for the ` itself.
         Tokens.back()->ColumnWidth =
-            EndBacktick->OriginalColumn - Tokens.back()->OriginalColumn + 1;
+            EndOriginalColumn - Tokens.back()->OriginalColumn + 1;
       }
       return true;
     }
@@ -988,18 +1001,25 @@
     // Consume and record whitespace until we find a significant token.
     unsigned WhitespaceLength = TrailingWhitespace;
     while (FormatTok->Tok.is(tok::unknown)) {
+      StringRef Text = FormatTok->TokenText;
+      auto EscapesNewline = [&](int pos) {
+        // A '\r' here is just part of '\r\n'. Skip it.
+        if (pos >= 0 && Text[pos] == '\r')
+          --pos;
+        // See whether there is an odd number of '\' before this.
+        unsigned count = 0;
+        for (; pos >= 0; --pos, ++count)
+          if (Text[pos] != '\\')
+            break;
+        return count & 1;
+      };
       // FIXME: This miscounts tok:unknown tokens that are not just
       // whitespace, e.g. a '`' character.
-      for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
-        switch (FormatTok->TokenText[i]) {
+      for (int i = 0, e = Text.size(); i != e; ++i) {
+        switch (Text[i]) {
         case '\n':
           ++FormatTok->NewlinesBefore;
-          // FIXME: This is technically incorrect, as it could also
-          // be a literal backslash at the end of the line.
-          if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
-                         (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
-                          FormatTok->TokenText[i - 2] != '\\')))
-            FormatTok->HasUnescapedNewline = true;
+          FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1);
           FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
           Column = 0;
           break;
@@ -1018,8 +1038,7 @@
           Column += Style.TabWidth - Column % Style.TabWidth;
           break;
         case '\\':
-          if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
-                             FormatTok->TokenText[i + 1] != '\n'))
+          if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n'))
             FormatTok->Type = TT_ImplicitStringLiteral;
           break;
         default:
@@ -1044,6 +1063,7 @@
            FormatTok->TokenText[1] == '\n') {
       ++FormatTok->NewlinesBefore;
       WhitespaceLength += 2;
+      FormatTok->LastNewlineOffset = 2;
       Column = 0;
       FormatTok->TokenText = FormatTok->TokenText.substr(2);
     }
@@ -1103,9 +1123,12 @@
       Column = FormatTok->LastLineColumnWidth;
     }
 
-    FormatTok->IsForEachMacro =
-        std::binary_search(ForEachMacros.begin(), ForEachMacros.end(),
-                           FormatTok->Tok.getIdentifierInfo());
+    if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&
+          Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
+              tok::pp_define) &&
+        std::find(ForEachMacros.begin(), ForEachMacros.end(),
+                  FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end())
+      FormatTok->Type = TT_ForEachMacro;
 
     return FormatTok;
   }
@@ -1200,13 +1223,13 @@
                        << "\n");
   }
 
-  tooling::Replacements format() {
+  tooling::Replacements format(bool *IncompleteFormat) {
     tooling::Replacements Result;
     FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
 
     UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
                                *this);
-    bool StructuralError = Parser.parse();
+    Parser.parse();
     assert(UnwrappedLines.rbegin()->empty());
     for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
          ++Run) {
@@ -1216,7 +1239,7 @@
         AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
       }
       tooling::Replacements RunResult =
-          format(AnnotatedLines, StructuralError, Tokens);
+          format(AnnotatedLines, Tokens, IncompleteFormat);
       DEBUG({
         llvm::dbgs() << "Replacements for run " << Run << ":\n";
         for (tooling::Replacements::iterator I = RunResult.begin(),
@@ -1235,7 +1258,7 @@
   }
 
   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
-                               bool StructuralError, FormatTokenLexer &Tokens) {
+                               FormatTokenLexer &Tokens, bool *IncompleteFormat) {
     TokenAnnotator Annotator(Style, Tokens.getKeywords());
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
       Annotator.annotate(*AnnotatedLines[i]);
@@ -1250,9 +1273,9 @@
     ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
                                   Whitespaces, Encoding,
                                   BinPackInconclusiveFunctions);
-    UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style,
-                                     Tokens.getKeywords());
-    Formatter.format(AnnotatedLines, /*DryRun=*/false);
+    UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
+                           IncompleteFormat)
+        .format(AnnotatedLines);
     return Whitespaces.generateReplacements();
   }
 
@@ -1469,27 +1492,20 @@
 
 } // end anonymous namespace
 
-tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
-                               SourceManager &SourceMgr,
-                               ArrayRef<CharSourceRange> Ranges) {
-  if (Style.DisableFormat)
-    return tooling::Replacements();
-  return reformat(Style, SourceMgr,
-                  SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
-}
-
 tooling::Replacements reformat(const FormatStyle &Style,
                                SourceManager &SourceMgr, FileID ID,
-                               ArrayRef<CharSourceRange> Ranges) {
+                               ArrayRef<CharSourceRange> Ranges,
+                               bool *IncompleteFormat) {
   if (Style.DisableFormat)
     return tooling::Replacements();
   Formatter formatter(Style, SourceMgr, ID, Ranges);
-  return formatter.format();
+  return formatter.format(IncompleteFormat);
 }
 
 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
                                ArrayRef<tooling::Range> Ranges,
-                               StringRef FileName) {
+                               StringRef FileName,
+                               bool *IncompleteFormat) {
   if (Style.DisableFormat)
     return tooling::Replacements();
 
@@ -1512,7 +1528,7 @@
     SourceLocation End = Start.getLocWithOffset(Range.getLength());
     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
   }
-  return reformat(Style, SourceMgr, ID, CharRanges);
+  return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
 }
 
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -1628,8 +1644,6 @@
       return Style;
     }
   }
-  llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
-               << " style\n";
   if (!UnsuitableConfigFiles.empty()) {
     llvm::errs() << "Configuration file(s) do(es) not support "
                  << getLanguageName(Style.Language) << ": "
diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp
index 0addbfe..316171d 100644
--- a/lib/Format/FormatToken.cpp
+++ b/lib/Format/FormatToken.cpp
@@ -18,6 +18,7 @@
 #include "clang/Format/Format.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
+#include <climits>
 
 namespace clang {
 namespace format {
@@ -59,13 +60,13 @@
 unsigned CommaSeparatedList::formatAfterToken(LineState &State,
                                               ContinuationIndenter *Indenter,
                                               bool DryRun) {
-  if (State.NextToken == nullptr || !State.NextToken->Previous ||
-      !State.NextToken->Previous->Previous)
+  if (State.NextToken == nullptr || !State.NextToken->Previous)
     return 0;
 
   // Ensure that we start on the opening brace.
-  const FormatToken *LBrace = State.NextToken->Previous->Previous;
-  if (LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block ||
+  const FormatToken *LBrace =
+      State.NextToken->Previous->getPreviousNonComment();
+  if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block ||
       LBrace->Type == TT_DictLiteral ||
       LBrace->Next->Type == TT_DesignatedInitializerPeriod)
     return 0;
@@ -133,9 +134,9 @@
     return;
 
   // In C++11 braced list style, we should not format in columns unless they
-  // have many items (20 or more) or we allow bin-packing of function
-  // parameters.
-  if (Style.Cpp11BracedListStyle && !Style.BinPackParameters &&
+  // have many items (20 or more) or we allow bin-packing of function call
+  // arguments.
+  if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
       Commas.size() < 19)
     return;
 
@@ -144,19 +145,21 @@
     return;
 
   FormatToken *ItemBegin = Token->Next;
+  while (ItemBegin->isTrailingComment())
+    ItemBegin = ItemBegin->Next;
   SmallVector<bool, 8> MustBreakBeforeItem;
 
   // The lengths of an item if it is put at the end of the line. This includes
   // trailing comments which are otherwise ignored for column alignment.
   SmallVector<unsigned, 8> EndOfLineItemLength;
 
-  unsigned MinItemLength = Style.ColumnLimit;
-  unsigned MaxItemLength = 0;
-
+  bool HasSeparatingComment = false;
   for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
     // Skip comments on their own line.
-    while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment())
+    while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
       ItemBegin = ItemBegin->Next;
+      HasSeparatingComment = i > 0;
+    }
 
     MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
     if (ItemBegin->is(tok::l_brace))
@@ -179,8 +182,6 @@
       ItemEnd = Commas[i];
       // The comma is counted as part of the item when calculating the length.
       ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
-      MinItemLength = std::min(MinItemLength, ItemLengths.back());
-      MaxItemLength = std::max(MaxItemLength, ItemLengths.back());
 
       // Consume trailing comments so the are included in EndOfLineItemLength.
       if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
@@ -195,20 +196,21 @@
     ItemBegin = ItemEnd->Next;
   }
 
-  // If this doesn't have a nested list, we require at least 6 elements in order
-  // create a column layout. If it has a nested list, column layout ensures one
-  // list element per line. If the difference between the shortest and longest
-  // element is too large, column layout would create too much whitespace.
-  if (HasNestedBracedList || Commas.size() < 5 || Token->NestingLevel != 0 ||
-      MaxItemLength - MinItemLength > 10)
+  // Don't use column layout for nested lists, lists with few elements and in
+  // presence of separating comments.
+  if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment)
     return;
 
   // We can never place more than ColumnLimit / 3 items in a row (because of the
   // spaces and the comma).
-  for (unsigned Columns = 1; Columns <= Style.ColumnLimit / 3; ++Columns) {
+  unsigned MaxItems = Style.ColumnLimit / 3;
+  std::vector<unsigned> MinSizeInColumn;
+  MinSizeInColumn.reserve(MaxItems);
+  for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
     ColumnFormat Format;
     Format.Columns = Columns;
     Format.ColumnSizes.resize(Columns);
+    MinSizeInColumn.assign(Columns, UINT_MAX);
     Format.LineCount = 1;
     bool HasRowWithSufficientColumns = false;
     unsigned Column = 0;
@@ -220,9 +222,10 @@
       }
       if (Column == Columns - 1)
         HasRowWithSufficientColumns = true;
-      unsigned length =
+      unsigned Length =
           (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
-      Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], length);
+      Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
+      MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
       ++Column;
     }
     // If all rows are terminated early (e.g. by trailing comments), we don't
@@ -230,9 +233,19 @@
     if (!HasRowWithSufficientColumns)
       break;
     Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
-    for (unsigned i = 0; i < Columns; ++i) {
+
+    for (unsigned i = 0; i < Columns; ++i)
       Format.TotalWidth += Format.ColumnSizes[i];
-    }
+
+    // Don't use this Format, if the difference between the longest and shortest
+    // element in a column exceeds a threshold to avoid excessive spaces.
+    if ([&] {
+          for (unsigned i = 0; i < Columns - 1; ++i)
+            if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
+              return true;
+          return false;
+        }())
+      continue;
 
     // Ignore layouts that are bound to violate the column limit.
     if (Format.TotalWidth > Style.ColumnLimit)
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index bc14d4c..dd12969 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -41,13 +41,18 @@
   TT_CtorInitializerComma,
   TT_DesignatedInitializerPeriod,
   TT_DictLiteral,
+  TT_ForEachMacro,
+  TT_FunctionAnnotationRParen,
   TT_FunctionDeclarationName,
   TT_FunctionLBrace,
   TT_FunctionTypeLParen,
   TT_ImplicitStringLiteral,
   TT_InheritanceColon,
+  TT_InlineASMBrace,
   TT_InlineASMColon,
   TT_JavaAnnotation,
+  TT_JsComputedPropertyName,
+  TT_JsFatArrow,
   TT_JsTypeColon,
   TT_JsTypeOptionalQuestion,
   TT_LambdaArrow,
@@ -61,6 +66,7 @@
   TT_ObjCMethodExpr,
   TT_ObjCMethodSpecifier,
   TT_ObjCProperty,
+  TT_ObjCStringLiteral,
   TT_OverloadedOperator,
   TT_OverloadedOperatorLParen,
   TT_PointerOrReference,
@@ -105,21 +111,7 @@
 /// \brief A wrapper around a \c Token storing information about the
 /// whitespace characters preceding it.
 struct FormatToken {
-  FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false),
-        IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
-        BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
-        CanBreakBefore(false), ClosesTemplateDeclaration(false),
-        ParameterCount(0), BlockParameterCount(0),
-        PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
-        BindingStrength(0), NestingLevel(0), SplitPenalty(0),
-        LongestObjCSelectorName(0), FakeRParens(0),
-        StartsBinaryExpression(false), EndsBinaryExpression(false),
-        OperatorIndex(0), LastOperator(false),
-        PartOfMultiVariableDeclStmt(false), IsForEachMacro(false),
-        MatchingParen(nullptr), Previous(nullptr), Next(nullptr),
-        Decision(FD_Unformatted), Finalized(false) {}
+  FormatToken() {}
 
   /// \brief The \c Token.
   Token Tok;
@@ -128,48 +120,39 @@
   ///
   /// This can be used to determine what the user wrote in the original code
   /// and thereby e.g. leave an empty line between two function definitions.
-  unsigned NewlinesBefore;
+  unsigned NewlinesBefore = 0;
 
   /// \brief Whether there is at least one unescaped newline before the \c
   /// Token.
-  bool HasUnescapedNewline;
+  bool HasUnescapedNewline = false;
 
   /// \brief The range of the whitespace immediately preceding the \c Token.
   SourceRange WhitespaceRange;
 
   /// \brief The offset just past the last '\n' in this token's leading
   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
-  unsigned LastNewlineOffset;
+  unsigned LastNewlineOffset = 0;
 
   /// \brief The width of the non-whitespace parts of the token (or its first
   /// line for multi-line tokens) in columns.
   /// We need this to correctly measure number of columns a token spans.
-  unsigned ColumnWidth;
+  unsigned ColumnWidth = 0;
 
   /// \brief Contains the width in columns of the last line of a multi-line
   /// token.
-  unsigned LastLineColumnWidth;
+  unsigned LastLineColumnWidth = 0;
 
   /// \brief Whether the token text contains newlines (escaped or not).
-  bool IsMultiline;
+  bool IsMultiline = false;
 
   /// \brief Indicates that this is the first token.
-  bool IsFirst;
+  bool IsFirst = false;
 
   /// \brief Whether there must be a line break before this token.
   ///
   /// This happens for example when a preprocessor directive ended directly
   /// before the token.
-  bool MustBreakBefore;
-
-  /// \brief Returns actual token start location without leading escaped
-  /// newlines and whitespace.
-  ///
-  /// This can be different to Tok.getLocation(), which includes leading escaped
-  /// newlines.
-  SourceLocation getStartOfNonWhitespace() const {
-    return WhitespaceRange.getEnd();
-  }
+  bool MustBreakBefore = false;
 
   /// \brief The raw text of the token.
   ///
@@ -178,69 +161,74 @@
   StringRef TokenText;
 
   /// \brief Set to \c true if this token is an unterminated literal.
-  bool IsUnterminatedLiteral;
+  bool IsUnterminatedLiteral = 0;
 
   /// \brief Contains the kind of block if this token is a brace.
-  BraceBlockKind BlockKind;
+  BraceBlockKind BlockKind = BK_Unknown;
 
-  TokenType Type;
+  TokenType Type = TT_Unknown;
 
   /// \brief The number of spaces that should be inserted before this token.
-  unsigned SpacesRequiredBefore;
+  unsigned SpacesRequiredBefore = 0;
 
   /// \brief \c true if it is allowed to break before this token.
-  bool CanBreakBefore;
+  bool CanBreakBefore = false;
 
-  bool ClosesTemplateDeclaration;
+  /// \brief \c true if this is the ">" of "template<..>".
+  bool ClosesTemplateDeclaration = false;
 
   /// \brief Number of parameters, if this is "(", "[" or "<".
   ///
   /// This is initialized to 1 as we don't need to distinguish functions with
   /// 0 parameters from functions with 1 parameter. Thus, we can simply count
   /// the number of commas.
-  unsigned ParameterCount;
+  unsigned ParameterCount = 0;
 
   /// \brief Number of parameters that are nested blocks,
   /// if this is "(", "[" or "<".
-  unsigned BlockParameterCount;
+  unsigned BlockParameterCount = 0;
+
+  /// \brief If this is a bracket ("<", "(", "[" or "{"), contains the kind of
+  /// the surrounding bracket.
+  tok::TokenKind ParentBracket = tok::unknown;
 
   /// \brief A token can have a special role that can carry extra information
   /// about the token's formatting.
   std::unique_ptr<TokenRole> Role;
 
   /// \brief If this is an opening parenthesis, how are the parameters packed?
-  ParameterPackingKind PackingKind;
+  ParameterPackingKind PackingKind = PPK_Inconclusive;
 
   /// \brief The total length of the unwrapped line up to and including this
   /// token.
-  unsigned TotalLength;
+  unsigned TotalLength = 0;
 
   /// \brief The original 0-based column of this token, including expanded tabs.
   /// The configured TabWidth is used as tab width.
-  unsigned OriginalColumn;
+  unsigned OriginalColumn = 0;
 
   /// \brief The length of following tokens until the next natural split point,
   /// or the next token that can be broken.
-  unsigned UnbreakableTailLength;
+  unsigned UnbreakableTailLength = 0;
 
   // FIXME: Come up with a 'cleaner' concept.
   /// \brief The binding strength of a token. This is a combined value of
   /// operator precedence, parenthesis nesting, etc.
-  unsigned BindingStrength;
+  unsigned BindingStrength = 0;
 
   /// \brief The nesting level of this token, i.e. the number of surrounding (),
   /// [], {} or <>.
-  unsigned NestingLevel;
+  unsigned NestingLevel = 0;
 
   /// \brief Penalty for inserting a line break before this token.
-  unsigned SplitPenalty;
+  unsigned SplitPenalty = 0;
 
   /// \brief If this is the first ObjC selector name in an ObjC method
   /// definition or call, this contains the length of the longest name.
   ///
   /// This being set to 0 means that the selectors should not be colon-aligned,
   /// e.g. because several of them are block-type.
-  unsigned LongestObjCSelectorName;
+  unsigned LongestObjCSelectorName = 0;
 
   /// \brief Stores the number of required fake parentheses and the
   /// corresponding operator precedence.
@@ -249,29 +237,47 @@
   /// reverse order, i.e. inner fake parenthesis first.
   SmallVector<prec::Level, 4> FakeLParens;
   /// \brief Insert this many fake ) after this token for correct indentation.
-  unsigned FakeRParens;
+  unsigned FakeRParens = 0;
 
   /// \brief \c true if this token starts a binary expression, i.e. has at least
   /// one fake l_paren with a precedence greater than prec::Unknown.
-  bool StartsBinaryExpression;
+  bool StartsBinaryExpression = false;
   /// \brief \c true if this token ends a binary expression.
-  bool EndsBinaryExpression;
+  bool EndsBinaryExpression = false;
 
   /// \brief Is this is an operator (or "."/"->") in a sequence of operators
   /// with the same precedence, contains the 0-based operator index.
-  unsigned OperatorIndex;
+  unsigned OperatorIndex = 0;
 
   /// \brief Is this the last operator (or "."/"->") in a sequence of operators
   /// with the same precedence?
-  bool LastOperator;
+  bool LastOperator = false;
 
   /// \brief Is this token part of a \c DeclStmt defining multiple variables?
   ///
   /// Only set if \c Type == \c TT_StartOfName.
-  bool PartOfMultiVariableDeclStmt;
+  bool PartOfMultiVariableDeclStmt = false;
 
-  /// \brief Is this a foreach macro?
-  bool IsForEachMacro;
+  /// \brief If this is a bracket, this points to the matching one.
+  FormatToken *MatchingParen = nullptr;
+
+  /// \brief The previous token in the unwrapped line.
+  FormatToken *Previous = nullptr;
+
+  /// \brief The next token in the unwrapped line.
+  FormatToken *Next = nullptr;
+
+  /// \brief If this token starts a block, this contains all the unwrapped lines
+  /// in it.
+  SmallVector<AnnotatedLine *, 1> Children;
+
+  /// \brief Stores the formatting decision for the token once it was made.
+  FormatDecision Decision = FD_Unformatted;
+
+  /// \brief If \c true, this token has been fully formatted (indented and
+  /// potentially re-formatted inside), and we do not allow further formatting
+  /// changes.
+  bool Finalized = false;
 
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return Type == TT; }
@@ -372,6 +378,15 @@
     }
   }
 
+  /// \brief Returns actual token start location without leading escaped
+  /// newlines and whitespace.
+  ///
+  /// This can be different to Tok.getLocation(), which includes leading escaped
+  /// newlines.
+  SourceLocation getStartOfNonWhitespace() const {
+    return WhitespaceRange.getEnd();
+  }
+
   prec::Level getPrecedence() const {
     return getBinOpPrecedence(Tok.getKind(), true, true);
   }
@@ -406,21 +421,6 @@
     return MatchingParen && MatchingParen->opensBlockTypeList(Style);
   }
 
-  FormatToken *MatchingParen;
-
-  FormatToken *Previous;
-  FormatToken *Next;
-
-  SmallVector<AnnotatedLine *, 1> Children;
-
-  /// \brief Stores the formatting decision for the token once it was made.
-  FormatDecision Decision;
-
-  /// \brief If \c true, this token has been fully formatted (indented and
-  /// potentially re-formatted inside), and we do not allow further formatting
-  /// changes.
-  bool Finalized;
-
 private:
   // Disallow copying.
   FormatToken(const FormatToken &) = delete;
@@ -545,6 +545,8 @@
     kw_throws = &IdentTable.get("throws");
     kw___except = &IdentTable.get("__except");
 
+    kw_mark = &IdentTable.get("mark");
+
     kw_option = &IdentTable.get("option");
     kw_optional = &IdentTable.get("optional");
     kw_repeated = &IdentTable.get("repeated");
@@ -582,6 +584,9 @@
   IdentifierInfo *kw_synchronized;
   IdentifierInfo *kw_throws;
 
+  // Pragma keywords.
+  IdentifierInfo *kw_mark;
+
   // Proto keywords.
   IdentifierInfo *kw_option;
   IdentifierInfo *kw_optional;
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index 5b148ea..8ffd67f 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -15,6 +15,7 @@
 
 #include "TokenAnnotator.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "format-token-annotator"
@@ -43,8 +44,14 @@
   bool parseAngle() {
     if (!CurrentToken)
       return false;
-    ScopedContextCreator ContextCreator(*this, tok::less, 10);
     FormatToken *Left = CurrentToken->Previous;
+    Left->ParentBracket = Contexts.back().ContextKind;
+    ScopedContextCreator ContextCreator(*this, tok::less, 10);
+
+    // If this angle is in the context of an expression, we need to be more
+    // hesitant to detect it as opening template parameters.
+    bool InExprContext = Contexts.back().IsExpression;
+
     Contexts.back().IsExpression = false;
     // If there's a template keyword before the opening angle bracket, this is a
     // template parameter, not an argument.
@@ -68,8 +75,8 @@
         next();
         continue;
       }
-      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
-                                tok::colon, tok::question))
+      if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
+          (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext))
         return false;
       // If a && or || is found and interpreted as a binary operator, this set
       // of angles is likely part of something like "a < b && c > d". If the
@@ -92,6 +99,8 @@
   bool parseParens(bool LookForDecls = false) {
     if (!CurrentToken)
       return false;
+    FormatToken *Left = CurrentToken->Previous;
+    Left->ParentBracket = Contexts.back().ContextKind;
     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
 
     // FIXME: This is a bit of a hack. Do better.
@@ -99,7 +108,6 @@
         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
 
     bool StartsObjCMethodExpr = false;
-    FormatToken *Left = CurrentToken->Previous;
     if (CurrentToken->is(tok::caret)) {
       // (^ can start a block type.
       Left->Type = TT_ObjCBlockLParen;
@@ -117,22 +125,22 @@
          Left->Previous->is(TT_BinaryOperator))) {
       // static_assert, if and while usually contain expressions.
       Contexts.back().IsExpression = true;
-    } else if (Line.InPPDirective &&
-               (!Left->Previous ||
-                !Left->Previous->isOneOf(tok::identifier,
-                                         TT_OverloadedOperator))) {
-      Contexts.back().IsExpression = true;
     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
                Left->Previous->MatchingParen &&
                Left->Previous->MatchingParen->is(TT_LambdaLSquare)) {
       // This is a parameter list of a lambda expression.
       Contexts.back().IsExpression = false;
+    } else if (Line.InPPDirective &&
+               (!Left->Previous ||
+                !Left->Previous->isOneOf(tok::identifier,
+                                         TT_OverloadedOperator))) {
+      Contexts.back().IsExpression = true;
     } else if (Contexts[Contexts.size() - 2].CaretFound) {
       // This is the parameter list of an ObjC block.
       Contexts.back().IsExpression = false;
     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
       Left->Type = TT_AttributeParen;
-    } else if (Left->Previous && Left->Previous->IsForEachMacro) {
+    } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) {
       // The first argument to a foreach macro is a declaration.
       Contexts.back().IsForEachMacro = true;
       Contexts.back().IsExpression = false;
@@ -226,6 +234,10 @@
         MightBeObjCForRangeLoop = false;
       if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in))
         CurrentToken->Type = TT_ObjCForIn;
+      // When we discover a 'new', we set CanBeExpression to 'false' in order to
+      // parse the type correctly. Reset that after a comma.
+      if (CurrentToken->is(tok::comma))
+        Contexts.back().CanBeExpression = true;
 
       FormatToken *Tok = CurrentToken;
       if (!consumeToken())
@@ -245,8 +257,10 @@
     // ')' or ']'), it could be the start of an Objective-C method
     // expression, or it could the the start of an Objective-C array literal.
     FormatToken *Left = CurrentToken->Previous;
+    Left->ParentBracket = Contexts.back().ContextKind;
     FormatToken *Parent = Left->getPreviousNonComment();
     bool StartsObjCMethodExpr =
+        Style.Language == FormatStyle::LK_Cpp &&
         Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
         CurrentToken->isNot(tok::l_brace) &&
         (!Parent ||
@@ -255,19 +269,31 @@
          Parent->isUnaryOperator() ||
          Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
-    ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
-    Contexts.back().IsExpression = true;
     bool ColonFound = false;
 
-    if (StartsObjCMethodExpr) {
-      Contexts.back().ColonIsObjCMethodExpr = true;
-      Left->Type = TT_ObjCMethodExpr;
-    } else if (Parent && Parent->is(tok::at)) {
-      Left->Type = TT_ArrayInitializerLSquare;
-    } else if (Left->is(TT_Unknown)) {
-      Left->Type = TT_ArraySubscriptLSquare;
+    unsigned BindingIncrease = 1;
+    if (Left->is(TT_Unknown)) {
+      if (StartsObjCMethodExpr) {
+        Left->Type = TT_ObjCMethodExpr;
+      } else if (Style.Language == FormatStyle::LK_JavaScript && Parent &&
+                 Contexts.back().ContextKind == tok::l_brace &&
+                 Parent->isOneOf(tok::l_brace, tok::comma)) {
+        Left->Type = TT_JsComputedPropertyName;
+      } else if (Parent &&
+                 Parent->isOneOf(tok::at, tok::equal, tok::comma, tok::l_paren,
+                                 tok::l_square, tok::question, tok::colon,
+                                 tok::kw_return)) {
+        Left->Type = TT_ArrayInitializerLSquare;
+      } else {
+        BindingIncrease = 10;
+        Left->Type = TT_ArraySubscriptLSquare;
+      }
     }
 
+    ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
+    Contexts.back().IsExpression = true;
+    Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
+
     while (CurrentToken) {
       if (CurrentToken->is(tok::r_square)) {
         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
@@ -308,10 +334,8 @@
         }
         ColonFound = true;
       }
-      if (CurrentToken->is(tok::comma) &&
-          Style.Language != FormatStyle::LK_Proto &&
-          (Left->is(TT_ArraySubscriptLSquare) ||
-           (Left->is(TT_ObjCMethodExpr) && !ColonFound)))
+      if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
+          !ColonFound)
         Left->Type = TT_ArrayInitializerLSquare;
       FormatToken *Tok = CurrentToken;
       if (!consumeToken())
@@ -324,6 +348,7 @@
   bool parseBrace() {
     if (CurrentToken) {
       FormatToken *Left = CurrentToken->Previous;
+      Left->ParentBracket = Contexts.back().ContextKind;
 
       if (Contexts.back().CaretFound)
         Left->Type = TT_ObjCBlockLBrace;
@@ -418,11 +443,13 @@
         return false;
       // Colons from ?: are handled in parseConditional().
       if (Style.Language == FormatStyle::LK_JavaScript) {
-        if (Contexts.back().ColonIsForRangeExpr ||
-            (Contexts.size() == 1 &&
+        if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
+            (Contexts.size() == 1 &&               // switch/case labels
              !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
-            Contexts.back().ContextKind == tok::l_paren ||
-            Contexts.back().ContextKind == tok::l_square) {
+            Contexts.back().ContextKind == tok::l_paren ||  // function params
+            Contexts.back().ContextKind == tok::l_square || // array type
+            (Contexts.size() == 1 &&
+             Line.MustBeDeclaration)) { // method/property declaration
           Tok->Type = TT_JsTypeColon;
           break;
         }
@@ -490,13 +517,15 @@
         return false;
       break;
     case tok::less:
-      if ((!Tok->Previous ||
+      if (!NonTemplateLess.count(Tok) &&
+          (!Tok->Previous ||
            (!Tok->Previous->Tok.isLiteral() &&
             !(Tok->Previous->is(tok::r_paren) && Contexts.size() > 1))) &&
           parseAngle()) {
         Tok->Type = TT_TemplateOpener;
       } else {
         Tok->Type = TT_BinaryOperator;
+        NonTemplateLess.insert(Tok);
         CurrentToken = Tok;
         next();
       }
@@ -529,26 +558,33 @@
       break;
     case tok::question:
       if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next &&
-          Tok->Next->isOneOf(tok::colon, tok::semi, tok::r_paren,
+          Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
                              tok::r_brace)) {
-        // Question marks before semicolons, colons, commas, etc. indicate
-        // optional types (fields, parameters), e.g.
-        // `function(x?: string, y?) {...}` or `class X {y?;}`
+        // Question marks before semicolons, colons, etc. indicate optional
+        // types (fields, parameters), e.g.
+        //   function(x?: string, y?) {...}
+        //   class X { y?; }
         Tok->Type = TT_JsTypeOptionalQuestion;
         break;
       }
+      // Declarations cannot be conditional expressions, this can only be part
+      // of a type declaration.
+      if (Line.MustBeDeclaration &&
+          Style.Language == FormatStyle::LK_JavaScript)
+        break;
       parseConditional();
       break;
     case tok::kw_template:
       parseTemplateDeclaration();
       break;
     case tok::comma:
-      if (Contexts.back().FirstStartOfName && Contexts.size() == 1) {
+      if (Contexts.back().InCtorInitializer)
+        Tok->Type = TT_CtorInitializerComma;
+      else if (Contexts.back().FirstStartOfName &&
+               (Contexts.size() == 1 || Line.First->is(tok::kw_for))) {
         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
         Line.IsMultiVariableDeclStmt = true;
       }
-      if (Contexts.back().InCtorInitializer)
-        Tok->Type = TT_CtorInitializerComma;
       if (Contexts.back().IsForEachMacro)
         Contexts.back().IsExpression = true;
       break;
@@ -582,11 +618,14 @@
 
   void parsePragma() {
     next(); // Consume "pragma".
-    if (CurrentToken && CurrentToken->TokenText == "mark") {
+    if (CurrentToken &&
+        CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) {
+      bool IsMark = CurrentToken->is(Keywords.kw_mark);
       next(); // Consume "mark".
       next(); // Consume first token (so we fix leading whitespace).
       while (CurrentToken) {
-        CurrentToken->Type = TT_ImplicitStringLiteral;
+        if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator))
+          CurrentToken->Type = TT_ImplicitStringLiteral;
         next();
       }
     }
@@ -607,6 +646,7 @@
       return Type;
     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
     case tok::pp_include:
+    case tok::pp_include_next:
     case tok::pp_import:
       next();
       parseIncludeDirective();
@@ -634,9 +674,9 @@
 
 public:
   LineType parseLine() {
-    if (CurrentToken->is(tok::hash)) {
+    NonTemplateLess.clear();
+    if (CurrentToken->is(tok::hash))
       return parsePreprocessorDirective();
-    }
 
     // Directly allow to 'import <string-literal>' to support protocol buffer
     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
@@ -660,6 +700,15 @@
       return LT_ImportStatement;
     }
 
+    // In .proto files, top-level options are very similar to import statements
+    // and should not be line-wrapped.
+    if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
+        CurrentToken->is(Keywords.kw_option)) {
+      next();
+      if (CurrentToken && CurrentToken->is(tok::identifier))
+        return LT_ImportStatement;
+    }
+
     bool KeywordVirtualFound = false;
     bool ImportStatement = false;
     while (CurrentToken) {
@@ -703,9 +752,10 @@
 
     // Reset token type in case we have already looked at it and then
     // recovered from an error (e.g. failure to find the matching >).
-    if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_FunctionLBrace,
-                               TT_ImplicitStringLiteral, TT_RegexLiteral,
-                               TT_TrailingReturnArrow))
+    if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro,
+                               TT_FunctionLBrace, TT_ImplicitStringLiteral,
+                               TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
+                               TT_RegexLiteral))
       CurrentToken->Type = TT_Unknown;
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;
@@ -787,10 +837,15 @@
             Previous->Type = TT_PointerOrReference;
         }
       }
+    } else if (Current.is(tok::lessless) &&
+               (!Current.Previous || !Current.Previous->is(tok::kw_operator))) {
+      Contexts.back().IsExpression = true;
     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
       Contexts.back().IsExpression = true;
     } else if (Current.is(TT_TrailingReturnArrow)) {
       Contexts.back().IsExpression = false;
+    } else if (Current.is(TT_LambdaArrow)) {
+      Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
     } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
                !Line.InPPDirective &&
                (!Current.Previous ||
@@ -857,28 +912,56 @@
     } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
       Current.Type = TT_UnaryOperator;
     } else if (Current.is(tok::question)) {
-      Current.Type = TT_ConditionalExpr;
+      if (Style.Language == FormatStyle::LK_JavaScript &&
+          Line.MustBeDeclaration) {
+        // In JavaScript, `interface X { foo?(): bar; }` is an optional method
+        // on the interface, not a ternary expression.
+        Current.Type = TT_JsTypeOptionalQuestion;
+      } else {
+        Current.Type = TT_ConditionalExpr;
+      }
     } else if (Current.isBinaryOperator() &&
                (!Current.Previous || Current.Previous->isNot(tok::l_square))) {
       Current.Type = TT_BinaryOperator;
     } else if (Current.is(tok::comment)) {
-      Current.Type =
-          Current.TokenText.startswith("/*") ? TT_BlockComment : TT_LineComment;
+      if (Current.TokenText.startswith("/*")) {
+        if (Current.TokenText.endswith("*/"))
+          Current.Type = TT_BlockComment;
+        else
+          // The lexer has for some reason determined a comment here. But we
+          // cannot really handle it, if it isn't properly terminated.
+          Current.Tok.setKind(tok::unknown);
+      } else {
+        Current.Type = TT_LineComment;
+      }
     } else if (Current.is(tok::r_paren)) {
       if (rParenEndsCast(Current))
         Current.Type = TT_CastRParen;
+      if (Current.MatchingParen && Current.Next &&
+          !Current.Next->isBinaryOperator() &&
+          !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace))
+        if (FormatToken *BeforeParen = Current.MatchingParen->Previous)
+          if (BeforeParen->is(tok::identifier) &&
+              BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
+              (!BeforeParen->Previous ||
+               BeforeParen->Previous->ClosesTemplateDeclaration))
+            Current.Type = TT_FunctionAnnotationRParen;
     } else if (Current.is(tok::at) && Current.Next) {
-      switch (Current.Next->Tok.getObjCKeywordID()) {
-      case tok::objc_interface:
-      case tok::objc_implementation:
-      case tok::objc_protocol:
-        Current.Type = TT_ObjCDecl;
-        break;
-      case tok::objc_property:
-        Current.Type = TT_ObjCProperty;
-        break;
-      default:
-        break;
+      if (Current.Next->isStringLiteral()) {
+        Current.Type = TT_ObjCStringLiteral;
+      } else {
+        switch (Current.Next->Tok.getObjCKeywordID()) {
+        case tok::objc_interface:
+        case tok::objc_implementation:
+        case tok::objc_protocol:
+          Current.Type = TT_ObjCDecl;
+          break;
+        case tok::objc_property:
+          Current.Type = TT_ObjCProperty;
+          break;
+        default:
+          break;
+        }
       }
     } else if (Current.is(tok::period)) {
       FormatToken *PreviousNoComment = Current.getPreviousNonComment();
@@ -998,7 +1081,7 @@
     // there is also an identifier before the ().
     else if (LeftOfParens && Tok.Next &&
              (LeftOfParens->Tok.getIdentifierInfo() == nullptr ||
-              LeftOfParens->is(tok::kw_return)) &&
+              LeftOfParens->isOneOf(tok::kw_return, tok::kw_case)) &&
              !LeftOfParens->isOneOf(TT_OverloadedOperator, tok::at,
                                     TT_TemplateCloser)) {
       if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) {
@@ -1018,7 +1101,8 @@
         }
 
         for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) {
-          if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) {
+          if (!Prev ||
+              !Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) {
             IsCast = false;
             break;
           }
@@ -1131,6 +1215,12 @@
   FormatToken *CurrentToken;
   bool AutoFound;
   const AdditionalKeywords &Keywords;
+
+  // Set of "<" tokens that do not open a template parameter list. If parseAngle
+  // determines that a specific token can't be a template opener, it will make
+  // same decision irrespective of the decisions for tokens leading up to it.
+  // Store this information to prevent this from causing exponential runtime.
+  llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
 };
 
 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
@@ -1232,25 +1322,27 @@
       const FormatToken *NextNonComment = Current->getNextNonComment();
       if (Current->is(TT_ConditionalExpr))
         return prec::Conditional;
-      else if (NextNonComment && NextNonComment->is(tok::colon) &&
-               NextNonComment->is(TT_DictLiteral))
+      if (NextNonComment && NextNonComment->is(tok::colon) &&
+          NextNonComment->is(TT_DictLiteral))
         return prec::Comma;
-      else if (Current->is(TT_LambdaArrow))
+      if (Current->is(TT_LambdaArrow))
         return prec::Comma;
-      else if (Current->isOneOf(tok::semi, TT_InlineASMColon,
-                                TT_SelectorName) ||
-               (Current->is(tok::comment) && NextNonComment &&
-                NextNonComment->is(TT_SelectorName)))
+      if (Current->is(TT_JsFatArrow))
+        return prec::Assignment;
+      if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName,
+                           TT_JsComputedPropertyName) ||
+          (Current->is(tok::comment) && NextNonComment &&
+           NextNonComment->is(TT_SelectorName)))
         return 0;
-      else if (Current->is(TT_RangeBasedForLoopColon))
+      if (Current->is(TT_RangeBasedForLoopColon))
         return prec::Comma;
-      else if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
+      if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
         return Current->getPrecedence();
-      else if (Current->isOneOf(tok::period, tok::arrow))
+      if (Current->isOneOf(tok::period, tok::arrow))
         return PrecedenceArrowAndPeriod;
-      else if (Style.Language == FormatStyle::LK_Java &&
-               Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
-                                Keywords.kw_throws))
+      if (Style.Language == FormatStyle::LK_Java &&
+          Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
+                           Keywords.kw_throws))
         return 0;
     }
     return -1;
@@ -1389,7 +1481,8 @@
     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
         Tok->isOneOf(TT_PointerOrReference, TT_StartOfName))
       return true;
-    if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral())
+    if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) ||
+        Tok->Tok.isLiteral())
       return false;
   }
   return false;
@@ -1525,7 +1618,7 @@
     if (Left.is(tok::comma) && Left.NestingLevel == 0)
       return 3;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
-    if (Right.is(Keywords.kw_function))
+    if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
       return 100;
   }
 
@@ -1535,7 +1628,11 @@
   if (Right.is(tok::l_square)) {
     if (Style.Language == FormatStyle::LK_Proto)
       return 1;
-    if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare))
+    // Slightly prefer formatting local lambda definitions like functions.
+    if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
+      return 50;
+    if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
+                       TT_ArrayInitializerLSquare))
       return 500;
   }
 
@@ -1544,14 +1641,14 @@
     if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
       return 3;
     if (Left.is(TT_StartOfName))
-      return 20;
+      return 110;
     if (InFunctionDecl && Right.NestingLevel == 0)
       return Style.PenaltyReturnTypeOnItsOwnLine;
     return 200;
   }
   if (Right.is(TT_PointerOrReference))
     return 190;
-  if (Right.is(TT_TrailingReturnArrow))
+  if (Right.is(TT_LambdaArrow))
     return 110;
   if (Left.is(tok::equal) && Right.is(tok::l_brace))
     return 150;
@@ -1620,7 +1717,8 @@
     return 50;
 
   if (Right.is(tok::lessless)) {
-    if (Left.is(tok::string_literal)) {
+    if (Left.is(tok::string_literal) &&
+        (!Right.LastOperator || Right.OperatorIndex != 1)) {
       StringRef Content = Left.TokenText;
       if (Content.startswith("\""))
         Content = Content.drop_front(1);
@@ -1638,6 +1736,9 @@
   prec::Level Level = Left.getPrecedence();
   if (Level != prec::Unknown)
     return Level;
+  Level = Right.getPrecedence();
+  if (Level != prec::Unknown)
+    return Level;
 
   return 3;
 }
@@ -1699,7 +1800,9 @@
     return true;
   if (Left.is(TT_PointerOrReference))
     return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
-           (!Right.isOneOf(TT_PointerOrReference, tok::l_paren) &&
+           (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
+           (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
+                           tok::l_paren) &&
             (Style.PointerAlignment != FormatStyle::PAS_Right &&
              !Line.IsMultiVariableDeclStmt) &&
             Left.Previous &&
@@ -1737,13 +1840,12 @@
     return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
             (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
-                          tok::kw_switch, tok::kw_case) ||
+                          tok::kw_switch, tok::kw_case, TT_ForEachMacro) ||
              (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
                            tok::kw_new, tok::kw_delete) &&
-              (!Left.Previous || Left.Previous->isNot(tok::period))) ||
-             Left.IsForEachMacro)) ||
+              (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
-            (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) &&
+            (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || Left.is(tok::r_paren)) &&
             Line.Type != LT_PreprocessorDirective);
   }
   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
@@ -1783,15 +1885,17 @@
         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
       return true;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
-    if (Left.is(Keywords.kw_var))
+    if (Left.isOneOf(Keywords.kw_var, TT_JsFatArrow))
       return true;
     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
       return false;
     if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
         Line.First->isOneOf(Keywords.kw_import, tok::kw_export))
       return false;
+    if (Left.is(tok::ellipsis))
+      return false;
     if (Left.is(TT_TemplateCloser) &&
-        !Right.isOneOf(tok::l_brace, tok::comma, tok::l_square,
+        !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
                        Keywords.kw_implements, Keywords.kw_extends))
       // Type assertions ('<type>expr') are not followed by whitespace. Other
       // locations that should have whitespace following are identified by the
@@ -1800,8 +1904,6 @@
   } else if (Style.Language == FormatStyle::LK_Java) {
     if (Left.is(tok::r_square) && Right.is(tok::l_brace))
       return true;
-    if (Left.is(TT_LambdaArrow) || Right.is(TT_LambdaArrow))
-      return true;
     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
       return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
     if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
@@ -1826,7 +1928,8 @@
       (Right.is(tok::equal) || Left.is(tok::equal)))
     return false;
 
-  if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow))
+  if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) ||
+      Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow))
     return true;
   if (Left.is(tok::comma))
     return true;
@@ -1838,20 +1941,32 @@
     return Right.is(tok::coloncolon);
   if (Right.is(TT_OverloadedOperatorLParen))
     return false;
-  if (Right.is(tok::colon))
-    return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
-           Right.getNextNonComment() && Right.isNot(TT_ObjCMethodExpr) &&
-           !Left.is(tok::question) &&
-           !(Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) &&
-           (Right.isNot(TT_DictLiteral) || Style.SpacesInContainerLiterals);
+  if (Right.is(tok::colon)) {
+    if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
+        !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
+      return false;
+    if (Right.is(TT_ObjCMethodExpr))
+      return false;
+    if (Left.is(tok::question))
+      return false;
+    if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
+      return false;
+    if (Right.is(TT_DictLiteral))
+      return Style.SpacesInContainerLiterals;
+    return true;
+  }
   if (Left.is(TT_UnaryOperator))
     return Right.is(TT_BinaryOperator);
+
+  // If the next token is a binary operator or a selector name, we have
+  // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
   if (Left.is(TT_CastRParen))
-    return Style.SpaceAfterCStyleCast || Right.is(TT_BinaryOperator);
-  if (Left.is(tok::greater) && Right.is(tok::greater)) {
+    return Style.SpaceAfterCStyleCast ||
+           Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
+
+  if (Left.is(tok::greater) && Right.is(tok::greater))
     return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
-  }
   if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
     return false;
@@ -1915,8 +2030,10 @@
     return Left.BlockKind != BK_BracedInit &&
            Left.isNot(TT_CtorInitializerColon) &&
            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
-  if (Right.Previous->isTrailingComment() ||
-      (Right.isStringLiteral() && Right.Previous->isStringLiteral()))
+  if (Left.isTrailingComment())
+   return true;
+  if (Left.isStringLiteral() &&
+      (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
     return true;
   if (Right.Previous->IsUnterminatedLiteral)
     return true;
@@ -1942,6 +2059,8 @@
       Style.Language == FormatStyle::LK_Proto)
     // Don't put enums onto single lines in protocol buffers.
     return true;
+  if (Right.is(TT_InlineASMBrace))
+    return Right.HasUnescapedNewline;
   if (Style.Language == FormatStyle::LK_JavaScript && Right.is(tok::r_brace) &&
       Left.is(tok::l_brace) && !Left.Children.empty())
     // Support AllowShortFunctionsOnASingleLine for JavaScript.
@@ -1970,13 +2089,14 @@
         Left.Previous->is(tok::char_constant))
       return true;
     if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) &&
-        Left.NestingLevel == 0 && Left.Previous &&
+        Line.Level == 0 && Left.Previous &&
         Left.Previous->is(tok::equal) &&
         Line.First->isOneOf(tok::identifier, Keywords.kw_import,
-                            tok::kw_export) &&
+                            tok::kw_export, tok::kw_const) &&
         // kw_var is a pseudo-token that's a tok::identifier, so matches above.
         !Line.First->is(Keywords.kw_var))
-      // Enum style object literal.
+      // Object literals on the top level of a file are treated as "enum-style".
+      // Each key/value pair is put on a separate line, instead of bin-packing.
       return true;
   } else if (Style.Language == FormatStyle::LK_Java) {
     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
@@ -1991,6 +2111,7 @@
                                     const FormatToken &Right) {
   const FormatToken &Left = *Right.Previous;
 
+  // Language-specific stuff.
   if (Style.Language == FormatStyle::LK_Java) {
     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
                      Keywords.kw_implements))
@@ -1998,6 +2119,9 @@
     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
                       Keywords.kw_implements))
       return true;
+  } else if (Style.Language == FormatStyle::LK_JavaScript) {
+    if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace))
+      return false;
   }
 
   if (Left.is(tok::at))
@@ -2035,8 +2159,9 @@
     return false;
   if (Left.is(tok::colon) && (Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))
     return true;
-  if (Right.is(TT_SelectorName))
-    return true;
+  if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
+                                    Right.Next->is(TT_ObjCMethodExpr)))
+    return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
     return true;
   if (Left.ClosesTemplateDeclaration)
@@ -2051,7 +2176,8 @@
   if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
       Left.is(tok::kw_operator))
     return false;
-  if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
+  if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
+      Line.Type == LT_VirtualFunctionDecl)
     return false;
   if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
     return false;
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index 6c92d79..191b78d 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -25,11 +25,137 @@
          NextNext && NextNext->is(tok::l_brace);
 }
 
+/// \brief Tracks the indent level of \c AnnotatedLines across levels.
+///
+/// \c nextLine must be called for each \c AnnotatedLine, after which \c
+/// getIndent() will return the indent for the last line \c nextLine was called
+/// with.
+/// If the line is not formatted (and thus the indent does not change), calling
+/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
+/// subsequent lines on the same level to be indented at the same level as the
+/// given line.
+class LevelIndentTracker {
+public:
+  LevelIndentTracker(const FormatStyle &Style,
+                     const AdditionalKeywords &Keywords, unsigned StartLevel,
+                     int AdditionalIndent)
+      : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
+    for (unsigned i = 0; i != StartLevel; ++i)
+      IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+  }
+
+  /// \brief Returns the indent for the current line.
+  unsigned getIndent() const { return Indent; }
+
+  /// \brief Update the indent state given that \p Line is going to be formatted
+  /// next.
+  void nextLine(const AnnotatedLine &Line) {
+    Offset = getIndentOffset(*Line.First);
+    // Update the indent level cache size so that we can rely on it
+    // having the right size in adjustToUnmodifiedline.
+    while (IndentForLevel.size() <= Line.Level)
+      IndentForLevel.push_back(-1);
+    if (Line.InPPDirective) {
+      Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
+    } else {
+      IndentForLevel.resize(Line.Level + 1);
+      Indent = getIndent(IndentForLevel, Line.Level);
+    }
+    if (static_cast<int>(Indent) + Offset >= 0)
+      Indent += Offset;
+  }
+
+  /// \brief Update the level indent to adapt to the given \p Line.
+  ///
+  /// When a line is not formatted, we move the subsequent lines on the same
+  /// level to the same indent.
+  /// Note that \c nextLine must have been called before this method.
+  void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
+    unsigned LevelIndent = Line.First->OriginalColumn;
+    if (static_cast<int>(LevelIndent) - Offset >= 0)
+      LevelIndent -= Offset;
+    if ((Line.First->isNot(tok::comment) || IndentForLevel[Line.Level] == -1) &&
+        !Line.InPPDirective)
+      IndentForLevel[Line.Level] = LevelIndent;
+  }
+
+private:
+  /// \brief Get the offset of the line relatively to the level.
+  ///
+  /// For example, 'public:' labels in classes are offset by 1 or 2
+  /// characters to the left from their level.
+  int getIndentOffset(const FormatToken &RootToken) {
+    if (Style.Language == FormatStyle::LK_Java ||
+        Style.Language == FormatStyle::LK_JavaScript)
+      return 0;
+    if (RootToken.isAccessSpecifier(false) ||
+        RootToken.isObjCAccessSpecifier() ||
+        (RootToken.is(Keywords.kw_signals) && RootToken.Next &&
+         RootToken.Next->is(tok::colon)))
+      return Style.AccessModifierOffset;
+    return 0;
+  }
+
+  /// \brief Get the indent of \p Level from \p IndentForLevel.
+  ///
+  /// \p IndentForLevel must contain the indent for the level \c l
+  /// at \p IndentForLevel[l], or a value < 0 if the indent for
+  /// that level is unknown.
+  unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
+    if (IndentForLevel[Level] != -1)
+      return IndentForLevel[Level];
+    if (Level == 0)
+      return 0;
+    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
+  }
+
+  const FormatStyle &Style;
+  const AdditionalKeywords &Keywords;
+  const unsigned AdditionalIndent;
+
+  /// \brief The indent in characters for each level.
+  std::vector<int> IndentForLevel;
+
+  /// \brief Offset of the current line relative to the indent level.
+  ///
+  /// For example, the 'public' keywords is often indented with a negative
+  /// offset.
+  int Offset = 0;
+
+  /// \brief The current line's indent.
+  unsigned Indent = 0;
+};
+
 class LineJoiner {
 public:
-  LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords)
-      : Style(Style), Keywords(Keywords) {}
+  LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
+             const SmallVectorImpl<AnnotatedLine *> &Lines)
+      : Style(Style), Keywords(Keywords), End(Lines.end()),
+        Next(Lines.begin()) {}
 
+  /// \brief Returns the next line, merging multiple lines into one if possible.
+  const AnnotatedLine *getNextMergedLine(bool DryRun,
+                                         LevelIndentTracker &IndentTracker) {
+    if (Next == End)
+      return nullptr;
+    const AnnotatedLine *Current = *Next;
+    IndentTracker.nextLine(*Current);
+    unsigned MergedLines =
+        tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
+    if (MergedLines > 0 && Style.ColumnLimit == 0)
+      // Disallow line merging if there is a break at the start of one of the
+      // input lines.
+      for (unsigned i = 0; i < MergedLines; ++i)
+        if (Next[i + 1]->First->NewlinesBefore > 0)
+          MergedLines = 0;
+    if (!DryRun)
+      for (unsigned i = 0; i < MergedLines; ++i)
+        join(*Next[i], *Next[i + 1]);
+    Next = Next + MergedLines + 1;
+    return Current;
+  }
+
+private:
   /// \brief Calculates how many lines can be merged into 1 starting at \p I.
   unsigned
   tryFitMultipleLinesInOne(unsigned Indent,
@@ -63,7 +189,7 @@
     // If necessary, change to something smarter.
     bool MergeShortFunctions =
         Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
-        (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty &&
+        (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
          I[1]->First->is(tok::r_brace)) ||
         (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
          TheLine->Level != 0);
@@ -119,7 +245,6 @@
     return 0;
   }
 
-private:
   unsigned
   tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
                             SmallVectorImpl<AnnotatedLine *>::const_iterator E,
@@ -151,8 +276,8 @@
       return 0;
     if (1 + I[1]->Last->TotalLength > Limit)
       return 0;
-    if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
-                             tok::kw_while, TT_LineComment))
+    if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
+                             TT_LineComment))
       return 0;
     // Only inline simple if's (no nested if or else).
     if (I + 2 != E && Line.First->is(tok::kw_if) &&
@@ -161,9 +286,10 @@
     return 1;
   }
 
-  unsigned tryMergeShortCaseLabels(
-      SmallVectorImpl<AnnotatedLine *>::const_iterator I,
-      SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
+  unsigned
+  tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+                          SmallVectorImpl<AnnotatedLine *>::const_iterator E,
+                          unsigned Limit) {
     if (Limit == 0 || I + 1 == E ||
         I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
       return 0;
@@ -203,7 +329,8 @@
 
     // Check that the current line allows merging. This depends on whether we
     // are in a control flow statements as well as several style flags.
-    if (Line.First->isOneOf(tok::kw_else, tok::kw_case))
+    if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
+        (Line.First->Next && Line.First->Next->is(tok::kw_else)))
       return 0;
     if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
                             tok::kw___try, tok::kw_catch, tok::kw___finally,
@@ -238,7 +365,8 @@
     } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace) &&
                !startsExternCBlock(Line)) {
       // We don't merge short records.
-      if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct))
+      if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
+                              Keywords.kw_interface))
         return 0;
 
       // Check that we still have three lines and they fit into the limit.
@@ -264,6 +392,10 @@
       if (Tok->isNot(tok::r_brace))
         return 0;
 
+      // Don't merge "if (a) { .. } else {".
+      if (Tok->Next && Tok->Next->is(tok::kw_else))
+        return 0;
+
       return 2;
     }
     return 0;
@@ -297,31 +429,27 @@
     return false;
   }
 
-  const FormatStyle &Style;
-  const AdditionalKeywords &Keywords;
-};
-
-class NoColumnLimitFormatter {
-public:
-  NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
-
-  /// \brief Formats the line starting at \p State, simply keeping all of the
-  /// input's line breaking decisions.
-  void format(unsigned FirstIndent, const AnnotatedLine *Line) {
-    LineState State =
-        Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
-    while (State.NextToken) {
-      bool Newline =
-          Indenter->mustBreak(State) ||
-          (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
-      Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
+  void join(AnnotatedLine &A, const AnnotatedLine &B) {
+    assert(!A.Last->Next);
+    assert(!B.First->Previous);
+    if (B.Affected)
+      A.Affected = true;
+    A.Last->Next = B.First;
+    B.First->Previous = A.Last;
+    B.First->CanBreakBefore = true;
+    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
+    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
+      Tok->TotalLength += LengthA;
+      A.Last = Tok;
     }
   }
 
-private:
-  ContinuationIndenter *Indenter;
-};
+  const FormatStyle &Style;
+  const AdditionalKeywords &Keywords;
+  const SmallVectorImpl<AnnotatedLine*>::const_iterator End;
 
+  SmallVectorImpl<AnnotatedLine*>::const_iterator Next;
+};
 
 static void markFinalized(FormatToken *Tok) {
   for (; Tok; Tok = Tok->Next) {
@@ -331,13 +459,346 @@
   }
 }
 
+#ifndef NDEBUG
+static void printLineState(const LineState &State) {
+  llvm::dbgs() << "State: ";
+  for (const ParenState &P : State.Stack) {
+    llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
+                 << " ";
+  }
+  llvm::dbgs() << State.NextToken->TokenText << "\n";
+}
+#endif
+
+/// \brief Base class for classes that format one \c AnnotatedLine.
+class LineFormatter {
+public:
+  LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
+                const FormatStyle &Style,
+                UnwrappedLineFormatter *BlockFormatter)
+      : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
+        BlockFormatter(BlockFormatter) {}
+  virtual ~LineFormatter() {}
+
+  /// \brief Formats an \c AnnotatedLine and returns the penalty.
+  ///
+  /// If \p DryRun is \c false, directly applies the changes.
+  virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+                              bool DryRun) = 0;
+
+protected:
+  /// \brief If the \p State's next token is an r_brace closing a nested block,
+  /// format the nested block before it.
+  ///
+  /// Returns \c true if all children could be placed successfully and adapts
+  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
+  /// creates changes using \c Whitespaces.
+  ///
+  /// The crucial idea here is that children always get formatted upon
+  /// encountering the closing brace right after the nested block. Now, if we
+  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
+  /// \c false), the entire block has to be kept on the same line (which is only
+  /// possible if it fits on the line, only contains a single statement, etc.
+  ///
+  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
+  /// break after the "{", format all lines with correct indentation and the put
+  /// the closing "}" on yet another new line.
+  ///
+  /// This enables us to keep the simple structure of the
+  /// \c UnwrappedLineFormatter, where we only have two options for each token:
+  /// break or don't break.
+  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
+                      unsigned &Penalty) {
+    const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+    FormatToken &Previous = *State.NextToken->Previous;
+    if (!LBrace || LBrace->isNot(tok::l_brace) ||
+        LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
+      // The previous token does not open a block. Nothing to do. We don't
+      // assert so that we can simply call this function for all tokens.
+      return true;
+
+    if (NewLine) {
+      int AdditionalIndent = State.Stack.back().Indent -
+                             Previous.Children[0]->Level * Style.IndentWidth;
+
+      Penalty +=
+          BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
+                                 /*FixBadIndentation=*/true);
+      return true;
+    }
+
+    if (Previous.Children[0]->First->MustBreakBefore)
+      return false;
+
+    // Cannot merge multiple statements into a single line.
+    if (Previous.Children.size() > 1)
+      return false;
+
+    // Cannot merge into one line if this line ends on a comment.
+    if (Previous.is(tok::comment))
+      return false;
+
+    // We can't put the closing "}" on a line with a trailing comment.
+    if (Previous.Children[0]->Last->isTrailingComment())
+      return false;
+
+    // If the child line exceeds the column limit, we wouldn't want to merge it.
+    // We add +2 for the trailing " }".
+    if (Style.ColumnLimit > 0 &&
+        Previous.Children[0]->Last->TotalLength + State.Column + 2 >
+            Style.ColumnLimit)
+      return false;
+
+    if (!DryRun) {
+      Whitespaces->replaceWhitespace(
+          *Previous.Children[0]->First,
+          /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
+          /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
+    }
+    Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
+
+    State.Column += 1 + Previous.Children[0]->Last->TotalLength;
+    return true;
+  }
+
+  ContinuationIndenter *Indenter;
+
+private:
+  WhitespaceManager *Whitespaces;
+  const FormatStyle &Style;
+  UnwrappedLineFormatter *BlockFormatter;
+};
+
+/// \brief Formatter that keeps the existing line breaks.
+class NoColumnLimitLineFormatter : public LineFormatter {
+public:
+  NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
+                             WhitespaceManager *Whitespaces,
+                             const FormatStyle &Style,
+                             UnwrappedLineFormatter *BlockFormatter)
+      : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+  /// \brief Formats the line, simply keeping all of the input's line breaking
+  /// decisions.
+  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+                      bool DryRun) override {
+    assert(!DryRun);
+    LineState State =
+        Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
+    while (State.NextToken) {
+      bool Newline =
+          Indenter->mustBreak(State) ||
+          (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
+      unsigned Penalty = 0;
+      formatChildren(State, Newline, /*DryRun=*/false, Penalty);
+      Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
+    }
+    return 0;
+  }
+};
+
+/// \brief Formatter that puts all tokens into a single line without breaks.
+class NoLineBreakFormatter : public LineFormatter {
+public:
+  NoLineBreakFormatter(ContinuationIndenter *Indenter,
+                       WhitespaceManager *Whitespaces, const FormatStyle &Style,
+                       UnwrappedLineFormatter *BlockFormatter)
+      : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+  /// \brief Puts all tokens into a single line.
+  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+                      bool DryRun) {
+    unsigned Penalty = 0;
+    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+    while (State.NextToken) {
+      formatChildren(State, /*Newline=*/false, DryRun, Penalty);
+      Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+    }
+    return Penalty;
+  }
+};
+
+/// \brief Finds the best way to break lines.
+class OptimizingLineFormatter : public LineFormatter {
+public:
+  OptimizingLineFormatter(ContinuationIndenter *Indenter,
+                          WhitespaceManager *Whitespaces,
+                          const FormatStyle &Style,
+                          UnwrappedLineFormatter *BlockFormatter)
+      : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
+
+  /// \brief Formats the line by finding the best line breaks with line lengths
+  /// below the column limit.
+  unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
+                      bool DryRun) {
+    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
+
+    // If the ObjC method declaration does not fit on a line, we should format
+    // it with one arg per line.
+    if (State.Line->Type == LT_ObjCMethodDecl)
+      State.Stack.back().BreakBeforeParameter = true;
+
+    // Find best solution in solution space.
+    return analyzeSolutionSpace(State, DryRun);
+  }
+
+private:
+  struct CompareLineStatePointers {
+    bool operator()(LineState *obj1, LineState *obj2) const {
+      return *obj1 < *obj2;
+    }
+  };
+
+  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
+  ///
+  /// In case of equal penalties, we want to prefer states that were inserted
+  /// first. During state generation we make sure that we insert states first
+  /// that break the line as late as possible.
+  typedef std::pair<unsigned, unsigned> OrderedPenalty;
+
+  /// \brief An edge in the solution space from \c Previous->State to \c State,
+  /// inserting a newline dependent on the \c NewLine.
+  struct StateNode {
+    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
+        : State(State), NewLine(NewLine), Previous(Previous) {}
+    LineState State;
+    bool NewLine;
+    StateNode *Previous;
+  };
+
+  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
+  /// \c State has the given \c OrderedPenalty.
+  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
+
+  /// \brief The BFS queue type.
+  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
+                              std::greater<QueueItem>> QueueType;
+
+  /// \brief Analyze the entire solution space starting from \p InitialState.
+  ///
+  /// This implements a variant of Dijkstra's algorithm on the graph that spans
+  /// the solution space (\c LineStates are the nodes). The algorithm tries to
+  /// find the shortest path (the one with lowest penalty) from \p InitialState
+  /// to a state where all tokens are placed. Returns the penalty.
+  ///
+  /// If \p DryRun is \c false, directly applies the changes.
+  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
+    std::set<LineState *, CompareLineStatePointers> Seen;
+
+    // Increasing count of \c StateNode items we have created. This is used to
+    // create a deterministic order independent of the container.
+    unsigned Count = 0;
+    QueueType Queue;
+
+    // Insert start element into queue.
+    StateNode *Node =
+        new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
+    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
+    ++Count;
+
+    unsigned Penalty = 0;
+
+    // While not empty, take first element and follow edges.
+    while (!Queue.empty()) {
+      Penalty = Queue.top().first.first;
+      StateNode *Node = Queue.top().second;
+      if (!Node->State.NextToken) {
+        DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
+        break;
+      }
+      Queue.pop();
+
+      // Cut off the analysis of certain solutions if the analysis gets too
+      // complex. See description of IgnoreStackForComparison.
+      if (Count > 10000)
+        Node->State.IgnoreStackForComparison = true;
+
+      if (!Seen.insert(&Node->State).second)
+        // State already examined with lower penalty.
+        continue;
+
+      FormatDecision LastFormat = Node->State.NextToken->Decision;
+      if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
+      if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
+    }
+
+    if (Queue.empty()) {
+      // We were unable to find a solution, do nothing.
+      // FIXME: Add diagnostic?
+      DEBUG(llvm::dbgs() << "Could not find a solution.\n");
+      return 0;
+    }
+
+    // Reconstruct the solution.
+    if (!DryRun)
+      reconstructPath(InitialState, Queue.top().second);
+
+    DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
+    DEBUG(llvm::dbgs() << "---\n");
+
+    return Penalty;
+  }
+
+  /// \brief Add the following state to the analysis queue \c Queue.
+  ///
+  /// Assume the current state is \p PreviousNode and has been reached with a
+  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
+  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
+                           bool NewLine, unsigned *Count, QueueType *Queue) {
+    if (NewLine && !Indenter->canBreak(PreviousNode->State))
+      return;
+    if (!NewLine && Indenter->mustBreak(PreviousNode->State))
+      return;
+
+    StateNode *Node = new (Allocator.Allocate())
+        StateNode(PreviousNode->State, NewLine, PreviousNode);
+    if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
+      return;
+
+    Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
+
+    Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
+    ++(*Count);
+  }
+
+  /// \brief Applies the best formatting by reconstructing the path in the
+  /// solution space that leads to \c Best.
+  void reconstructPath(LineState &State, StateNode *Best) {
+    std::deque<StateNode *> Path;
+    // We do not need a break before the initial token.
+    while (Best->Previous) {
+      Path.push_front(Best);
+      Best = Best->Previous;
+    }
+    for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
+         I != E; ++I) {
+      unsigned Penalty = 0;
+      formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
+      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
+
+      DEBUG({
+        printLineState((*I)->Previous->State);
+        if ((*I)->NewLine) {
+          llvm::dbgs() << "Penalty for placing "
+                       << (*I)->Previous->State.NextToken->Tok.getName() << ": "
+                       << Penalty << "\n";
+        }
+      });
+    }
+  }
+
+  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
+};
+
 } // namespace
 
 unsigned
 UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
                                bool DryRun, int AdditionalIndent,
                                bool FixBadIndentation) {
-  LineJoiner Joiner(Style, Keywords);
+  LineJoiner Joiner(Style, Keywords, Lines);
 
   // Try to look up already computed penalty in DryRun-mode.
   std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
@@ -348,148 +809,93 @@
 
   assert(!Lines.empty());
   unsigned Penalty = 0;
-  std::vector<int> IndentForLevel;
-  for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
-    IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+  LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
+                                   AdditionalIndent);
   const AnnotatedLine *PreviousLine = nullptr;
-  for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
-                                                        E = Lines.end();
-       I != E; ++I) {
-    const AnnotatedLine &TheLine = **I;
-    const FormatToken *FirstTok = TheLine.First;
-    int Offset = getIndentOffset(*FirstTok);
-
-    // Determine indent and try to merge multiple unwrapped lines.
-    unsigned Indent;
-    if (TheLine.InPPDirective) {
-      Indent = TheLine.Level * Style.IndentWidth;
-    } else {
-      while (IndentForLevel.size() <= TheLine.Level)
-        IndentForLevel.push_back(-1);
-      IndentForLevel.resize(TheLine.Level + 1);
-      Indent = getIndent(IndentForLevel, TheLine.Level);
-    }
-    unsigned LevelIndent = Indent;
-    if (static_cast<int>(Indent) + Offset >= 0)
-      Indent += Offset;
-
-    // Merge multiple lines if possible.
-    unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
-    if (MergedLines > 0 && Style.ColumnLimit == 0) {
-      // Disallow line merging if there is a break at the start of one of the
-      // input lines.
-      for (unsigned i = 0; i < MergedLines; ++i) {
-        if (I[i + 1]->First->NewlinesBefore > 0)
-          MergedLines = 0;
-      }
-    }
-    if (!DryRun) {
-      for (unsigned i = 0; i < MergedLines; ++i) {
-        join(*I[i], *I[i + 1]);
-      }
-    }
-    I += MergedLines;
-
+  const AnnotatedLine *NextLine = nullptr;
+  for (const AnnotatedLine *Line =
+           Joiner.getNextMergedLine(DryRun, IndentTracker);
+       Line; Line = NextLine) {
+    const AnnotatedLine &TheLine = *Line;
+    unsigned Indent = IndentTracker.getIndent();
     bool FixIndentation =
-        FixBadIndentation && (LevelIndent != FirstTok->OriginalColumn);
-    if (TheLine.First->is(tok::eof)) {
-      if (PreviousLine && PreviousLine->Affected && !DryRun) {
-        // Remove the file's trailing whitespace.
-        unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
-        Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
-                                       /*IndentLevel=*/0, /*Spaces=*/0,
-                                       /*TargetColumn=*/0);
-      }
-    } else if (TheLine.Type != LT_Invalid &&
-               (TheLine.Affected || FixIndentation)) {
-      if (FirstTok->WhitespaceRange.isValid()) {
-        if (!DryRun)
-          formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
-                           TheLine.InPPDirective);
-      } else {
-        Indent = LevelIndent = FirstTok->OriginalColumn;
-      }
+        FixBadIndentation && (Indent != TheLine.First->OriginalColumn);
+    bool ShouldFormat = TheLine.Affected || FixIndentation;
+    // We cannot format this line; if the reason is that the line had a
+    // parsing error, remember that.
+    if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat)
+      *IncompleteFormat = true;
 
-      // If everything fits on a single line, just put it there.
-      unsigned ColumnLimit = Style.ColumnLimit;
-      if (I + 1 != E) {
-        AnnotatedLine *NextLine = I[1];
-        if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
-          ColumnLimit = getColumnLimit(TheLine.InPPDirective);
-      }
+    if (ShouldFormat && TheLine.Type != LT_Invalid) {
+      if (!DryRun)
+        formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
+                         TheLine.InPPDirective);
 
-      if (TheLine.Last->TotalLength + Indent <= ColumnLimit ||
-          TheLine.Type == LT_ImportStatement) {
-        LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
-        while (State.NextToken) {
-          formatChildren(State, /*Newline=*/false, DryRun, Penalty);
-          Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
-        }
-      } else if (Style.ColumnLimit == 0) {
-        // FIXME: Implement nested blocks for ColumnLimit = 0.
-        NoColumnLimitFormatter Formatter(Indenter);
-        if (!DryRun)
-          Formatter.format(Indent, &TheLine);
-      } else {
-        Penalty += format(TheLine, Indent, DryRun);
-      }
+      NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
+      unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
+      bool FitsIntoOneLine =
+          TheLine.Last->TotalLength + Indent <= ColumnLimit ||
+          TheLine.Type == LT_ImportStatement;
 
-      if (!TheLine.InPPDirective)
-        IndentForLevel[TheLine.Level] = LevelIndent;
-    } else if (TheLine.ChildrenAffected) {
-      format(TheLine.Children, DryRun);
+      if (Style.ColumnLimit == 0)
+        NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
+            .formatLine(TheLine, Indent, DryRun);
+      else if (FitsIntoOneLine)
+        Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
+                       .formatLine(TheLine, Indent, DryRun);
+      else
+        Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
+                       .formatLine(TheLine, Indent, DryRun);
     } else {
-      // Format the first token if necessary, and notify the WhitespaceManager
-      // about the unchanged whitespace.
-      for (FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) {
-        if (Tok == TheLine.First && (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
-          unsigned LevelIndent = Tok->OriginalColumn;
-          if (!DryRun) {
-            // Remove trailing whitespace of the previous line.
-            if ((PreviousLine && PreviousLine->Affected) ||
-                TheLine.LeadingEmptyLinesAffected) {
-              formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
-                               TheLine.InPPDirective);
-            } else {
-              Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
-            }
-          }
+      // If no token in the current line is affected, we still need to format
+      // affected children.
+      if (TheLine.ChildrenAffected)
+        format(TheLine.Children, DryRun);
 
-          if (static_cast<int>(LevelIndent) - Offset >= 0)
-            LevelIndent -= Offset;
-          if (Tok->isNot(tok::comment) && !TheLine.InPPDirective)
-            IndentForLevel[TheLine.Level] = LevelIndent;
-        } else if (!DryRun) {
+      // Adapt following lines on the current indent level to the same level
+      // unless the current \c AnnotatedLine is not at the beginning of a line.
+      bool StartsNewLine =
+          TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
+      if (StartsNewLine)
+        IndentTracker.adjustToUnmodifiedLine(TheLine);
+      if (!DryRun) {
+        bool ReformatLeadingWhitespace =
+            StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
+                              TheLine.LeadingEmptyLinesAffected);
+        // Format the first token.
+        if (ReformatLeadingWhitespace)
+          formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
+                           TheLine.First->OriginalColumn,
+                           TheLine.InPPDirective);
+        else
+          Whitespaces->addUntouchableToken(*TheLine.First,
+                                           TheLine.InPPDirective);
+
+        // Notify the WhitespaceManager about the unchanged whitespace.
+        for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
           Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
-        }
       }
+      NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
     }
     if (!DryRun)
       markFinalized(TheLine.First);
-    PreviousLine = *I;
+    PreviousLine = &TheLine;
   }
   PenaltyCache[CacheKey] = Penalty;
   return Penalty;
 }
 
-unsigned UnwrappedLineFormatter::format(const AnnotatedLine &Line,
-                                        unsigned FirstIndent, bool DryRun) {
-  LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
-
-  // If the ObjC method declaration does not fit on a line, we should format
-  // it with one arg per line.
-  if (State.Line->Type == LT_ObjCMethodDecl)
-    State.Stack.back().BreakBeforeParameter = true;
-
-  // Find best solution in solution space.
-  return analyzeSolutionSpace(State, DryRun);
-}
-
 void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
                                               const AnnotatedLine *PreviousLine,
                                               unsigned IndentLevel,
                                               unsigned Indent,
                                               bool InPPDirective) {
+  if (RootToken.is(tok::eof)) {
+    unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
+    Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
+                                   /*Spaces=*/0, /*TargetColumn=*/0);
+    return;
+  }
   unsigned Newlines =
       std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
   // Remove empty lines before "}" where applicable.
@@ -524,202 +930,21 @@
                                              !RootToken.HasUnescapedNewline);
 }
 
-/// \brief Get the indent of \p Level from \p IndentForLevel.
-///
-/// \p IndentForLevel must contain the indent for the level \c l
-/// at \p IndentForLevel[l], or a value < 0 if the indent for
-/// that level is unknown.
-unsigned UnwrappedLineFormatter::getIndent(ArrayRef<int> IndentForLevel,
-                                           unsigned Level) {
-  if (IndentForLevel[Level] != -1)
-    return IndentForLevel[Level];
-  if (Level == 0)
-    return 0;
-  return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
-}
-
-void UnwrappedLineFormatter::join(AnnotatedLine &A, const AnnotatedLine &B) {
-  assert(!A.Last->Next);
-  assert(!B.First->Previous);
-  if (B.Affected)
-    A.Affected = true;
-  A.Last->Next = B.First;
-  B.First->Previous = A.Last;
-  B.First->CanBreakBefore = true;
-  unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
-  for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
-    Tok->TotalLength += LengthA;
-    A.Last = Tok;
-  }
-}
-
-unsigned UnwrappedLineFormatter::analyzeSolutionSpace(LineState &InitialState,
-                                                      bool DryRun) {
-  std::set<LineState *, CompareLineStatePointers> Seen;
-
-  // Increasing count of \c StateNode items we have created. This is used to
-  // create a deterministic order independent of the container.
-  unsigned Count = 0;
-  QueueType Queue;
-
-  // Insert start element into queue.
-  StateNode *Node =
-      new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
-  Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
-  ++Count;
-
-  unsigned Penalty = 0;
-
-  // While not empty, take first element and follow edges.
-  while (!Queue.empty()) {
-    Penalty = Queue.top().first.first;
-    StateNode *Node = Queue.top().second;
-    if (!Node->State.NextToken) {
-      DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
-      break;
-    }
-    Queue.pop();
-
-    // Cut off the analysis of certain solutions if the analysis gets too
-    // complex. See description of IgnoreStackForComparison.
-    if (Count > 10000)
-      Node->State.IgnoreStackForComparison = true;
-
-    if (!Seen.insert(&Node->State).second)
-      // State already examined with lower penalty.
-      continue;
-
-    FormatDecision LastFormat = Node->State.NextToken->Decision;
-    if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
-      addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
-    if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
-      addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
-  }
-
-  if (Queue.empty()) {
-    // We were unable to find a solution, do nothing.
-    // FIXME: Add diagnostic?
-    DEBUG(llvm::dbgs() << "Could not find a solution.\n");
-    return 0;
-  }
-
-  // Reconstruct the solution.
-  if (!DryRun)
-    reconstructPath(InitialState, Queue.top().second);
-
-  DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
-  DEBUG(llvm::dbgs() << "---\n");
-
-  return Penalty;
-}
-
-#ifndef NDEBUG
-static void printLineState(const LineState &State) {
-  llvm::dbgs() << "State: ";
-  for (const ParenState &P : State.Stack) {
-    llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
-                 << " ";
-  }
-  llvm::dbgs() << State.NextToken->TokenText << "\n";
-}
-#endif
-
-void UnwrappedLineFormatter::reconstructPath(LineState &State,
-                                             StateNode *Current) {
-  std::deque<StateNode *> Path;
-  // We do not need a break before the initial token.
-  while (Current->Previous) {
-    Path.push_front(Current);
-    Current = Current->Previous;
-  }
-  for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
-       I != E; ++I) {
-    unsigned Penalty = 0;
-    formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
-    Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
-
-    DEBUG({
-      printLineState((*I)->Previous->State);
-      if ((*I)->NewLine) {
-        llvm::dbgs() << "Penalty for placing "
-                     << (*I)->Previous->State.NextToken->Tok.getName() << ": "
-                     << Penalty << "\n";
-      }
-    });
-  }
-}
-
-void UnwrappedLineFormatter::addNextStateToQueue(unsigned Penalty,
-                                                 StateNode *PreviousNode,
-                                                 bool NewLine, unsigned *Count,
-                                                 QueueType *Queue) {
-  if (NewLine && !Indenter->canBreak(PreviousNode->State))
-    return;
-  if (!NewLine && Indenter->mustBreak(PreviousNode->State))
-    return;
-
-  StateNode *Node = new (Allocator.Allocate())
-      StateNode(PreviousNode->State, NewLine, PreviousNode);
-  if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
-    return;
-
-  Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
-
-  Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
-  ++(*Count);
-}
-
-bool UnwrappedLineFormatter::formatChildren(LineState &State, bool NewLine,
-                                            bool DryRun, unsigned &Penalty) {
-  const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
-  FormatToken &Previous = *State.NextToken->Previous;
-  if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind != BK_Block ||
-      Previous.Children.size() == 0)
-    // The previous token does not open a block. Nothing to do. We don't
-    // assert so that we can simply call this function for all tokens.
-    return true;
-
-  if (NewLine) {
-    int AdditionalIndent = State.Stack.back().Indent -
-                           Previous.Children[0]->Level * Style.IndentWidth;
-
-    Penalty += format(Previous.Children, DryRun, AdditionalIndent,
-                      /*FixBadIndentation=*/true);
-    return true;
-  }
-
-  if (Previous.Children[0]->First->MustBreakBefore)
-    return false;
-
-  // Cannot merge multiple statements into a single line.
-  if (Previous.Children.size() > 1)
-    return false;
-
-  // Cannot merge into one line if this line ends on a comment.
-  if (Previous.is(tok::comment))
-    return false;
-
-  // We can't put the closing "}" on a line with a trailing comment.
-  if (Previous.Children[0]->Last->isTrailingComment())
-    return false;
-
-  // If the child line exceeds the column limit, we wouldn't want to merge it.
-  // We add +2 for the trailing " }".
-  if (Style.ColumnLimit > 0 &&
-      Previous.Children[0]->Last->TotalLength + State.Column + 2 >
-          Style.ColumnLimit)
-    return false;
-
-  if (!DryRun) {
-    Whitespaces->replaceWhitespace(
-        *Previous.Children[0]->First,
-        /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
-        /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
-  }
-  Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
-
-  State.Column += 1 + Previous.Children[0]->Last->TotalLength;
-  return true;
+unsigned
+UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
+                                       const AnnotatedLine *NextLine) const {
+  // In preprocessor directives reserve two chars for trailing " \" if the
+  // next line continues the preprocessor directive.
+  bool ContinuesPPDirective =
+      InPPDirective &&
+      // If there is no next line, this is likely a child line and the parent
+      // continues the preprocessor directive.
+      (!NextLine ||
+       (NextLine->InPPDirective &&
+        // If there is an unescaped newline between this line and the next, the
+        // next line starts a new preprocessor directive.
+        !NextLine->First->HasUnescapedNewline));
+  return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
 }
 
 } // namespace format
diff --git a/lib/Format/UnwrappedLineFormatter.h b/lib/Format/UnwrappedLineFormatter.h
index 7d5b011..da9aa1c 100644
--- a/lib/Format/UnwrappedLineFormatter.h
+++ b/lib/Format/UnwrappedLineFormatter.h
@@ -33,139 +33,38 @@
   UnwrappedLineFormatter(ContinuationIndenter *Indenter,
                          WhitespaceManager *Whitespaces,
                          const FormatStyle &Style,
-                         const AdditionalKeywords &Keywords)
+                         const AdditionalKeywords &Keywords,
+                         bool *IncompleteFormat)
       : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
-        Keywords(Keywords) {}
+        Keywords(Keywords), IncompleteFormat(IncompleteFormat) {}
 
-  unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
-                  int AdditionalIndent = 0, bool FixBadIndentation = false);
+  /// \brief Format the current block and return the penalty.
+  unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines,
+                  bool DryRun = false, int AdditionalIndent = 0,
+                  bool FixBadIndentation = false);
 
 private:
-  /// \brief Formats an \c AnnotatedLine and returns the penalty.
-  ///
-  /// If \p DryRun is \c false, directly applies the changes.
-  unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
-                  bool DryRun);
-
-  /// \brief An edge in the solution space from \c Previous->State to \c State,
-  /// inserting a newline dependent on the \c NewLine.
-  struct StateNode {
-    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
-        : State(State), NewLine(NewLine), Previous(Previous) {}
-    LineState State;
-    bool NewLine;
-    StateNode *Previous;
-  };
-
-  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
-  ///
-  /// In case of equal penalties, we want to prefer states that were inserted
-  /// first. During state generation we make sure that we insert states first
-  /// that break the line as late as possible.
-  typedef std::pair<unsigned, unsigned> OrderedPenalty;
-
-  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
-  /// \c State has the given \c OrderedPenalty.
-  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
-
-  /// \brief The BFS queue type.
-  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
-                              std::greater<QueueItem> > QueueType;
-
-  /// \brief Get the offset of the line relatively to the level.
-  ///
-  /// For example, 'public:' labels in classes are offset by 1 or 2
-  /// characters to the left from their level.
-  int getIndentOffset(const FormatToken &RootToken) {
-    if (Style.Language == FormatStyle::LK_Java ||
-        Style.Language == FormatStyle::LK_JavaScript)
-      return 0;
-    if (RootToken.isAccessSpecifier(false) ||
-        RootToken.isObjCAccessSpecifier() || RootToken.is(Keywords.kw_signals))
-      return Style.AccessModifierOffset;
-    return 0;
-  }
-
   /// \brief Add a new line and the required indent before the first Token
   /// of the \c UnwrappedLine if there was no structural parsing error.
   void formatFirstToken(FormatToken &RootToken,
                         const AnnotatedLine *PreviousLine, unsigned IndentLevel,
                         unsigned Indent, bool InPPDirective);
 
-  /// \brief Get the indent of \p Level from \p IndentForLevel.
-  ///
-  /// \p IndentForLevel must contain the indent for the level \c l
-  /// at \p IndentForLevel[l], or a value < 0 if the indent for
-  /// that level is unknown.
-  unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level);
-
-  void join(AnnotatedLine &A, const AnnotatedLine &B);
-
-  unsigned getColumnLimit(bool InPPDirective) const {
-    // In preprocessor directives reserve two chars for trailing " \"
-    return Style.ColumnLimit - (InPPDirective ? 2 : 0);
-  }
-
-  struct CompareLineStatePointers {
-    bool operator()(LineState *obj1, LineState *obj2) const {
-      return *obj1 < *obj2;
-    }
-  };
-
-  /// \brief Analyze the entire solution space starting from \p InitialState.
-  ///
-  /// This implements a variant of Dijkstra's algorithm on the graph that spans
-  /// the solution space (\c LineStates are the nodes). The algorithm tries to
-  /// find the shortest path (the one with lowest penalty) from \p InitialState
-  /// to a state where all tokens are placed. Returns the penalty.
-  ///
-  /// If \p DryRun is \c false, directly applies the changes.
-  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false);
-
-  void reconstructPath(LineState &State, StateNode *Current);
-
-  /// \brief Add the following state to the analysis queue \c Queue.
-  ///
-  /// Assume the current state is \p PreviousNode and has been reached with a
-  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
-  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
-                           bool NewLine, unsigned *Count, QueueType *Queue);
-
-  /// \brief If the \p State's next token is an r_brace closing a nested block,
-  /// format the nested block before it.
-  ///
-  /// Returns \c true if all children could be placed successfully and adapts
-  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
-  /// creates changes using \c Whitespaces.
-  ///
-  /// The crucial idea here is that children always get formatted upon
-  /// encountering the closing brace right after the nested block. Now, if we
-  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
-  /// \c false), the entire block has to be kept on the same line (which is only
-  /// possible if it fits on the line, only contains a single statement, etc.
-  ///
-  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
-  /// break after the "{", format all lines with correct indentation and the put
-  /// the closing "}" on yet another new line.
-  ///
-  /// This enables us to keep the simple structure of the
-  /// \c UnwrappedLineFormatter, where we only have two options for each token:
-  /// break or don't break.
-  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
-                      unsigned &Penalty);
-
-  ContinuationIndenter *Indenter;
-  WhitespaceManager *Whitespaces;
-  FormatStyle Style;
-  const AdditionalKeywords &Keywords;
-
-  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
+  /// \brief Returns the column limit for a line, taking into account whether we
+  /// need an escaped newline due to a continued preprocessor directive.
+  unsigned getColumnLimit(bool InPPDirective, const AnnotatedLine *NextLine) const;
 
   // Cache to store the penalty of formatting a vector of AnnotatedLines
   // starting from a specific additional offset. Improves performance if there
   // are many nested blocks.
   std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>,
            unsigned> PenaltyCache;
+
+  ContinuationIndenter *Indenter;
+  WhitespaceManager *Whitespaces;
+  const FormatStyle &Style;
+  const AdditionalKeywords &Keywords;
+  bool *IncompleteFormat;
 };
 } // end namespace format
 } // end namespace clang
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 905f9c1..7d9e5e9 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -58,11 +58,10 @@
 class ScopedMacroState : public FormatTokenSource {
 public:
   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
-                   FormatToken *&ResetToken, bool &StructuralError)
+                   FormatToken *&ResetToken)
       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
-        StructuralError(StructuralError),
-        PreviousStructuralError(StructuralError), Token(nullptr) {
+        Token(nullptr) {
     TokenSource = this;
     Line.Level = 0;
     Line.InPPDirective = true;
@@ -73,7 +72,6 @@
     ResetToken = Token;
     Line.InPPDirective = false;
     Line.Level = PreviousLineLevel;
-    StructuralError = PreviousStructuralError;
   }
 
   FormatToken *getNextToken() override {
@@ -112,8 +110,6 @@
   FormatToken *&ResetToken;
   unsigned PreviousLineLevel;
   FormatTokenSource *PreviousTokenSource;
-  bool &StructuralError;
-  bool PreviousStructuralError;
 
   FormatToken *Token;
 };
@@ -208,9 +204,8 @@
                                          ArrayRef<FormatToken *> Tokens,
                                          UnwrappedLineConsumer &Callback)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
-      CurrentLines(&Lines), StructuralError(false), Style(Style),
-      Keywords(Keywords), Tokens(nullptr), Callback(Callback),
-      AllTokens(Tokens), PPBranchLevel(-1) {}
+      CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
+      Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 
 void UnwrappedLineParser::reset() {
   PPBranchLevel = -1;
@@ -221,11 +216,10 @@
   PreprocessorDirectives.clear();
   CurrentLines = &Lines;
   DeclarationScopeStack.clear();
-  StructuralError = false;
   PPStack.clear();
 }
 
-bool UnwrappedLineParser::parse() {
+void UnwrappedLineParser::parse() {
   IndexedTokenSource TokenSource(AllTokens);
   do {
     DEBUG(llvm::dbgs() << "----\n");
@@ -258,13 +252,15 @@
     }
   } while (!PPLevelBranchIndex.empty());
 
-  return StructuralError;
 }
 
 void UnwrappedLineParser::parseFile() {
-  ScopedDeclarationState DeclarationState(
-      *Line, DeclarationScopeStack,
-      /*MustBeDeclaration=*/!Line->InPPDirective);
+  // The top-level context in a file always has declarations, except for pre-
+  // processor directives and JavaScript files.
+  bool MustBeDeclaration =
+      !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
+  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+                                          MustBeDeclaration);
   parseLevel(/*HasOpeningBrace=*/false);
   // Make sure to format the remaining tokens.
   flushComments(true);
@@ -288,7 +284,6 @@
     case tok::r_brace:
       if (HasOpeningBrace)
         return;
-      StructuralError = true;
       nextToken();
       addUnwrappedLine();
       break;
@@ -307,7 +302,7 @@
   } while (!eof());
 }
 
-void UnwrappedLineParser::calculateBraceTypes() {
+void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
   // We'll parse forward through the tokens until we hit
   // a closing brace or eof - note that getNextToken() will
   // parse macros, so this will magically work inside macro
@@ -330,6 +325,7 @@
 
     switch (Tok->Tok.getKind()) {
     case tok::l_brace:
+      Tok->BlockKind = BK_Unknown;
       LBraceStack.push_back(Tok);
       break;
     case tok::r_brace:
@@ -353,9 +349,11 @@
             //
             // We exclude + and - as they can be ObjC visibility modifiers.
             ProbablyBracedList =
-                NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon,
+                NextTok->isOneOf(tok::comma, tok::period, tok::colon,
                                  tok::r_paren, tok::r_square, tok::l_brace,
                                  tok::l_paren, tok::ellipsis) ||
+                (NextTok->is(tok::semi) &&
+                 (!ExpectClassBody || LBraceStack.size() != 1)) ||
                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
           }
           if (ProbablyBracedList) {
@@ -410,7 +408,6 @@
 
   if (!FormatTok->Tok.is(tok::r_brace)) {
     Line->Level = InitialLevel;
-    StructuralError = true;
     return;
   }
 
@@ -470,7 +467,7 @@
 
 void UnwrappedLineParser::parsePPDirective() {
   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
-  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
+  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
   nextToken();
 
   if (!FormatTok->Tok.getIdentifierInfo()) {
@@ -672,10 +669,13 @@
   case tok::kw_asm:
     nextToken();
     if (FormatTok->is(tok::l_brace)) {
+      FormatTok->Type = TT_InlineASMBrace;
       nextToken();
       while (FormatTok && FormatTok->isNot(tok::eof)) {
         if (FormatTok->is(tok::r_brace)) {
+          FormatTok->Type = TT_InlineASMBrace;
           nextToken();
+          addUnwrappedLine();
           break;
         }
         FormatTok->Finalized = true;
@@ -744,7 +744,7 @@
     }
     break;
   case tok::identifier:
-    if (FormatTok->IsForEachMacro) {
+    if (FormatTok->is(TT_ForEachMacro)) {
       parseForOrWhileLoop();
       return;
     }
@@ -754,7 +754,11 @@
       return;
     }
     if (FormatTok->is(Keywords.kw_signals)) {
-      parseAccessSpecifier();
+      nextToken();
+      if (FormatTok->is(tok::colon)) {
+        nextToken();
+        addUnwrappedLine();
+      }
       return;
     }
     // In all other cases, parse the declaration.
@@ -781,9 +785,15 @@
     case tok::kw_struct:
     case tok::kw_union:
     case tok::kw_class:
+      // parseRecord falls through and does not yet add an unwrapped line as a
+      // record declaration or definition can start a structural element.
       parseRecord();
-      // A record declaration or definition is always the start of a structural
-      // element.
+      // This does not apply for Java and JavaScript.
+      if (Style.Language == FormatStyle::LK_Java ||
+          Style.Language == FormatStyle::LK_JavaScript) {
+        addUnwrappedLine();
+        return;
+      }
       break;
     case tok::period:
       nextToken();
@@ -833,14 +843,22 @@
       parseTryCatch();
       return;
     case tok::identifier: {
-      StringRef Text = FormatTok->TokenText;
       // Parse function literal unless 'function' is the first token in a line
       // in which case this should be treated as a free-standing function.
-      if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" &&
-          Line->Tokens.size() > 0) {
+      if (Style.Language == FormatStyle::LK_JavaScript &&
+          FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
         tryToParseJSFunction();
         break;
       }
+      if ((Style.Language == FormatStyle::LK_JavaScript ||
+           Style.Language == FormatStyle::LK_Java) &&
+          FormatTok->is(Keywords.kw_interface)) {
+        parseRecord();
+        addUnwrappedLine();
+        break;
+      }
+
+      StringRef Text = FormatTok->TokenText;
       nextToken();
       if (Line->Tokens.size() == 1 &&
           // JS doesn't have macros, and within classes colons indicate fields,
@@ -855,7 +873,13 @@
         bool FunctionLike = FormatTok->is(tok::l_paren);
         if (FunctionLike)
           parseParens();
-        if (FormatTok->NewlinesBefore > 0 &&
+
+        bool FollowedByNewline =
+            CommentsBeforeNextToken.empty()
+                ? FormatTok->NewlinesBefore > 0
+                : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
+
+        if (FollowedByNewline &&
             (Text.size() >= 5 || FunctionLike) &&
             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
           addUnwrappedLine();
@@ -865,6 +889,16 @@
       break;
     }
     case tok::equal:
+      // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
+      // TT_JsFatArrow. The always start an expression or a child block if
+      // followed by a curly.
+      if (FormatTok->is(TT_JsFatArrow)) {
+        nextToken();
+        if (FormatTok->is(tok::l_brace))
+          parseChildBlock();
+        break;
+      }
+
       nextToken();
       if (FormatTok->Tok.is(tok::l_brace)) {
         parseBracedList();
@@ -884,6 +918,10 @@
 }
 
 bool UnwrappedLineParser::tryToParseLambda() {
+  if (Style.Language != FormatStyle::LK_Cpp) {
+    nextToken();
+    return false;
+  }
   // FIXME: This is a dirty way to access the previous token. Find a better
   // solution.
   if (!Line->Tokens.empty() &&
@@ -922,7 +960,7 @@
       nextToken();
       break;
     case tok::arrow:
-      FormatTok->Type = TT_TrailingReturnArrow;
+      FormatTok->Type = TT_LambdaArrow;
       nextToken();
       break;
     default:
@@ -989,15 +1027,23 @@
 
   if (FormatTok->isNot(tok::l_paren))
     return;
-  nextToken();
-  while (FormatTok->isNot(tok::l_brace)) {
-    // Err on the side of caution in order to avoid consuming the full file in
-    // case of incomplete code.
-    if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren,
-                            tok::comment))
-      return;
+
+  // Parse formal parameter list.
+  parseParens();
+
+  if (FormatTok->is(tok::colon)) {
+    // Parse a type definition.
     nextToken();
+
+    // Eat the type declaration. For braced inline object types, balance braces,
+    // otherwise just parse until finding an l_brace for the function body.
+    if (FormatTok->is(tok::l_brace))
+      tryToParseBracedList();
+    else
+      while(FormatTok->isNot(tok::l_brace) && !eof())
+        nextToken();
   }
+
   parseChildBlock();
 }
 
@@ -1018,10 +1064,20 @@
   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
   // replace this by using parseAssigmentExpression() inside.
   do {
-    if (Style.Language == FormatStyle::LK_JavaScript &&
-        FormatTok->is(Keywords.kw_function)) {
-      tryToParseJSFunction();
-      continue;
+    if (Style.Language == FormatStyle::LK_JavaScript) {
+      if (FormatTok->is(Keywords.kw_function)) {
+        tryToParseJSFunction();
+        continue;
+      }
+      if (FormatTok->is(TT_JsFatArrow)) {
+        nextToken();
+        // Fat arrows can be followed by simple expressions or by child blocks
+        // in curly braces.
+        if (FormatTok->is(tok::l_brace)){
+          parseChildBlock();
+          continue;
+        }
+      }
     }
     switch (FormatTok->Tok.getKind()) {
     case tok::caret:
@@ -1090,9 +1146,8 @@
       tryToParseLambda();
       break;
     case tok::l_brace:
-      if (!tryToParseBracedList()) {
+      if (!tryToParseBracedList())
         parseChildBlock();
-      }
       break;
     case tok::at:
       nextToken();
@@ -1132,9 +1187,8 @@
       parseSquare();
       break;
     case tok::l_brace: {
-      if (!tryToParseBracedList()) {
+      if (!tryToParseBracedList())
         parseChildBlock();
-      }
       break;
     }
     case tok::at:
@@ -1202,8 +1256,6 @@
       nextToken();
       if (FormatTok->is(tok::l_paren))
         parseParens();
-      else
-        StructuralError = true;
       if (FormatTok->is(tok::comma))
         nextToken();
     }
@@ -1226,7 +1278,6 @@
     // The C++ standard requires a compound-statement after a try.
     // If there's none, we try to assume there's a structuralElement
     // and try to continue.
-    StructuralError = true;
     addUnwrappedLine();
     ++Line->Level;
     parseStructuralElement();
@@ -1319,8 +1370,7 @@
 }
 
 void UnwrappedLineParser::parseForOrWhileLoop() {
-  assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) ||
-          FormatTok->IsForEachMacro) &&
+  assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
          "'for', 'while' or foreach macro expected");
   nextToken();
   if (FormatTok->Tok.is(tok::l_paren))
@@ -1380,6 +1430,8 @@
     }
     addUnwrappedLine();
   } else {
+    if (FormatTok->is(tok::semi))
+      nextToken();
     addUnwrappedLine();
   }
   Line->Level = OldLineLevel;
@@ -1530,38 +1582,45 @@
 void UnwrappedLineParser::parseRecord() {
   const FormatToken &InitialToken = *FormatTok;
   nextToken();
-  if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute,
-                         tok::kw___declspec, tok::kw_alignas)) {
+
+
+  // The actual identifier can be a nested name specifier, and in macros
+  // it is often token-pasted.
+  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
+                            tok::kw___attribute, tok::kw___declspec,
+                            tok::kw_alignas) ||
+         ((Style.Language == FormatStyle::LK_Java ||
+           Style.Language == FormatStyle::LK_JavaScript) &&
+          FormatTok->isOneOf(tok::period, tok::comma))) {
+    bool IsNonMacroIdentifier =
+        FormatTok->is(tok::identifier) &&
+        FormatTok->TokenText != FormatTok->TokenText.upper();
     nextToken();
     // We can have macros or attributes in between 'class' and the class name.
-    if (FormatTok->Tok.is(tok::l_paren)) {
+    if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
       parseParens();
-    }
-    // The actual identifier can be a nested name specifier, and in macros
-    // it is often token-pasted.
-    while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) ||
-           FormatTok->is(tok::hashhash) ||
-           ((Style.Language == FormatStyle::LK_Java ||
-             Style.Language == FormatStyle::LK_JavaScript) &&
-            FormatTok->isOneOf(tok::period, tok::comma)))
-      nextToken();
+  }
 
-    // Note that parsing away template declarations here leads to incorrectly
-    // accepting function declarations as record declarations.
-    // In general, we cannot solve this problem. Consider:
-    // class A<int> B() {}
-    // which can be a function definition or a class definition when B() is a
-    // macro. If we find enough real-world cases where this is a problem, we
-    // can parse for the 'template' keyword in the beginning of the statement,
-    // and thus rule out the record production in case there is no template
-    // (this would still leave us with an ambiguity between template function
-    // and class declarations).
-    if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
-      while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
-        if (FormatTok->Tok.is(tok::semi))
-          return;
-        nextToken();
+  // Note that parsing away template declarations here leads to incorrectly
+  // accepting function declarations as record declarations.
+  // In general, we cannot solve this problem. Consider:
+  // class A<int> B() {}
+  // which can be a function definition or a class definition when B() is a
+  // macro. If we find enough real-world cases where this is a problem, we
+  // can parse for the 'template' keyword in the beginning of the statement,
+  // and thus rule out the record production in case there is no template
+  // (this would still leave us with an ambiguity between template function
+  // and class declarations).
+  if (FormatTok->isOneOf(tok::colon, tok::less)) {
+    while (!eof()) {
+      if (FormatTok->is(tok::l_brace)) {
+        calculateBraceTypes(/*ExpectClassBody=*/true);
+        if (!tryToParseBracedList())
+          break;
       }
+      if (FormatTok->Tok.is(tok::semi))
+        return;
+      nextToken();
     }
   }
   if (FormatTok->Tok.is(tok::l_brace)) {
@@ -1574,10 +1633,6 @@
   // We fall through to parsing a structural element afterwards, so
   // class A {} n, m;
   // will end up in one unwrapped line.
-  // This does not apply for Java.
-  if (Style.Language == FormatStyle::LK_Java ||
-      Style.Language == FormatStyle::LK_JavaScript)
-    addUnwrappedLine();
 }
 
 void UnwrappedLineParser::parseObjCProtocolList() {
@@ -1659,15 +1714,21 @@
   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
   nextToken();
 
-  if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_function,
-                         Keywords.kw_var))
-    return; // Fall through to parsing the corresponding structure.
+  // Consume the "default" in "export default class/function".
+  if (FormatTok->is(tok::kw_default))
+    nextToken();
 
-  if (FormatTok->is(tok::kw_default)) {
-    nextToken(); // export default ..., fall through after eating 'default'.
+  // Consume "function" and "default function", so that these get parsed as
+  // free-standing JS functions, i.e. do not require a trailing semicolon.
+  if (FormatTok->is(Keywords.kw_function)) {
+    nextToken();
     return;
   }
 
+  if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
+                         Keywords.kw_var))
+    return; // Fall through to parsing the corresponding structure.
+
   if (FormatTok->is(tok::l_brace)) {
     FormatTok->BlockKind = BK_Block;
     parseBracedList();
@@ -1709,15 +1770,12 @@
     if (CurrentLines == &Lines)
       printDebugInfo(*Line);
   });
-  CurrentLines->push_back(*Line);
+  CurrentLines->push_back(std::move(*Line));
   Line->Tokens.clear();
   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
-    for (SmallVectorImpl<UnwrappedLine>::iterator
-             I = PreprocessorDirectives.begin(),
-             E = PreprocessorDirectives.end();
-         I != E; ++I) {
-      CurrentLines->push_back(*I);
-    }
+    CurrentLines->append(
+        std::make_move_iterator(PreprocessorDirectives.begin()),
+        std::make_move_iterator(PreprocessorDirectives.end()));
     PreprocessorDirectives.clear();
   }
 }
@@ -1735,14 +1793,12 @@
            I = CommentsBeforeNextToken.begin(),
            E = CommentsBeforeNextToken.end();
        I != E; ++I) {
-    if (isOnNewLine(**I) && JustComments) {
+    if (isOnNewLine(**I) && JustComments)
       addUnwrappedLine();
-    }
     pushToken(*I);
   }
-  if (NewlineBeforeNext && JustComments) {
+  if (NewlineBeforeNext && JustComments)
     addUnwrappedLine();
-  }
   CommentsBeforeNextToken.clear();
 }
 
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index 76c62cd..c2fa029 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -65,8 +65,7 @@
                       ArrayRef<FormatToken *> Tokens,
                       UnwrappedLineConsumer &Callback);
 
-  /// Returns true in case of a structural error.
-  bool parse();
+  void parse();
 
 private:
   void reset();
@@ -114,7 +113,7 @@
   void readToken();
   void flushComments(bool NewlineBeforeNext);
   void pushToken(FormatToken *Tok);
-  void calculateBraceTypes();
+  void calculateBraceTypes(bool ExpectClassBody = false);
 
   // Marks a conditional compilation edge (for example, an '#if', '#ifdef',
   // '#else' or merge conflict marker). If 'Unreachable' is true, assumes
@@ -158,10 +157,6 @@
   // whether we are in a compound statement or not.
   std::vector<bool> DeclarationScopeStack;
 
-  // Will be true if we encounter an error that leads to possibily incorrect
-  // indentation levels.
-  bool StructuralError;
-
   const FormatStyle &Style;
   const AdditionalKeywords &Keywords;
 
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 4896ad7..6539527 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -93,6 +93,7 @@
 
   std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
   calculateLineBreakInformation();
+  alignConsecutiveAssignments();
   alignTrailingComments();
   alignEscapedNewlines();
   generateChanges();
@@ -141,6 +142,96 @@
   }
 }
 
+// Walk through all of the changes and find sequences of "=" to align.  To do
+// so, keep track of the lines and whether or not an "=" was found on align. If
+// a "=" is found on a line, extend the current sequence. If the current line
+// cannot be part of a sequence, e.g. because there is an empty line before it
+// or it contains non-assignments, finalize the previous sequence.
+void WhitespaceManager::alignConsecutiveAssignments() {
+  if (!Style.AlignConsecutiveAssignments)
+    return;
+
+  unsigned MinColumn = 0;
+  unsigned StartOfSequence = 0;
+  unsigned EndOfSequence = 0;
+  bool FoundAssignmentOnLine = false;
+  bool FoundLeftParenOnLine = false;
+  unsigned CurrentLine = 0;
+
+  auto AlignSequence = [&] {
+    alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn);
+    MinColumn = 0;
+    StartOfSequence = 0;
+    EndOfSequence = 0;
+  };
+
+  for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
+    if (Changes[i].NewlinesBefore != 0) {
+      CurrentLine += Changes[i].NewlinesBefore;
+      if (StartOfSequence > 0 &&
+          (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine)) {
+        EndOfSequence = i;
+        AlignSequence();
+      }
+      FoundAssignmentOnLine = false;
+      FoundLeftParenOnLine = false;
+    }
+
+    if ((Changes[i].Kind == tok::equal &&
+         (FoundAssignmentOnLine || ((Changes[i].NewlinesBefore > 0 ||
+                                     Changes[i + 1].NewlinesBefore > 0)))) ||
+        (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren)) {
+      if (StartOfSequence > 0)
+        AlignSequence();
+    } else if (Changes[i].Kind == tok::l_paren) {
+      FoundLeftParenOnLine = true;
+      if (!FoundAssignmentOnLine && StartOfSequence > 0)
+        AlignSequence();
+    } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine &&
+               Changes[i].Kind == tok::equal) {
+      FoundAssignmentOnLine = true;
+      EndOfSequence = i;
+      if (StartOfSequence == 0)
+        StartOfSequence = i;
+
+      unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
+      MinColumn = std::max(MinColumn, ChangeMinColumn);
+    }
+  }
+
+  if (StartOfSequence > 0) {
+    EndOfSequence = Changes.size();
+    AlignSequence();
+  }
+}
+
+void WhitespaceManager::alignConsecutiveAssignments(unsigned Start,
+                                                    unsigned End,
+                                                    unsigned Column) {
+  bool AlignedAssignment = false;
+  int PreviousShift = 0;
+  for (unsigned i = Start; i != End; ++i) {
+    int Shift = 0;
+    if (Changes[i].NewlinesBefore > 0)
+      AlignedAssignment = false;
+    if (!AlignedAssignment && Changes[i].Kind == tok::equal) {
+      Shift = Column - Changes[i].StartOfTokenColumn;
+      AlignedAssignment = true;
+      PreviousShift = Shift;
+    }
+    assert(Shift >= 0);
+    Changes[i].Spaces += Shift;
+    if (i + 1 != Changes.size())
+      Changes[i + 1].PreviousEndOfTokenColumn += Shift;
+    Changes[i].StartOfTokenColumn += Shift;
+    if (AlignedAssignment) {
+      Changes[i].StartOfTokenColumn += PreviousShift;
+      if (i + 1 != Changes.size())
+        Changes[i + 1].PreviousEndOfTokenColumn += PreviousShift;
+    }
+  }
+}
+
 void WhitespaceManager::alignTrailingComments() {
   unsigned MinColumn = 0;
   unsigned MaxColumn = UINT_MAX;
@@ -311,7 +402,7 @@
     unsigned Offset =
         std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
     for (unsigned i = 0; i < Newlines; ++i) {
-      Text.append(std::string(EscapedNewlineColumn - Offset - 1, ' '));
+      Text.append(EscapedNewlineColumn - Offset - 1, ' ');
       Text.append(UseCRLF ? "\\\r\n" : "\\\n");
       Offset = 0;
     }
@@ -323,7 +414,7 @@
                                          unsigned WhitespaceStartColumn) {
   switch (Style.UseTab) {
   case FormatStyle::UT_Never:
-    Text.append(std::string(Spaces, ' '));
+    Text.append(Spaces, ' ');
     break;
   case FormatStyle::UT_Always: {
     unsigned FirstTabWidth =
@@ -333,8 +424,8 @@
       Spaces -= FirstTabWidth;
       Text.append("\t");
     }
-    Text.append(std::string(Spaces / Style.TabWidth, '\t'));
-    Text.append(std::string(Spaces % Style.TabWidth, ' '));
+    Text.append(Spaces / Style.TabWidth, '\t');
+    Text.append(Spaces % Style.TabWidth, ' ');
     break;
   }
   case FormatStyle::UT_ForIndentation:
@@ -345,10 +436,10 @@
       if (Indentation > Spaces)
         Indentation = Spaces;
       unsigned Tabs = Indentation / Style.TabWidth;
-      Text.append(std::string(Tabs, '\t'));
+      Text.append(Tabs, '\t');
       Spaces -= Tabs * Style.TabWidth;
     }
-    Text.append(std::string(Spaces, ' '));
+    Text.append(Spaces, ' ');
     break;
   }
 }
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index 28730d4..4bfc813 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -164,6 +164,13 @@
   /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
   void calculateLineBreakInformation();
 
+  /// \brief Align consecutive assignments over all \c Changes.
+  void alignConsecutiveAssignments();
+
+  /// \brief Align consecutive assignments from change \p Start to change \p End at
+  /// the specified \p Column.
+  void alignConsecutiveAssignments(unsigned Start, unsigned End, unsigned Column);
+
   /// \brief Align trailing comments over all \c Changes.
   void alignTrailingComments();
 
diff --git a/lib/Frontend/ASTMerge.cpp b/lib/Frontend/ASTMerge.cpp
index 216ac6a..b84df94 100644
--- a/lib/Frontend/ASTMerge.cpp
+++ b/lib/Frontend/ASTMerge.cpp
@@ -57,6 +57,7 @@
                          /*MinimalImport=*/false);
 
     TranslationUnitDecl *TU = Unit->getASTContext().getTranslationUnitDecl();
+    CI.getASTConsumer().Initialize(CI.getASTContext());
     for (auto *D : TU->decls()) {
       // Don't re-import __va_list_tag, __builtin_va_list.
       if (const auto *ND = dyn_cast<NamedDecl>(D))
@@ -64,7 +65,12 @@
           if (II->isStr("__va_list_tag") || II->isStr("__builtin_va_list"))
             continue;
       
-      Importer.Import(D);
+      Decl *ToD = Importer.Import(D);
+    
+      if (ToD) {
+        DeclGroupRef DGR(ToD);
+        CI.getASTConsumer().HandleTopLevelDecl(DGR);
+      }
     }
   }
 
diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp
index 7226344..4fd330d 100644
--- a/lib/Frontend/ASTUnit.cpp
+++ b/lib/Frontend/ASTUnit.cpp
@@ -613,7 +613,7 @@
   // about. This effectively drops diagnostics from modules we're building.
   // FIXME: In the long run, ee don't want to drop source managers from modules.
   if (!Info.hasSourceManager() || &Info.getSourceManager() == SourceMgr)
-    StoredDiags.push_back(StoredDiagnostic(Level, Info));
+    StoredDiags.emplace_back(Level, Info);
 }
 
 ASTMutationListener *ASTUnit::getASTMutationListener() {
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index fdaf7e2..aef3905 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -946,19 +946,20 @@
   if (const FileEntry *ModuleMapFile =
           ModMap.getContainingModuleMapFile(Module)) {
     // Use the module map where this module resides.
-    FrontendOpts.Inputs.push_back(
-        FrontendInputFile(ModuleMapFile->getName(), IK));
+    FrontendOpts.Inputs.emplace_back(ModuleMapFile->getName(), IK);
   } else {
+    SmallString<128> FakeModuleMapFile(Module->Directory->getName());
+    llvm::sys::path::append(FakeModuleMapFile, "__inferred_module.map");
+    FrontendOpts.Inputs.emplace_back(FakeModuleMapFile, IK);
+
     llvm::raw_string_ostream OS(InferredModuleMapContent);
     Module->print(OS);
     OS.flush();
-    FrontendOpts.Inputs.push_back(
-        FrontendInputFile("__inferred_module.map", IK));
 
     std::unique_ptr<llvm::MemoryBuffer> ModuleMapBuffer =
         llvm::MemoryBuffer::getMemBuffer(InferredModuleMapContent);
     ModuleMapFile = Instance.getFileManager().getVirtualFile(
-        "__inferred_module.map", InferredModuleMapContent.size(), 0);
+        FakeModuleMapFile, InferredModuleMapContent.size(), 0);
     SourceMgr.overrideFileContents(ModuleMapFile, std::move(ModuleMapBuffer));
   }
 
@@ -1084,79 +1085,51 @@
   // not have changed.
   if (!Id->hadMacroDefinition())
     return;
+  auto *LatestLocalMD = PP.getLocalMacroDirectiveHistory(Id);
 
-  // If this identifier does not currently have a macro definition,
-  // check whether it had one on the command line.
-  if (!Id->hasMacroDefinition()) {
-    MacroDirective::DefInfo LatestDef =
-        PP.getMacroDirectiveHistory(Id)->getDefinition();
-    for (MacroDirective::DefInfo Def = LatestDef; Def;
-           Def = Def.getPreviousDefinition()) {
-      FileID FID = SourceMgr.getFileID(Def.getLocation());
-      if (FID.isInvalid())
-        continue;
-
-      // We only care about the predefines buffer.
-      if (FID != PP.getPredefinesFileID())
-        continue;
-
-      // This macro was defined on the command line, then #undef'd later.
-      // Complain.
-      PP.Diag(ImportLoc, diag::warn_module_config_macro_undef)
-        << true << ConfigMacro << Mod->getFullModuleName();
-      if (LatestDef.isUndefined())
-        PP.Diag(LatestDef.getUndefLocation(), diag::note_module_def_undef_here)
-          << true;
-      return;
-    }
-
-    // Okay: no definition in the predefines buffer.
-    return;
-  }
-
-  // This identifier has a macro definition. Check whether we had a definition
-  // on the command line.
-  MacroDirective::DefInfo LatestDef =
-      PP.getMacroDirectiveHistory(Id)->getDefinition();
-  MacroDirective::DefInfo PredefinedDef;
-  for (MacroDirective::DefInfo Def = LatestDef; Def;
-         Def = Def.getPreviousDefinition()) {
-    FileID FID = SourceMgr.getFileID(Def.getLocation());
-    if (FID.isInvalid())
-      continue;
-
+  // Find the macro definition from the command line.
+  MacroInfo *CmdLineDefinition = nullptr;
+  for (auto *MD = LatestLocalMD; MD; MD = MD->getPrevious()) {
     // We only care about the predefines buffer.
-    if (FID != PP.getPredefinesFileID())
+    FileID FID = SourceMgr.getFileID(MD->getLocation());
+    if (FID.isInvalid() || FID != PP.getPredefinesFileID())
       continue;
-
-    PredefinedDef = Def;
+    if (auto *DMD = dyn_cast<DefMacroDirective>(MD))
+      CmdLineDefinition = DMD->getMacroInfo();
     break;
   }
 
-  // If there was no definition for this macro in the predefines buffer,
-  // complain.
-  if (!PredefinedDef ||
-      (!PredefinedDef.getLocation().isValid() &&
-       PredefinedDef.getUndefLocation().isValid())) {
+  auto *CurrentDefinition = PP.getMacroInfo(Id);
+  if (CurrentDefinition == CmdLineDefinition) {
+    // Macro matches. Nothing to do.
+  } else if (!CurrentDefinition) {
+    // This macro was defined on the command line, then #undef'd later.
+    // Complain.
+    PP.Diag(ImportLoc, diag::warn_module_config_macro_undef)
+      << true << ConfigMacro << Mod->getFullModuleName();
+    auto LatestDef = LatestLocalMD->getDefinition();
+    assert(LatestDef.isUndefined() &&
+           "predefined macro went away with no #undef?");
+    PP.Diag(LatestDef.getUndefLocation(), diag::note_module_def_undef_here)
+      << true;
+    return;
+  } else if (!CmdLineDefinition) {
+    // There was no definition for this macro in the predefines buffer,
+    // but there was a local definition. Complain.
     PP.Diag(ImportLoc, diag::warn_module_config_macro_undef)
       << false << ConfigMacro << Mod->getFullModuleName();
-    PP.Diag(LatestDef.getLocation(), diag::note_module_def_undef_here)
+    PP.Diag(CurrentDefinition->getDefinitionLoc(),
+            diag::note_module_def_undef_here)
       << false;
-    return;
+  } else if (!CurrentDefinition->isIdenticalTo(*CmdLineDefinition, PP,
+                                               /*Syntactically=*/true)) {
+    // The macro definitions differ.
+    PP.Diag(ImportLoc, diag::warn_module_config_macro_undef)
+      << false << ConfigMacro << Mod->getFullModuleName();
+    PP.Diag(CurrentDefinition->getDefinitionLoc(),
+            diag::note_module_def_undef_here)
+      << false;
   }
-
-  // If the current macro definition is the same as the predefined macro
-  // definition, it's okay.
-  if (LatestDef.getMacroInfo() == PredefinedDef.getMacroInfo() ||
-      LatestDef.getMacroInfo()->isIdenticalTo(*PredefinedDef.getMacroInfo(),PP,
-                                              /*Syntactically=*/true))
-    return;
-
-  // The macro definitions differ.
-  PP.Diag(ImportLoc, diag::warn_module_config_macro_undef)
-    << false << ConfigMacro << Mod->getFullModuleName();
-  PP.Diag(LatestDef.getLocation(), diag::note_module_def_undef_here)
-    << false;
 }
 
 /// \brief Write a new timestamp file with the given path.
@@ -1385,7 +1358,7 @@
     if (LastModuleImportResult && ModuleName != getLangOpts().CurrentModule &&
         ModuleName != getLangOpts().ImplementationOfModule)
       ModuleManager->makeModuleVisible(LastModuleImportResult, Visibility,
-                                       ImportLoc, /*Complain=*/false);
+                                       ImportLoc);
     return LastModuleImportResult;
   }
 
@@ -1628,8 +1601,7 @@
       return ModuleLoadResult();
     }
 
-    ModuleManager->makeModuleVisible(Module, Visibility, ImportLoc,
-                                     /*Complain=*/true);
+    ModuleManager->makeModuleVisible(Module, Visibility, ImportLoc);
   }
 
   // Check for any configuration macros that have changed.
@@ -1639,25 +1611,6 @@
                      Module, ImportLoc);
   }
 
-  // Determine whether we're in the #include buffer for a module. The #includes
-  // in that buffer do not qualify as module imports; they're just an
-  // implementation detail of us building the module.
-  bool IsInModuleIncludes = !getLangOpts().CurrentModule.empty() &&
-                            getSourceManager().getFileID(ImportLoc) ==
-                                getSourceManager().getMainFileID();
-
-  // If this module import was due to an inclusion directive, create an 
-  // implicit import declaration to capture it in the AST.
-  if (IsInclusionDirective && hasASTContext() && !IsInModuleIncludes) {
-    TranslationUnitDecl *TU = getASTContext().getTranslationUnitDecl();
-    ImportDecl *ImportD = ImportDecl::CreateImplicit(getASTContext(), TU,
-                                                     ImportLoc, Module,
-                                                     Path.back().second);
-    TU->addDecl(ImportD);
-    if (Consumer)
-      Consumer->HandleImplicitImportDecl(ImportD);
-  }
-  
   LastModuleImportLoc = ImportLoc;
   LastModuleImportResult = ModuleLoadResult(Module, false);
   return LastModuleImportResult;
@@ -1665,9 +1618,13 @@
 
 void CompilerInstance::makeModuleVisible(Module *Mod,
                                          Module::NameVisibilityKind Visibility,
-                                         SourceLocation ImportLoc,
-                                         bool Complain){
-  ModuleManager->makeModuleVisible(Mod, Visibility, ImportLoc, Complain);
+                                         SourceLocation ImportLoc) {
+  if (!ModuleManager)
+    createModuleManager();
+  if (!ModuleManager)
+    return;
+
+  ModuleManager->makeModuleVisible(Mod, Visibility, ImportLoc);
 }
 
 GlobalModuleIndex *CompilerInstance::loadGlobalModuleIndex(
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index d2b528c..f4480bb 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -126,7 +126,7 @@
     } else {
       // Otherwise, add its value (for OPT_W_Joined and similar).
       for (const char *Arg : A->getValues())
-        Diagnostics.push_back(Arg);
+        Diagnostics.emplace_back(Arg);
     }
   }
 }
@@ -239,10 +239,8 @@
                          Opts.InlineMaxStackDepth, Diags);
 
   Opts.CheckersControlList.clear();
-  for (arg_iterator it = Args.filtered_begin(OPT_analyzer_checker,
-                                             OPT_analyzer_disable_checker),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
+  for (const Arg *A :
+       Args.filtered(OPT_analyzer_checker, OPT_analyzer_disable_checker)) {
     A->claim();
     bool enable = (A->getOption().getID() == OPT_analyzer_checker);
     // We can have a list of comma separated checker names, e.g:
@@ -250,14 +248,12 @@
     StringRef checkerList = A->getValue();
     SmallVector<StringRef, 4> checkers;
     checkerList.split(checkers, ",");
-    for (unsigned i = 0, e = checkers.size(); i != e; ++i)
-      Opts.CheckersControlList.push_back(std::make_pair(checkers[i], enable));
+    for (StringRef checker : checkers)
+      Opts.CheckersControlList.emplace_back(checker, enable);
   }
 
   // Go through the analyzer configuration options.
-  for (arg_iterator it = Args.filtered_begin(OPT_analyzer_config),
-       ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
+  for (const Arg *A : Args.filtered(OPT_analyzer_config)) {
     A->claim();
     // We can have a list of comma separated config names, e.g:
     // '-analyzer-config key1=val1,key2=val2'
@@ -325,15 +321,35 @@
   return Pattern;
 }
 
+static bool parseDiagnosticLevelMask(StringRef FlagName,
+                                     const std::vector<std::string> &Levels,
+                                     DiagnosticsEngine *Diags,
+                                     DiagnosticLevelMask &M) {
+  bool Success = true;
+  for (const auto &Level : Levels) {
+    DiagnosticLevelMask const PM =
+      llvm::StringSwitch<DiagnosticLevelMask>(Level)
+        .Case("note",    DiagnosticLevelMask::Note)
+        .Case("remark",  DiagnosticLevelMask::Remark)
+        .Case("warning", DiagnosticLevelMask::Warning)
+        .Case("error",   DiagnosticLevelMask::Error)
+        .Default(DiagnosticLevelMask::None);
+    if (PM == DiagnosticLevelMask::None) {
+      Success = false;
+      if (Diags)
+        Diags->Report(diag::err_drv_invalid_value) << FlagName << Level;
+    }
+    M = M | PM;
+  }
+  return Success;
+}
+
 static void parseSanitizerKinds(StringRef FlagName,
                                 const std::vector<std::string> &Sanitizers,
                                 DiagnosticsEngine &Diags, SanitizerSet &S) {
   for (const auto &Sanitizer : Sanitizers) {
-    SanitizerKind K = llvm::StringSwitch<SanitizerKind>(Sanitizer)
-#define SANITIZER(NAME, ID) .Case(NAME, SanitizerKind::ID)
-#include "clang/Basic/Sanitizers.def"
-                          .Default(SanitizerKind::Unknown);
-    if (K == SanitizerKind::Unknown)
+    SanitizerMask K = parseSanitizerValue(Sanitizer, /*AllowGroups=*/false);
+    if (K == 0)
       Diags.Report(diag::err_drv_invalid_value) << FlagName << Sanitizer;
     else
       S.set(K, true);
@@ -431,7 +447,9 @@
   Opts.DisableIntegratedAS = Args.hasArg(OPT_fno_integrated_as);
   Opts.Autolink = !Args.hasArg(OPT_fno_autolink);
   Opts.SampleProfileFile = Args.getLastArgValue(OPT_fprofile_sample_use_EQ);
-  Opts.ProfileInstrGenerate = Args.hasArg(OPT_fprofile_instr_generate);
+  Opts.ProfileInstrGenerate = Args.hasArg(OPT_fprofile_instr_generate) ||
+      Args.hasArg(OPT_fprofile_instr_generate_EQ);
+  Opts.InstrProfileOutput = Args.getLastArgValue(OPT_fprofile_instr_generate_EQ);
   Opts.InstrProfileInput = Args.getLastArgValue(OPT_fprofile_instr_use_EQ);
   Opts.CoverageMapping = Args.hasArg(OPT_fcoverage_mapping);
   Opts.DumpCoverageMapping = Args.hasArg(OPT_dump_coverage_mapping);
@@ -527,8 +545,14 @@
   Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
   Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
   Opts.LinkBitcodeFile = Args.getLastArgValue(OPT_mlink_bitcode_file);
-  Opts.SanitizeCoverage =
-      getLastArgIntValue(Args, OPT_fsanitize_coverage, 0, Diags);
+  Opts.SanitizeCoverageType =
+      getLastArgIntValue(Args, OPT_fsanitize_coverage_type, 0, Diags);
+  Opts.SanitizeCoverageIndirectCalls =
+      Args.hasArg(OPT_fsanitize_coverage_indirect_calls);
+  Opts.SanitizeCoverageTraceBB = Args.hasArg(OPT_fsanitize_coverage_trace_bb);
+  Opts.SanitizeCoverageTraceCmp = Args.hasArg(OPT_fsanitize_coverage_trace_cmp);
+  Opts.SanitizeCoverage8bitCounters =
+      Args.hasArg(OPT_fsanitize_coverage_8bit_counters);
   Opts.SanitizeMemoryTrackOrigins =
       getLastArgIntValue(Args, OPT_fsanitize_memory_track_origins_EQ, 0, Diags);
   Opts.SanitizeUndefinedTrapOnError =
@@ -643,6 +667,9 @@
                       Args.getAllArgValues(OPT_fsanitize_recover_EQ), Diags,
                       Opts.SanitizeRecover);
 
+  Opts.CudaGpuBinaryFileNames =
+      Args.getAllArgValues(OPT_fcuda_include_gpubinary);
+
   return Success;
 }
 
@@ -661,6 +688,8 @@
   Opts.DOTOutputFile = Args.getLastArgValue(OPT_dependency_dot);
   Opts.ModuleDependencyOutputDir =
       Args.getLastArgValue(OPT_module_dependency_dir);
+  if (Args.hasArg(OPT_MV))
+    Opts.OutputFormat = DependencyOutputFormat::NMake;
 }
 
 bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
@@ -742,11 +771,18 @@
       << Args.getLastArg(OPT_fdiagnostics_format)->getAsString(Args)
       << Format;
   }
-  
+
   Opts.ShowSourceRanges = Args.hasArg(OPT_fdiagnostics_print_source_range_info);
   Opts.ShowParseableFixits = Args.hasArg(OPT_fdiagnostics_parseable_fixits);
   Opts.ShowPresumedLoc = !Args.hasArg(OPT_fno_diagnostics_use_presumed_location);
   Opts.VerifyDiagnostics = Args.hasArg(OPT_verify);
+  DiagnosticLevelMask DiagMask = DiagnosticLevelMask::None;
+  Success &= parseDiagnosticLevelMask("-verify-ignore-unexpected=",
+    Args.getAllArgValues(OPT_verify_ignore_unexpected_EQ),
+    Diags, DiagMask);
+  if (Args.hasArg(OPT_verify_ignore_unexpected))
+    DiagMask = DiagnosticLevelMask::All;
+  Opts.setVerifyIgnoreUnexpected(DiagMask);
   Opts.ElideType = !Args.hasArg(OPT_fno_elide_type);
   Opts.ShowTemplateTree = Args.hasArg(OPT_fdiagnostics_show_template_tree);
   Opts.ErrorLimit = getLastArgIntValue(Args, OPT_ferror_limit, 0, Diags);
@@ -857,26 +893,21 @@
   }
 
   if (const Arg* A = Args.getLastArg(OPT_plugin)) {
-    Opts.Plugins.push_back(A->getValue(0));
+    Opts.Plugins.emplace_back(A->getValue(0));
     Opts.ProgramAction = frontend::PluginAction;
     Opts.ActionName = A->getValue();
 
-    for (arg_iterator it = Args.filtered_begin(OPT_plugin_arg),
-           end = Args.filtered_end(); it != end; ++it) {
-      if ((*it)->getValue(0) == Opts.ActionName)
-        Opts.PluginArgs.push_back((*it)->getValue(1));
-    }
+    for (const Arg *AA : Args.filtered(OPT_plugin_arg))
+      if (AA->getValue(0) == Opts.ActionName)
+        Opts.PluginArgs.emplace_back(AA->getValue(1));
   }
 
   Opts.AddPluginActions = Args.getAllArgValues(OPT_add_plugin);
   Opts.AddPluginArgs.resize(Opts.AddPluginActions.size());
-  for (int i = 0, e = Opts.AddPluginActions.size(); i != e; ++i) {
-    for (arg_iterator it = Args.filtered_begin(OPT_plugin_arg),
-           end = Args.filtered_end(); it != end; ++it) {
-      if ((*it)->getValue(0) == Opts.AddPluginActions[i])
-        Opts.AddPluginArgs[i].push_back((*it)->getValue(1));
-    }
-  }
+  for (int i = 0, e = Opts.AddPluginActions.size(); i != e; ++i)
+    for (const Arg *A : Args.filtered(OPT_plugin_arg))
+      if (A->getValue(0) == Opts.AddPluginActions[i])
+        Opts.AddPluginArgs[i].emplace_back(A->getValue(1));
 
   if (const Arg *A = Args.getLastArg(OPT_code_completion_at)) {
     Opts.CodeCompletionAt =
@@ -1025,7 +1056,7 @@
       if (i == 0)
         DashX = IK;
     }
-    Opts.Inputs.push_back(FrontendInputFile(Inputs[i], IK));
+    Opts.Inputs.emplace_back(std::move(Inputs[i]), IK);
   }
 
   return DashX;
@@ -1078,98 +1109,77 @@
   Opts.ModulesValidateSystemHeaders =
       Args.hasArg(OPT_fmodules_validate_system_headers);
 
-  for (arg_iterator it = Args.filtered_begin(OPT_fmodules_ignore_macro),
-                    ie = Args.filtered_end();
-       it != ie; ++it) {
-    StringRef MacroDef = (*it)->getValue();
+  for (const Arg *A : Args.filtered(OPT_fmodules_ignore_macro)) {
+    StringRef MacroDef = A->getValue();
     Opts.ModulesIgnoreMacros.insert(MacroDef.split('=').first);
   }
 
   // Add -I..., -F..., and -index-header-map options in order.
   bool IsIndexHeaderMap = false;
-  for (arg_iterator it = Args.filtered_begin(OPT_I, OPT_F, 
-                                             OPT_index_header_map),
-       ie = Args.filtered_end(); it != ie; ++it) {
-    if ((*it)->getOption().matches(OPT_index_header_map)) {
+  for (const Arg *A : Args.filtered(OPT_I, OPT_F, OPT_index_header_map)) {
+    if (A->getOption().matches(OPT_index_header_map)) {
       // -index-header-map applies to the next -I or -F.
       IsIndexHeaderMap = true;
       continue;
     }
-        
-    frontend::IncludeDirGroup Group 
-      = IsIndexHeaderMap? frontend::IndexHeaderMap : frontend::Angled;
-    
-    Opts.AddPath((*it)->getValue(), Group,
-                 /*IsFramework=*/ (*it)->getOption().matches(OPT_F), true);
+
+    frontend::IncludeDirGroup Group =
+        IsIndexHeaderMap ? frontend::IndexHeaderMap : frontend::Angled;
+
+    Opts.AddPath(A->getValue(), Group,
+                 /*IsFramework=*/A->getOption().matches(OPT_F), true);
     IsIndexHeaderMap = false;
   }
 
   // Add -iprefix/-iwithprefix/-iwithprefixbefore options.
   StringRef Prefix = ""; // FIXME: This isn't the correct default prefix.
-  for (arg_iterator it = Args.filtered_begin(OPT_iprefix, OPT_iwithprefix,
-                                             OPT_iwithprefixbefore),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
+  for (const Arg *A :
+       Args.filtered(OPT_iprefix, OPT_iwithprefix, OPT_iwithprefixbefore)) {
     if (A->getOption().matches(OPT_iprefix))
       Prefix = A->getValue();
     else if (A->getOption().matches(OPT_iwithprefix))
-      Opts.AddPath(Prefix.str() + A->getValue(),
-                   frontend::After, false, true);
+      Opts.AddPath(Prefix.str() + A->getValue(), frontend::After, false, true);
     else
-      Opts.AddPath(Prefix.str() + A->getValue(),
-                   frontend::Angled, false, true);
+      Opts.AddPath(Prefix.str() + A->getValue(), frontend::Angled, false, true);
   }
 
-  for (arg_iterator it = Args.filtered_begin(OPT_idirafter),
-         ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::After, false, true);
-  for (arg_iterator it = Args.filtered_begin(OPT_iquote),
-         ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::Quoted, false, true);
-  for (arg_iterator it = Args.filtered_begin(OPT_isystem,
-         OPT_iwithsysroot), ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::System, false,
-                 !(*it)->getOption().matches(OPT_iwithsysroot));
-  for (arg_iterator it = Args.filtered_begin(OPT_iframework),
-         ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::System, true, true);
+  for (const Arg *A : Args.filtered(OPT_idirafter))
+    Opts.AddPath(A->getValue(), frontend::After, false, true);
+  for (const Arg *A : Args.filtered(OPT_iquote))
+    Opts.AddPath(A->getValue(), frontend::Quoted, false, true);
+  for (const Arg *A : Args.filtered(OPT_isystem, OPT_iwithsysroot))
+    Opts.AddPath(A->getValue(), frontend::System, false,
+                 !A->getOption().matches(OPT_iwithsysroot));
+  for (const Arg *A : Args.filtered(OPT_iframework))
+    Opts.AddPath(A->getValue(), frontend::System, true, true);
 
   // Add the paths for the various language specific isystem flags.
-  for (arg_iterator it = Args.filtered_begin(OPT_c_isystem),
-       ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::CSystem, false, true);
-  for (arg_iterator it = Args.filtered_begin(OPT_cxx_isystem),
-       ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::CXXSystem, false, true);
-  for (arg_iterator it = Args.filtered_begin(OPT_objc_isystem),
-       ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::ObjCSystem, false,true);
-  for (arg_iterator it = Args.filtered_begin(OPT_objcxx_isystem),
-       ie = Args.filtered_end(); it != ie; ++it)
-    Opts.AddPath((*it)->getValue(), frontend::ObjCXXSystem, false, true);
+  for (const Arg *A : Args.filtered(OPT_c_isystem))
+    Opts.AddPath(A->getValue(), frontend::CSystem, false, true);
+  for (const Arg *A : Args.filtered(OPT_cxx_isystem))
+    Opts.AddPath(A->getValue(), frontend::CXXSystem, false, true);
+  for (const Arg *A : Args.filtered(OPT_objc_isystem))
+    Opts.AddPath(A->getValue(), frontend::ObjCSystem, false,true);
+  for (const Arg *A : Args.filtered(OPT_objcxx_isystem))
+    Opts.AddPath(A->getValue(), frontend::ObjCXXSystem, false, true);
 
   // Add the internal paths from a driver that detects standard include paths.
-  for (arg_iterator I = Args.filtered_begin(OPT_internal_isystem,
-                                            OPT_internal_externc_isystem),
-                    E = Args.filtered_end();
-       I != E; ++I) {
+  for (const Arg *A :
+       Args.filtered(OPT_internal_isystem, OPT_internal_externc_isystem)) {
     frontend::IncludeDirGroup Group = frontend::System;
-    if ((*I)->getOption().matches(OPT_internal_externc_isystem))
+    if (A->getOption().matches(OPT_internal_externc_isystem))
       Group = frontend::ExternCSystem;
-    Opts.AddPath((*I)->getValue(), Group, false, true);
+    Opts.AddPath(A->getValue(), Group, false, true);
   }
 
   // Add the path prefixes which are implicitly treated as being system headers.
-  for (arg_iterator I = Args.filtered_begin(OPT_system_header_prefix,
-                                            OPT_no_system_header_prefix),
-                    E = Args.filtered_end();
-       I != E; ++I)
+  for (const Arg *A :
+       Args.filtered(OPT_system_header_prefix, OPT_no_system_header_prefix))
     Opts.AddSystemHeaderPrefix(
-        (*I)->getValue(), (*I)->getOption().matches(OPT_system_header_prefix));
+        A->getValue(), A->getOption().matches(OPT_system_header_prefix));
 
-  for (arg_iterator I = Args.filtered_begin(OPT_ivfsoverlay),
-       E = Args.filtered_end(); I != E; ++I)
-    Opts.AddVFSOverlayFile((*I)->getValue());
+  for (const Arg *A : Args.filtered(OPT_ivfsoverlay))
+    Opts.AddVFSOverlayFile(A->getValue());
 }
 
 void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK,
@@ -1226,7 +1236,7 @@
   Opts.CPlusPlus1z = Std.isCPlusPlus1z();
   Opts.Digraphs = Std.hasDigraphs();
   Opts.GNUMode = Std.isGNUMode();
-  Opts.GNUInline = !Std.isC99();
+  Opts.GNUInline = Std.isC89();
   Opts.HexFloats = Std.hasHexFloats();
   Opts.ImplicitInt = Std.hasImplicitInt();
 
@@ -1409,8 +1419,13 @@
         (Opts.ObjCRuntime.getKind() == ObjCRuntime::FragileMacOSX);
   }
     
-  if (Args.hasArg(OPT_fgnu89_inline))
-    Opts.GNUInline = 1;
+  if (Args.hasArg(OPT_fgnu89_inline)) {
+    if (Opts.CPlusPlus)
+      Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fgnu89-inline"
+                                                            << "C++/ObjC++";
+    else
+      Opts.GNUInline = 1;
+  }
 
   if (Args.hasArg(OPT_fapple_kext)) {
     if (!Opts.CPlusPlus)
@@ -1504,6 +1519,8 @@
   Opts.ModulesStrictDeclUse = Args.hasArg(OPT_fmodules_strict_decluse);
   Opts.ModulesDeclUse =
       Args.hasArg(OPT_fmodules_decluse) || Opts.ModulesStrictDeclUse;
+  Opts.ModulesLocalVisibility =
+      Args.hasArg(OPT_fmodules_local_submodule_visibility);
   Opts.ModulesSearchAll = Opts.Modules &&
     !Args.hasArg(OPT_fno_modules_search_all) &&
     Args.hasArg(OPT_fmodules_search_all);
@@ -1520,6 +1537,7 @@
   Opts.NoMathBuiltin = Args.hasArg(OPT_fno_math_builtin);
   Opts.AssumeSaneOperatorNew = !Args.hasArg(OPT_fno_assume_sane_operator_new);
   Opts.SizedDeallocation = Args.hasArg(OPT_fsized_deallocation);
+  Opts.ConceptsTS = Args.hasArg(OPT_fconcepts_ts);
   Opts.HeinousExtensions = Args.hasArg(OPT_fheinous_gnu_extensions);
   Opts.AccessControl = !Args.hasArg(OPT_fno_access_control);
   Opts.ElideConstructors = !Args.hasArg(OPT_fno_elide_constructors);
@@ -1580,6 +1598,12 @@
         << Opts.CurrentModule << Opts.ImplementationOfModule;
   }
 
+  // For now, we only support local submodule visibility in C++ (because we
+  // heavily depend on the ODR for merging redefinitions).
+  if (Opts.ModulesLocalVisibility && !Opts.CPlusPlus)
+    Diags.Report(diag::err_drv_argument_not_allowed_with)
+        << "-fmodules-local-submodule-visibility" << "C";
+
   if (Arg *A = Args.getLastArg(OPT_faddress_space_map_mangling_EQ)) {
     switch (llvm::StringSwitch<unsigned>(A->getValue())
       .Case("target", LangOptions::ASMM_Target)
@@ -1620,12 +1644,8 @@
     Opts.setMSPointerToMemberRepresentationMethod(InheritanceModel);
   }
 
-  // Check if -fopenmp= is specified.
-  if (const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ)) {
-    Opts.OpenMP = llvm::StringSwitch<bool>(A->getValue())
-        .Case("libiomp5", true)
-        .Default(false);
-  }
+  // Check if -fopenmp is specified.
+  Opts.OpenMP = Args.hasArg(options::OPT_fopenmp);
 
   // Record whether the __DEPRECATED define was requested.
   Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro,
@@ -1688,11 +1708,8 @@
   Opts.DisablePCHValidation = Args.hasArg(OPT_fno_validate_pch);
 
   Opts.DumpDeserializedPCHDecls = Args.hasArg(OPT_dump_deserialized_pch_decls);
-  for (arg_iterator it = Args.filtered_begin(OPT_error_on_deserialized_pch_decl),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
+  for (const Arg *A : Args.filtered(OPT_error_on_deserialized_pch_decl))
     Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue());
-  }
 
   if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) {
     StringRef Value(A->getValue());
@@ -1711,38 +1728,28 @@
   }
 
   // Add macros from the command line.
-  for (arg_iterator it = Args.filtered_begin(OPT_D, OPT_U),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    if ((*it)->getOption().matches(OPT_D))
-      Opts.addMacroDef((*it)->getValue());
+  for (const Arg *A : Args.filtered(OPT_D, OPT_U)) {
+    if (A->getOption().matches(OPT_D))
+      Opts.addMacroDef(A->getValue());
     else
-      Opts.addMacroUndef((*it)->getValue());
+      Opts.addMacroUndef(A->getValue());
   }
 
   Opts.MacroIncludes = Args.getAllArgValues(OPT_imacros);
 
   // Add the ordered list of -includes.
-  for (arg_iterator it = Args.filtered_begin(OPT_include),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
-    Opts.Includes.push_back(A->getValue());
-  }
+  for (const Arg *A : Args.filtered(OPT_include))
+    Opts.Includes.emplace_back(A->getValue());
 
-  for (arg_iterator it = Args.filtered_begin(OPT_chain_include),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
-    Opts.ChainedIncludes.push_back(A->getValue());
-  }
+  for (const Arg *A : Args.filtered(OPT_chain_include))
+    Opts.ChainedIncludes.emplace_back(A->getValue());
 
   // Include 'altivec.h' if -faltivec option present
   if (Args.hasArg(OPT_faltivec))
-    Opts.Includes.push_back("altivec.h");
+    Opts.Includes.emplace_back("altivec.h");
 
-  for (arg_iterator it = Args.filtered_begin(OPT_remap_file),
-         ie = Args.filtered_end(); it != ie; ++it) {
-    const Arg *A = *it;
-    std::pair<StringRef,StringRef> Split =
-      StringRef(A->getValue()).split(';');
+  for (const Arg *A : Args.filtered(OPT_remap_file)) {
+    std::pair<StringRef, StringRef> Split = StringRef(A->getValue()).split(';');
 
     if (Split.second.empty()) {
       Diags.Report(diag::err_drv_invalid_remap_file) << A->getAsString(Args);
@@ -1751,7 +1758,7 @@
 
     Opts.addRemappedFile(Split.first, Split.second);
   }
-  
+
   if (Arg *A = Args.getLastArg(OPT_fobjc_arc_cxxlib_EQ)) {
     StringRef Name = A->getValue();
     unsigned Library = llvm::StringSwitch<unsigned>(Name)
@@ -1826,7 +1833,7 @@
   Opts.FeaturesAsWritten = Args.getAllArgValues(OPT_target_feature);
   Opts.LinkerVersion = Args.getLastArgValue(OPT_target_linker_version);
   Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple));
-
+  Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
   // Use the default target triple if unspecified.
   if (Opts.Triple.empty())
     Opts.Triple = llvm::sys::getDefaultTargetTriple();
@@ -1854,24 +1861,22 @@
   }
 
   // Issue errors on unknown arguments.
-  for (arg_iterator it = Args->filtered_begin(OPT_UNKNOWN),
-         ie = Args->filtered_end(); it != ie; ++it) {
-    Diags.Report(diag::err_drv_unknown_argument) << (*it)->getAsString(*Args);
+  for (const Arg *A : Args->filtered(OPT_UNKNOWN)) {
+    Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(*Args);
     Success = false;
   }
 
-  Success = ParseAnalyzerArgs(*Res.getAnalyzerOpts(), *Args, Diags) && Success;
-  Success = ParseMigratorArgs(Res.getMigratorOpts(), *Args) && Success;
+  Success &= ParseAnalyzerArgs(*Res.getAnalyzerOpts(), *Args, Diags);
+  Success &= ParseMigratorArgs(Res.getMigratorOpts(), *Args);
   ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), *Args);
-  Success = ParseDiagnosticArgs(Res.getDiagnosticOpts(), *Args, &Diags)
-            && Success;
+  Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), *Args, &Diags);
   ParseCommentArgs(Res.getLangOpts()->CommentOpts, *Args);
   ParseFileSystemArgs(Res.getFileSystemOpts(), *Args);
   // FIXME: We shouldn't have to pass the DashX option around here
   InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), *Args, Diags);
   ParseTargetArgs(Res.getTargetOpts(), *Args);
-  Success = ParseCodeGenArgs(Res.getCodeGenOpts(), *Args, DashX, Diags,
-                             Res.getTargetOpts()) && Success;
+  Success &= ParseCodeGenArgs(Res.getCodeGenOpts(), *Args, DashX, Diags,
+                              Res.getTargetOpts());
   ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), *Args);
   if (DashX != IK_AST && DashX != IK_LLVM_IR) {
     ParseLangArgs(*Res.getLangOpts(), *Args, DashX, Diags);
diff --git a/lib/Frontend/DependencyFile.cpp b/lib/Frontend/DependencyFile.cpp
index 6ea8f51..0995ab4 100644
--- a/lib/Frontend/DependencyFile.cpp
+++ b/lib/Frontend/DependencyFile.cpp
@@ -150,6 +150,8 @@
   bool AddMissingHeaderDeps;
   bool SeenMissingHeader;
   bool IncludeModuleFiles;
+  DependencyOutputFormat OutputFormat;
+
 private:
   bool FileMatchesDepCriteria(const char *Filename,
                               SrcMgr::CharacteristicKind FileType);
@@ -162,7 +164,8 @@
       PhonyTarget(Opts.UsePhonyTargets),
       AddMissingHeaderDeps(Opts.AddMissingHeaderDeps),
       SeenMissingHeader(false),
-      IncludeModuleFiles(Opts.IncludeModuleFiles) {}
+      IncludeModuleFiles(Opts.IncludeModuleFiles),
+      OutputFormat(Opts.OutputFormat) {}
 
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
@@ -289,13 +292,76 @@
     Files.push_back(Filename);
 }
 
-/// PrintFilename - GCC escapes spaces, # and $, but apparently not ' or " or
-/// other scary characters.
-static void PrintFilename(raw_ostream &OS, StringRef Filename) {
+/// Print the filename, with escaping or quoting that accommodates the three
+/// most likely tools that use dependency files: GNU Make, BSD Make, and
+/// NMake/Jom.
+///
+/// BSD Make is the simplest case: It does no escaping at all.  This means
+/// characters that are normally delimiters, i.e. space and # (the comment
+/// character) simply aren't supported in filenames.
+///
+/// GNU Make does allow space and # in filenames, but to avoid being treated
+/// as a delimiter or comment, these must be escaped with a backslash. Because
+/// backslash is itself the escape character, if a backslash appears in a
+/// filename, it should be escaped as well.  (As a special case, $ is escaped
+/// as $$, which is the normal Make way to handle the $ character.)
+/// For compatibility with BSD Make and historical practice, if GNU Make
+/// un-escapes characters in a filename but doesn't find a match, it will
+/// retry with the unmodified original string.
+///
+/// GCC tries to accommodate both Make formats by escaping any space or #
+/// characters in the original filename, but not escaping backslashes.  The
+/// apparent intent is so that filenames with backslashes will be handled
+/// correctly by BSD Make, and by GNU Make in its fallback mode of using the
+/// unmodified original string; filenames with # or space characters aren't
+/// supported by BSD Make at all, but will be handled correctly by GNU Make
+/// due to the escaping.
+///
+/// A corner case that GCC gets only partly right is when the original filename
+/// has a backslash immediately followed by space or #.  GNU Make would expect
+/// this backslash to be escaped; however GCC escapes the original backslash
+/// only when followed by space, not #.  It will therefore take a dependency
+/// from a directive such as
+///     #include "a\ b\#c.h"
+/// and emit it as
+///     a\\\ b\\#c.h
+/// which GNU Make will interpret as
+///     a\ b\
+/// followed by a comment. Failing to find this file, it will fall back to the
+/// original string, which probably doesn't exist either; in any case it won't
+/// find
+///     a\ b\#c.h
+/// which is the actual filename specified by the include directive.
+///
+/// Clang does what GCC does, rather than what GNU Make expects.
+///
+/// NMake/Jom has a different set of scary characters, but wraps filespecs in
+/// double-quotes to avoid misinterpreting them; see
+/// https://msdn.microsoft.com/en-us/library/dd9y37ha.aspx for NMake info,
+/// https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
+/// for Windows file-naming info.
+static void PrintFilename(raw_ostream &OS, StringRef Filename,
+                          DependencyOutputFormat OutputFormat) {
+  if (OutputFormat == DependencyOutputFormat::NMake) {
+    // Add quotes if needed. These are the characters listed as "special" to
+    // NMake, that are legal in a Windows filespec, and that could cause
+    // misinterpretation of the dependency string.
+    if (Filename.find_first_of(" #${}^!") != StringRef::npos)
+      OS << '\"' << Filename << '\"';
+    else
+      OS << Filename;
+    return;
+  }
+  assert(OutputFormat == DependencyOutputFormat::Make);
   for (unsigned i = 0, e = Filename.size(); i != e; ++i) {
-    if (Filename[i] == ' ' || Filename[i] == '#')
+    if (Filename[i] == '#') // Handle '#' the broken gcc way.
       OS << '\\';
-    else if (Filename[i] == '$') // $ is escaped by $$.
+    else if (Filename[i] == ' ') { // Handle space correctly.
+      OS << '\\';
+      unsigned j = i;
+      while (j > 0 && Filename[--j] == '\\')
+        OS << '\\';
+    } else if (Filename[i] == '$') // $ is escaped by $$.
       OS << '$';
     OS << Filename[i];
   }
@@ -354,7 +420,7 @@
       Columns = 2;
     }
     OS << ' ';
-    PrintFilename(OS, *I);
+    PrintFilename(OS, *I, OutputFormat);
     Columns += N + 1;
   }
   OS << '\n';
@@ -365,7 +431,7 @@
     for (std::vector<std::string>::iterator I = Files.begin() + 1,
            E = Files.end(); I != E; ++I) {
       OS << '\n';
-      PrintFilename(OS, *I);
+      PrintFilename(OS, *I, OutputFormat);
       OS << ":\n";
     }
   }
diff --git a/lib/Frontend/FrontendAction.cpp b/lib/Frontend/FrontendAction.cpp
index 8390624..9bba755 100644
--- a/lib/Frontend/FrontendAction.cpp
+++ b/lib/Frontend/FrontendAction.cpp
@@ -71,7 +71,7 @@
       Previous->SelectorRead(ID, Sel);
   }
   void MacroDefinitionRead(serialization::PreprocessedEntityID PPID,
-                           MacroDefinition *MD) override {
+                           MacroDefinitionRecord *MD) override {
     if (Previous)
       Previous->MacroDefinitionRead(PPID, MD);
   }
@@ -468,16 +468,12 @@
   // FIXME: There is more per-file stuff we could just drop here?
   bool DisableFree = CI.getFrontendOpts().DisableFree;
   if (DisableFree) {
-    if (!isCurrentFileAST()) {
-      CI.resetAndLeakSema();
-      CI.resetAndLeakASTContext();
-    }
+    CI.resetAndLeakSema();
+    CI.resetAndLeakASTContext();
     BuryPointer(CI.takeASTConsumer().get());
   } else {
-    if (!isCurrentFileAST()) {
-      CI.setSema(nullptr);
-      CI.setASTContext(nullptr);
-    }
+    CI.setSema(nullptr);
+    CI.setASTContext(nullptr);
     CI.setASTConsumer(nullptr);
   }
 
@@ -494,13 +490,16 @@
   // FrontendAction.
   CI.clearOutputFiles(/*EraseFiles=*/shouldEraseOutputFiles());
 
-  // FIXME: Only do this if DisableFree is set.
   if (isCurrentFileAST()) {
-    CI.resetAndLeakSema();
-    CI.resetAndLeakASTContext();
-    CI.resetAndLeakPreprocessor();
-    CI.resetAndLeakSourceManager();
-    CI.resetAndLeakFileManager();
+    if (DisableFree) {
+      CI.resetAndLeakPreprocessor();
+      CI.resetAndLeakSourceManager();
+      CI.resetAndLeakFileManager();
+    } else {
+      CI.setPreprocessor(nullptr);
+      CI.setSourceManager(nullptr);
+      CI.setFileManager(nullptr);
+    }
   }
 
   setCompilerInstance(nullptr);
diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp
index 0defe5c..46cdeeb 100644
--- a/lib/Frontend/FrontendActions.cpp
+++ b/lib/Frontend/FrontendActions.cpp
@@ -152,22 +152,6 @@
   return std::error_code();
 }
 
-static std::error_code addHeaderInclude(const FileEntry *Header,
-                                        SmallVectorImpl<char> &Includes,
-                                        const LangOptions &LangOpts,
-                                        bool IsExternC) {
-  // Use an absolute path if we don't have a filename as written in the module
-  // map file; this ensures that we will identify the right file independent of
-  // header search paths.
-  if (llvm::sys::path::is_absolute(Header->getName()))
-    return addHeaderInclude(Header->getName(), Includes, LangOpts, IsExternC);
-
-  SmallString<256> AbsName(Header->getName());
-  if (std::error_code Err = llvm::sys::fs::make_absolute(AbsName))
-    return Err;
-  return addHeaderInclude(AbsName, Includes, LangOpts, IsExternC);
-}
-
 /// \brief Collect the set of header includes needed to construct the given 
 /// module and update the TopHeaders file set of the module.
 ///
@@ -196,20 +180,20 @@
   }
   // Note that Module->PrivateHeaders will not be a TopHeader.
 
-  if (const FileEntry *UmbrellaHeader = Module->getUmbrellaHeader()) {
-    // FIXME: Track the name as written here.
-    Module->addTopHeader(UmbrellaHeader);
+  if (Module::Header UmbrellaHeader = Module->getUmbrellaHeader()) {
+    Module->addTopHeader(UmbrellaHeader.Entry);
     if (Module->Parent) {
       // Include the umbrella header for submodules.
-      if (std::error_code Err = addHeaderInclude(UmbrellaHeader, Includes,
-                                                 LangOpts, Module->IsExternC))
+      if (std::error_code Err = addHeaderInclude(UmbrellaHeader.NameAsWritten,
+                                                 Includes, LangOpts,
+                                                 Module->IsExternC))
         return Err;
     }
-  } else if (const DirectoryEntry *UmbrellaDir = Module->getUmbrellaDir()) {
+  } else if (Module::DirectoryName UmbrellaDir = Module->getUmbrellaDir()) {
     // Add all of the headers we find in this subdirectory.
     std::error_code EC;
     SmallString<128> DirNative;
-    llvm::sys::path::native(UmbrellaDir->getName(), DirNative);
+    llvm::sys::path::native(UmbrellaDir.Entry->getName(), DirNative);
     for (llvm::sys::fs::recursive_directory_iterator Dir(DirNative, EC), 
                                                      DirEnd;
          Dir != DirEnd && !EC; Dir.increment(EC)) {
@@ -231,11 +215,20 @@
       if (ModMap.isHeaderUnavailableInModule(Header, Module))
         continue;
 
+      // Compute the relative path from the directory to this file.
+      SmallVector<StringRef, 16> Components;
+      auto PathIt = llvm::sys::path::rbegin(Dir->path());
+      for (int I = 0; I != Dir.level() + 1; ++I, ++PathIt)
+        Components.push_back(*PathIt);
+      SmallString<128> RelativeHeader(UmbrellaDir.NameAsWritten);
+      for (auto It = Components.rbegin(), End = Components.rend(); It != End;
+           ++It)
+        llvm::sys::path::append(RelativeHeader, *It);
+
       // Include this header as part of the umbrella directory.
-      // FIXME: Track the name as written through to here.
       Module->addTopHeader(Header);
-      if (std::error_code Err =
-              addHeaderInclude(Header, Includes, LangOpts, Module->IsExternC))
+      if (std::error_code Err = addHeaderInclude(RelativeHeader, Includes,
+                                                 LangOpts, Module->IsExternC))
         return Err;
     }
 
@@ -327,10 +320,9 @@
   // Collect the set of #includes we need to build the module.
   SmallString<256> HeaderContents;
   std::error_code Err = std::error_code();
-  if (const FileEntry *UmbrellaHeader = Module->getUmbrellaHeader())
-    // FIXME: Track the file name as written.
-    Err = addHeaderInclude(UmbrellaHeader, HeaderContents, CI.getLangOpts(),
-                           Module->IsExternC);
+  if (Module::Header UmbrellaHeader = Module->getUmbrellaHeader())
+    Err = addHeaderInclude(UmbrellaHeader.NameAsWritten, HeaderContents,
+                           CI.getLangOpts(), Module->IsExternC);
   if (!Err)
     Err = collectModuleHeaderIncludes(
         CI.getLangOpts(), FileMgr,
diff --git a/lib/Frontend/InitHeaderSearch.cpp b/lib/Frontend/InitHeaderSearch.cpp
index 2bd999e..bf8470e 100644
--- a/lib/Frontend/InitHeaderSearch.cpp
+++ b/lib/Frontend/InitHeaderSearch.cpp
@@ -65,7 +65,7 @@
   /// AddSystemHeaderPrefix - Add the specified prefix to the system header
   /// prefix list.
   void AddSystemHeaderPrefix(StringRef Prefix, bool IsSystemHeader) {
-    SystemHeaderPrefixes.push_back(std::make_pair(Prefix, IsSystemHeader));
+    SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader);
   }
 
   /// AddGnuCPlusPlusIncludePaths - Add the necessary paths to support a gnu
diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index d9ae3ba..dfc46f4 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -453,6 +453,8 @@
   }
   if (LangOpts.SizedDeallocation)
     Builder.defineMacro("__cpp_sized_deallocation", "201309");
+  if (LangOpts.ConceptsTS)
+    Builder.defineMacro("__cpp_experimental_concepts", "1");
 }
 
 static void InitializePredefinedMacros(const TargetInfo &TI,
@@ -790,7 +792,7 @@
     Builder.defineMacro("__FINITE_MATH_ONLY__", "0");
 
   if (!LangOpts.MSVCCompat) {
-    if (LangOpts.GNUInline)
+    if (LangOpts.GNUInline || LangOpts.CPlusPlus)
       Builder.defineMacro("__GNUC_GNU_INLINE__");
     else
       Builder.defineMacro("__GNUC_STDC_INLINE__");
diff --git a/lib/Frontend/MultiplexConsumer.cpp b/lib/Frontend/MultiplexConsumer.cpp
index 007ddc2..219e949 100644
--- a/lib/Frontend/MultiplexConsumer.cpp
+++ b/lib/Frontend/MultiplexConsumer.cpp
@@ -37,9 +37,10 @@
   void DeclRead(serialization::DeclID ID, const Decl *D) override;
   void SelectorRead(serialization::SelectorID iD, Selector Sel) override;
   void MacroDefinitionRead(serialization::PreprocessedEntityID,
-                           MacroDefinition *MD) override;
+                           MacroDefinitionRecord *MD) override;
+
 private:
-  std::vector<ASTDeserializationListener*> Listeners;
+  std::vector<ASTDeserializationListener *> Listeners;
 };
 
 MultiplexASTDeserializationListener::MultiplexASTDeserializationListener(
@@ -78,7 +79,7 @@
 }
 
 void MultiplexASTDeserializationListener::MacroDefinitionRead(
-    serialization::PreprocessedEntityID ID, MacroDefinition *MD) {
+    serialization::PreprocessedEntityID ID, MacroDefinitionRecord *MD) {
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->MacroDefinitionRead(ID, MD);
 }
@@ -110,8 +111,7 @@
                                     const ObjCCategoryDecl *ClassExt) override;
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
-  void RedefinedHiddenDefinition(const NamedDecl *D,
-                                 SourceLocation Loc) override;
+  void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override;
 
 private:
   std::vector<ASTMutationListener*> Listeners;
@@ -195,10 +195,10 @@
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->DeclarationMarkedOpenMPThreadPrivate(D);
 }
-void MultiplexASTMutationListener::RedefinedHiddenDefinition(
-    const NamedDecl *D, SourceLocation Loc) {
+void MultiplexASTMutationListener::RedefinedHiddenDefinition(const NamedDecl *D,
+                                                             Module *M) {
   for (auto *L : Listeners)
-    L->RedefinedHiddenDefinition(D, Loc);
+    L->RedefinedHiddenDefinition(D, M);
 }
 
 }  // end namespace clang
diff --git a/lib/Frontend/PrintPreprocessedOutput.cpp b/lib/Frontend/PrintPreprocessedOutput.cpp
index 6507f8e..6192554 100644
--- a/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -64,12 +64,11 @@
     OS << ' ';
 
   SmallString<128> SpellingBuffer;
-  for (MacroInfo::tokens_iterator I = MI.tokens_begin(), E = MI.tokens_end();
-       I != E; ++I) {
-    if (I->hasLeadingSpace())
+  for (const auto &T : MI.tokens()) {
+    if (T.hasLeadingSpace())
       OS << ' ';
 
-    OS << PP.getSpelling(*I, SpellingBuffer);
+    OS << PP.getSpelling(T, SpellingBuffer);
   }
 }
 
@@ -129,7 +128,7 @@
                           CharSourceRange FilenameRange, const FileEntry *File,
                           StringRef SearchPath, StringRef RelativePath,
                           const Module *Imported) override;
-  void Ident(SourceLocation Loc, const std::string &str) override;
+  void Ident(SourceLocation Loc, StringRef str) override;
   void PragmaMessage(SourceLocation Loc, StringRef Namespace,
                      PragmaMessageKind Kind, StringRef Str) override;
   void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
@@ -170,7 +169,7 @@
 
   /// MacroUndefined - This hook is called whenever a macro #undef is seen.
   void MacroUndefined(const Token &MacroNameTok,
-                      const MacroDirective *MD) override;
+                      const MacroDefinition &MD) override;
 };
 }  // end anonymous namespace
 
@@ -338,11 +337,11 @@
 
 /// Ident - Handle #ident directives when read by the preprocessor.
 ///
-void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
+void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
   MoveToLine(Loc);
 
   OS.write("#ident ", strlen("#ident "));
-  OS.write(&S[0], S.size());
+  OS.write(S.begin(), S.size());
   EmittedTokensOnThisLine = true;
 }
 
@@ -361,7 +360,7 @@
 }
 
 void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
-                                              const MacroDirective *MD) {
+                                              const MacroDefinition &MD) {
   // Only print out macro definitions in -dD mode.
   if (!DumpDefines) return;
 
@@ -686,8 +685,9 @@
   SmallVector<id_macro_pair, 128> MacrosByID;
   for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
        I != E; ++I) {
-    if (I->first->hasMacroDefinition())
-      MacrosByID.push_back(id_macro_pair(I->first, I->second->getMacroInfo()));
+    auto *MD = I->second.getLatest();
+    if (MD && MD->isDefined())
+      MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
   }
   llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
 
diff --git a/lib/Frontend/Rewrite/InclusionRewriter.cpp b/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 865bb29..b9ea051 100644
--- a/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -61,7 +61,7 @@
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
                    FileID PrevFID) override;
-  void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok,
+  void FileSkipped(const FileEntry &SkippedFile, const Token &FilenameTok,
                    SrcMgr::CharacteristicKind FileType) override;
   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                           StringRef FileName, bool IsAngled,
@@ -153,7 +153,7 @@
 
 /// Called whenever an inclusion is skipped due to canonical header protection
 /// macros.
-void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
+void InclusionRewriter::FileSkipped(const FileEntry &/*SkippedFile*/,
                                     const Token &/*FilenameTok*/,
                                     SrcMgr::CharacteristicKind /*FileType*/) {
   assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
diff --git a/lib/Frontend/Rewrite/RewriteObjC.cpp b/lib/Frontend/Rewrite/RewriteObjC.cpp
index 170c209..b2a45b4 100644
--- a/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -3818,16 +3818,16 @@
   FieldDecl *FD = FieldDecl::Create(*Context, nullptr, SourceLocation(),
                                     SourceLocation(),
                                     &Context->Idents.get("FuncPtr"),
-                                    Context->VoidPtrTy, nullptr,

-                                    /*BitWidth=*/nullptr, /*Mutable=*/true,

-                                    ICIS_NoInit);

-  MemberExpr *ME =

-      new (Context) MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),

-                               FD->getType(), VK_LValue, OK_Ordinary);

-

-  CastExpr *FunkCast = NoTypeInfoCStyleCastExpr(Context, PtrToFuncCastType,

-                                                CK_BitCast, ME);

-  PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), FunkCast);

+                                    Context->VoidPtrTy, nullptr,
+                                    /*BitWidth=*/nullptr, /*Mutable=*/true,
+                                    ICIS_NoInit);
+  MemberExpr *ME =
+      new (Context) MemberExpr(PE, true, SourceLocation(), FD, SourceLocation(),
+                               FD->getType(), VK_LValue, OK_Ordinary);
+
+  CastExpr *FunkCast = NoTypeInfoCStyleCastExpr(Context, PtrToFuncCastType,
+                                                CK_BitCast, ME);
+  PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), FunkCast);
 
   SmallVector<Expr*, 8> BlkExprs;
   // Add the implicit argument.
@@ -3866,26 +3866,26 @@
   FieldDecl *FD = FieldDecl::Create(*Context, nullptr, SourceLocation(),
                                     SourceLocation(),
                                     &Context->Idents.get("__forwarding"), 
-                                    Context->VoidPtrTy, nullptr,

-                                    /*BitWidth=*/nullptr, /*Mutable=*/true,

-                                    ICIS_NoInit);

-  MemberExpr *ME = new (Context)

-      MemberExpr(DeclRefExp, isArrow, SourceLocation(), FD, SourceLocation(),

-                 FD->getType(), VK_LValue, OK_Ordinary);

-

-  StringRef Name = VD->getName();

-  FD = FieldDecl::Create(*Context, nullptr, SourceLocation(), SourceLocation(),

+                                    Context->VoidPtrTy, nullptr,
+                                    /*BitWidth=*/nullptr, /*Mutable=*/true,
+                                    ICIS_NoInit);
+  MemberExpr *ME = new (Context)
+      MemberExpr(DeclRefExp, isArrow, SourceLocation(), FD, SourceLocation(),
+                 FD->getType(), VK_LValue, OK_Ordinary);
+
+  StringRef Name = VD->getName();
+  FD = FieldDecl::Create(*Context, nullptr, SourceLocation(), SourceLocation(),
                          &Context->Idents.get(Name), 
-                         Context->VoidPtrTy, nullptr,

-                         /*BitWidth=*/nullptr, /*Mutable=*/true,

-                         ICIS_NoInit);

-  ME =

-      new (Context) MemberExpr(ME, true, SourceLocation(), FD, SourceLocation(),

-                               DeclRefExp->getType(), VK_LValue, OK_Ordinary);

-

-  // Need parens to enforce precedence.

-  ParenExpr *PE = new (Context) ParenExpr(DeclRefExp->getExprLoc(), 

-                                          DeclRefExp->getExprLoc(), 

+                         Context->VoidPtrTy, nullptr,
+                         /*BitWidth=*/nullptr, /*Mutable=*/true,
+                         ICIS_NoInit);
+  ME =
+      new (Context) MemberExpr(ME, true, SourceLocation(), FD, SourceLocation(),
+                               DeclRefExp->getType(), VK_LValue, OK_Ordinary);
+
+  // Need parens to enforce precedence.
+  ParenExpr *PE = new (Context) ParenExpr(DeclRefExp->getExprLoc(), 
+                                          DeclRefExp->getExprLoc(), 
                                           ME);
   ReplaceStmt(DeclRefExp, PE);
   return PE;
@@ -5874,15 +5874,15 @@
       // Don't forget the parens to enforce the proper binding.
       ParenExpr *PE = new (Context) ParenExpr(OldRange.getBegin(),
                                               OldRange.getEnd(),
-                                              castExpr);

-      if (IV->isFreeIvar() &&

-          declaresSameEntity(CurMethodDef->getClassInterface(), iFaceDecl->getDecl())) {

-        MemberExpr *ME = new (Context)

-            MemberExpr(PE, true, SourceLocation(), D, IV->getLocation(),

-                       D->getType(), VK_LValue, OK_Ordinary);

-        Replacement = ME;

-      } else {

-        IV->setBase(PE);

+                                              castExpr);
+      if (IV->isFreeIvar() &&
+          declaresSameEntity(CurMethodDef->getClassInterface(), iFaceDecl->getDecl())) {
+        MemberExpr *ME = new (Context)
+            MemberExpr(PE, true, SourceLocation(), D, IV->getLocation(),
+                       D->getType(), VK_LValue, OK_Ordinary);
+        Replacement = ME;
+      } else {
+        IV->setBase(PE);
       }
     }
   } else { // we are outside a method.
diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp
index 17e41f6..aaf17a9 100644
--- a/lib/Frontend/TextDiagnostic.cpp
+++ b/lib/Frontend/TextDiagnostic.cpp
@@ -810,7 +810,7 @@
         OS << ',';
         // Visual Studio 2010 or earlier expects column number to be off by one
         if (LangOpts.MSCompatibilityVersion &&
-            LangOpts.MSCompatibilityVersion < 170000000)
+            !LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2012))
           ColNo--;
       } else
         OS << ':';
diff --git a/lib/Frontend/TextDiagnosticBuffer.cpp b/lib/Frontend/TextDiagnosticBuffer.cpp
index 9c6bebb..d49e983 100644
--- a/lib/Frontend/TextDiagnosticBuffer.cpp
+++ b/lib/Frontend/TextDiagnosticBuffer.cpp
@@ -30,17 +30,17 @@
   default: llvm_unreachable(
                          "Diagnostic not handled during diagnostic buffering!");
   case DiagnosticsEngine::Note:
-    Notes.push_back(std::make_pair(Info.getLocation(), Buf.str()));
+    Notes.emplace_back(Info.getLocation(), Buf.str());
     break;
   case DiagnosticsEngine::Warning:
-    Warnings.push_back(std::make_pair(Info.getLocation(), Buf.str()));
+    Warnings.emplace_back(Info.getLocation(), Buf.str());
     break;
   case DiagnosticsEngine::Remark:
-    Remarks.push_back(std::make_pair(Info.getLocation(), Buf.str()));
+    Remarks.emplace_back(Info.getLocation(), Buf.str());
     break;
   case DiagnosticsEngine::Error:
   case DiagnosticsEngine::Fatal:
-    Errors.push_back(std::make_pair(Info.getLocation(), Buf.str()));
+    Errors.emplace_back(Info.getLocation(), Buf.str());
     break;
   }
 }
diff --git a/lib/Frontend/VerifyDiagnosticConsumer.cpp b/lib/Frontend/VerifyDiagnosticConsumer.cpp
index 910e394..55df936 100644
--- a/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -691,7 +691,8 @@
                            const char *Label,
                            DirectiveList &Left,
                            const_diag_iterator d2_begin,
-                           const_diag_iterator d2_end) {
+                           const_diag_iterator d2_end,
+                           bool IgnoreUnexpected) {
   std::vector<Directive *> LeftOnly;
   DiagList Right(d2_begin, d2_end);
 
@@ -727,7 +728,8 @@
   }
   // Now all that's left in Right are those that were not matched.
   unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label);
-  num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
+  if (!IgnoreUnexpected)
+    num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label);
   return num;
 }
 
@@ -745,21 +747,28 @@
   //   Seen \ Expected - set seen but not expected
   unsigned NumProblems = 0;
 
+  const DiagnosticLevelMask DiagMask =
+    Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
+
   // See if there are error mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "error", ED.Errors,
-                            Buffer.err_begin(), Buffer.err_end());
+                            Buffer.err_begin(), Buffer.err_end(),
+                            bool(DiagnosticLevelMask::Error & DiagMask));
 
   // See if there are warning mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "warning", ED.Warnings,
-                            Buffer.warn_begin(), Buffer.warn_end());
+                            Buffer.warn_begin(), Buffer.warn_end(),
+                            bool(DiagnosticLevelMask::Warning & DiagMask));
 
   // See if there are remark mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "remark", ED.Remarks,
-                            Buffer.remark_begin(), Buffer.remark_end());
+                            Buffer.remark_begin(), Buffer.remark_end(),
+                            bool(DiagnosticLevelMask::Remark & DiagMask));
 
   // See if there are note mismatches.
   NumProblems += CheckLists(Diags, SourceMgr, "note", ED.Notes,
-                            Buffer.note_begin(), Buffer.note_end());
+                            Buffer.note_begin(), Buffer.note_end(),
+                            bool(DiagnosticLevelMask::Note & DiagMask));
 
   return NumProblems;
 }
@@ -854,12 +863,20 @@
     // Check that the expected diagnostics occurred.
     NumErrors += CheckResults(Diags, *SrcManager, *Buffer, ED);
   } else {
-    NumErrors += (PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
-                                  Buffer->err_end(), "error") +
-                  PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
-                                  Buffer->warn_end(), "warn") +
-                  PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
-                                  Buffer->note_end(), "note"));
+    const DiagnosticLevelMask DiagMask =
+        ~Diags.getDiagnosticOptions().getVerifyIgnoreUnexpected();
+    if (bool(DiagnosticLevelMask::Error & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->err_begin(),
+                                   Buffer->err_end(), "error");
+    if (bool(DiagnosticLevelMask::Warning & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->warn_begin(),
+                                   Buffer->warn_end(), "warn");
+    if (bool(DiagnosticLevelMask::Remark & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->remark_begin(),
+                                   Buffer->remark_end(), "remark");
+    if (bool(DiagnosticLevelMask::Note & DiagMask))
+      NumErrors += PrintUnexpected(Diags, nullptr, Buffer->note_begin(),
+                                   Buffer->note_end(), "note");
   }
 
   Diags.setClient(CurClient, Owner.release() != nullptr);
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 5429092..29a738e 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -9,10 +9,13 @@
   avx512fintrin.h
   avx512vlbwintrin.h
   avx512vlintrin.h
+  avx512dqintrin.h
+  avx512vldqintrin.h
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h
   cpuid.h
+  cuda_builtin_vars.h
   emmintrin.h
   f16cintrin.h
   float.h
diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h
index 252bf36..28df890 100644
--- a/lib/Headers/altivec.h
+++ b/lib/Headers/altivec.h
@@ -29,225 +29,215 @@
 
 /* constants for mapping CR6 bits to predicate result. */
 
-#define __CR6_EQ     0
+#define __CR6_EQ 0
 #define __CR6_EQ_REV 1
-#define __CR6_LT     2
+#define __CR6_LT 2
 #define __CR6_LT_REV 3
 
 #define __ATTRS_o_ai __attribute__((__overloadable__, __always_inline__))
 
-static vector signed char __ATTRS_o_ai
-vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c);
+static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a,
+                                                vector signed char __b,
+                                                vector unsigned char __c);
 
-static vector unsigned char __ATTRS_o_ai
-vec_perm(vector unsigned char __a,
-         vector unsigned char __b,
+static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a,
+                                                  vector unsigned char __b,
+                                                  vector unsigned char __c);
+
+static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
+                                              vector bool char __b,
+                                              vector unsigned char __c);
+
+static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b,
+                                          vector unsigned char __c);
+
+static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a,
+                                                   vector unsigned short __b,
+                                                   vector unsigned char __c);
+
+static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a,
+                                               vector bool short __b,
+                                               vector unsigned char __c);
+
+static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
+                                          vector unsigned char __c);
+
+static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b,
+                                        vector unsigned char __c);
+
+static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a,
+                                                 vector unsigned int __b,
+                                                 vector unsigned char __c);
+
+static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a,
+                                             vector bool int __b,
+                                             vector unsigned char __c);
+
+static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
+                                          vector unsigned char __c);
+
+#ifdef __VSX__
+static vector long long __ATTRS_o_ai vec_perm(vector long long __a,
+                                              vector long long __b,
+                                              vector unsigned char __c);
+
+static vector unsigned long long __ATTRS_o_ai
+vec_perm(vector unsigned long long __a, vector unsigned long long __b,
          vector unsigned char __c);
 
-static vector bool char __ATTRS_o_ai
-vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c);
+static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
+                                           vector unsigned char __c);
+#endif
 
-static vector short __ATTRS_o_ai
-vec_perm(vector short __a, vector short __b, vector unsigned char __c);
-
-static vector unsigned short __ATTRS_o_ai
-vec_perm(vector unsigned short __a,
-         vector unsigned short __b,
-         vector unsigned char __c);
-
-static vector bool short __ATTRS_o_ai
-vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c);
-
-static vector pixel __ATTRS_o_ai
-vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c);
-
-static vector int __ATTRS_o_ai
-vec_perm(vector int __a, vector int __b, vector unsigned char __c);
-
-static vector unsigned int __ATTRS_o_ai
-vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c);
-
-static vector bool int __ATTRS_o_ai
-vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c);
-
-static vector float __ATTRS_o_ai
-vec_perm(vector float __a, vector float __b, vector unsigned char __c);
-
-static vector unsigned char __ATTRS_o_ai
-vec_xor(vector unsigned char __a, vector unsigned char __b);
+static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
+                                                 vector unsigned char __b);
 
 /* vec_abs */
 
 #define __builtin_altivec_abs_v16qi vec_abs
-#define __builtin_altivec_abs_v8hi  vec_abs
-#define __builtin_altivec_abs_v4si  vec_abs
+#define __builtin_altivec_abs_v8hi vec_abs
+#define __builtin_altivec_abs_v4si vec_abs
 
-static vector signed char __ATTRS_o_ai
-vec_abs(vector signed char __a)
-{
+static vector signed char __ATTRS_o_ai vec_abs(vector signed char __a) {
   return __builtin_altivec_vmaxsb(__a, -__a);
 }
 
-static vector signed short __ATTRS_o_ai
-vec_abs(vector signed short __a)
-{
+static vector signed short __ATTRS_o_ai vec_abs(vector signed short __a) {
   return __builtin_altivec_vmaxsh(__a, -__a);
 }
 
-static vector signed int __ATTRS_o_ai
-vec_abs(vector signed int __a)
-{
+static vector signed int __ATTRS_o_ai vec_abs(vector signed int __a) {
   return __builtin_altivec_vmaxsw(__a, -__a);
 }
 
-static vector float __ATTRS_o_ai
-vec_abs(vector float __a)
-{
-  vector unsigned int __res = (vector unsigned int)__a
-                            & (vector unsigned int)(0x7FFFFFFF);
+static vector float __ATTRS_o_ai vec_abs(vector float __a) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)(0x7FFFFFFF);
   return (vector float)__res;
 }
 
 /* vec_abss */
 
 #define __builtin_altivec_abss_v16qi vec_abss
-#define __builtin_altivec_abss_v8hi  vec_abss
-#define __builtin_altivec_abss_v4si  vec_abss
+#define __builtin_altivec_abss_v8hi vec_abss
+#define __builtin_altivec_abss_v4si vec_abss
 
-static vector signed char __ATTRS_o_ai
-vec_abss(vector signed char __a)
-{
-  return __builtin_altivec_vmaxsb
-           (__a, __builtin_altivec_vsubsbs((vector signed char)(0), __a));
+static vector signed char __ATTRS_o_ai vec_abss(vector signed char __a) {
+  return __builtin_altivec_vmaxsb(
+      __a, __builtin_altivec_vsubsbs((vector signed char)(0), __a));
 }
 
-static vector signed short __ATTRS_o_ai
-vec_abss(vector signed short __a)
-{
-  return __builtin_altivec_vmaxsh
-           (__a, __builtin_altivec_vsubshs((vector signed short)(0), __a));
+static vector signed short __ATTRS_o_ai vec_abss(vector signed short __a) {
+  return __builtin_altivec_vmaxsh(
+      __a, __builtin_altivec_vsubshs((vector signed short)(0), __a));
 }
 
-static vector signed int __ATTRS_o_ai
-vec_abss(vector signed int __a)
-{
-  return __builtin_altivec_vmaxsw
-           (__a, __builtin_altivec_vsubsws((vector signed int)(0), __a));
+static vector signed int __ATTRS_o_ai vec_abss(vector signed int __a) {
+  return __builtin_altivec_vmaxsw(
+      __a, __builtin_altivec_vsubsws((vector signed int)(0), __a));
 }
 
 /* vec_add */
 
-static vector signed char __ATTRS_o_ai
-vec_add(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_add(vector signed char __a,
+                                               vector signed char __b) {
   return __a + __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_add(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_add(vector bool char __a,
+                                               vector signed char __b) {
   return (vector signed char)__a + __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_add(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_add(vector signed char __a,
+                                               vector bool char __b) {
   return __a + (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_add(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_add(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_add(vector bool char __a,
+                                                 vector unsigned char __b) {
   return (vector unsigned char)__a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_add(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_add(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __a + (vector unsigned char)__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_add(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_add(vector short __a, vector short __b) {
   return __a + __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_add(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_add(vector bool short __a,
+                                         vector short __b) {
   return (vector short)__a + __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_add(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_add(vector short __a,
+                                         vector bool short __b) {
   return __a + (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_add(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_add(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_add(vector bool short __a,
+                                                  vector unsigned short __b) {
   return (vector unsigned short)__a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_add(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_add(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __a + (vector unsigned short)__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_add(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_add(vector int __a, vector int __b) {
   return __a + __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_add(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_add(vector bool int __a, vector int __b) {
   return (vector int)__a + __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_add(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_add(vector int __a, vector bool int __b) {
   return __a + (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_add(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_add(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_add(vector bool int __a,
+                                                vector unsigned int __b) {
   return (vector unsigned int)__a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_add(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_add(vector unsigned int __a,
+                                                vector bool int __b) {
   return __a + (vector unsigned int)__b;
 }
 
-static vector float __ATTRS_o_ai
-vec_add(vector float __a, vector float __b)
-{
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai vec_add(vector signed __int128 __a,
+                                                   vector signed __int128 __b) {
+  return __a + __b;
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_add(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a + __b;
+}
+#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+
+static vector float __ATTRS_o_ai vec_add(vector float __a, vector float __b) {
   return __a + __b;
 }
 
@@ -255,39 +245,33 @@
 
 #define __builtin_altivec_vaddubm vec_vaddubm
 
-static vector signed char __ATTRS_o_ai
-vec_vaddubm(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vaddubm(vector signed char __a,
+                                                   vector signed char __b) {
   return __a + __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vaddubm(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vaddubm(vector bool char __a,
+                                                   vector signed char __b) {
   return (vector signed char)__a + __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vaddubm(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vaddubm(vector signed char __a,
+                                                   vector bool char __b) {
   return __a + (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vaddubm(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a,
+                                                     vector unsigned char __b) {
   return __a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vaddubm(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector bool char __a,
+                                                     vector unsigned char __b) {
   return (vector unsigned char)__a + __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vaddubm(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vaddubm(vector unsigned char __a,
+                                                     vector bool char __b) {
   return __a + (vector unsigned char)__b;
 }
 
@@ -295,39 +279,33 @@
 
 #define __builtin_altivec_vadduhm vec_vadduhm
 
-static vector short __ATTRS_o_ai
-vec_vadduhm(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vadduhm(vector short __a,
+                                             vector short __b) {
   return __a + __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vadduhm(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vadduhm(vector bool short __a,
+                                             vector short __b) {
   return (vector short)__a + __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vadduhm(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vadduhm(vector short __a,
+                                             vector bool short __b) {
   return __a + (vector short)__b;
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vadduhm(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vadduhm(vector unsigned short __a, vector unsigned short __b) {
   return __a + __b;
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vadduhm(vector bool short __a, vector unsigned short __b)
-{
+vec_vadduhm(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a + __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vadduhm(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vadduhm(vector unsigned short __a,
+                                                      vector bool short __b) {
   return __a + (vector unsigned short)__b;
 }
 
@@ -335,1963 +313,1818 @@
 
 #define __builtin_altivec_vadduwm vec_vadduwm
 
-static vector int __ATTRS_o_ai
-vec_vadduwm(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vadduwm(vector int __a, vector int __b) {
   return __a + __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vadduwm(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vadduwm(vector bool int __a,
+                                           vector int __b) {
   return (vector int)__a + __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vadduwm(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vadduwm(vector int __a,
+                                           vector bool int __b) {
   return __a + (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vadduwm(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a,
+                                                    vector unsigned int __b) {
   return __a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vadduwm(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector bool int __a,
+                                                    vector unsigned int __b) {
   return (vector unsigned int)__a + __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vadduwm(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vadduwm(vector unsigned int __a,
+                                                    vector bool int __b) {
   return __a + (vector unsigned int)__b;
 }
 
 /* vec_vaddfp */
 
-#define __builtin_altivec_vaddfp  vec_vaddfp
+#define __builtin_altivec_vaddfp vec_vaddfp
 
 static vector float __attribute__((__always_inline__))
-vec_vaddfp(vector float __a, vector float __b)
-{
+vec_vaddfp(vector float __a, vector float __b) {
   return __a + __b;
 }
 
 /* vec_addc */
 
-static vector unsigned int __attribute__((__always_inline__))
-vec_addc(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vaddcuw(__a, __b);
 }
 
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai
+vec_addc(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vaddcuq(__a, __b);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_addc(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __builtin_altivec_vaddcuq(__a, __b);
+}
+#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+
 /* vec_vaddcuw */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vaddcuw(vector unsigned int __a, vector unsigned int __b)
-{
+vec_vaddcuw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vaddcuw(__a, __b);
 }
 
 /* vec_adds */
 
-static vector signed char __ATTRS_o_ai
-vec_adds(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_adds(vector signed char __a,
+                                                vector signed char __b) {
   return __builtin_altivec_vaddsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_adds(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_adds(vector bool char __a,
+                                                vector signed char __b) {
   return __builtin_altivec_vaddsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_adds(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_adds(vector signed char __a,
+                                                vector bool char __b) {
   return __builtin_altivec_vaddsbs(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_adds(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return __builtin_altivec_vaddubs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_adds(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_adds(vector bool char __a,
+                                                  vector unsigned char __b) {
   return __builtin_altivec_vaddubs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_adds(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_adds(vector unsigned char __a,
+                                                  vector bool char __b) {
   return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_adds(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_adds(vector short __a, vector short __b) {
   return __builtin_altivec_vaddshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_adds(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_adds(vector bool short __a,
+                                          vector short __b) {
   return __builtin_altivec_vaddshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_adds(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_adds(vector short __a,
+                                          vector bool short __b) {
   return __builtin_altivec_vaddshs(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_adds(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return __builtin_altivec_vadduhs(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_adds(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_adds(vector bool short __a,
+                                                   vector unsigned short __b) {
   return __builtin_altivec_vadduhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_adds(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_adds(vector unsigned short __a,
+                                                   vector bool short __b) {
   return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_adds(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_adds(vector int __a, vector int __b) {
   return __builtin_altivec_vaddsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_adds(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_adds(vector bool int __a, vector int __b) {
   return __builtin_altivec_vaddsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_adds(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_adds(vector int __a, vector bool int __b) {
   return __builtin_altivec_vaddsws(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_adds(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vadduws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_adds(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_adds(vector bool int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vadduws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_adds(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_adds(vector unsigned int __a,
+                                                 vector bool int __b) {
   return __builtin_altivec_vadduws(__a, (vector unsigned int)__b);
 }
 
 /* vec_vaddsbs */
 
-static vector signed char __ATTRS_o_ai
-vec_vaddsbs(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char __a,
+                                                   vector signed char __b) {
   return __builtin_altivec_vaddsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vaddsbs(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vaddsbs(vector bool char __a,
+                                                   vector signed char __b) {
   return __builtin_altivec_vaddsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vaddsbs(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vaddsbs(vector signed char __a,
+                                                   vector bool char __b) {
   return __builtin_altivec_vaddsbs(__a, (vector signed char)__b);
 }
 
 /* vec_vaddubs */
 
-static vector unsigned char __ATTRS_o_ai
-vec_vaddubs(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char __a,
+                                                     vector unsigned char __b) {
   return __builtin_altivec_vaddubs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vaddubs(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector bool char __a,
+                                                     vector unsigned char __b) {
   return __builtin_altivec_vaddubs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vaddubs(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vaddubs(vector unsigned char __a,
+                                                     vector bool char __b) {
   return __builtin_altivec_vaddubs(__a, (vector unsigned char)__b);
 }
 
 /* vec_vaddshs */
 
-static vector short __ATTRS_o_ai
-vec_vaddshs(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vaddshs(vector short __a,
+                                             vector short __b) {
   return __builtin_altivec_vaddshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vaddshs(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vaddshs(vector bool short __a,
+                                             vector short __b) {
   return __builtin_altivec_vaddshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vaddshs(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vaddshs(vector short __a,
+                                             vector bool short __b) {
   return __builtin_altivec_vaddshs(__a, (vector short)__b);
 }
 
 /* vec_vadduhs */
 
 static vector unsigned short __ATTRS_o_ai
-vec_vadduhs(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vadduhs(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vadduhs(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vadduhs(vector bool short __a, vector unsigned short __b)
-{
+vec_vadduhs(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vadduhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vadduhs(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vadduhs(vector unsigned short __a,
+                                                      vector bool short __b) {
   return __builtin_altivec_vadduhs(__a, (vector unsigned short)__b);
 }
 
 /* vec_vaddsws */
 
-static vector int __ATTRS_o_ai
-vec_vaddsws(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vaddsws(vector int __a, vector int __b) {
   return __builtin_altivec_vaddsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vaddsws(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vaddsws(vector bool int __a,
+                                           vector int __b) {
   return __builtin_altivec_vaddsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vaddsws(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vaddsws(vector int __a,
+                                           vector bool int __b) {
   return __builtin_altivec_vaddsws(__a, (vector int)__b);
 }
 
 /* vec_vadduws */
 
-static vector unsigned int __ATTRS_o_ai
-vec_vadduws(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int __a,
+                                                    vector unsigned int __b) {
   return __builtin_altivec_vadduws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vadduws(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vadduws(vector bool int __a,
+                                                    vector unsigned int __b) {
   return __builtin_altivec_vadduws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vadduws(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vadduws(vector unsigned int __a,
+                                                    vector bool int __b) {
   return __builtin_altivec_vadduws(__a, (vector unsigned int)__b);
 }
 
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+/* vec_vadduqm */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vadduqm(vector signed __int128 __a, vector signed __int128 __b) {
+  return __a + __b;
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vadduqm(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a + __b;
+}
+
+/* vec_vaddeuqm */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vaddeuqm(vector signed __int128 __a, vector signed __int128 __b,
+             vector signed __int128 __c) {
+  return __builtin_altivec_vaddeuqm(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vaddeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b,
+             vector unsigned __int128 __c) {
+  return __builtin_altivec_vaddeuqm(__a, __b, __c);
+}
+
+/* vec_vaddcuq */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vaddcuq(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vaddcuq(__a, __b);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vaddcuq(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __builtin_altivec_vaddcuq(__a, __b);
+}
+
+/* vec_vaddecuq */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vaddecuq(vector signed __int128 __a, vector signed __int128 __b,
+             vector signed __int128 __c) {
+  return __builtin_altivec_vaddecuq(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vaddecuq(vector unsigned __int128 __a, vector unsigned __int128 __b,
+             vector unsigned __int128 __c) {
+  return __builtin_altivec_vaddecuq(__a, __b, __c);
+}
+#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+
 /* vec_and */
 
 #define __builtin_altivec_vand vec_and
 
-static vector signed char __ATTRS_o_ai
-vec_and(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_and(vector signed char __a,
+                                               vector signed char __b) {
   return __a & __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_and(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_and(vector bool char __a,
+                                               vector signed char __b) {
   return (vector signed char)__a & __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_and(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_and(vector signed char __a,
+                                               vector bool char __b) {
   return __a & (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_and(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_and(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_and(vector bool char __a,
+                                                 vector unsigned char __b) {
   return (vector unsigned char)__a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_and(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_and(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __a & (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_and(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_and(vector bool char __a,
+                                             vector bool char __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_and(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_and(vector short __a, vector short __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_and(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_and(vector bool short __a,
+                                         vector short __b) {
   return (vector short)__a & __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_and(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_and(vector short __a,
+                                         vector bool short __b) {
   return __a & (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_and(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_and(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_and(vector bool short __a,
+                                                  vector unsigned short __b) {
   return (vector unsigned short)__a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_and(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_and(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __a & (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_and(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_and(vector bool short __a,
+                                              vector bool short __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_and(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_and(vector int __a, vector int __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_and(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_and(vector bool int __a, vector int __b) {
   return (vector int)__a & __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_and(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_and(vector int __a, vector bool int __b) {
   return __a & (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_and(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_and(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_and(vector bool int __a,
+                                                vector unsigned int __b) {
   return (vector unsigned int)__a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_and(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_and(vector unsigned int __a,
+                                                vector bool int __b) {
   return __a & (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_and(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_and(vector bool int __a,
+                                            vector bool int __b) {
   return __a & __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_and(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_and(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_and(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_and(vector bool int __a,
+                                         vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_and(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_and(vector float __a,
+                                         vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_and(vector signed long long __a, vector signed long long __b) {
+  return __a & __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_and(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a & __b;
+}
+
+static vector signed long long __ATTRS_o_ai vec_and(vector signed long long __a,
+                                                    vector bool long long __b) {
+  return __a & (vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_and(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a & __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_and(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a & __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_and(vector unsigned long long __a, vector bool long long __b) {
+  return __a & (vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_and(vector bool long long __a,
+                                                  vector bool long long __b) {
+  return __a & __b;
+}
+#endif
+
 /* vec_vand */
 
-static vector signed char __ATTRS_o_ai
-vec_vand(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vand(vector signed char __a,
+                                                vector signed char __b) {
   return __a & __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vand(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vand(vector bool char __a,
+                                                vector signed char __b) {
   return (vector signed char)__a & __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vand(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vand(vector signed char __a,
+                                                vector bool char __b) {
   return __a & (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vand(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return __a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vand(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vand(vector bool char __a,
+                                                  vector unsigned char __b) {
   return (vector unsigned char)__a & __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vand(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vand(vector unsigned char __a,
+                                                  vector bool char __b) {
   return __a & (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vand(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_vand(vector bool char __a,
+                                              vector bool char __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vand(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vand(vector short __a, vector short __b) {
   return __a & __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vand(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vand(vector bool short __a,
+                                          vector short __b) {
   return (vector short)__a & __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vand(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vand(vector short __a,
+                                          vector bool short __b) {
   return __a & (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vand(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return __a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vand(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vand(vector bool short __a,
+                                                   vector unsigned short __b) {
   return (vector unsigned short)__a & __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vand(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vand(vector unsigned short __a,
+                                                   vector bool short __b) {
   return __a & (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vand(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_vand(vector bool short __a,
+                                               vector bool short __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vand(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vand(vector int __a, vector int __b) {
   return __a & __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vand(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vand(vector bool int __a, vector int __b) {
   return (vector int)__a & __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vand(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vand(vector int __a, vector bool int __b) {
   return __a & (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vand(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vand(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vand(vector bool int __a,
+                                                 vector unsigned int __b) {
   return (vector unsigned int)__a & __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vand(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vand(vector unsigned int __a,
+                                                 vector bool int __b) {
   return __a & (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vand(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_vand(vector bool int __a,
+                                             vector bool int __b) {
   return __a & __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_vand(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vand(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vand(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vand(vector bool int __a,
+                                          vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vand(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vand(vector float __a,
+                                          vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & (vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_vand(vector signed long long __a, vector signed long long __b) {
+  return __a & __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vand(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a & __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vand(vector signed long long __a, vector bool long long __b) {
+  return __a & (vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vand(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a & __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vand(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a & __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vand(vector unsigned long long __a, vector bool long long __b) {
+  return __a & (vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_vand(vector bool long long __a,
+                                                   vector bool long long __b) {
+  return __a & __b;
+}
+#endif
+
 /* vec_andc */
 
 #define __builtin_altivec_vandc vec_andc
 
-static vector signed char __ATTRS_o_ai
-vec_andc(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_andc(vector signed char __a,
+                                                vector signed char __b) {
   return __a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_andc(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_andc(vector bool char __a,
+                                                vector signed char __b) {
   return (vector signed char)__a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_andc(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_andc(vector signed char __a,
+                                                vector bool char __b) {
   return __a & ~(vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_andc(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return __a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_andc(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_andc(vector bool char __a,
+                                                  vector unsigned char __b) {
   return (vector unsigned char)__a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_andc(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_andc(vector unsigned char __a,
+                                                  vector bool char __b) {
   return __a & ~(vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_andc(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_andc(vector bool char __a,
+                                              vector bool char __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_andc(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_andc(vector short __a, vector short __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_andc(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_andc(vector bool short __a,
+                                          vector short __b) {
   return (vector short)__a & ~__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_andc(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_andc(vector short __a,
+                                          vector bool short __b) {
   return __a & ~(vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_andc(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return __a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_andc(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_andc(vector bool short __a,
+                                                   vector unsigned short __b) {
   return (vector unsigned short)__a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_andc(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_andc(vector unsigned short __a,
+                                                   vector bool short __b) {
   return __a & ~(vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_andc(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_andc(vector bool short __a,
+                                               vector bool short __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_andc(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_andc(vector int __a, vector int __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_andc(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_andc(vector bool int __a, vector int __b) {
   return (vector int)__a & ~__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_andc(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_andc(vector int __a, vector bool int __b) {
   return __a & ~(vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_andc(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_andc(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_andc(vector bool int __a,
+                                                 vector unsigned int __b) {
   return (vector unsigned int)__a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_andc(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_andc(vector unsigned int __a,
+                                                 vector bool int __b) {
   return __a & ~(vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_andc(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_andc(vector bool int __a,
+                                             vector bool int __b) {
   return __a & ~__b;
 }
 
-static vector float __ATTRS_o_ai
-vec_andc(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_andc(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_andc(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_andc(vector bool int __a,
+                                          vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_andc(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_andc(vector float __a,
+                                          vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_andc(vector signed long long __a, vector signed long long __b) {
+  return __a & ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_andc(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a & ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_andc(vector signed long long __a, vector bool long long __b) {
+  return __a & ~(vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_andc(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a & ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_andc(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a & ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_andc(vector unsigned long long __a, vector bool long long __b) {
+  return __a & ~(vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_andc(vector bool long long __a,
+                                                   vector bool long long __b) {
+  return __a & ~__b;
+}
+#endif
+
 /* vec_vandc */
 
-static vector signed char __ATTRS_o_ai
-vec_vandc(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vandc(vector signed char __a,
+                                                 vector signed char __b) {
   return __a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vandc(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vandc(vector bool char __a,
+                                                 vector signed char __b) {
   return (vector signed char)__a & ~__b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vandc(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vandc(vector signed char __a,
+                                                 vector bool char __b) {
   return __a & ~(vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vandc(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char __a,
+                                                   vector unsigned char __b) {
   return __a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vandc(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vandc(vector bool char __a,
+                                                   vector unsigned char __b) {
   return (vector unsigned char)__a & ~__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vandc(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vandc(vector unsigned char __a,
+                                                   vector bool char __b) {
   return __a & ~(vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vandc(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_vandc(vector bool char __a,
+                                               vector bool char __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vandc(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vandc(vector short __a, vector short __b) {
   return __a & ~__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vandc(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vandc(vector bool short __a,
+                                           vector short __b) {
   return (vector short)__a & ~__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vandc(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vandc(vector short __a,
+                                           vector bool short __b) {
   return __a & ~(vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vandc(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short __a,
+                                                    vector unsigned short __b) {
   return __a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vandc(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vandc(vector bool short __a,
+                                                    vector unsigned short __b) {
   return (vector unsigned short)__a & ~__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vandc(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vandc(vector unsigned short __a,
+                                                    vector bool short __b) {
   return __a & ~(vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vandc(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_vandc(vector bool short __a,
+                                                vector bool short __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vandc(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vandc(vector int __a, vector int __b) {
   return __a & ~__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vandc(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vandc(vector bool int __a, vector int __b) {
   return (vector int)__a & ~__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vandc(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vandc(vector int __a, vector bool int __b) {
   return __a & ~(vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vandc(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int __a,
+                                                  vector unsigned int __b) {
   return __a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vandc(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vandc(vector bool int __a,
+                                                  vector unsigned int __b) {
   return (vector unsigned int)__a & ~__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vandc(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vandc(vector unsigned int __a,
+                                                  vector bool int __b) {
   return __a & ~(vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vandc(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_vandc(vector bool int __a,
+                                              vector bool int __b) {
   return __a & ~__b;
 }
 
-static vector float __ATTRS_o_ai
-vec_vandc(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vandc(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vandc(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vandc(vector bool int __a,
+                                           vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vandc(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a & ~(vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vandc(vector float __a,
+                                           vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a & ~(vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_vandc(vector signed long long __a, vector signed long long __b) {
+  return __a & ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vandc(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a & ~__b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vandc(vector signed long long __a, vector bool long long __b) {
+  return __a & ~(vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vandc(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a & ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vandc(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a & ~__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vandc(vector unsigned long long __a, vector bool long long __b) {
+  return __a & ~(vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_vandc(vector bool long long __a,
+                                                    vector bool long long __b) {
+  return __a & ~__b;
+}
+#endif
+
 /* vec_avg */
 
-static vector signed char __ATTRS_o_ai
-vec_avg(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_avg(vector signed char __a,
+                                               vector signed char __b) {
   return __builtin_altivec_vavgsb(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_avg(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_avg(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __builtin_altivec_vavgub(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_avg(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_avg(vector short __a, vector short __b) {
   return __builtin_altivec_vavgsh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_avg(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_avg(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __builtin_altivec_vavguh(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_avg(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_avg(vector int __a, vector int __b) {
   return __builtin_altivec_vavgsw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_avg(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_avg(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __builtin_altivec_vavguw(__a, __b);
 }
 
 /* vec_vavgsb */
 
 static vector signed char __attribute__((__always_inline__))
-vec_vavgsb(vector signed char __a, vector signed char __b)
-{
+vec_vavgsb(vector signed char __a, vector signed char __b) {
   return __builtin_altivec_vavgsb(__a, __b);
 }
 
 /* vec_vavgub */
 
 static vector unsigned char __attribute__((__always_inline__))
-vec_vavgub(vector unsigned char __a, vector unsigned char __b)
-{
+vec_vavgub(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_vavgub(__a, __b);
 }
 
 /* vec_vavgsh */
 
 static vector short __attribute__((__always_inline__))
-vec_vavgsh(vector short __a, vector short __b)
-{
+vec_vavgsh(vector short __a, vector short __b) {
   return __builtin_altivec_vavgsh(__a, __b);
 }
 
 /* vec_vavguh */
 
 static vector unsigned short __attribute__((__always_inline__))
-vec_vavguh(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vavguh(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vavguh(__a, __b);
 }
 
 /* vec_vavgsw */
 
 static vector int __attribute__((__always_inline__))
-vec_vavgsw(vector int __a, vector int __b)
-{
+vec_vavgsw(vector int __a, vector int __b) {
   return __builtin_altivec_vavgsw(__a, __b);
 }
 
 /* vec_vavguw */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vavguw(vector unsigned int __a, vector unsigned int __b)
-{
+vec_vavguw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vavguw(__a, __b);
 }
 
 /* vec_ceil */
 
 static vector float __attribute__((__always_inline__))
-vec_ceil(vector float __a)
-{
+vec_ceil(vector float __a) {
   return __builtin_altivec_vrfip(__a);
 }
 
 /* vec_vrfip */
 
 static vector float __attribute__((__always_inline__))
-vec_vrfip(vector float __a)
-{
+vec_vrfip(vector float __a) {
   return __builtin_altivec_vrfip(__a);
 }
 
 /* vec_cmpb */
 
 static vector int __attribute__((__always_inline__))
-vec_cmpb(vector float __a, vector float __b)
-{
+vec_cmpb(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp(__a, __b);
 }
 
 /* vec_vcmpbfp */
 
 static vector int __attribute__((__always_inline__))
-vec_vcmpbfp(vector float __a, vector float __b)
-{
+vec_vcmpbfp(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp(__a, __b);
 }
 
 /* vec_cmpeq */
 
-static vector bool char __ATTRS_o_ai
-vec_cmpeq(vector signed char __a, vector signed char __b)
-{
-  return (vector bool char)
-    __builtin_altivec_vcmpequb((vector char)__a, (vector char)__b);
+static vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a,
+                                               vector signed char __b) {
+  return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a,
+                                                      (vector char)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_cmpeq(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector bool char)
-    __builtin_altivec_vcmpequb((vector char)__a, (vector char)__b);
+static vector bool char __ATTRS_o_ai vec_cmpeq(vector unsigned char __a,
+                                               vector unsigned char __b) {
+  return (vector bool char)__builtin_altivec_vcmpequb((vector char)__a,
+                                                      (vector char)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmpeq(vector short __a, vector short __b)
-{
+static vector bool short __ATTRS_o_ai vec_cmpeq(vector short __a,
+                                                vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpequh(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmpeq(vector unsigned short __a, vector unsigned short __b)
-{
-  return (vector bool short)
-    __builtin_altivec_vcmpequh((vector short)__a, (vector short)__b);
+static vector bool short __ATTRS_o_ai vec_cmpeq(vector unsigned short __a,
+                                                vector unsigned short __b) {
+  return (vector bool short)__builtin_altivec_vcmpequh((vector short)__a,
+                                                       (vector short)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpeq(vector int __a, vector int __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmpeq(vector int __a, vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpequw(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpeq(vector unsigned int __a, vector unsigned int __b)
-{
-  return (vector bool int)
-    __builtin_altivec_vcmpequw((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_cmpeq(vector unsigned int __a,
+                                              vector unsigned int __b) {
+  return (vector bool int)__builtin_altivec_vcmpequw((vector int)__a,
+                                                     (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
 static vector bool long long __ATTRS_o_ai
-vec_cmpeq(vector signed long long __a, vector signed long long __b) 
-{
-  return (vector bool long long) __builtin_altivec_vcmpequd(__a, __b);
+vec_cmpeq(vector signed long long __a, vector signed long long __b) {
+  return (vector bool long long)__builtin_altivec_vcmpequd(__a, __b);
 }
 
 static vector bool long long __ATTRS_o_ai
-vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) 
-{
-  return (vector bool long long) 
-    __builtin_altivec_vcmpequd((vector long long)__a, (vector long long) __b);
+vec_cmpeq(vector unsigned long long __a, vector unsigned long long __b) {
+  return (vector bool long long)__builtin_altivec_vcmpequd(
+      (vector long long)__a, (vector long long)__b);
 }
 #endif
 
-static vector bool int __ATTRS_o_ai
-vec_cmpeq(vector float __a, vector float __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmpeq(vector float __a,
+                                              vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpeqfp(__a, __b);
 }
 
 /* vec_cmpge */
 
 static vector bool int __attribute__((__always_inline__))
-vec_cmpge(vector float __a, vector float __b)
-{
+vec_cmpge(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
 }
 
 /* vec_vcmpgefp */
 
 static vector bool int __attribute__((__always_inline__))
-vec_vcmpgefp(vector float __a, vector float __b)
-{
+vec_vcmpgefp(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
 }
 
 /* vec_cmpgt */
 
-static vector bool char __ATTRS_o_ai
-vec_cmpgt(vector signed char __a, vector signed char __b)
-{
+static vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a,
+                                               vector signed char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_cmpgt(vector unsigned char __a, vector unsigned char __b)
-{
+static vector bool char __ATTRS_o_ai vec_cmpgt(vector unsigned char __a,
+                                               vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmpgt(vector short __a, vector short __b)
-{
+static vector bool short __ATTRS_o_ai vec_cmpgt(vector short __a,
+                                                vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmpgt(vector unsigned short __a, vector unsigned short __b)
-{
+static vector bool short __ATTRS_o_ai vec_cmpgt(vector unsigned short __a,
+                                                vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpgt(vector int __a, vector int __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmpgt(vector int __a, vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmpgt(vector unsigned int __a, vector unsigned int __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmpgt(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b);
 }
 
 #ifdef __POWER8_VECTOR__
 static vector bool long long __ATTRS_o_ai
-vec_cmpgt(vector signed long long __a, vector signed long long __b)
-{
+vec_cmpgt(vector signed long long __a, vector signed long long __b) {
   return (vector bool long long)__builtin_altivec_vcmpgtsd(__a, __b);
 }
 
 static vector bool long long __ATTRS_o_ai
-vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b)
-{
+vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector bool long long)__builtin_altivec_vcmpgtud(__a, __b);
 }
 #endif
 
-static vector bool int __ATTRS_o_ai
-vec_cmpgt(vector float __a, vector float __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmpgt(vector float __a,
+                                              vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b);
 }
 
 /* vec_vcmpgtsb */
 
 static vector bool char __attribute__((__always_inline__))
-vec_vcmpgtsb(vector signed char __a, vector signed char __b)
-{
+vec_vcmpgtsb(vector signed char __a, vector signed char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtsb(__a, __b);
 }
 
 /* vec_vcmpgtub */
 
 static vector bool char __attribute__((__always_inline__))
-vec_vcmpgtub(vector unsigned char __a, vector unsigned char __b)
-{
+vec_vcmpgtub(vector unsigned char __a, vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtub(__a, __b);
 }
 
 /* vec_vcmpgtsh */
 
 static vector bool short __attribute__((__always_inline__))
-vec_vcmpgtsh(vector short __a, vector short __b)
-{
+vec_vcmpgtsh(vector short __a, vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtsh(__a, __b);
 }
 
 /* vec_vcmpgtuh */
 
 static vector bool short __attribute__((__always_inline__))
-vec_vcmpgtuh(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vcmpgtuh(vector unsigned short __a, vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtuh(__a, __b);
 }
 
 /* vec_vcmpgtsw */
 
 static vector bool int __attribute__((__always_inline__))
-vec_vcmpgtsw(vector int __a, vector int __b)
-{
+vec_vcmpgtsw(vector int __a, vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtsw(__a, __b);
 }
 
 /* vec_vcmpgtuw */
 
 static vector bool int __attribute__((__always_inline__))
-vec_vcmpgtuw(vector unsigned int __a, vector unsigned int __b)
-{
+vec_vcmpgtuw(vector unsigned int __a, vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtuw(__a, __b);
 }
 
 /* vec_vcmpgtfp */
 
 static vector bool int __attribute__((__always_inline__))
-vec_vcmpgtfp(vector float __a, vector float __b)
-{
+vec_vcmpgtfp(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgtfp(__a, __b);
 }
 
 /* vec_cmple */
 
 static vector bool int __attribute__((__always_inline__))
-vec_cmple(vector float __a, vector float __b)
-{
+vec_cmple(vector float __a, vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgefp(__b, __a);
 }
 
 /* vec_cmplt */
 
-static vector bool char __ATTRS_o_ai
-vec_cmplt(vector signed char __a, vector signed char __b)
-{
+static vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a,
+                                               vector signed char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtsb(__b, __a);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_cmplt(vector unsigned char __a, vector unsigned char __b)
-{
+static vector bool char __ATTRS_o_ai vec_cmplt(vector unsigned char __a,
+                                               vector unsigned char __b) {
   return (vector bool char)__builtin_altivec_vcmpgtub(__b, __a);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmplt(vector short __a, vector short __b)
-{
+static vector bool short __ATTRS_o_ai vec_cmplt(vector short __a,
+                                                vector short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtsh(__b, __a);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_cmplt(vector unsigned short __a, vector unsigned short __b)
-{
+static vector bool short __ATTRS_o_ai vec_cmplt(vector unsigned short __a,
+                                                vector unsigned short __b) {
   return (vector bool short)__builtin_altivec_vcmpgtuh(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmplt(vector int __a, vector int __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmplt(vector int __a, vector int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtsw(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmplt(vector unsigned int __a, vector unsigned int __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmplt(vector unsigned int __a,
+                                              vector unsigned int __b) {
   return (vector bool int)__builtin_altivec_vcmpgtuw(__b, __a);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_cmplt(vector float __a, vector float __b)
-{
+static vector bool int __ATTRS_o_ai vec_cmplt(vector float __a,
+                                              vector float __b) {
   return (vector bool int)__builtin_altivec_vcmpgtfp(__b, __a);
 }
 
 /* vec_ctf */
 
-static vector float __ATTRS_o_ai
-vec_ctf(vector int __a, int __b)
-{
+static vector float __ATTRS_o_ai vec_ctf(vector int __a, int __b) {
   return __builtin_altivec_vcfsx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_ctf(vector unsigned int __a, int __b)
-{
+static vector float __ATTRS_o_ai vec_ctf(vector unsigned int __a, int __b) {
   return __builtin_altivec_vcfux((vector int)__a, __b);
 }
 
 /* vec_vcfsx */
 
 static vector float __attribute__((__always_inline__))
-vec_vcfsx(vector int __a, int __b)
-{
+vec_vcfsx(vector int __a, int __b) {
   return __builtin_altivec_vcfsx(__a, __b);
 }
 
 /* vec_vcfux */
 
 static vector float __attribute__((__always_inline__))
-vec_vcfux(vector unsigned int __a, int __b)
-{
+vec_vcfux(vector unsigned int __a, int __b) {
   return __builtin_altivec_vcfux((vector int)__a, __b);
 }
 
 /* vec_cts */
 
 static vector int __attribute__((__always_inline__))
-vec_cts(vector float __a, int __b)
-{
+vec_cts(vector float __a, int __b) {
   return __builtin_altivec_vctsxs(__a, __b);
 }
 
 /* vec_vctsxs */
 
 static vector int __attribute__((__always_inline__))
-vec_vctsxs(vector float __a, int __b)
-{
+vec_vctsxs(vector float __a, int __b) {
   return __builtin_altivec_vctsxs(__a, __b);
 }
 
 /* vec_ctu */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_ctu(vector float __a, int __b)
-{
+vec_ctu(vector float __a, int __b) {
   return __builtin_altivec_vctuxs(__a, __b);
 }
 
 /* vec_vctuxs */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vctuxs(vector float __a, int __b)
-{
+vec_vctuxs(vector float __a, int __b) {
   return __builtin_altivec_vctuxs(__a, __b);
 }
 
 /* vec_div */
 #ifdef __VSX__
-static vector float __ATTRS_o_ai
-vec_div(vector float __a, vector float __b)
-{
+static vector float __ATTRS_o_ai vec_div(vector float __a, vector float __b) {
   return __builtin_vsx_xvdivsp(__a, __b);
 }
 
-static vector double __ATTRS_o_ai
-vec_div(vector double __a, vector double __b)
-{
+static vector double __ATTRS_o_ai vec_div(vector double __a,
+                                          vector double __b) {
   return __builtin_vsx_xvdivdp(__a, __b);
 }
 #endif
 
 /* vec_dss */
 
-static void __attribute__((__always_inline__))
-vec_dss(int __a)
-{
+static void __attribute__((__always_inline__)) vec_dss(int __a) {
   __builtin_altivec_dss(__a);
 }
 
 /* vec_dssall */
 
-static void __attribute__((__always_inline__))
-vec_dssall(void)
-{
+static void __attribute__((__always_inline__)) vec_dssall(void) {
   __builtin_altivec_dssall();
 }
 
 /* vec_dst */
 
 static void __attribute__((__always_inline__))
-vec_dst(const void *__a, int __b, int __c)
-{
+vec_dst(const void *__a, int __b, int __c) {
   __builtin_altivec_dst(__a, __b, __c);
 }
 
 /* vec_dstst */
 
 static void __attribute__((__always_inline__))
-vec_dstst(const void *__a, int __b, int __c)
-{
+vec_dstst(const void *__a, int __b, int __c) {
   __builtin_altivec_dstst(__a, __b, __c);
 }
 
 /* vec_dststt */
 
 static void __attribute__((__always_inline__))
-vec_dststt(const void *__a, int __b, int __c)
-{
+vec_dststt(const void *__a, int __b, int __c) {
   __builtin_altivec_dststt(__a, __b, __c);
 }
 
 /* vec_dstt */
 
 static void __attribute__((__always_inline__))
-vec_dstt(const void *__a, int __b, int __c)
-{
+vec_dstt(const void *__a, int __b, int __c) {
   __builtin_altivec_dstt(__a, __b, __c);
 }
 
 /* vec_expte */
 
 static vector float __attribute__((__always_inline__))
-vec_expte(vector float __a)
-{
+vec_expte(vector float __a) {
   return __builtin_altivec_vexptefp(__a);
 }
 
 /* vec_vexptefp */
 
 static vector float __attribute__((__always_inline__))
-vec_vexptefp(vector float __a)
-{
+vec_vexptefp(vector float __a) {
   return __builtin_altivec_vexptefp(__a);
 }
 
 /* vec_floor */
 
 static vector float __attribute__((__always_inline__))
-vec_floor(vector float __a)
-{
+vec_floor(vector float __a) {
   return __builtin_altivec_vrfim(__a);
 }
 
 /* vec_vrfim */
 
 static vector float __attribute__((__always_inline__))
-vec_vrfim(vector float __a)
-{
+vec_vrfim(vector float __a) {
   return __builtin_altivec_vrfim(__a);
 }
 
 /* vec_ld */
 
-static vector signed char __ATTRS_o_ai
-vec_ld(int __a, const vector signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_ld(int __a,
+                                              const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_ld(int __a, const signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_ld(int __a, const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_ld(int __a, const vector unsigned char *__b)
-{
+vec_ld(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_ld(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_ld(int __a,
+                                                const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_ld(int __a, const vector bool char *__b)
-{
+static vector bool char __ATTRS_o_ai vec_ld(int __a,
+                                            const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_ld(int __a, const vector short *__b)
-{
+static vector short __ATTRS_o_ai vec_ld(int __a, const vector short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_ld(int __a, const short *__b)
-{
+static vector short __ATTRS_o_ai vec_ld(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_ld(int __a, const vector unsigned short *__b)
-{
+vec_ld(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_ld(int __a, const unsigned short *__b)
-{
+static vector unsigned short __ATTRS_o_ai vec_ld(int __a,
+                                                 const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_ld(int __a, const vector bool short *__b)
-{
+static vector bool short __ATTRS_o_ai vec_ld(int __a,
+                                             const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_ld(int __a, const vector pixel *__b)
-{
+static vector pixel __ATTRS_o_ai vec_ld(int __a, const vector pixel *__b) {
   return (vector pixel)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_ld(int __a, const vector int *__b)
-{
+static vector int __ATTRS_o_ai vec_ld(int __a, const vector int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_ld(int __a, const int *__b)
-{
+static vector int __ATTRS_o_ai vec_ld(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_ld(int __a, const vector unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_ld(int __a,
+                                               const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_ld(int __a, const unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_ld(int __a,
+                                               const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_ld(int __a, const vector bool int *__b)
-{
+static vector bool int __ATTRS_o_ai vec_ld(int __a,
+                                           const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_ld(int __a, const vector float *__b)
-{
+static vector float __ATTRS_o_ai vec_ld(int __a, const vector float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_ld(int __a, const float *__b)
-{
+static vector float __ATTRS_o_ai vec_ld(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
 /* vec_lvx */
 
-static vector signed char __ATTRS_o_ai
-vec_lvx(int __a, const vector signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_lvx(int __a,
+                                               const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_lvx(int __a, const signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_lvx(int __a,
+                                               const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvx(__a, __b);
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_lvx(int __a, const vector unsigned char *__b)
-{
+vec_lvx(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvx(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvx(int __a,
+                                                 const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_lvx(int __a, const vector bool char *__b)
-{
+static vector bool char __ATTRS_o_ai vec_lvx(int __a,
+                                             const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_lvx(int __a, const vector short *__b)
-{
+static vector short __ATTRS_o_ai vec_lvx(int __a, const vector short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_lvx(int __a, const short *__b)
-{
+static vector short __ATTRS_o_ai vec_lvx(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvx(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_lvx(int __a, const vector unsigned short *__b)
-{
+vec_lvx(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvx(int __a, const unsigned short *__b)
-{
+static vector unsigned short __ATTRS_o_ai vec_lvx(int __a,
+                                                  const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_lvx(int __a, const vector bool short *__b)
-{
+static vector bool short __ATTRS_o_ai vec_lvx(int __a,
+                                              const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_lvx(int __a, const vector pixel *__b)
-{
+static vector pixel __ATTRS_o_ai vec_lvx(int __a, const vector pixel *__b) {
   return (vector pixel)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_lvx(int __a, const vector int *__b)
-{
+static vector int __ATTRS_o_ai vec_lvx(int __a, const vector int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_lvx(int __a, const int *__b)
-{
+static vector int __ATTRS_o_ai vec_lvx(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvx(__a, __b);
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_lvx(int __a, const vector unsigned int *__b)
-{
+vec_lvx(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvx(int __a, const unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_lvx(int __a,
+                                                const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_lvx(int __a, const vector bool int *__b)
-{
+static vector bool int __ATTRS_o_ai vec_lvx(int __a,
+                                            const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_lvx(int __a, const vector float *__b)
-{
+static vector float __ATTRS_o_ai vec_lvx(int __a, const vector float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_lvx(int __a, const float *__b)
-{
+static vector float __ATTRS_o_ai vec_lvx(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvx(__a, __b);
 }
 
 /* vec_lde */
 
-static vector signed char __ATTRS_o_ai
-vec_lde(int __a, const signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_lde(int __a,
+                                               const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvebx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lde(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lde(int __a,
+                                                 const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvebx(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_lde(int __a, const short *__b)
-{
+static vector short __ATTRS_o_ai vec_lde(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvehx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lde(int __a, const unsigned short *__b)
-{
+static vector unsigned short __ATTRS_o_ai vec_lde(int __a,
+                                                  const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvehx(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_lde(int __a, const int *__b)
-{
+static vector int __ATTRS_o_ai vec_lde(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lde(int __a, const unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_lde(int __a,
+                                                const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_lde(int __a, const float *__b)
-{
+static vector float __ATTRS_o_ai vec_lde(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvewx(__a, __b);
 }
 
 /* vec_lvebx */
 
-static vector signed char __ATTRS_o_ai
-vec_lvebx(int __a, const signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_lvebx(int __a,
+                                                 const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvebx(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvebx(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvebx(int __a,
+                                                   const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvebx(__a, __b);
 }
 
 /* vec_lvehx */
 
-static vector short __ATTRS_o_ai
-vec_lvehx(int __a, const short *__b)
-{
+static vector short __ATTRS_o_ai vec_lvehx(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvehx(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvehx(int __a, const unsigned short *__b)
-{
+static vector unsigned short __ATTRS_o_ai vec_lvehx(int __a,
+                                                    const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvehx(__a, __b);
 }
 
 /* vec_lvewx */
 
-static vector int __ATTRS_o_ai
-vec_lvewx(int __a, const int *__b)
-{
+static vector int __ATTRS_o_ai vec_lvewx(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvewx(int __a, const unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_lvewx(int __a,
+                                                  const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvewx(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_lvewx(int __a, const float *__b)
-{
+static vector float __ATTRS_o_ai vec_lvewx(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvewx(__a, __b);
 }
 
 /* vec_ldl */
 
-static vector signed char __ATTRS_o_ai
-vec_ldl(int __a, const vector signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_ldl(int __a,
+                                               const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_ldl(int __a, const signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_ldl(int __a,
+                                               const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_ldl(int __a, const vector unsigned char *__b)
-{
+vec_ldl(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_ldl(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_ldl(int __a,
+                                                 const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_ldl(int __a, const vector bool char *__b)
-{
+static vector bool char __ATTRS_o_ai vec_ldl(int __a,
+                                             const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_ldl(int __a, const vector short *__b)
-{
+static vector short __ATTRS_o_ai vec_ldl(int __a, const vector short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_ldl(int __a, const short *__b)
-{
+static vector short __ATTRS_o_ai vec_ldl(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_ldl(int __a, const vector unsigned short *__b)
-{
+vec_ldl(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_ldl(int __a, const unsigned short *__b)
-{
+static vector unsigned short __ATTRS_o_ai vec_ldl(int __a,
+                                                  const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_ldl(int __a, const vector bool short *__b)
-{
+static vector bool short __ATTRS_o_ai vec_ldl(int __a,
+                                              const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_ldl(int __a, const vector pixel *__b)
-{
+static vector pixel __ATTRS_o_ai vec_ldl(int __a, const vector pixel *__b) {
   return (vector pixel short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_ldl(int __a, const vector int *__b)
-{
+static vector int __ATTRS_o_ai vec_ldl(int __a, const vector int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_ldl(int __a, const int *__b)
-{
+static vector int __ATTRS_o_ai vec_ldl(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_ldl(int __a, const vector unsigned int *__b)
-{
+vec_ldl(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_ldl(int __a, const unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_ldl(int __a,
+                                                const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_ldl(int __a, const vector bool int *__b)
-{
+static vector bool int __ATTRS_o_ai vec_ldl(int __a,
+                                            const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_ldl(int __a, const vector float *__b)
-{
+static vector float __ATTRS_o_ai vec_ldl(int __a, const vector float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_ldl(int __a, const float *__b)
-{
+static vector float __ATTRS_o_ai vec_ldl(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
 /* vec_lvxl */
 
-static vector signed char __ATTRS_o_ai
-vec_lvxl(int __a, const vector signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_lvxl(int __a,
+                                                const vector signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_lvxl(int __a, const signed char *__b)
-{
+static vector signed char __ATTRS_o_ai vec_lvxl(int __a,
+                                                const signed char *__b) {
   return (vector signed char)__builtin_altivec_lvxl(__a, __b);
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_lvxl(int __a, const vector unsigned char *__b)
-{
+vec_lvxl(int __a, const vector unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvxl(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvxl(int __a,
+                                                  const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_lvxl(int __a, const vector bool char *__b)
-{
+static vector bool char __ATTRS_o_ai vec_lvxl(int __a,
+                                              const vector bool char *__b) {
   return (vector bool char)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_lvxl(int __a, const vector short *__b)
-{
+static vector short __ATTRS_o_ai vec_lvxl(int __a, const vector short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_lvxl(int __a, const short *__b)
-{
+static vector short __ATTRS_o_ai vec_lvxl(int __a, const short *__b) {
   return (vector short)__builtin_altivec_lvxl(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_lvxl(int __a, const vector unsigned short *__b)
-{
+vec_lvxl(int __a, const vector unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvxl(int __a, const unsigned short *__b)
-{
+static vector unsigned short __ATTRS_o_ai vec_lvxl(int __a,
+                                                   const unsigned short *__b) {
   return (vector unsigned short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_lvxl(int __a, const vector bool short *__b)
-{
+static vector bool short __ATTRS_o_ai vec_lvxl(int __a,
+                                               const vector bool short *__b) {
   return (vector bool short)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_lvxl(int __a, const vector pixel *__b)
-{
+static vector pixel __ATTRS_o_ai vec_lvxl(int __a, const vector pixel *__b) {
   return (vector pixel)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_lvxl(int __a, const vector int *__b)
-{
+static vector int __ATTRS_o_ai vec_lvxl(int __a, const vector int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_lvxl(int __a, const int *__b)
-{
+static vector int __ATTRS_o_ai vec_lvxl(int __a, const int *__b) {
   return (vector int)__builtin_altivec_lvxl(__a, __b);
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_lvxl(int __a, const vector unsigned int *__b)
-{
+vec_lvxl(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvxl(int __a, const unsigned int *__b)
-{
+static vector unsigned int __ATTRS_o_ai vec_lvxl(int __a,
+                                                 const unsigned int *__b) {
   return (vector unsigned int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_lvxl(int __a, const vector bool int *__b)
-{
+static vector bool int __ATTRS_o_ai vec_lvxl(int __a,
+                                             const vector bool int *__b) {
   return (vector bool int)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_lvxl(int __a, const vector float *__b)
-{
+static vector float __ATTRS_o_ai vec_lvxl(int __a, const vector float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_lvxl(int __a, const float *__b)
-{
+static vector float __ATTRS_o_ai vec_lvxl(int __a, const float *__b) {
   return (vector float)__builtin_altivec_lvxl(__a, __b);
 }
 
 /* vec_loge */
 
 static vector float __attribute__((__always_inline__))
-vec_loge(vector float __a)
-{
+vec_loge(vector float __a) {
   return __builtin_altivec_vlogefp(__a);
 }
 
 /* vec_vlogefp */
 
 static vector float __attribute__((__always_inline__))
-vec_vlogefp(vector float __a)
-{
+vec_vlogefp(vector float __a) {
   return __builtin_altivec_vlogefp(__a);
 }
 
@@ -2299,133 +2132,116 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const signed char *__b)
-{
-  vector unsigned char mask = 
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const signed char *__b) {
+  vector unsigned char mask =
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const signed char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                  const signed char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const unsigned char *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const unsigned char *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                  const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const short *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const short *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const short *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const unsigned short *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const unsigned short *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const unsigned short *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                  const unsigned short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const int *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const int *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const int *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const unsigned int *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const unsigned int *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const unsigned int *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a,
+                                                  const unsigned int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsl(int __a, const float *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsl(int __a, const float *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsl(int __a, const float *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsl(int __a, const float *__b) {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
 #endif
@@ -2434,133 +2250,116 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const signed char *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const signed char *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const signed char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                  const signed char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const unsigned char *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const unsigned char *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const unsigned char *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                  const unsigned char *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const short *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const short *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const short *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const unsigned short *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const unsigned short *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const unsigned short *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                  const unsigned short *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const int *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const int *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const int *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const unsigned int *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const unsigned int *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const unsigned int *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a,
+                                                  const unsigned int *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((__deprecated__("use assignment for unaligned little endian \
-loads/stores")))
-vec_lvsr(int __a, const float *__b)
-{
+    __attribute__((__deprecated__("use assignment for unaligned little endian \
+loads/stores"))) vec_lvsr(int __a, const float *__b) {
   vector unsigned char mask =
-    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
-  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+      (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15, 14, 13, 12, 11, 10, 9, 8,
+                                  7,  6,  5,  4,  3,  2,  1, 0};
   return vec_perm(mask, mask, reverse);
 }
 #else
-static vector unsigned char __ATTRS_o_ai
-vec_lvsr(int __a, const float *__b)
-{
+static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
 #endif
@@ -2568,163 +2367,153 @@
 /* vec_madd */
 
 static vector float __attribute__((__always_inline__))
-vec_madd(vector float __a, vector float __b, vector float __c)
-{
+vec_madd(vector float __a, vector float __b, vector float __c) {
   return __builtin_altivec_vmaddfp(__a, __b, __c);
 }
 
 /* vec_vmaddfp */
 
 static vector float __attribute__((__always_inline__))
-vec_vmaddfp(vector float __a, vector float __b, vector float __c)
-{
+vec_vmaddfp(vector float __a, vector float __b, vector float __c) {
   return __builtin_altivec_vmaddfp(__a, __b, __c);
 }
 
 /* vec_madds */
 
 static vector signed short __attribute__((__always_inline__))
-vec_madds(vector signed short __a, vector signed short __b, vector signed short __c)
-{
+vec_madds(vector signed short __a, vector signed short __b,
+          vector signed short __c) {
   return __builtin_altivec_vmhaddshs(__a, __b, __c);
 }
 
 /* vec_vmhaddshs */
 static vector signed short __attribute__((__always_inline__))
-vec_vmhaddshs(vector signed short __a,
-              vector signed short __b,
-              vector signed short __c)
-{
+vec_vmhaddshs(vector signed short __a, vector signed short __b,
+              vector signed short __c) {
   return __builtin_altivec_vmhaddshs(__a, __b, __c);
 }
 
 /* vec_max */
 
-static vector signed char __ATTRS_o_ai
-vec_max(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_max(vector signed char __a,
+                                               vector signed char __b) {
   return __builtin_altivec_vmaxsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_max(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_max(vector bool char __a,
+                                               vector signed char __b) {
   return __builtin_altivec_vmaxsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_max(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_max(vector signed char __a,
+                                               vector bool char __b) {
   return __builtin_altivec_vmaxsb(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_max(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_max(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __builtin_altivec_vmaxub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_max(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_max(vector bool char __a,
+                                                 vector unsigned char __b) {
   return __builtin_altivec_vmaxub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_max(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_max(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_max(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_max(vector short __a, vector short __b) {
   return __builtin_altivec_vmaxsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_max(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_max(vector bool short __a,
+                                         vector short __b) {
   return __builtin_altivec_vmaxsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_max(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_max(vector short __a,
+                                         vector bool short __b) {
   return __builtin_altivec_vmaxsh(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_max(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __builtin_altivec_vmaxuh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_max(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_max(vector bool short __a,
+                                                  vector unsigned short __b) {
   return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_max(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_max(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_max(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_max(vector int __a, vector int __b) {
   return __builtin_altivec_vmaxsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_max(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_max(vector bool int __a, vector int __b) {
   return __builtin_altivec_vmaxsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_max(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_max(vector int __a, vector bool int __b) {
   return __builtin_altivec_vmaxsw(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_max(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __builtin_altivec_vmaxuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_max(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_max(vector bool int __a,
+                                                vector unsigned int __b) {
   return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_max(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_max(vector unsigned int __a,
+                                                vector bool int __b) {
   return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
 static vector signed long long __ATTRS_o_ai
-vec_max(vector signed long long __a, vector signed long long __b) 
-{
+vec_max(vector signed long long __a, vector signed long long __b) {
   return __builtin_altivec_vmaxsd(__a, __b);
 }
 
+static vector signed long long __ATTRS_o_ai
+vec_max(vector bool long long __a, vector signed long long __b) {
+  return __builtin_altivec_vmaxsd((vector signed long long)__a, __b);
+}
+
+static vector signed long long __ATTRS_o_ai vec_max(vector signed long long __a,
+                                                    vector bool long long __b) {
+  return __builtin_altivec_vmaxsd(__a, (vector signed long long)__b);
+}
+
 static vector unsigned long long __ATTRS_o_ai
-vec_max(vector unsigned long long __a, vector unsigned long long __b)
-{
+vec_max(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vmaxud(__a, __b);
 }
+
+static vector unsigned long long __ATTRS_o_ai
+vec_max(vector bool long long __a, vector unsigned long long __b) {
+  return __builtin_altivec_vmaxud((vector unsigned long long)__a, __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_max(vector unsigned long long __a, vector bool long long __b) {
+  return __builtin_altivec_vmaxud(__a, (vector unsigned long long)__b);
+}
 #endif
 
-static vector float __ATTRS_o_ai
-vec_max(vector float __a, vector float __b)
-{
+static vector float __ATTRS_o_ai vec_max(vector float __a, vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvmaxsp(__a, __b);
 #else
@@ -2733,138 +2522,115 @@
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_max(vector double __a, vector double __b)
-{
+static vector double __ATTRS_o_ai vec_max(vector double __a,
+                                          vector double __b) {
   return __builtin_vsx_xvmaxdp(__a, __b);
 }
 #endif
 
 /* vec_vmaxsb */
 
-static vector signed char __ATTRS_o_ai
-vec_vmaxsb(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vmaxsb(vector signed char __a,
+                                                  vector signed char __b) {
   return __builtin_altivec_vmaxsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vmaxsb(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vmaxsb(vector bool char __a,
+                                                  vector signed char __b) {
   return __builtin_altivec_vmaxsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vmaxsb(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vmaxsb(vector signed char __a,
+                                                  vector bool char __b) {
   return __builtin_altivec_vmaxsb(__a, (vector signed char)__b);
 }
 
 /* vec_vmaxub */
 
-static vector unsigned char __ATTRS_o_ai
-vec_vmaxub(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vmaxub(vector unsigned char __a,
+                                                    vector unsigned char __b) {
   return __builtin_altivec_vmaxub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vmaxub(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vmaxub(vector bool char __a,
+                                                    vector unsigned char __b) {
   return __builtin_altivec_vmaxub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vmaxub(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vmaxub(vector unsigned char __a,
+                                                    vector bool char __b) {
   return __builtin_altivec_vmaxub(__a, (vector unsigned char)__b);
 }
 
 /* vec_vmaxsh */
 
-static vector short __ATTRS_o_ai
-vec_vmaxsh(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vmaxsh(vector short __a,
+                                            vector short __b) {
   return __builtin_altivec_vmaxsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vmaxsh(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vmaxsh(vector bool short __a,
+                                            vector short __b) {
   return __builtin_altivec_vmaxsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vmaxsh(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vmaxsh(vector short __a,
+                                            vector bool short __b) {
   return __builtin_altivec_vmaxsh(__a, (vector short)__b);
 }
 
 /* vec_vmaxuh */
 
 static vector unsigned short __ATTRS_o_ai
-vec_vmaxuh(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vmaxuh(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vmaxuh(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vmaxuh(vector bool short __a, vector unsigned short __b)
-{
+vec_vmaxuh(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vmaxuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vmaxuh(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vmaxuh(vector unsigned short __a,
+                                                     vector bool short __b) {
   return __builtin_altivec_vmaxuh(__a, (vector unsigned short)__b);
 }
 
 /* vec_vmaxsw */
 
-static vector int __ATTRS_o_ai
-vec_vmaxsw(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vmaxsw(vector int __a, vector int __b) {
   return __builtin_altivec_vmaxsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vmaxsw(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vmaxsw(vector bool int __a, vector int __b) {
   return __builtin_altivec_vmaxsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vmaxsw(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vmaxsw(vector int __a, vector bool int __b) {
   return __builtin_altivec_vmaxsw(__a, (vector int)__b);
 }
 
 /* vec_vmaxuw */
 
-static vector unsigned int __ATTRS_o_ai
-vec_vmaxuw(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int __a,
+                                                   vector unsigned int __b) {
   return __builtin_altivec_vmaxuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vmaxuw(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector bool int __a,
+                                                   vector unsigned int __b) {
   return __builtin_altivec_vmaxuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vmaxuw(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vmaxuw(vector unsigned int __a,
+                                                   vector bool int __b) {
   return __builtin_altivec_vmaxuw(__a, (vector unsigned int)__b);
 }
 
 /* vec_vmaxfp */
 
 static vector float __attribute__((__always_inline__))
-vec_vmaxfp(vector float __a, vector float __b)
-{
+vec_vmaxfp(vector float __a, vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvmaxsp(__a, __b);
 #else
@@ -2874,519 +2640,508 @@
 
 /* vec_mergeh */
 
-static vector signed char __ATTRS_o_ai
-vec_mergeh(vector signed char __a, vector signed char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 
-     0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17));
+static vector signed char __ATTRS_o_ai vec_mergeh(vector signed char __a,
+                                                  vector signed char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
+                                         0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
+                                         0x06, 0x16, 0x07, 0x17));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_mergeh(vector unsigned char __a, vector unsigned char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 
-     0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17));
+static vector unsigned char __ATTRS_o_ai vec_mergeh(vector unsigned char __a,
+                                                    vector unsigned char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
+                                         0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
+                                         0x06, 0x16, 0x07, 0x17));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_mergeh(vector bool char __a, vector bool char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 
-     0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17));
+static vector bool char __ATTRS_o_ai vec_mergeh(vector bool char __a,
+                                                vector bool char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
+                                         0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
+                                         0x06, 0x16, 0x07, 0x17));
 }
 
-static vector short __ATTRS_o_ai
-vec_mergeh(vector short __a, vector short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+static vector short __ATTRS_o_ai vec_mergeh(vector short __a,
+                                            vector short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_mergeh(vector unsigned short __a, vector unsigned short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+vec_mergeh(vector unsigned short __a, vector unsigned short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_mergeh(vector bool short __a, vector bool short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+static vector bool short __ATTRS_o_ai vec_mergeh(vector bool short __a,
+                                                 vector bool short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_mergeh(vector pixel __a, vector pixel __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+static vector pixel __ATTRS_o_ai vec_mergeh(vector pixel __a,
+                                            vector pixel __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
-static vector int __ATTRS_o_ai
-vec_mergeh(vector int __a, vector int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector int __ATTRS_o_ai vec_mergeh(vector int __a, vector int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_mergeh(vector unsigned int __a, vector unsigned int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector unsigned int __ATTRS_o_ai vec_mergeh(vector unsigned int __a,
+                                                   vector unsigned int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_mergeh(vector bool int __a, vector bool int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector bool int __ATTRS_o_ai vec_mergeh(vector bool int __a,
+                                               vector bool int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector float __ATTRS_o_ai
-vec_mergeh(vector float __a, vector float __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector float __ATTRS_o_ai vec_mergeh(vector float __a,
+                                            vector float __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
 /* vec_vmrghb */
 
 #define __builtin_altivec_vmrghb vec_vmrghb
 
-static vector signed char __ATTRS_o_ai
-vec_vmrghb(vector signed char __a, vector signed char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 
-     0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17));
+static vector signed char __ATTRS_o_ai vec_vmrghb(vector signed char __a,
+                                                  vector signed char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
+                                         0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
+                                         0x06, 0x16, 0x07, 0x17));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vmrghb(vector unsigned char __a, vector unsigned char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 
-     0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17));
+static vector unsigned char __ATTRS_o_ai vec_vmrghb(vector unsigned char __a,
+                                                    vector unsigned char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
+                                         0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
+                                         0x06, 0x16, 0x07, 0x17));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vmrghb(vector bool char __a, vector bool char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 
-     0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17));
+static vector bool char __ATTRS_o_ai vec_vmrghb(vector bool char __a,
+                                                vector bool char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x10, 0x01, 0x11, 0x02, 0x12,
+                                         0x03, 0x13, 0x04, 0x14, 0x05, 0x15,
+                                         0x06, 0x16, 0x07, 0x17));
 }
 
 /* vec_vmrghh */
 
 #define __builtin_altivec_vmrghh vec_vmrghh
 
-static vector short __ATTRS_o_ai
-vec_vmrghh(vector short __a, vector short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+static vector short __ATTRS_o_ai vec_vmrghh(vector short __a,
+                                            vector short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vmrghh(vector unsigned short __a, vector unsigned short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+vec_vmrghh(vector unsigned short __a, vector unsigned short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vmrghh(vector bool short __a, vector bool short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+static vector bool short __ATTRS_o_ai vec_vmrghh(vector bool short __a,
+                                                 vector bool short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vmrghh(vector pixel __a, vector pixel __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
-     0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17));
+static vector pixel __ATTRS_o_ai vec_vmrghh(vector pixel __a,
+                                            vector pixel __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x10, 0x11, 0x02, 0x03,
+                                         0x12, 0x13, 0x04, 0x05, 0x14, 0x15,
+                                         0x06, 0x07, 0x16, 0x17));
 }
 
 /* vec_vmrghw */
 
 #define __builtin_altivec_vmrghw vec_vmrghw
 
-static vector int __ATTRS_o_ai
-vec_vmrghw(vector int __a, vector int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector int __ATTRS_o_ai vec_vmrghw(vector int __a, vector int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vmrghw(vector unsigned int __a, vector unsigned int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector unsigned int __ATTRS_o_ai vec_vmrghw(vector unsigned int __a,
+                                                   vector unsigned int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vmrghw(vector bool int __a, vector bool int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector bool int __ATTRS_o_ai vec_vmrghw(vector bool int __a,
+                                               vector bool int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
-static vector float __ATTRS_o_ai
-vec_vmrghw(vector float __a, vector float __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
-     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17));
+static vector float __ATTRS_o_ai vec_vmrghw(vector float __a,
+                                            vector float __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x10, 0x11,
+                                         0x12, 0x13, 0x04, 0x05, 0x06, 0x07,
+                                         0x14, 0x15, 0x16, 0x17));
 }
 
 /* vec_mergel */
 
-static vector signed char __ATTRS_o_ai
-vec_mergel(vector signed char __a, vector signed char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 
-     0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F));
+static vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a,
+                                                  vector signed char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
+                                         0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
+                                         0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_mergel(vector unsigned char __a, vector unsigned char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 
-     0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F));
+static vector unsigned char __ATTRS_o_ai vec_mergel(vector unsigned char __a,
+                                                    vector unsigned char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
+                                         0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
+                                         0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_mergel(vector bool char __a, vector bool char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 
-     0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F));
+static vector bool char __ATTRS_o_ai vec_mergel(vector bool char __a,
+                                                vector bool char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
+                                         0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
+                                         0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector short __ATTRS_o_ai
-vec_mergel(vector short __a, vector short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+static vector short __ATTRS_o_ai vec_mergel(vector short __a,
+                                            vector short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_mergel(vector unsigned short __a, vector unsigned short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+vec_mergel(vector unsigned short __a, vector unsigned short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_mergel(vector bool short __a, vector bool short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+static vector bool short __ATTRS_o_ai vec_mergel(vector bool short __a,
+                                                 vector bool short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_mergel(vector pixel __a, vector pixel __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+static vector pixel __ATTRS_o_ai vec_mergel(vector pixel __a,
+                                            vector pixel __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector int __ATTRS_o_ai
-vec_mergel(vector int __a, vector int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector int __ATTRS_o_ai vec_mergel(vector int __a, vector int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_mergel(vector unsigned int __a, vector unsigned int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector unsigned int __ATTRS_o_ai vec_mergel(vector unsigned int __a,
+                                                   vector unsigned int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_mergel(vector bool int __a, vector bool int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector bool int __ATTRS_o_ai vec_mergel(vector bool int __a,
+                                               vector bool int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector float __ATTRS_o_ai
-vec_mergel(vector float __a, vector float __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector float __ATTRS_o_ai vec_mergel(vector float __a,
+                                            vector float __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
 /* vec_vmrglb */
 
 #define __builtin_altivec_vmrglb vec_vmrglb
 
-static vector signed char __ATTRS_o_ai
-vec_vmrglb(vector signed char __a, vector signed char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 
-     0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F));
+static vector signed char __ATTRS_o_ai vec_vmrglb(vector signed char __a,
+                                                  vector signed char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
+                                         0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
+                                         0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vmrglb(vector unsigned char __a, vector unsigned char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 
-     0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F));
+static vector unsigned char __ATTRS_o_ai vec_vmrglb(vector unsigned char __a,
+                                                    vector unsigned char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
+                                         0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
+                                         0x0E, 0x1E, 0x0F, 0x1F));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vmrglb(vector bool char __a, vector bool char __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 
-     0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F));
+static vector bool char __ATTRS_o_ai vec_vmrglb(vector bool char __a,
+                                                vector bool char __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A,
+                                         0x0B, 0x1B, 0x0C, 0x1C, 0x0D, 0x1D,
+                                         0x0E, 0x1E, 0x0F, 0x1F));
 }
 
 /* vec_vmrglh */
 
 #define __builtin_altivec_vmrglh vec_vmrglh
 
-static vector short __ATTRS_o_ai
-vec_vmrglh(vector short __a, vector short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+static vector short __ATTRS_o_ai vec_vmrglh(vector short __a,
+                                            vector short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vmrglh(vector unsigned short __a, vector unsigned short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+vec_vmrglh(vector unsigned short __a, vector unsigned short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vmrglh(vector bool short __a, vector bool short __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+static vector bool short __ATTRS_o_ai vec_vmrglh(vector bool short __a,
+                                                 vector bool short __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vmrglh(vector pixel __a, vector pixel __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F));
+static vector pixel __ATTRS_o_ai vec_vmrglh(vector pixel __a,
+                                            vector pixel __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x1C, 0x1D,
+                                         0x0E, 0x0F, 0x1E, 0x1F));
 }
 
 /* vec_vmrglw */
 
 #define __builtin_altivec_vmrglw vec_vmrglw
 
-static vector int __ATTRS_o_ai
-vec_vmrglw(vector int __a, vector int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector int __ATTRS_o_ai vec_vmrglw(vector int __a, vector int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vmrglw(vector unsigned int __a, vector unsigned int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector unsigned int __ATTRS_o_ai vec_vmrglw(vector unsigned int __a,
+                                                   vector unsigned int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vmrglw(vector bool int __a, vector bool int __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector bool int __ATTRS_o_ai vec_vmrglw(vector bool int __a,
+                                               vector bool int __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
-static vector float __ATTRS_o_ai
-vec_vmrglw(vector float __a, vector float __b)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
-     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F));
+static vector float __ATTRS_o_ai vec_vmrglw(vector float __a,
+                                            vector float __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19,
+                                         0x1A, 0x1B, 0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
 }
 
 /* vec_mfvscr */
 
 static vector unsigned short __attribute__((__always_inline__))
-vec_mfvscr(void)
-{
+vec_mfvscr(void) {
   return __builtin_altivec_mfvscr();
 }
 
 /* vec_min */
 
-static vector signed char __ATTRS_o_ai
-vec_min(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_min(vector signed char __a,
+                                               vector signed char __b) {
   return __builtin_altivec_vminsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_min(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_min(vector bool char __a,
+                                               vector signed char __b) {
   return __builtin_altivec_vminsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_min(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_min(vector signed char __a,
+                                               vector bool char __b) {
   return __builtin_altivec_vminsb(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_min(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_min(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __builtin_altivec_vminub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_min(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_min(vector bool char __a,
+                                                 vector unsigned char __b) {
   return __builtin_altivec_vminub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_min(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_min(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __builtin_altivec_vminub(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_min(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_min(vector short __a, vector short __b) {
   return __builtin_altivec_vminsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_min(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_min(vector bool short __a,
+                                         vector short __b) {
   return __builtin_altivec_vminsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_min(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_min(vector short __a,
+                                         vector bool short __b) {
   return __builtin_altivec_vminsh(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_min(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __builtin_altivec_vminuh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_min(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_min(vector bool short __a,
+                                                  vector unsigned short __b) {
   return __builtin_altivec_vminuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_min(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_min(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __builtin_altivec_vminuh(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_min(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_min(vector int __a, vector int __b) {
   return __builtin_altivec_vminsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_min(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_min(vector bool int __a, vector int __b) {
   return __builtin_altivec_vminsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_min(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_min(vector int __a, vector bool int __b) {
   return __builtin_altivec_vminsw(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_min(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __builtin_altivec_vminuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_min(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_min(vector bool int __a,
+                                                vector unsigned int __b) {
   return __builtin_altivec_vminuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_min(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_min(vector unsigned int __a,
+                                                vector bool int __b) {
   return __builtin_altivec_vminuw(__a, (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
 static vector signed long long __ATTRS_o_ai
-vec_min(vector signed long long __a, vector signed long long __b)
-{
+vec_min(vector signed long long __a, vector signed long long __b) {
   return __builtin_altivec_vminsd(__a, __b);
 }
 
+static vector signed long long __ATTRS_o_ai
+vec_min(vector bool long long __a, vector signed long long __b) {
+  return __builtin_altivec_vminsd((vector signed long long)__a, __b);
+}
+
+static vector signed long long __ATTRS_o_ai vec_min(vector signed long long __a,
+                                                    vector bool long long __b) {
+  return __builtin_altivec_vminsd(__a, (vector signed long long)__b);
+}
+
 static vector unsigned long long __ATTRS_o_ai
-vec_min(vector unsigned long long __a, vector unsigned long long __b)
-{
+vec_min(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vminud(__a, __b);
 }
+
+static vector unsigned long long __ATTRS_o_ai
+vec_min(vector bool long long __a, vector unsigned long long __b) {
+  return __builtin_altivec_vminud((vector unsigned long long)__a, __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_min(vector unsigned long long __a, vector bool long long __b) {
+  return __builtin_altivec_vminud(__a, (vector unsigned long long)__b);
+}
 #endif
 
-static vector float __ATTRS_o_ai
-vec_min(vector float __a, vector float __b)
-{
+static vector float __ATTRS_o_ai vec_min(vector float __a, vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvminsp(__a, __b);
 #else
@@ -3395,138 +3150,115 @@
 }
 
 #ifdef __VSX__
-static vector double __ATTRS_o_ai
-vec_min(vector double __a, vector double __b)
-{
+static vector double __ATTRS_o_ai vec_min(vector double __a,
+                                          vector double __b) {
   return __builtin_vsx_xvmindp(__a, __b);
 }
 #endif
 
 /* vec_vminsb */
 
-static vector signed char __ATTRS_o_ai
-vec_vminsb(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vminsb(vector signed char __a,
+                                                  vector signed char __b) {
   return __builtin_altivec_vminsb(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vminsb(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vminsb(vector bool char __a,
+                                                  vector signed char __b) {
   return __builtin_altivec_vminsb((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vminsb(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vminsb(vector signed char __a,
+                                                  vector bool char __b) {
   return __builtin_altivec_vminsb(__a, (vector signed char)__b);
 }
 
 /* vec_vminub */
 
-static vector unsigned char __ATTRS_o_ai
-vec_vminub(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vminub(vector unsigned char __a,
+                                                    vector unsigned char __b) {
   return __builtin_altivec_vminub(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vminub(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vminub(vector bool char __a,
+                                                    vector unsigned char __b) {
   return __builtin_altivec_vminub((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vminub(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vminub(vector unsigned char __a,
+                                                    vector bool char __b) {
   return __builtin_altivec_vminub(__a, (vector unsigned char)__b);
 }
 
 /* vec_vminsh */
 
-static vector short __ATTRS_o_ai
-vec_vminsh(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vminsh(vector short __a,
+                                            vector short __b) {
   return __builtin_altivec_vminsh(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vminsh(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vminsh(vector bool short __a,
+                                            vector short __b) {
   return __builtin_altivec_vminsh((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vminsh(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vminsh(vector short __a,
+                                            vector bool short __b) {
   return __builtin_altivec_vminsh(__a, (vector short)__b);
 }
 
 /* vec_vminuh */
 
 static vector unsigned short __ATTRS_o_ai
-vec_vminuh(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vminuh(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vminuh(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vminuh(vector bool short __a, vector unsigned short __b)
-{
+vec_vminuh(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vminuh((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vminuh(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vminuh(vector unsigned short __a,
+                                                     vector bool short __b) {
   return __builtin_altivec_vminuh(__a, (vector unsigned short)__b);
 }
 
 /* vec_vminsw */
 
-static vector int __ATTRS_o_ai
-vec_vminsw(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vminsw(vector int __a, vector int __b) {
   return __builtin_altivec_vminsw(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vminsw(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vminsw(vector bool int __a, vector int __b) {
   return __builtin_altivec_vminsw((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vminsw(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vminsw(vector int __a, vector bool int __b) {
   return __builtin_altivec_vminsw(__a, (vector int)__b);
 }
 
 /* vec_vminuw */
 
-static vector unsigned int __ATTRS_o_ai
-vec_vminuw(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int __a,
+                                                   vector unsigned int __b) {
   return __builtin_altivec_vminuw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vminuw(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vminuw(vector bool int __a,
+                                                   vector unsigned int __b) {
   return __builtin_altivec_vminuw((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vminuw(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vminuw(vector unsigned int __a,
+                                                   vector bool int __b) {
   return __builtin_altivec_vminuw(__a, (vector unsigned int)__b);
 }
 
 /* vec_vminfp */
 
 static vector float __attribute__((__always_inline__))
-vec_vminfp(vector float __a, vector float __b)
-{
+vec_vminfp(vector float __a, vector float __b) {
 #ifdef __VSX__
   return __builtin_vsx_xvminsp(__a, __b);
 #else
@@ -3538,239 +3270,194 @@
 
 #define __builtin_altivec_vmladduhm vec_mladd
 
-static vector short __ATTRS_o_ai
-vec_mladd(vector short __a, vector short __b, vector short __c)
-{
+static vector short __ATTRS_o_ai vec_mladd(vector short __a, vector short __b,
+                                           vector short __c) {
   return __a * __b + __c;
 }
 
-static vector short __ATTRS_o_ai
-vec_mladd(vector short __a, vector unsigned short __b, vector unsigned short __c)
-{
+static vector short __ATTRS_o_ai vec_mladd(vector short __a,
+                                           vector unsigned short __b,
+                                           vector unsigned short __c) {
   return __a * (vector short)__b + (vector short)__c;
 }
 
-static vector short __ATTRS_o_ai
-vec_mladd(vector unsigned short __a, vector short __b, vector short __c)
-{
+static vector short __ATTRS_o_ai vec_mladd(vector unsigned short __a,
+                                           vector short __b, vector short __c) {
   return (vector short)__a * __b + __c;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_mladd(vector unsigned short __a,
-          vector unsigned short __b,
-          vector unsigned short __c)
-{
+static vector unsigned short __ATTRS_o_ai vec_mladd(vector unsigned short __a,
+                                                    vector unsigned short __b,
+                                                    vector unsigned short __c) {
   return __a * __b + __c;
 }
 
 /* vec_vmladduhm */
 
-static vector short __ATTRS_o_ai
-vec_vmladduhm(vector short __a, vector short __b, vector short __c)
-{
+static vector short __ATTRS_o_ai vec_vmladduhm(vector short __a,
+                                               vector short __b,
+                                               vector short __c) {
   return __a * __b + __c;
 }
 
-static vector short __ATTRS_o_ai
-vec_vmladduhm(vector short __a, vector unsigned short __b, vector unsigned short __c)
-{
+static vector short __ATTRS_o_ai vec_vmladduhm(vector short __a,
+                                               vector unsigned short __b,
+                                               vector unsigned short __c) {
   return __a * (vector short)__b + (vector short)__c;
 }
 
-static vector short __ATTRS_o_ai
-vec_vmladduhm(vector unsigned short __a, vector short __b, vector short __c)
-{
+static vector short __ATTRS_o_ai vec_vmladduhm(vector unsigned short __a,
+                                               vector short __b,
+                                               vector short __c) {
   return (vector short)__a * __b + __c;
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vmladduhm(vector unsigned short __a,
-              vector unsigned short __b,
-              vector unsigned short __c)
-{
+vec_vmladduhm(vector unsigned short __a, vector unsigned short __b,
+              vector unsigned short __c) {
   return __a * __b + __c;
 }
 
 /* vec_mradds */
 
 static vector short __attribute__((__always_inline__))
-vec_mradds(vector short __a, vector short __b, vector short __c)
-{
+vec_mradds(vector short __a, vector short __b, vector short __c) {
   return __builtin_altivec_vmhraddshs(__a, __b, __c);
 }
 
 /* vec_vmhraddshs */
 
 static vector short __attribute__((__always_inline__))
-vec_vmhraddshs(vector short __a, vector short __b, vector short __c)
-{
+vec_vmhraddshs(vector short __a, vector short __b, vector short __c) {
   return __builtin_altivec_vmhraddshs(__a, __b, __c);
 }
 
 /* vec_msum */
 
-static vector int __ATTRS_o_ai
-vec_msum(vector signed char __a, vector unsigned char __b, vector int __c)
-{
+static vector int __ATTRS_o_ai vec_msum(vector signed char __a,
+                                        vector unsigned char __b,
+                                        vector int __c) {
   return __builtin_altivec_vmsummbm(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_msum(vector unsigned char __a, vector unsigned char __b, vector unsigned int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_msum(vector unsigned char __a,
+                                                 vector unsigned char __b,
+                                                 vector unsigned int __c) {
   return __builtin_altivec_vmsumubm(__a, __b, __c);
 }
 
-static vector int __ATTRS_o_ai
-vec_msum(vector short __a, vector short __b, vector int __c)
-{
+static vector int __ATTRS_o_ai vec_msum(vector short __a, vector short __b,
+                                        vector int __c) {
   return __builtin_altivec_vmsumshm(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_msum(vector unsigned short __a,
-         vector unsigned short __b,
-         vector unsigned int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_msum(vector unsigned short __a,
+                                                 vector unsigned short __b,
+                                                 vector unsigned int __c) {
   return __builtin_altivec_vmsumuhm(__a, __b, __c);
 }
 
 /* vec_vmsummbm */
 
 static vector int __attribute__((__always_inline__))
-vec_vmsummbm(vector signed char __a, vector unsigned char __b, vector int __c)
-{
+vec_vmsummbm(vector signed char __a, vector unsigned char __b, vector int __c) {
   return __builtin_altivec_vmsummbm(__a, __b, __c);
 }
 
 /* vec_vmsumubm */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vmsumubm(vector unsigned char __a,
-             vector unsigned char __b,
-             vector unsigned int __c)
-{
+vec_vmsumubm(vector unsigned char __a, vector unsigned char __b,
+             vector unsigned int __c) {
   return __builtin_altivec_vmsumubm(__a, __b, __c);
 }
 
 /* vec_vmsumshm */
 
 static vector int __attribute__((__always_inline__))
-vec_vmsumshm(vector short __a, vector short __b, vector int __c)
-{
+vec_vmsumshm(vector short __a, vector short __b, vector int __c) {
   return __builtin_altivec_vmsumshm(__a, __b, __c);
 }
 
 /* vec_vmsumuhm */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vmsumuhm(vector unsigned short __a,
-             vector unsigned short __b,
-             vector unsigned int __c)
-{
+vec_vmsumuhm(vector unsigned short __a, vector unsigned short __b,
+             vector unsigned int __c) {
   return __builtin_altivec_vmsumuhm(__a, __b, __c);
 }
 
 /* vec_msums */
 
-static vector int __ATTRS_o_ai
-vec_msums(vector short __a, vector short __b, vector int __c)
-{
+static vector int __ATTRS_o_ai vec_msums(vector short __a, vector short __b,
+                                         vector int __c) {
   return __builtin_altivec_vmsumshs(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_msums(vector unsigned short __a,
-          vector unsigned short __b,
-          vector unsigned int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_msums(vector unsigned short __a,
+                                                  vector unsigned short __b,
+                                                  vector unsigned int __c) {
   return __builtin_altivec_vmsumuhs(__a, __b, __c);
 }
 
 /* vec_vmsumshs */
 
 static vector int __attribute__((__always_inline__))
-vec_vmsumshs(vector short __a, vector short __b, vector int __c)
-{
+vec_vmsumshs(vector short __a, vector short __b, vector int __c) {
   return __builtin_altivec_vmsumshs(__a, __b, __c);
 }
 
 /* vec_vmsumuhs */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vmsumuhs(vector unsigned short __a,
-             vector unsigned short __b,
-             vector unsigned int __c)
-{
+vec_vmsumuhs(vector unsigned short __a, vector unsigned short __b,
+             vector unsigned int __c) {
   return __builtin_altivec_vmsumuhs(__a, __b, __c);
 }
 
 /* vec_mtvscr */
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector signed char __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector signed char __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector unsigned char __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector unsigned char __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector bool char __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector bool char __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector short __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector short __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector unsigned short __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector unsigned short __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector bool short __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector bool short __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector pixel __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector pixel __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector int __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector int __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector unsigned int __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector unsigned int __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector bool int __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector bool int __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
-static void __ATTRS_o_ai
-vec_mtvscr(vector float __a)
-{
+static void __ATTRS_o_ai vec_mtvscr(vector float __a) {
   __builtin_altivec_mtvscr((vector int)__a);
 }
 
@@ -3779,9 +3466,8 @@
 
 /* vec_mule */
 
-static vector short __ATTRS_o_ai
-vec_mule(vector signed char __a, vector signed char __b)
-{
+static vector short __ATTRS_o_ai vec_mule(vector signed char __a,
+                                          vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosb(__a, __b);
 #else
@@ -3789,9 +3475,8 @@
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_mule(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_mule(vector unsigned char __a,
+                                                   vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuloub(__a, __b);
 #else
@@ -3799,9 +3484,7 @@
 #endif
 }
 
-static vector int __ATTRS_o_ai
-vec_mule(vector short __a, vector short __b)
-{
+static vector int __ATTRS_o_ai vec_mule(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosh(__a, __b);
 #else
@@ -3809,9 +3492,8 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_mule(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_mule(vector unsigned short __a,
+                                                 vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulouh(__a, __b);
 #else
@@ -3820,9 +3502,8 @@
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
-vec_mule(vector signed int __a, vector signed int __b)
-{
+static vector signed long long __ATTRS_o_ai vec_mule(vector signed int __a,
+                                                     vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosw(__a, __b);
 #else
@@ -3831,8 +3512,7 @@
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_mule(vector unsigned int __a, vector unsigned int __b)
-{
+vec_mule(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulouw(__a, __b);
 #else
@@ -3844,8 +3524,7 @@
 /* vec_vmulesb */
 
 static vector short __attribute__((__always_inline__))
-vec_vmulesb(vector signed char __a, vector signed char __b)
-{
+vec_vmulesb(vector signed char __a, vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosb(__a, __b);
 #else
@@ -3856,8 +3535,7 @@
 /* vec_vmuleub */
 
 static vector unsigned short __attribute__((__always_inline__))
-vec_vmuleub(vector unsigned char __a, vector unsigned char __b)
-{
+vec_vmuleub(vector unsigned char __a, vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuloub(__a, __b);
 #else
@@ -3868,8 +3546,7 @@
 /* vec_vmulesh */
 
 static vector int __attribute__((__always_inline__))
-vec_vmulesh(vector short __a, vector short __b)
-{
+vec_vmulesh(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulosh(__a, __b);
 #else
@@ -3880,8 +3557,7 @@
 /* vec_vmuleuh */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vmuleuh(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulouh(__a, __b);
 #else
@@ -3891,9 +3567,8 @@
 
 /* vec_mulo */
 
-static vector short __ATTRS_o_ai
-vec_mulo(vector signed char __a, vector signed char __b)
-{
+static vector short __ATTRS_o_ai vec_mulo(vector signed char __a,
+                                          vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesb(__a, __b);
 #else
@@ -3901,9 +3576,8 @@
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_mulo(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_mulo(vector unsigned char __a,
+                                                   vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleub(__a, __b);
 #else
@@ -3911,9 +3585,7 @@
 #endif
 }
 
-static vector int __ATTRS_o_ai
-vec_mulo(vector short __a, vector short __b)
-{
+static vector int __ATTRS_o_ai vec_mulo(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesh(__a, __b);
 #else
@@ -3921,9 +3593,8 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_mulo(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_mulo(vector unsigned short __a,
+                                                 vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleuh(__a, __b);
 #else
@@ -3932,9 +3603,8 @@
 }
 
 #ifdef __POWER8_VECTOR__
-static vector signed long long __ATTRS_o_ai
-vec_mulo(vector signed int __a, vector signed int __b) 
-{
+static vector signed long long __ATTRS_o_ai vec_mulo(vector signed int __a,
+                                                     vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesw(__a, __b);
 #else
@@ -3943,8 +3613,7 @@
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_mulo(vector unsigned int __a, vector unsigned int __b) 
-{
+vec_mulo(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleuw(__a, __b);
 #else
@@ -3956,8 +3625,7 @@
 /* vec_vmulosb */
 
 static vector short __attribute__((__always_inline__))
-vec_vmulosb(vector signed char __a, vector signed char __b)
-{
+vec_vmulosb(vector signed char __a, vector signed char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesb(__a, __b);
 #else
@@ -3968,8 +3636,7 @@
 /* vec_vmuloub */
 
 static vector unsigned short __attribute__((__always_inline__))
-vec_vmuloub(vector unsigned char __a, vector unsigned char __b)
-{
+vec_vmuloub(vector unsigned char __a, vector unsigned char __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleub(__a, __b);
 #else
@@ -3980,8 +3647,7 @@
 /* vec_vmulosh */
 
 static vector int __attribute__((__always_inline__))
-vec_vmulosh(vector short __a, vector short __b)
-{
+vec_vmulosh(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmulesh(__a, __b);
 #else
@@ -3992,8 +3658,7 @@
 /* vec_vmulouh */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vmulouh(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vmulouh(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vmuleuh(__a, __b);
 #else
@@ -4004,16 +3669,14 @@
 /* vec_nmsub */
 
 static vector float __attribute__((__always_inline__))
-vec_nmsub(vector float __a, vector float __b, vector float __c)
-{
+vec_nmsub(vector float __a, vector float __b, vector float __c) {
   return __builtin_altivec_vnmsubfp(__a, __b, __c);
 }
 
 /* vec_vnmsubfp */
 
 static vector float __attribute__((__always_inline__))
-vec_vnmsubfp(vector float __a, vector float __b, vector float __c)
-{
+vec_vnmsubfp(vector float __a, vector float __b, vector float __c) {
   return __builtin_altivec_vnmsubfp(__a, __b, __c);
 }
 
@@ -4021,516 +3684,534 @@
 
 #define __builtin_altivec_vnor vec_nor
 
-static vector signed char __ATTRS_o_ai
-vec_nor(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_nor(vector signed char __a,
+                                               vector signed char __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_nor(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_nor(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return ~(__a | __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_nor(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_nor(vector bool char __a,
+                                             vector bool char __b) {
   return ~(__a | __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_nor(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_nor(vector short __a, vector short __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_nor(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_nor(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return ~(__a | __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_nor(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_nor(vector bool short __a,
+                                              vector bool short __b) {
   return ~(__a | __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_nor(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_nor(vector int __a, vector int __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_nor(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_nor(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return ~(__a | __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_nor(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_nor(vector bool int __a,
+                                            vector bool int __b) {
   return ~(__a | __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_nor(vector float __a, vector float __b)
-{
-  vector unsigned int __res = ~((vector unsigned int)__a | (vector unsigned int)__b);
+static vector float __ATTRS_o_ai vec_nor(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      ~((vector unsigned int)__a | (vector unsigned int)__b);
   return (vector float)__res;
 }
 
 /* vec_vnor */
 
-static vector signed char __ATTRS_o_ai
-vec_vnor(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vnor(vector signed char __a,
+                                                vector signed char __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vnor(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vnor(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return ~(__a | __b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vnor(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_vnor(vector bool char __a,
+                                              vector bool char __b) {
   return ~(__a | __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vnor(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vnor(vector short __a, vector short __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vnor(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vnor(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return ~(__a | __b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vnor(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_vnor(vector bool short __a,
+                                               vector bool short __b) {
   return ~(__a | __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vnor(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vnor(vector int __a, vector int __b) {
   return ~(__a | __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vnor(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vnor(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return ~(__a | __b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vnor(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_vnor(vector bool int __a,
+                                             vector bool int __b) {
   return ~(__a | __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_vnor(vector float __a, vector float __b)
-{
-  vector unsigned int __res = ~((vector unsigned int)__a | (vector unsigned int)__b);
+static vector float __ATTRS_o_ai vec_vnor(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      ~((vector unsigned int)__a | (vector unsigned int)__b);
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_nor(vector signed long long __a, vector signed long long __b) {
+  return ~(__a | __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_nor(vector unsigned long long __a, vector unsigned long long __b) {
+  return ~(__a | __b);
+}
+
+static vector bool long long __ATTRS_o_ai vec_nor(vector bool long long __a,
+                                                  vector bool long long __b) {
+  return ~(__a | __b);
+}
+#endif
+
 /* vec_or */
 
 #define __builtin_altivec_vor vec_or
 
-static vector signed char __ATTRS_o_ai
-vec_or(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_or(vector signed char __a,
+                                              vector signed char __b) {
   return __a | __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_or(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_or(vector bool char __a,
+                                              vector signed char __b) {
   return (vector signed char)__a | __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_or(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_or(vector signed char __a,
+                                              vector bool char __b) {
   return __a | (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_or(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char __a,
+                                                vector unsigned char __b) {
   return __a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_or(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_or(vector bool char __a,
+                                                vector unsigned char __b) {
   return (vector unsigned char)__a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_or(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_or(vector unsigned char __a,
+                                                vector bool char __b) {
   return __a | (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_or(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_or(vector bool char __a,
+                                            vector bool char __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_or(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_or(vector short __a, vector short __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_or(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_or(vector bool short __a,
+                                        vector short __b) {
   return (vector short)__a | __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_or(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_or(vector short __a,
+                                        vector bool short __b) {
   return __a | (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_or(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short __a,
+                                                 vector unsigned short __b) {
   return __a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_or(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_or(vector bool short __a,
+                                                 vector unsigned short __b) {
   return (vector unsigned short)__a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_or(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_or(vector unsigned short __a,
+                                                 vector bool short __b) {
   return __a | (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_or(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_or(vector bool short __a,
+                                             vector bool short __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_or(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_or(vector int __a, vector int __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_or(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_or(vector bool int __a, vector int __b) {
   return (vector int)__a | __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_or(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_or(vector int __a, vector bool int __b) {
   return __a | (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_or(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int __a,
+                                               vector unsigned int __b) {
   return __a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_or(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_or(vector bool int __a,
+                                               vector unsigned int __b) {
   return (vector unsigned int)__a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_or(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_or(vector unsigned int __a,
+                                               vector bool int __b) {
   return __a | (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_or(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_or(vector bool int __a,
+                                           vector bool int __b) {
   return __a | __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_or(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_or(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_or(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_or(vector bool int __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_or(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_or(vector float __a, vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_or(vector signed long long __a, vector signed long long __b) {
+  return __a | __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_or(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a | __b;
+}
+
+static vector signed long long __ATTRS_o_ai vec_or(vector signed long long __a,
+                                                   vector bool long long __b) {
+  return __a | (vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_or(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a | __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_or(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a | __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_or(vector unsigned long long __a, vector bool long long __b) {
+  return __a | (vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_or(vector bool long long __a,
+                                                 vector bool long long __b) {
+  return __a | __b;
+}
+#endif
+
 /* vec_vor */
 
-static vector signed char __ATTRS_o_ai
-vec_vor(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vor(vector signed char __a,
+                                               vector signed char __b) {
   return __a | __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vor(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vor(vector bool char __a,
+                                               vector signed char __b) {
   return (vector signed char)__a | __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vor(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vor(vector signed char __a,
+                                               vector bool char __b) {
   return __a | (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vor(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vor(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vor(vector bool char __a,
+                                                 vector unsigned char __b) {
   return (vector unsigned char)__a | __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vor(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vor(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __a | (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vor(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_vor(vector bool char __a,
+                                             vector bool char __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vor(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vor(vector short __a, vector short __b) {
   return __a | __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vor(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vor(vector bool short __a,
+                                         vector short __b) {
   return (vector short)__a | __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vor(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vor(vector short __a,
+                                         vector bool short __b) {
   return __a | (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vor(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vor(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vor(vector bool short __a,
+                                                  vector unsigned short __b) {
   return (vector unsigned short)__a | __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vor(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vor(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __a | (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vor(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_vor(vector bool short __a,
+                                              vector bool short __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vor(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vor(vector int __a, vector int __b) {
   return __a | __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vor(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vor(vector bool int __a, vector int __b) {
   return (vector int)__a | __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vor(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vor(vector int __a, vector bool int __b) {
   return __a | (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vor(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vor(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vor(vector bool int __a,
+                                                vector unsigned int __b) {
   return (vector unsigned int)__a | __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vor(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vor(vector unsigned int __a,
+                                                vector bool int __b) {
   return __a | (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vor(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_vor(vector bool int __a,
+                                            vector bool int __b) {
   return __a | __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_vor(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vor(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vor(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vor(vector bool int __a,
+                                         vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vor(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a | (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vor(vector float __a,
+                                         vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a | (vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_vor(vector signed long long __a, vector signed long long __b) {
+  return __a | __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vor(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a | __b;
+}
+
+static vector signed long long __ATTRS_o_ai vec_vor(vector signed long long __a,
+                                                    vector bool long long __b) {
+  return __a | (vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vor(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a | __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vor(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a | __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vor(vector unsigned long long __a, vector bool long long __b) {
+  return __a | (vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_vor(vector bool long long __a,
+                                                  vector bool long long __b) {
+  return __a | __b;
+}
+#endif
+
 /* vec_pack */
 
 /* The various vector pack instructions have a big-endian bias, so for
    little endian we must handle reversed element numbering.  */
 
-static vector signed char __ATTRS_o_ai
-vec_pack(vector signed short __a, vector signed short __b)
-{
+static vector signed char __ATTRS_o_ai vec_pack(vector signed short __a,
+                                                vector signed short __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector signed char)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
-     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
+  return (vector signed char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
+                             0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
 #else
-  return (vector signed char)vec_perm(__a, __b, (vector unsigned char)
-    (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
-     0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
+  return (vector signed char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+                             0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_pack(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_pack(vector unsigned short __a,
+                                                  vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector unsigned char)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
-     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
+  return (vector unsigned char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
+                             0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
 #else
-  return (vector unsigned char)vec_perm(__a, __b, (vector unsigned char)
-    (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
-     0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
+  return (vector unsigned char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+                             0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
 #endif
 }
 
-static vector bool char __ATTRS_o_ai
-vec_pack(vector bool short __a, vector bool short __b)
-{
+static vector bool char __ATTRS_o_ai vec_pack(vector bool short __a,
+                                              vector bool short __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector bool char)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
-     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
+  return (vector bool char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
+                             0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
 #else
-  return (vector bool char)vec_perm(__a, __b, (vector unsigned char)
-    (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
-     0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
+  return (vector bool char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+                             0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
 #endif
 }
 
-static vector short __ATTRS_o_ai
-vec_pack(vector int __a, vector int __b)
-{
+static vector short __ATTRS_o_ai vec_pack(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector short)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
-     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
+  return (vector short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
+                             0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
 #else
-  return (vector short)vec_perm(__a, __b, (vector unsigned char)
-    (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
-     0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
+  return (vector short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+                             0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_pack(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_pack(vector unsigned int __a,
+                                                   vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector unsigned short)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
-     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
+  return (vector unsigned short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
+                             0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
 #else
-  return (vector unsigned short)vec_perm(__a, __b, (vector unsigned char)
-    (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
-     0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
+  return (vector unsigned short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+                             0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_pack(vector bool int __a, vector bool int __b)
-{
+static vector bool short __ATTRS_o_ai vec_pack(vector bool int __a,
+                                               vector bool int __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector bool short)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
-     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
+  return (vector bool short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
+                             0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
 #else
-  return (vector bool short)vec_perm(__a, __b, (vector unsigned char)
-    (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
-     0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
+  return (vector bool short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+                             0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
 #endif
 }
 
@@ -4538,45 +4219,48 @@
 
 #define __builtin_altivec_vpkuhum vec_vpkuhum
 
-static vector signed char __ATTRS_o_ai
-vec_vpkuhum(vector signed short __a, vector signed short __b)
-{
+static vector signed char __ATTRS_o_ai vec_vpkuhum(vector signed short __a,
+                                                   vector signed short __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector signed char)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
-     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
+  return (vector signed char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
+                             0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
 #else
-  return (vector signed char)vec_perm(__a, __b, (vector unsigned char)
-    (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
-     0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
+  return (vector signed char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+                             0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
 #endif
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_vpkuhum(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vpkuhum(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector unsigned char)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
-     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
+  return (vector unsigned char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
+                             0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
 #else
-  return (vector unsigned char)vec_perm(__a, __b, (vector unsigned char)
-    (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
-     0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
+  return (vector unsigned char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+                             0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
 #endif
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vpkuhum(vector bool short __a, vector bool short __b)
-{
+static vector bool char __ATTRS_o_ai vec_vpkuhum(vector bool short __a,
+                                                 vector bool short __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector bool char)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
-     0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
+  return (vector bool char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
+                             0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E));
 #else
-  return (vector bool char)vec_perm(__a, __b, (vector unsigned char)
-    (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
-     0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
+  return (vector bool char)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F,
+                             0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F));
 #endif
 }
 
@@ -4584,53 +4268,105 @@
 
 #define __builtin_altivec_vpkuwum vec_vpkuwum
 
-static vector short __ATTRS_o_ai
-vec_vpkuwum(vector int __a, vector int __b)
-{
+static vector short __ATTRS_o_ai vec_vpkuwum(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector short)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
-     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
+  return (vector short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
+                             0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
 #else
-  return (vector short)vec_perm(__a, __b, (vector unsigned char)
-    (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
-     0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
+  return (vector short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+                             0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vpkuwum(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vpkuwum(vector unsigned int __a,
+                                                      vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector unsigned short)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
-     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
+  return (vector unsigned short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
+                             0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
 #else
-  return (vector unsigned short)vec_perm(__a, __b, (vector unsigned char)
-    (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
-     0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
+  return (vector unsigned short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+                             0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vpkuwum(vector bool int __a, vector bool int __b)
-{
+static vector bool short __ATTRS_o_ai vec_vpkuwum(vector bool int __a,
+                                                  vector bool int __b) {
 #ifdef __LITTLE_ENDIAN__
-  return (vector bool short)vec_perm(__a, __b, (vector unsigned char)
-    (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
-     0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
+  return (vector bool short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D,
+                             0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D));
 #else
-  return (vector bool short)vec_perm(__a, __b, (vector unsigned char)
-    (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
-     0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
+  return (vector bool short)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F,
+                             0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F));
 #endif
 }
 
+/* vec_vpkudum */
+
+#ifdef __POWER8_VECTOR__
+#define __builtin_altivec_vpkudum vec_vpkudum
+
+static vector int __ATTRS_o_ai vec_vpkudum(vector long long __a,
+                                           vector long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector int)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
+                             0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
+#else
+  return (vector int)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
+                             0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
+#endif
+}
+
+static vector unsigned int __ATTRS_o_ai
+vec_vpkudum(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector unsigned int)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
+                             0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
+#else
+  return (vector unsigned int)vec_perm(
+      __a, __b,
+      (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
+                             0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
+#endif
+}
+
+static vector bool int __ATTRS_o_ai vec_vpkudum(vector bool long long __a,
+                                                vector bool long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector bool int)vec_perm(
+      (vector long long)__a, (vector long long)__b,
+      (vector unsigned char)(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
+                             0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
+#else
+  return (vector bool int)vec_perm(
+      (vector long long)__a, (vector long long)__b,
+      (vector unsigned char)(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
+                             0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
+#endif
+}
+#endif
+
 /* vec_packpx */
 
 static vector pixel __attribute__((__always_inline__))
-vec_packpx(vector unsigned int __a, vector unsigned int __b)
-{
+vec_packpx(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector pixel)__builtin_altivec_vpkpx(__b, __a);
 #else
@@ -4641,8 +4377,7 @@
 /* vec_vpkpx */
 
 static vector pixel __attribute__((__always_inline__))
-vec_vpkpx(vector unsigned int __a, vector unsigned int __b)
-{
+vec_vpkpx(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return (vector pixel)__builtin_altivec_vpkpx(__b, __a);
 #else
@@ -4652,9 +4387,8 @@
 
 /* vec_packs */
 
-static vector signed char __ATTRS_o_ai
-vec_packs(vector short __a, vector short __b)
-{
+static vector signed char __ATTRS_o_ai vec_packs(vector short __a,
+                                                 vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshss(__b, __a);
 #else
@@ -4662,9 +4396,8 @@
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_packs(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_packs(vector unsigned short __a,
+                                                   vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
 #else
@@ -4672,9 +4405,8 @@
 #endif
 }
 
-static vector signed short __ATTRS_o_ai
-vec_packs(vector int __a, vector int __b)
-{
+static vector signed short __ATTRS_o_ai vec_packs(vector int __a,
+                                                  vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswss(__b, __a);
 #else
@@ -4682,9 +4414,8 @@
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_packs(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_packs(vector unsigned int __a,
+                                                    vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -4692,11 +4423,30 @@
 #endif
 }
 
+#ifdef __POWER8_VECTOR__
+static vector int __ATTRS_o_ai vec_packs(vector long long __a,
+                                         vector long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpksdss(__b, __a);
+#else
+  return __builtin_altivec_vpksdss(__a, __b);
+#endif
+}
+
+static vector unsigned int __ATTRS_o_ai
+vec_packs(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpkudus(__b, __a);
+#else
+  return __builtin_altivec_vpkudus(__a, __b);
+#endif
+}
+#endif
+
 /* vec_vpkshss */
 
 static vector signed char __attribute__((__always_inline__))
-vec_vpkshss(vector short __a, vector short __b)
-{
+vec_vpkshss(vector short __a, vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshss(__b, __a);
 #else
@@ -4704,11 +4454,23 @@
 #endif
 }
 
+/* vec_vpksdss */
+
+#ifdef __POWER8_VECTOR__
+static vector int __ATTRS_o_ai vec_vpksdss(vector long long __a,
+                                           vector long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpksdss(__b, __a);
+#else
+  return __builtin_altivec_vpksdss(__a, __b);
+#endif
+}
+#endif
+
 /* vec_vpkuhus */
 
 static vector unsigned char __attribute__((__always_inline__))
-vec_vpkuhus(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vpkuhus(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
 #else
@@ -4716,11 +4478,23 @@
 #endif
 }
 
+/* vec_vpkudus */
+
+#ifdef __POWER8_VECTOR__
+static vector unsigned int __attribute__((__always_inline__))
+vec_vpkudus(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpkudus(__b, __a);
+#else
+  return __builtin_altivec_vpkudus(__a, __b);
+#endif
+}
+#endif
+
 /* vec_vpkswss */
 
 static vector signed short __attribute__((__always_inline__))
-vec_vpkswss(vector int __a, vector int __b)
-{
+vec_vpkswss(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswss(__b, __a);
 #else
@@ -4731,8 +4505,7 @@
 /* vec_vpkuwus */
 
 static vector unsigned short __attribute__((__always_inline__))
-vec_vpkuwus(vector unsigned int __a, vector unsigned int __b)
-{
+vec_vpkuwus(vector unsigned int __a, vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -4742,9 +4515,8 @@
 
 /* vec_packsu */
 
-static vector unsigned char __ATTRS_o_ai
-vec_packsu(vector short __a, vector short __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_packsu(vector short __a,
+                                                    vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshus(__b, __a);
 #else
@@ -4752,9 +4524,8 @@
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_packsu(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_packsu(vector unsigned short __a,
+                                                    vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
 #else
@@ -4762,9 +4533,8 @@
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_packsu(vector int __a, vector int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_packsu(vector int __a,
+                                                     vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswus(__b, __a);
 #else
@@ -4772,9 +4542,8 @@
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_packsu(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_packsu(vector unsigned int __a,
+                                                     vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -4782,11 +4551,30 @@
 #endif
 }
 
+#ifdef __POWER8_VECTOR__
+static vector unsigned int __ATTRS_o_ai vec_packsu(vector long long __a,
+                                                   vector long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpksdus(__b, __a);
+#else
+  return __builtin_altivec_vpksdus(__a, __b);
+#endif
+}
+
+static vector unsigned int __ATTRS_o_ai
+vec_packsu(vector unsigned long long __a, vector unsigned long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpkudus(__b, __a);
+#else
+  return __builtin_altivec_vpkudus(__a, __b);
+#endif
+}
+#endif
+
 /* vec_vpkshus */
 
-static vector unsigned char __ATTRS_o_ai
-vec_vpkshus(vector short __a, vector short __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vpkshus(vector short __a,
+                                                     vector short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkshus(__b, __a);
 #else
@@ -4795,8 +4583,7 @@
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_vpkshus(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vpkshus(vector unsigned short __a, vector unsigned short __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuhus(__b, __a);
 #else
@@ -4806,9 +4593,8 @@
 
 /* vec_vpkswus */
 
-static vector unsigned short __ATTRS_o_ai
-vec_vpkswus(vector int __a, vector int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vpkswus(vector int __a,
+                                                      vector int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkswus(__b, __a);
 #else
@@ -4816,9 +4602,8 @@
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vpkswus(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vpkswus(vector unsigned int __a,
+                                                      vector unsigned int __b) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vpkuwus(__b, __a);
 #else
@@ -4826,6 +4611,19 @@
 #endif
 }
 
+/* vec_vpksdus */
+
+#ifdef __POWER8_VECTOR__
+static vector unsigned int __ATTRS_o_ai vec_vpksdus(vector long long __a,
+                                                    vector long long __b) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vpksdus(__b, __a);
+#else
+  return __builtin_altivec_vpksdus(__a, __b);
+#endif
+}
+#endif
+
 /* vec_perm */
 
 // The vperm instruction is defined architecturally with a big-endian bias.
@@ -4836,121 +4634,114 @@
 // in that the vec_xor can be recognized as a vec_nor (and for P8 and
 // later, possibly a vec_nand).
 
-static vector signed char __ATTRS_o_ai
-vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
-{
+static vector signed char __ATTRS_o_ai vec_perm(vector signed char __a,
+                                                vector signed char __b,
+                                                vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector signed char)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector signed char)__builtin_altivec_vperm_4si((vector int)__b,
+                                                         (vector int)__a, __d);
 #else
-  return (vector signed char)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector signed char)__builtin_altivec_vperm_4si((vector int)__a,
+                                                         (vector int)__b, __c);
 #endif
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_perm(vector unsigned char __a,
-         vector unsigned char __b,
-         vector unsigned char __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_perm(vector unsigned char __a,
+                                                  vector unsigned char __b,
+                                                  vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector unsigned char)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector unsigned char)__builtin_altivec_vperm_4si(
+      (vector int)__b, (vector int)__a, __d);
 #else
-  return (vector unsigned char)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector unsigned char)__builtin_altivec_vperm_4si(
+      (vector int)__a, (vector int)__b, __c);
 #endif
 }
 
-static vector bool char __ATTRS_o_ai
-vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c)
-{
+static vector bool char __ATTRS_o_ai vec_perm(vector bool char __a,
+                                              vector bool char __b,
+                                              vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector bool char)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector bool char)__builtin_altivec_vperm_4si((vector int)__b,
+                                                       (vector int)__a, __d);
 #else
-  return (vector bool char)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector bool char)__builtin_altivec_vperm_4si((vector int)__a,
+                                                       (vector int)__b, __c);
 #endif
 }
 
-static vector short __ATTRS_o_ai
-vec_perm(vector short __a, vector short __b, vector unsigned char __c)
-{
+static vector short __ATTRS_o_ai vec_perm(vector short __a, vector short __b,
+                                          vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector short)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector short)__builtin_altivec_vperm_4si((vector int)__b,
+                                                   (vector int)__a, __d);
 #else
-  return (vector short)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector short)__builtin_altivec_vperm_4si((vector int)__a,
+                                                   (vector int)__b, __c);
 #endif
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_perm(vector unsigned short __a,
-         vector unsigned short __b,
-         vector unsigned char __c)
-{
+static vector unsigned short __ATTRS_o_ai vec_perm(vector unsigned short __a,
+                                                   vector unsigned short __b,
+                                                   vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector unsigned short)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector unsigned short)__builtin_altivec_vperm_4si(
+      (vector int)__b, (vector int)__a, __d);
 #else
-  return (vector unsigned short)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector unsigned short)__builtin_altivec_vperm_4si(
+      (vector int)__a, (vector int)__b, __c);
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c)
-{
+static vector bool short __ATTRS_o_ai vec_perm(vector bool short __a,
+                                               vector bool short __b,
+                                               vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector bool short)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector bool short)__builtin_altivec_vperm_4si((vector int)__b,
+                                                        (vector int)__a, __d);
 #else
-  return (vector bool short)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector bool short)__builtin_altivec_vperm_4si((vector int)__a,
+                                                        (vector int)__b, __c);
 #endif
 }
 
-static vector pixel __ATTRS_o_ai
-vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c)
-{
+static vector pixel __ATTRS_o_ai vec_perm(vector pixel __a, vector pixel __b,
+                                          vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector pixel)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector pixel)__builtin_altivec_vperm_4si((vector int)__b,
+                                                   (vector int)__a, __d);
 #else
-  return (vector pixel)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector pixel)__builtin_altivec_vperm_4si((vector int)__a,
+                                                   (vector int)__b, __c);
 #endif
 }
 
-static vector int __ATTRS_o_ai
-vec_perm(vector int __a, vector int __b, vector unsigned char __c)
-{
+static vector int __ATTRS_o_ai vec_perm(vector int __a, vector int __b,
+                                        vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
   return (vector int)__builtin_altivec_vperm_4si(__b, __a, __d);
 #else
@@ -4958,58 +4749,57 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_perm(vector unsigned int __a,
+                                                 vector unsigned int __b,
+                                                 vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector unsigned int)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector unsigned int)__builtin_altivec_vperm_4si((vector int)__b,
+                                                          (vector int)__a, __d);
 #else
-  return (vector unsigned int)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector unsigned int)__builtin_altivec_vperm_4si((vector int)__a,
+                                                          (vector int)__b, __c);
 #endif
 }
 
-static vector bool int __ATTRS_o_ai
-vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c)
-{
+static vector bool int __ATTRS_o_ai vec_perm(vector bool int __a,
+                                             vector bool int __b,
+                                             vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector bool int)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector bool int)__builtin_altivec_vperm_4si((vector int)__b,
+                                                      (vector int)__a, __d);
 #else
-  return (vector bool int)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector bool int)__builtin_altivec_vperm_4si((vector int)__a,
+                                                      (vector int)__b, __c);
 #endif
 }
 
-static vector float __ATTRS_o_ai
-vec_perm(vector float __a, vector float __b, vector unsigned char __c)
-{
+static vector float __ATTRS_o_ai vec_perm(vector float __a, vector float __b,
+                                          vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector float)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector float)__builtin_altivec_vperm_4si((vector int)__b,
+                                                   (vector int)__a, __d);
 #else
-  return (vector float)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector float)__builtin_altivec_vperm_4si((vector int)__a,
+                                                   (vector int)__b, __c);
 #endif
 }
 
 #ifdef __VSX__
-static vector long long __ATTRS_o_ai
-vec_perm(vector long long __a, vector long long __b, vector unsigned char __c)
-{
+static vector long long __ATTRS_o_ai vec_perm(vector long long __a,
+                                              vector long long __b,
+                                              vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
   return (vector long long)__builtin_altivec_vperm_4si(__b, __a, __d);
 #else
@@ -5019,125 +4809,114 @@
 
 static vector unsigned long long __ATTRS_o_ai
 vec_perm(vector unsigned long long __a, vector unsigned long long __b,
-         vector unsigned char __c)
-{
+         vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector unsigned long long)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector unsigned long long)__builtin_altivec_vperm_4si(
+      (vector int)__b, (vector int)__a, __d);
 #else
-  return (vector unsigned long long)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector unsigned long long)__builtin_altivec_vperm_4si(
+      (vector int)__a, (vector int)__b, __c);
 #endif
 }
 
-static vector double __ATTRS_o_ai
-vec_perm(vector double __a, vector double __b, vector unsigned char __c)
-{
+static vector double __ATTRS_o_ai vec_perm(vector double __a, vector double __b,
+                                           vector unsigned char __c) {
 #ifdef __LITTLE_ENDIAN__
-  vector unsigned char __d = {255,255,255,255,255,255,255,255,
-                              255,255,255,255,255,255,255,255};
+  vector unsigned char __d = {255, 255, 255, 255, 255, 255, 255, 255,
+                              255, 255, 255, 255, 255, 255, 255, 255};
   __d = vec_xor(__c, __d);
-  return (vector double)
-           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+  return (vector double)__builtin_altivec_vperm_4si((vector int)__b,
+                                                    (vector int)__a, __d);
 #else
-  return (vector double)
-           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+  return (vector double)__builtin_altivec_vperm_4si((vector int)__a,
+                                                    (vector int)__b, __c);
 #endif
 }
 #endif
 
 /* vec_vperm */
 
-static vector signed char __ATTRS_o_ai
-vec_vperm(vector signed char __a, vector signed char __b, vector unsigned char __c)
-{
+static vector signed char __ATTRS_o_ai vec_vperm(vector signed char __a,
+                                                 vector signed char __b,
+                                                 vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vperm(vector unsigned char __a,
-          vector unsigned char __b,
-          vector unsigned char __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_vperm(vector unsigned char __a,
+                                                   vector unsigned char __b,
+                                                   vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vperm(vector bool char __a, vector bool char __b, vector unsigned char __c)
-{
+static vector bool char __ATTRS_o_ai vec_vperm(vector bool char __a,
+                                               vector bool char __b,
+                                               vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector short __ATTRS_o_ai
-vec_vperm(vector short __a, vector short __b, vector unsigned char __c)
-{
+static vector short __ATTRS_o_ai vec_vperm(vector short __a, vector short __b,
+                                           vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vperm(vector unsigned short __a,
-          vector unsigned short __b,
-          vector unsigned char __c)
-{
+static vector unsigned short __ATTRS_o_ai vec_vperm(vector unsigned short __a,
+                                                    vector unsigned short __b,
+                                                    vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vperm(vector bool short __a, vector bool short __b, vector unsigned char __c)
-{
+static vector bool short __ATTRS_o_ai vec_vperm(vector bool short __a,
+                                                vector bool short __b,
+                                                vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vperm(vector pixel __a, vector pixel __b, vector unsigned char __c)
-{
+static vector pixel __ATTRS_o_ai vec_vperm(vector pixel __a, vector pixel __b,
+                                           vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector int __ATTRS_o_ai
-vec_vperm(vector int __a, vector int __b, vector unsigned char __c)
-{
+static vector int __ATTRS_o_ai vec_vperm(vector int __a, vector int __b,
+                                         vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vperm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_vperm(vector unsigned int __a,
+                                                  vector unsigned int __b,
+                                                  vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vperm(vector bool int __a, vector bool int __b, vector unsigned char __c)
-{
+static vector bool int __ATTRS_o_ai vec_vperm(vector bool int __a,
+                                              vector bool int __b,
+                                              vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector float __ATTRS_o_ai
-vec_vperm(vector float __a, vector float __b, vector unsigned char __c)
-{
+static vector float __ATTRS_o_ai vec_vperm(vector float __a, vector float __b,
+                                           vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
 #ifdef __VSX__
-static vector long long __ATTRS_o_ai
-vec_vperm(vector long long __a, vector long long __b, vector unsigned char __c)
-{
+static vector long long __ATTRS_o_ai vec_vperm(vector long long __a,
+                                               vector long long __b,
+                                               vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
 static vector unsigned long long __ATTRS_o_ai
 vec_vperm(vector unsigned long long __a, vector unsigned long long __b,
-          vector unsigned char __c)
-{
+          vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 
-static vector double __ATTRS_o_ai
-vec_vperm(vector double __a, vector double __b, vector unsigned char __c)
-{
+static vector double __ATTRS_o_ai vec_vperm(vector double __a,
+                                            vector double __b,
+                                            vector unsigned char __c) {
   return vec_perm(__a, __b, __c);
 }
 #endif
@@ -5145,142 +4924,121 @@
 /* vec_re */
 
 static vector float __attribute__((__always_inline__))
-vec_re(vector float __a)
-{
+vec_re(vector float __a) {
   return __builtin_altivec_vrefp(__a);
 }
 
 /* vec_vrefp */
 
 static vector float __attribute__((__always_inline__))
-vec_vrefp(vector float __a)
-{
+vec_vrefp(vector float __a) {
   return __builtin_altivec_vrefp(__a);
 }
 
 /* vec_rl */
 
-static vector signed char __ATTRS_o_ai
-vec_rl(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_rl(vector signed char __a,
+                                              vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_rl(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_rl(vector unsigned char __a,
+                                                vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_rl(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_rl(vector short __a,
+                                        vector unsigned short __b) {
   return __builtin_altivec_vrlh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_rl(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_rl(vector unsigned short __a,
+                                                 vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_rl(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_rl(vector int __a, vector unsigned int __b) {
   return __builtin_altivec_vrlw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_rl(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_rl(vector unsigned int __a,
+                                               vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b);
 }
 
 #ifdef __POWER8_VECTOR__
 static vector signed long long __ATTRS_o_ai
-vec_rl(vector signed long long __a, vector unsigned long long __b)
-{
+vec_rl(vector signed long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vrld(__a, __b);
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_rl(vector unsigned long long __a, vector unsigned long long __b)
-{
+vec_rl(vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_vrld(__a, __b);
 }
 #endif
 
 /* vec_vrlb */
 
-static vector signed char __ATTRS_o_ai
-vec_vrlb(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vrlb(vector signed char __a,
+                                                vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vrlb(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vrlb(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vrlb((vector char)__a, __b);
 }
 
 /* vec_vrlh */
 
-static vector short __ATTRS_o_ai
-vec_vrlh(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_vrlh(vector short __a,
+                                          vector unsigned short __b) {
   return __builtin_altivec_vrlh(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vrlh(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vrlh(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vrlh((vector short)__a, __b);
 }
 
 /* vec_vrlw */
 
-static vector int __ATTRS_o_ai
-vec_vrlw(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_vrlw(vector int __a,
+                                        vector unsigned int __b) {
   return __builtin_altivec_vrlw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vrlw(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vrlw(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vrlw((vector int)__a, __b);
 }
 
 /* vec_round */
 
 static vector float __attribute__((__always_inline__))
-vec_round(vector float __a)
-{
+vec_round(vector float __a) {
   return __builtin_altivec_vrfin(__a);
 }
 
 /* vec_vrfin */
 
 static vector float __attribute__((__always_inline__))
-vec_vrfin(vector float __a)
-{
+vec_vrfin(vector float __a) {
   return __builtin_altivec_vrfin(__a);
 }
 
 /* vec_rsqrte */
 
 static __vector float __attribute__((__always_inline__))
-vec_rsqrte(vector float __a)
-{
+vec_rsqrte(vector float __a) {
   return __builtin_altivec_vrsqrtefp(__a);
 }
 
 /* vec_vrsqrtefp */
 
 static __vector float __attribute__((__always_inline__))
-vec_vrsqrtefp(vector float __a)
-{
+vec_vrsqrtefp(vector float __a) {
   return __builtin_altivec_vrsqrtefp(__a);
 }
 
@@ -5288,308 +5046,285 @@
 
 #define __builtin_altivec_vsel_4si vec_sel
 
-static vector signed char __ATTRS_o_ai
-vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
-{
+static vector signed char __ATTRS_o_ai vec_sel(vector signed char __a,
+                                               vector signed char __b,
+                                               vector unsigned char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_sel(vector signed char __a, vector signed char __b, vector bool char __c)
-{
+static vector signed char __ATTRS_o_ai vec_sel(vector signed char __a,
+                                               vector signed char __b,
+                                               vector bool char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sel(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_sel(vector unsigned char __a,
+                                                 vector unsigned char __b,
+                                                 vector unsigned char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sel(vector unsigned char __a, vector unsigned char __b, vector bool char __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_sel(vector unsigned char __a,
+                                                 vector unsigned char __b,
+                                                 vector bool char __c) {
   return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_sel(vector bool char __a, vector bool char __b, vector unsigned char __c)
-{
+static vector bool char __ATTRS_o_ai vec_sel(vector bool char __a,
+                                             vector bool char __b,
+                                             vector unsigned char __c) {
   return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_sel(vector bool char __a, vector bool char __b, vector bool char __c)
-{
+static vector bool char __ATTRS_o_ai vec_sel(vector bool char __a,
+                                             vector bool char __b,
+                                             vector bool char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector short __ATTRS_o_ai
-vec_sel(vector short __a, vector short __b, vector unsigned short __c)
-{
+static vector short __ATTRS_o_ai vec_sel(vector short __a, vector short __b,
+                                         vector unsigned short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector short __ATTRS_o_ai
-vec_sel(vector short __a, vector short __b, vector bool short __c)
-{
+static vector short __ATTRS_o_ai vec_sel(vector short __a, vector short __b,
+                                         vector bool short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sel(vector unsigned short __a,
-        vector unsigned short __b,
-        vector unsigned short __c)
-{
+static vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short __a,
+                                                  vector unsigned short __b,
+                                                  vector unsigned short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sel(vector unsigned short __a, vector unsigned short __b, vector bool short __c)
-{
-  return (__a & ~(vector unsigned short)__c) | (__b & (vector unsigned short)__c);
+static vector unsigned short __ATTRS_o_ai vec_sel(vector unsigned short __a,
+                                                  vector unsigned short __b,
+                                                  vector bool short __c) {
+  return (__a & ~(vector unsigned short)__c) |
+         (__b & (vector unsigned short)__c);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_sel(vector bool short __a, vector bool short __b, vector unsigned short __c)
-{
+static vector bool short __ATTRS_o_ai vec_sel(vector bool short __a,
+                                              vector bool short __b,
+                                              vector unsigned short __c) {
   return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_sel(vector bool short __a, vector bool short __b, vector bool short __c)
-{
+static vector bool short __ATTRS_o_ai vec_sel(vector bool short __a,
+                                              vector bool short __b,
+                                              vector bool short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector int __ATTRS_o_ai
-vec_sel(vector int __a, vector int __b, vector unsigned int __c)
-{
+static vector int __ATTRS_o_ai vec_sel(vector int __a, vector int __b,
+                                       vector unsigned int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector int __ATTRS_o_ai
-vec_sel(vector int __a, vector int __b, vector bool int __c)
-{
+static vector int __ATTRS_o_ai vec_sel(vector int __a, vector int __b,
+                                       vector bool int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sel(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_sel(vector unsigned int __a,
+                                                vector unsigned int __b,
+                                                vector unsigned int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sel(vector unsigned int __a, vector unsigned int __b, vector bool int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_sel(vector unsigned int __a,
+                                                vector unsigned int __b,
+                                                vector bool int __c) {
   return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_sel(vector bool int __a, vector bool int __b, vector unsigned int __c)
-{
+static vector bool int __ATTRS_o_ai vec_sel(vector bool int __a,
+                                            vector bool int __b,
+                                            vector unsigned int __c) {
   return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_sel(vector bool int __a, vector bool int __b, vector bool int __c)
-{
+static vector bool int __ATTRS_o_ai vec_sel(vector bool int __a,
+                                            vector bool int __b,
+                                            vector bool int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector float __ATTRS_o_ai
-vec_sel(vector float __a, vector float __b, vector unsigned int __c)
-{
-  vector int __res = ((vector int)__a & ~(vector int)__c)
-                   | ((vector int)__b & (vector int)__c);
+static vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b,
+                                         vector unsigned int __c) {
+  vector int __res = ((vector int)__a & ~(vector int)__c) |
+                     ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_sel(vector float __a, vector float __b, vector bool int __c)
-{
-  vector int __res = ((vector int)__a & ~(vector int)__c)
-                   | ((vector int)__b & (vector int)__c);
+static vector float __ATTRS_o_ai vec_sel(vector float __a, vector float __b,
+                                         vector bool int __c) {
+  vector int __res = ((vector int)__a & ~(vector int)__c) |
+                     ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
 /* vec_vsel */
 
-static vector signed char __ATTRS_o_ai
-vec_vsel(vector signed char __a, vector signed char __b, vector unsigned char __c)
-{
+static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a,
+                                                vector signed char __b,
+                                                vector unsigned char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsel(vector signed char __a, vector signed char __b, vector bool char __c)
-{
+static vector signed char __ATTRS_o_ai vec_vsel(vector signed char __a,
+                                                vector signed char __b,
+                                                vector bool char __c) {
   return (__a & ~(vector signed char)__c) | (__b & (vector signed char)__c);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsel(vector unsigned char __a, vector unsigned char __b, vector unsigned char __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsel(vector unsigned char __a,
+                                                  vector unsigned char __b,
+                                                  vector unsigned char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsel(vector unsigned char __a, vector unsigned char __b, vector bool char __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsel(vector unsigned char __a,
+                                                  vector unsigned char __b,
+                                                  vector bool char __c) {
   return (__a & ~(vector unsigned char)__c) | (__b & (vector unsigned char)__c);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsel(vector bool char __a, vector bool char __b, vector unsigned char __c)
-{
+static vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a,
+                                              vector bool char __b,
+                                              vector unsigned char __c) {
   return (__a & ~(vector bool char)__c) | (__b & (vector bool char)__c);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsel(vector bool char __a, vector bool char __b, vector bool char __c)
-{
+static vector bool char __ATTRS_o_ai vec_vsel(vector bool char __a,
+                                              vector bool char __b,
+                                              vector bool char __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsel(vector short __a, vector short __b, vector unsigned short __c)
-{
+static vector short __ATTRS_o_ai vec_vsel(vector short __a, vector short __b,
+                                          vector unsigned short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsel(vector short __a, vector short __b, vector bool short __c)
-{
+static vector short __ATTRS_o_ai vec_vsel(vector short __a, vector short __b,
+                                          vector bool short __c) {
   return (__a & ~(vector short)__c) | (__b & (vector short)__c);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsel(vector unsigned short __a,
-         vector unsigned short __b,
-         vector unsigned short __c)
-{
+static vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short __a,
+                                                   vector unsigned short __b,
+                                                   vector unsigned short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsel(vector unsigned short __a, vector unsigned short __b, vector bool short __c)
-{
-  return (__a & ~(vector unsigned short)__c) | (__b & (vector unsigned short)__c);
+static vector unsigned short __ATTRS_o_ai vec_vsel(vector unsigned short __a,
+                                                   vector unsigned short __b,
+                                                   vector bool short __c) {
+  return (__a & ~(vector unsigned short)__c) |
+         (__b & (vector unsigned short)__c);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsel(vector bool short __a, vector bool short __b, vector unsigned short __c)
-{
+static vector bool short __ATTRS_o_ai vec_vsel(vector bool short __a,
+                                               vector bool short __b,
+                                               vector unsigned short __c) {
   return (__a & ~(vector bool short)__c) | (__b & (vector bool short)__c);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsel(vector bool short __a, vector bool short __b, vector bool short __c)
-{
+static vector bool short __ATTRS_o_ai vec_vsel(vector bool short __a,
+                                               vector bool short __b,
+                                               vector bool short __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsel(vector int __a, vector int __b, vector unsigned int __c)
-{
+static vector int __ATTRS_o_ai vec_vsel(vector int __a, vector int __b,
+                                        vector unsigned int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsel(vector int __a, vector int __b, vector bool int __c)
-{
+static vector int __ATTRS_o_ai vec_vsel(vector int __a, vector int __b,
+                                        vector bool int __c) {
   return (__a & ~(vector int)__c) | (__b & (vector int)__c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsel(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsel(vector unsigned int __a,
+                                                 vector unsigned int __b,
+                                                 vector unsigned int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsel(vector unsigned int __a, vector unsigned int __b, vector bool int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsel(vector unsigned int __a,
+                                                 vector unsigned int __b,
+                                                 vector bool int __c) {
   return (__a & ~(vector unsigned int)__c) | (__b & (vector unsigned int)__c);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsel(vector bool int __a, vector bool int __b, vector unsigned int __c)
-{
+static vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a,
+                                             vector bool int __b,
+                                             vector unsigned int __c) {
   return (__a & ~(vector bool int)__c) | (__b & (vector bool int)__c);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsel(vector bool int __a, vector bool int __b, vector bool int __c)
-{
+static vector bool int __ATTRS_o_ai vec_vsel(vector bool int __a,
+                                             vector bool int __b,
+                                             vector bool int __c) {
   return (__a & ~__c) | (__b & __c);
 }
 
-static vector float __ATTRS_o_ai
-vec_vsel(vector float __a, vector float __b, vector unsigned int __c)
-{
-  vector int __res = ((vector int)__a & ~(vector int)__c)
-                   | ((vector int)__b & (vector int)__c);
+static vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b,
+                                          vector unsigned int __c) {
+  vector int __res = ((vector int)__a & ~(vector int)__c) |
+                     ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vsel(vector float __a, vector float __b, vector bool int __c)
-{
-  vector int __res = ((vector int)__a & ~(vector int)__c)
-                   | ((vector int)__b & (vector int)__c);
+static vector float __ATTRS_o_ai vec_vsel(vector float __a, vector float __b,
+                                          vector bool int __c) {
+  vector int __res = ((vector int)__a & ~(vector int)__c) |
+                     ((vector int)__b & (vector int)__c);
   return (vector float)__res;
 }
 
 /* vec_sl */
 
-static vector signed char __ATTRS_o_ai
-vec_sl(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_sl(vector signed char __a,
+                                              vector unsigned char __b) {
   return __a << (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sl(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a,
+                                                vector unsigned char __b) {
   return __a << __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_sl(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_sl(vector short __a,
+                                        vector unsigned short __b) {
   return __a << (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sl(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_sl(vector unsigned short __a,
+                                                 vector unsigned short __b) {
   return __a << __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_sl(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_sl(vector int __a, vector unsigned int __b) {
   return __a << (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sl(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sl(vector unsigned int __a,
+                                               vector unsigned int __b) {
   return __a << __b;
 }
 
 #ifdef __POWER8_VECTOR__
 static vector signed long long __ATTRS_o_ai
-vec_sl(vector signed long long __a, vector unsigned long long __b)
-{
+vec_sl(vector signed long long __a, vector unsigned long long __b) {
   return __a << (vector long long)__b;
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_sl(vector unsigned long long __a, vector unsigned long long __b)
-{
+vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
   return __a << __b;
 }
 #endif
@@ -5598,15 +5333,13 @@
 
 #define __builtin_altivec_vslb vec_vslb
 
-static vector signed char __ATTRS_o_ai
-vec_vslb(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vslb(vector signed char __a,
+                                                vector unsigned char __b) {
   return vec_sl(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vslb(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vslb(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return vec_sl(__a, __b);
 }
 
@@ -5614,15 +5347,13 @@
 
 #define __builtin_altivec_vslh vec_vslh
 
-static vector short __ATTRS_o_ai
-vec_vslh(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_vslh(vector short __a,
+                                          vector unsigned short __b) {
   return vec_sl(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vslh(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vslh(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return vec_sl(__a, __b);
 }
 
@@ -5630,15 +5361,13 @@
 
 #define __builtin_altivec_vslw vec_vslw
 
-static vector int __ATTRS_o_ai
-vec_vslw(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_vslw(vector int __a,
+                                        vector unsigned int __b) {
   return vec_sl(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vslw(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vslw(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return vec_sl(__a, __b);
 }
 
@@ -5646,847 +5375,789 @@
 
 #define __builtin_altivec_vsldoi_4si vec_sld
 
-static vector signed char __ATTRS_o_ai
-vec_sld(vector signed char __a, vector signed char __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector signed char __ATTRS_o_ai vec_sld(vector signed char __a,
+                                               vector signed char __b,
+                                               unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector unsigned char __ATTRS_o_ai vec_sld(vector unsigned char __a,
+                                                 vector unsigned char __b,
+                                                 unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector short __ATTRS_o_ai
-vec_sld(vector short __a, vector short __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector short __ATTRS_o_ai vec_sld(vector short __a, vector short __b,
+                                         unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector unsigned short __ATTRS_o_ai vec_sld(vector unsigned short __a,
+                                                  vector unsigned short __b,
+                                                  unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_sld(vector pixel __a, vector pixel __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector pixel __ATTRS_o_ai vec_sld(vector pixel __a, vector pixel __b,
+                                         unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector int __ATTRS_o_ai
-vec_sld(vector int __a, vector int __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector int __ATTRS_o_ai vec_sld(vector int __a, vector int __b,
+                                       unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector unsigned int __ATTRS_o_ai vec_sld(vector unsigned int __a,
+                                                vector unsigned int __b,
+                                                unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector float __ATTRS_o_ai
-vec_sld(vector float __a, vector float __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector float __ATTRS_o_ai vec_sld(vector float __a, vector float __b,
+                                         unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
 /* vec_vsldoi */
 
-static vector signed char __ATTRS_o_ai
-vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector signed char __ATTRS_o_ai vec_vsldoi(vector signed char __a,
+                                                  vector signed char __b,
+                                                  unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector unsigned char __ATTRS_o_ai vec_vsldoi(vector unsigned char __a,
+                                                    vector unsigned char __b,
+                                                    unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector short __ATTRS_o_ai
-vec_vsldoi(vector short __a, vector short __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector short __ATTRS_o_ai vec_vsldoi(vector short __a, vector short __b,
+                                            unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector unsigned short __ATTRS_o_ai vec_vsldoi(vector unsigned short __a,
+                                                     vector unsigned short __b,
+                                                     unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector pixel __ATTRS_o_ai vec_vsldoi(vector pixel __a, vector pixel __b,
+                                            unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector int __ATTRS_o_ai
-vec_vsldoi(vector int __a, vector int __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector int __ATTRS_o_ai vec_vsldoi(vector int __a, vector int __b,
+                                          unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector unsigned int __ATTRS_o_ai vec_vsldoi(vector unsigned int __a,
+                                                   vector unsigned int __b,
+                                                   unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
-static vector float __ATTRS_o_ai
-vec_vsldoi(vector float __a, vector float __b, unsigned char __c)
-{
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
-     __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
+static vector float __ATTRS_o_ai vec_vsldoi(vector float __a, vector float __b,
+                                            unsigned char __c) {
+  return vec_perm(
+      __a, __b,
+      (vector unsigned char)(__c, __c + 1, __c + 2, __c + 3, __c + 4, __c + 5,
+                             __c + 6, __c + 7, __c + 8, __c + 9, __c + 10,
+                             __c + 11, __c + 12, __c + 13, __c + 14, __c + 15));
 }
 
 /* vec_sll */
 
-static vector signed char __ATTRS_o_ai
-vec_sll(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_sll(vector signed char __a,
+                                               vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vsl((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_sll(vector signed char __a, vector unsigned short __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_sll(vector signed char __a,
+                                               vector unsigned short __b) {
+  return (vector signed char)__builtin_altivec_vsl((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_sll(vector signed char __a, vector unsigned int __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_sll(vector signed char __a,
+                                               vector unsigned int __b) {
+  return (vector signed char)__builtin_altivec_vsl((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sll(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sll(vector unsigned char __a, vector unsigned short __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a,
+                                                 vector unsigned short __b) {
+  return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sll(vector unsigned char __a, vector unsigned int __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_sll(vector unsigned char __a,
+                                                 vector unsigned int __b) {
+  return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_sll(vector bool char __a, vector unsigned char __b)
-{
-  return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_sll(vector bool char __a,
+                                             vector unsigned char __b) {
+  return (vector bool char)__builtin_altivec_vsl((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_sll(vector bool char __a, vector unsigned short __b)
-{
-  return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_sll(vector bool char __a,
+                                             vector unsigned short __b) {
+  return (vector bool char)__builtin_altivec_vsl((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_sll(vector bool char __a, vector unsigned int __b)
-{
-  return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_sll(vector bool char __a,
+                                             vector unsigned int __b) {
+  return (vector bool char)__builtin_altivec_vsl((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_sll(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_sll(vector short __a,
+                                         vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_sll(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_sll(vector short __a,
+                                         vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_sll(vector short __a, vector unsigned int __b)
-{
+static vector short __ATTRS_o_ai vec_sll(vector short __a,
+                                         vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sll(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sll(vector unsigned short __a, vector unsigned short __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a,
+                                                  vector unsigned short __b) {
+  return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sll(vector unsigned short __a, vector unsigned int __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_sll(vector unsigned short __a,
+                                                  vector unsigned int __b) {
+  return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_sll(vector bool short __a, vector unsigned char __b)
-{
-  return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_sll(vector bool short __a,
+                                              vector unsigned char __b) {
+  return (vector bool short)__builtin_altivec_vsl((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_sll(vector bool short __a, vector unsigned short __b)
-{
-  return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_sll(vector bool short __a,
+                                              vector unsigned short __b) {
+  return (vector bool short)__builtin_altivec_vsl((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_sll(vector bool short __a, vector unsigned int __b)
-{
-  return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_sll(vector bool short __a,
+                                              vector unsigned int __b) {
+  return (vector bool short)__builtin_altivec_vsl((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_sll(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
+                                         vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_sll(vector pixel __a, vector unsigned short __b)
-{
+static vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
+                                         vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_sll(vector pixel __a, vector unsigned int __b)
-{
+static vector pixel __ATTRS_o_ai vec_sll(vector pixel __a,
+                                         vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sll(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_sll(vector int __a,
+                                       vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sll(vector int __a, vector unsigned short __b)
-{
+static vector int __ATTRS_o_ai vec_sll(vector int __a,
+                                       vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sll(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_sll(vector int __a,
+                                       vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sll(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a,
+                                                vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sll(vector unsigned int __a, vector unsigned short __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a,
+                                                vector unsigned short __b) {
+  return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sll(vector unsigned int __a, vector unsigned int __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_sll(vector unsigned int __a,
+                                                vector unsigned int __b) {
+  return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_sll(vector bool int __a, vector unsigned char __b)
-{
-  return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_sll(vector bool int __a,
+                                            vector unsigned char __b) {
+  return (vector bool int)__builtin_altivec_vsl((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_sll(vector bool int __a, vector unsigned short __b)
-{
-  return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_sll(vector bool int __a,
+                                            vector unsigned short __b) {
+  return (vector bool int)__builtin_altivec_vsl((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_sll(vector bool int __a, vector unsigned int __b)
-{
-  return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_sll(vector bool int __a,
+                                            vector unsigned int __b) {
+  return (vector bool int)__builtin_altivec_vsl((vector int)__a,
+                                                (vector int)__b);
 }
 
 /* vec_vsl */
 
-static vector signed char __ATTRS_o_ai
-vec_vsl(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a,
+                                               vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vsl((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsl(vector signed char __a, vector unsigned short __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a,
+                                               vector unsigned short __b) {
+  return (vector signed char)__builtin_altivec_vsl((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsl(vector signed char __a, vector unsigned int __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsl(vector signed char __a,
+                                               vector unsigned int __b) {
+  return (vector signed char)__builtin_altivec_vsl((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsl(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsl(vector unsigned char __a, vector unsigned short __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a,
+                                                 vector unsigned short __b) {
+  return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsl(vector unsigned char __a, vector unsigned int __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsl(vector unsigned char __a,
+                                                 vector unsigned int __b) {
+  return (vector unsigned char)__builtin_altivec_vsl((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsl(vector bool char __a, vector unsigned char __b)
-{
-  return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a,
+                                             vector unsigned char __b) {
+  return (vector bool char)__builtin_altivec_vsl((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsl(vector bool char __a, vector unsigned short __b)
-{
-  return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a,
+                                             vector unsigned short __b) {
+  return (vector bool char)__builtin_altivec_vsl((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsl(vector bool char __a, vector unsigned int __b)
-{
-  return (vector bool char)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_vsl(vector bool char __a,
+                                             vector unsigned int __b) {
+  return (vector bool char)__builtin_altivec_vsl((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsl(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_vsl(vector short __a,
+                                         vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsl(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_vsl(vector short __a,
+                                         vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsl(vector short __a, vector unsigned int __b)
-{
+static vector short __ATTRS_o_ai vec_vsl(vector short __a,
+                                         vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsl(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsl(vector unsigned short __a, vector unsigned short __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a,
+                                                  vector unsigned short __b) {
+  return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsl(vector unsigned short __a, vector unsigned int __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsl(vector unsigned short __a,
+                                                  vector unsigned int __b) {
+  return (vector unsigned short)__builtin_altivec_vsl((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsl(vector bool short __a, vector unsigned char __b)
-{
-  return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a,
+                                              vector unsigned char __b) {
+  return (vector bool short)__builtin_altivec_vsl((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsl(vector bool short __a, vector unsigned short __b)
-{
-  return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a,
+                                              vector unsigned short __b) {
+  return (vector bool short)__builtin_altivec_vsl((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsl(vector bool short __a, vector unsigned int __b)
-{
-  return (vector bool short)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_vsl(vector bool short __a,
+                                              vector unsigned int __b) {
+  return (vector bool short)__builtin_altivec_vsl((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsl(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
+                                         vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsl(vector pixel __a, vector unsigned short __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
+                                         vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsl(vector pixel __a, vector unsigned int __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsl(vector pixel __a,
+                                         vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsl(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_vsl(vector int __a,
+                                       vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsl(vector int __a, vector unsigned short __b)
-{
+static vector int __ATTRS_o_ai vec_vsl(vector int __a,
+                                       vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsl(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_vsl(vector int __a,
+                                       vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsl(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsl(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a,
+                                                vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsl(vector unsigned int __a, vector unsigned short __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a,
+                                                vector unsigned short __b) {
+  return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsl(vector unsigned int __a, vector unsigned int __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsl(vector unsigned int __a,
+                                                vector unsigned int __b) {
+  return (vector unsigned int)__builtin_altivec_vsl((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsl(vector bool int __a, vector unsigned char __b)
-{
-  return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a,
+                                            vector unsigned char __b) {
+  return (vector bool int)__builtin_altivec_vsl((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsl(vector bool int __a, vector unsigned short __b)
-{
-  return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a,
+                                            vector unsigned short __b) {
+  return (vector bool int)__builtin_altivec_vsl((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsl(vector bool int __a, vector unsigned int __b)
-{
-  return (vector bool int)__builtin_altivec_vsl((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_vsl(vector bool int __a,
+                                            vector unsigned int __b) {
+  return (vector bool int)__builtin_altivec_vsl((vector int)__a,
+                                                (vector int)__b);
 }
 
 /* vec_slo */
 
-static vector signed char __ATTRS_o_ai
-vec_slo(vector signed char __a, vector signed char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_slo(vector signed char __a,
+                                               vector signed char __b) {
+  return (vector signed char)__builtin_altivec_vslo((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_slo(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_slo(vector signed char __a,
+                                               vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vslo((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_slo(vector unsigned char __a, vector signed char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_slo(vector unsigned char __a,
+                                                 vector signed char __b) {
+  return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_slo(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_slo(vector unsigned char __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_slo(vector short __a, vector signed char __b)
-{
+static vector short __ATTRS_o_ai vec_slo(vector short __a,
+                                         vector signed char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_slo(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_slo(vector short __a,
+                                         vector unsigned char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_slo(vector unsigned short __a, vector signed char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_slo(vector unsigned short __a,
+                                                  vector signed char __b) {
+  return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_slo(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_slo(vector unsigned short __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_slo(vector pixel __a, vector signed char __b)
-{
+static vector pixel __ATTRS_o_ai vec_slo(vector pixel __a,
+                                         vector signed char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_slo(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_slo(vector pixel __a,
+                                         vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_slo(vector int __a, vector signed char __b)
-{
+static vector int __ATTRS_o_ai vec_slo(vector int __a, vector signed char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_slo(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_slo(vector int __a,
+                                       vector unsigned char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_slo(vector unsigned int __a, vector signed char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_slo(vector unsigned int __a,
+                                                vector signed char __b) {
+  return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_slo(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_slo(vector unsigned int __a,
+                                                vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_slo(vector float __a, vector signed char __b)
-{
+static vector float __ATTRS_o_ai vec_slo(vector float __a,
+                                         vector signed char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_slo(vector float __a, vector unsigned char __b)
-{
+static vector float __ATTRS_o_ai vec_slo(vector float __a,
+                                         vector unsigned char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
 /* vec_vslo */
 
-static vector signed char __ATTRS_o_ai
-vec_vslo(vector signed char __a, vector signed char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vslo(vector signed char __a,
+                                                vector signed char __b) {
+  return (vector signed char)__builtin_altivec_vslo((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vslo(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vslo(vector signed char __a,
+                                                vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vslo((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vslo(vector unsigned char __a, vector signed char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vslo(vector unsigned char __a,
+                                                  vector signed char __b) {
+  return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vslo(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vslo(vector unsigned char __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vslo((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vslo(vector short __a, vector signed char __b)
-{
+static vector short __ATTRS_o_ai vec_vslo(vector short __a,
+                                          vector signed char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vslo(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_vslo(vector short __a,
+                                          vector unsigned char __b) {
   return (vector short)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vslo(vector unsigned short __a, vector signed char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vslo(vector unsigned short __a,
+                                                   vector signed char __b) {
+  return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vslo(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vslo(vector unsigned short __a,
+                                                   vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vslo((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vslo(vector pixel __a, vector signed char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a,
+                                          vector signed char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vslo(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vslo(vector pixel __a,
+                                          vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vslo(vector int __a, vector signed char __b)
-{
+static vector int __ATTRS_o_ai vec_vslo(vector int __a,
+                                        vector signed char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vslo(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_vslo(vector int __a,
+                                        vector unsigned char __b) {
   return (vector int)__builtin_altivec_vslo(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vslo(vector unsigned int __a, vector signed char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vslo(vector unsigned int __a,
+                                                 vector signed char __b) {
+  return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vslo(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vslo((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vslo(vector unsigned int __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vslo((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_vslo(vector float __a, vector signed char __b)
-{
+static vector float __ATTRS_o_ai vec_vslo(vector float __a,
+                                          vector signed char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_vslo(vector float __a, vector unsigned char __b)
-{
+static vector float __ATTRS_o_ai vec_vslo(vector float __a,
+                                          vector unsigned char __b) {
   return (vector float)__builtin_altivec_vslo((vector int)__a, (vector int)__b);
 }
 
 /* vec_splat */
 
-static vector signed char __ATTRS_o_ai
-vec_splat(vector signed char __a, unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_splat(vector signed char __a,
+                                                 unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_splat(vector unsigned char __a, unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_splat(vector unsigned char __a,
+                                                   unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_splat(vector bool char __a, unsigned char __b)
-{
+static vector bool char __ATTRS_o_ai vec_splat(vector bool char __a,
+                                               unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector short __ATTRS_o_ai
-vec_splat(vector short __a, unsigned char __b)
-{ 
+static vector short __ATTRS_o_ai vec_splat(vector short __a,
+                                           unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_splat(vector unsigned short __a, unsigned char __b)
-{ 
+static vector unsigned short __ATTRS_o_ai vec_splat(vector unsigned short __a,
+                                                    unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_splat(vector bool short __a, unsigned char __b)
-{ 
+static vector bool short __ATTRS_o_ai vec_splat(vector bool short __a,
+                                                unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_splat(vector pixel __a, unsigned char __b)
-{ 
+static vector pixel __ATTRS_o_ai vec_splat(vector pixel __a,
+                                           unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector int __ATTRS_o_ai
-vec_splat(vector int __a, unsigned char __b)
-{ 
+static vector int __ATTRS_o_ai vec_splat(vector int __a, unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_splat(vector unsigned int __a, unsigned char __b)
-{ 
+static vector unsigned int __ATTRS_o_ai vec_splat(vector unsigned int __a,
+                                                  unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_splat(vector bool int __a, unsigned char __b)
-{ 
+static vector bool int __ATTRS_o_ai vec_splat(vector bool int __a,
+                                              unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector float __ATTRS_o_ai
-vec_splat(vector float __a, unsigned char __b)
-{ 
+static vector float __ATTRS_o_ai vec_splat(vector float __a,
+                                           unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
 /* vec_vspltb */
 
 #define __builtin_altivec_vspltb vec_vspltb
 
-static vector signed char __ATTRS_o_ai
-vec_vspltb(vector signed char __a, unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vspltb(vector signed char __a,
+                                                  unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vspltb(vector unsigned char __a, unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vspltb(vector unsigned char __a,
+                                                    unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vspltb(vector bool char __a, unsigned char __b)
-{
+static vector bool char __ATTRS_o_ai vec_vspltb(vector bool char __a,
+                                                unsigned char __b) {
   return vec_perm(__a, __a, (vector unsigned char)(__b));
 }
 
@@ -6494,80 +6165,79 @@
 
 #define __builtin_altivec_vsplth vec_vsplth
 
-static vector short __ATTRS_o_ai
-vec_vsplth(vector short __a, unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_vsplth(vector short __a,
+                                            unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsplth(vector unsigned short __a, unsigned char __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vsplth(vector unsigned short __a,
+                                                     unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsplth(vector bool short __a, unsigned char __b)
-{
+static vector bool short __ATTRS_o_ai vec_vsplth(vector bool short __a,
+                                                 unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsplth(vector pixel __a, unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsplth(vector pixel __a,
+                                            unsigned char __b) {
   __b *= 2;
-  unsigned char b1=__b+1;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1, __b, b1));
+  unsigned char b1 = __b + 1;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, __b, b1, __b, b1, __b, b1,
+                                         __b, b1, __b, b1, __b, b1, __b, b1));
 }
 
 /* vec_vspltw */
 
 #define __builtin_altivec_vspltw vec_vspltw
 
-static vector int __ATTRS_o_ai
-vec_vspltw(vector int __a, unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_vspltw(vector int __a, unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vspltw(vector unsigned int __a, unsigned char __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vspltw(vector unsigned int __a,
+                                                   unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vspltw(vector bool int __a, unsigned char __b)
-{
+static vector bool int __ATTRS_o_ai vec_vspltw(vector bool int __a,
+                                               unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
-static vector float __ATTRS_o_ai
-vec_vspltw(vector float __a, unsigned char __b)
-{
+static vector float __ATTRS_o_ai vec_vspltw(vector float __a,
+                                            unsigned char __b) {
   __b *= 4;
-  unsigned char b1=__b+1, b2=__b+2, b3=__b+3;
-  return vec_perm(__a, __a, (vector unsigned char)
-    (__b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3, __b, b1, b2, b3));
+  unsigned char b1 = __b + 1, b2 = __b + 2, b3 = __b + 3;
+  return vec_perm(__a, __a,
+                  (vector unsigned char)(__b, b1, b2, b3, __b, b1, b2, b3, __b,
+                                         b1, b2, b3, __b, b1, b2, b3));
 }
 
 /* vec_splat_s8 */
@@ -6575,18 +6245,14 @@
 #define __builtin_altivec_vspltisb vec_splat_s8
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector signed char __ATTRS_o_ai
-vec_splat_s8(signed char __a)
-{
+static vector signed char __ATTRS_o_ai vec_splat_s8(signed char __a) {
   return (vector signed char)(__a);
 }
 
 /* vec_vspltisb */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector signed char __ATTRS_o_ai
-vec_vspltisb(signed char __a)
-{
+static vector signed char __ATTRS_o_ai vec_vspltisb(signed char __a) {
   return (vector signed char)(__a);
 }
 
@@ -6595,18 +6261,14 @@
 #define __builtin_altivec_vspltish vec_splat_s16
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector short __ATTRS_o_ai
-vec_splat_s16(signed char __a)
-{
+static vector short __ATTRS_o_ai vec_splat_s16(signed char __a) {
   return (vector short)(__a);
 }
 
 /* vec_vspltish */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector short __ATTRS_o_ai
-vec_vspltish(signed char __a)
-{
+static vector short __ATTRS_o_ai vec_vspltish(signed char __a) {
   return (vector short)(__a);
 }
 
@@ -6615,96 +6277,77 @@
 #define __builtin_altivec_vspltisw vec_splat_s32
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector int __ATTRS_o_ai
-vec_splat_s32(signed char __a)
-{
+static vector int __ATTRS_o_ai vec_splat_s32(signed char __a) {
   return (vector int)(__a);
 }
 
 /* vec_vspltisw */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector int __ATTRS_o_ai
-vec_vspltisw(signed char __a)
-{
+static vector int __ATTRS_o_ai vec_vspltisw(signed char __a) {
   return (vector int)(__a);
 }
 
 /* vec_splat_u8 */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector unsigned char __ATTRS_o_ai
-vec_splat_u8(unsigned char __a)
-{
+static vector unsigned char __ATTRS_o_ai vec_splat_u8(unsigned char __a) {
   return (vector unsigned char)(__a);
 }
 
 /* vec_splat_u16 */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector unsigned short __ATTRS_o_ai
-vec_splat_u16(signed char __a)
-{
+static vector unsigned short __ATTRS_o_ai vec_splat_u16(signed char __a) {
   return (vector unsigned short)(__a);
 }
 
 /* vec_splat_u32 */
 
 // FIXME: parameter should be treated as 5-bit signed literal
-static vector unsigned int __ATTRS_o_ai
-vec_splat_u32(signed char __a)
-{
+static vector unsigned int __ATTRS_o_ai vec_splat_u32(signed char __a) {
   return (vector unsigned int)(__a);
 }
 
 /* vec_sr */
 
-static vector signed char __ATTRS_o_ai
-vec_sr(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_sr(vector signed char __a,
+                                              vector unsigned char __b) {
   return __a >> (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sr(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a,
+                                                vector unsigned char __b) {
   return __a >> __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_sr(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_sr(vector short __a,
+                                        vector unsigned short __b) {
   return __a >> (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sr(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_sr(vector unsigned short __a,
+                                                 vector unsigned short __b) {
   return __a >> __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_sr(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_sr(vector int __a, vector unsigned int __b) {
   return __a >> (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sr(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sr(vector unsigned int __a,
+                                               vector unsigned int __b) {
   return __a >> __b;
 }
 
 #ifdef __POWER8_VECTOR__
 static vector signed long long __ATTRS_o_ai
-vec_sr(vector signed long long __a, vector unsigned long long __b)
-{
+vec_sr(vector signed long long __a, vector unsigned long long __b) {
   return __a >> (vector long long)__b;
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_sr(vector unsigned long long __a, vector unsigned long long __b)
-{
+vec_sr(vector unsigned long long __a, vector unsigned long long __b) {
   return __a >> __b;
 }
 #endif
@@ -6713,15 +6356,13 @@
 
 #define __builtin_altivec_vsrb vec_vsrb
 
-static vector signed char __ATTRS_o_ai
-vec_vsrb(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsrb(vector signed char __a,
+                                                vector unsigned char __b) {
   return __a >> (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsrb(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsrb(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return __a >> __b;
 }
 
@@ -6729,15 +6370,13 @@
 
 #define __builtin_altivec_vsrh vec_vsrh
 
-static vector short __ATTRS_o_ai
-vec_vsrh(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_vsrh(vector short __a,
+                                          vector unsigned short __b) {
   return __a >> (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsrh(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vsrh(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return __a >> __b;
 }
 
@@ -6745,1645 +6384,1363 @@
 
 #define __builtin_altivec_vsrw vec_vsrw
 
-static vector int __ATTRS_o_ai
-vec_vsrw(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_vsrw(vector int __a,
+                                        vector unsigned int __b) {
   return __a >> (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsrw(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsrw(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __a >> __b;
 }
 
 /* vec_sra */
 
-static vector signed char __ATTRS_o_ai
-vec_sra(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_sra(vector signed char __a,
+                                               vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sra(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_sra(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_sra(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_sra(vector short __a,
+                                         vector unsigned short __b) {
   return __builtin_altivec_vsrah(__a, (vector unsigned short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sra(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_sra(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sra(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_sra(vector int __a,
+                                       vector unsigned int __b) {
   return __builtin_altivec_vsraw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sra(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sra(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b);
 }
 
 #ifdef __POWER8_VECTOR__
 static vector signed long long __ATTRS_o_ai
-vec_sra(vector signed long long __a, vector unsigned long long __b)
-{
+vec_sra(vector signed long long __a, vector unsigned long long __b) {
   return __a >> __b;
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_sra(vector unsigned long long __a, vector unsigned long long __b)
-{
-  return (vector unsigned long long) ( (vector signed long long) __a >> __b);
+vec_sra(vector unsigned long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)((vector signed long long)__a >> __b);
 }
 #endif
 
 /* vec_vsrab */
 
-static vector signed char __ATTRS_o_ai
-vec_vsrab(vector signed char __a, vector unsigned char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsrab(vector signed char __a,
+                                                 vector unsigned char __b) {
   return (vector signed char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsrab(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsrab(vector unsigned char __a,
+                                                   vector unsigned char __b) {
   return (vector unsigned char)__builtin_altivec_vsrab((vector char)__a, __b);
 }
 
 /* vec_vsrah */
 
-static vector short __ATTRS_o_ai
-vec_vsrah(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_vsrah(vector short __a,
+                                           vector unsigned short __b) {
   return __builtin_altivec_vsrah(__a, (vector unsigned short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsrah(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vsrah(vector unsigned short __a,
+                                                    vector unsigned short __b) {
   return (vector unsigned short)__builtin_altivec_vsrah((vector short)__a, __b);
 }
 
 /* vec_vsraw */
 
-static vector int __ATTRS_o_ai
-vec_vsraw(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_vsraw(vector int __a,
+                                         vector unsigned int __b) {
   return __builtin_altivec_vsraw(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsraw(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsraw(vector unsigned int __a,
+                                                  vector unsigned int __b) {
   return (vector unsigned int)__builtin_altivec_vsraw((vector int)__a, __b);
 }
 
 /* vec_srl */
 
-static vector signed char __ATTRS_o_ai
-vec_srl(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_srl(vector signed char __a,
+                                               vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vsr((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_srl(vector signed char __a, vector unsigned short __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_srl(vector signed char __a,
+                                               vector unsigned short __b) {
+  return (vector signed char)__builtin_altivec_vsr((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_srl(vector signed char __a, vector unsigned int __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_srl(vector signed char __a,
+                                               vector unsigned int __b) {
+  return (vector signed char)__builtin_altivec_vsr((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_srl(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_srl(vector unsigned char __a, vector unsigned short __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a,
+                                                 vector unsigned short __b) {
+  return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_srl(vector unsigned char __a, vector unsigned int __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_srl(vector unsigned char __a,
+                                                 vector unsigned int __b) {
+  return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_srl(vector bool char __a, vector unsigned char __b)
-{
-  return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_srl(vector bool char __a,
+                                             vector unsigned char __b) {
+  return (vector bool char)__builtin_altivec_vsr((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_srl(vector bool char __a, vector unsigned short __b)
-{
-  return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_srl(vector bool char __a,
+                                             vector unsigned short __b) {
+  return (vector bool char)__builtin_altivec_vsr((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_srl(vector bool char __a, vector unsigned int __b)
-{
-  return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_srl(vector bool char __a,
+                                             vector unsigned int __b) {
+  return (vector bool char)__builtin_altivec_vsr((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_srl(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_srl(vector short __a,
+                                         vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_srl(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_srl(vector short __a,
+                                         vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_srl(vector short __a, vector unsigned int __b)
-{
+static vector short __ATTRS_o_ai vec_srl(vector short __a,
+                                         vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_srl(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_srl(vector unsigned short __a, vector unsigned short __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a,
+                                                  vector unsigned short __b) {
+  return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_srl(vector unsigned short __a, vector unsigned int __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_srl(vector unsigned short __a,
+                                                  vector unsigned int __b) {
+  return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_srl(vector bool short __a, vector unsigned char __b)
-{
-  return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_srl(vector bool short __a,
+                                              vector unsigned char __b) {
+  return (vector bool short)__builtin_altivec_vsr((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_srl(vector bool short __a, vector unsigned short __b)
-{
-  return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_srl(vector bool short __a,
+                                              vector unsigned short __b) {
+  return (vector bool short)__builtin_altivec_vsr((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_srl(vector bool short __a, vector unsigned int __b)
-{
-  return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_srl(vector bool short __a,
+                                              vector unsigned int __b) {
+  return (vector bool short)__builtin_altivec_vsr((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_srl(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
+                                         vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_srl(vector pixel __a, vector unsigned short __b)
-{
+static vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
+                                         vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_srl(vector pixel __a, vector unsigned int __b)
-{
+static vector pixel __ATTRS_o_ai vec_srl(vector pixel __a,
+                                         vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_srl(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_srl(vector int __a,
+                                       vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_srl(vector int __a, vector unsigned short __b)
-{
+static vector int __ATTRS_o_ai vec_srl(vector int __a,
+                                       vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_srl(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_srl(vector int __a,
+                                       vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_srl(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a,
+                                                vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_srl(vector unsigned int __a, vector unsigned short __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a,
+                                                vector unsigned short __b) {
+  return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_srl(vector unsigned int __a, vector unsigned int __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_srl(vector unsigned int __a,
+                                                vector unsigned int __b) {
+  return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_srl(vector bool int __a, vector unsigned char __b)
-{
-  return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_srl(vector bool int __a,
+                                            vector unsigned char __b) {
+  return (vector bool int)__builtin_altivec_vsr((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_srl(vector bool int __a, vector unsigned short __b)
-{
-  return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_srl(vector bool int __a,
+                                            vector unsigned short __b) {
+  return (vector bool int)__builtin_altivec_vsr((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_srl(vector bool int __a, vector unsigned int __b)
-{
-  return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_srl(vector bool int __a,
+                                            vector unsigned int __b) {
+  return (vector bool int)__builtin_altivec_vsr((vector int)__a,
+                                                (vector int)__b);
 }
 
 /* vec_vsr */
 
-static vector signed char __ATTRS_o_ai
-vec_vsr(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a,
+                                               vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vsr((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsr(vector signed char __a, vector unsigned short __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a,
+                                               vector unsigned short __b) {
+  return (vector signed char)__builtin_altivec_vsr((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsr(vector signed char __a, vector unsigned int __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsr(vector signed char __a,
+                                               vector unsigned int __b) {
+  return (vector signed char)__builtin_altivec_vsr((vector int)__a,
+                                                   (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsr(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsr(vector unsigned char __a, vector unsigned short __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a,
+                                                 vector unsigned short __b) {
+  return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsr(vector unsigned char __a, vector unsigned int __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsr(vector unsigned char __a,
+                                                 vector unsigned int __b) {
+  return (vector unsigned char)__builtin_altivec_vsr((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsr(vector bool char __a, vector unsigned char __b)
-{
-  return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a,
+                                             vector unsigned char __b) {
+  return (vector bool char)__builtin_altivec_vsr((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsr(vector bool char __a, vector unsigned short __b)
-{
-  return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a,
+                                             vector unsigned short __b) {
+  return (vector bool char)__builtin_altivec_vsr((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vsr(vector bool char __a, vector unsigned int __b)
-{
-  return (vector bool char)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool char __ATTRS_o_ai vec_vsr(vector bool char __a,
+                                             vector unsigned int __b) {
+  return (vector bool char)__builtin_altivec_vsr((vector int)__a,
+                                                 (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsr(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_vsr(vector short __a,
+                                         vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsr(vector short __a, vector unsigned short __b)
-{
+static vector short __ATTRS_o_ai vec_vsr(vector short __a,
+                                         vector unsigned short __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsr(vector short __a, vector unsigned int __b)
-{
+static vector short __ATTRS_o_ai vec_vsr(vector short __a,
+                                         vector unsigned int __b) {
   return (vector short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsr(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsr(vector unsigned short __a, vector unsigned short __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a,
+                                                  vector unsigned short __b) {
+  return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsr(vector unsigned short __a, vector unsigned int __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsr(vector unsigned short __a,
+                                                  vector unsigned int __b) {
+  return (vector unsigned short)__builtin_altivec_vsr((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsr(vector bool short __a, vector unsigned char __b)
-{
-  return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a,
+                                              vector unsigned char __b) {
+  return (vector bool short)__builtin_altivec_vsr((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsr(vector bool short __a, vector unsigned short __b)
-{
-  return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a,
+                                              vector unsigned short __b) {
+  return (vector bool short)__builtin_altivec_vsr((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vsr(vector bool short __a, vector unsigned int __b)
-{
-  return (vector bool short)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool short __ATTRS_o_ai vec_vsr(vector bool short __a,
+                                              vector unsigned int __b) {
+  return (vector bool short)__builtin_altivec_vsr((vector int)__a,
+                                                  (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsr(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
+                                         vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsr(vector pixel __a, vector unsigned short __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
+                                         vector unsigned short __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsr(vector pixel __a, vector unsigned int __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsr(vector pixel __a,
+                                         vector unsigned int __b) {
   return (vector pixel)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsr(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_vsr(vector int __a,
+                                       vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsr(vector int __a, vector unsigned short __b)
-{
+static vector int __ATTRS_o_ai vec_vsr(vector int __a,
+                                       vector unsigned short __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsr(vector int __a, vector unsigned int __b)
-{
+static vector int __ATTRS_o_ai vec_vsr(vector int __a,
+                                       vector unsigned int __b) {
   return (vector int)__builtin_altivec_vsr(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsr(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a,
+                                                vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsr(vector unsigned int __a, vector unsigned short __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a,
+                                                vector unsigned short __b) {
+  return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsr(vector unsigned int __a, vector unsigned int __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsr(vector unsigned int __a,
+                                                vector unsigned int __b) {
+  return (vector unsigned int)__builtin_altivec_vsr((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsr(vector bool int __a, vector unsigned char __b)
-{
-  return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a,
+                                            vector unsigned char __b) {
+  return (vector bool int)__builtin_altivec_vsr((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsr(vector bool int __a, vector unsigned short __b)
-{
-  return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a,
+                                            vector unsigned short __b) {
+  return (vector bool int)__builtin_altivec_vsr((vector int)__a,
+                                                (vector int)__b);
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vsr(vector bool int __a, vector unsigned int __b)
-{
-  return (vector bool int)__builtin_altivec_vsr((vector int)__a, (vector int)__b);
+static vector bool int __ATTRS_o_ai vec_vsr(vector bool int __a,
+                                            vector unsigned int __b) {
+  return (vector bool int)__builtin_altivec_vsr((vector int)__a,
+                                                (vector int)__b);
 }
 
 /* vec_sro */
 
-static vector signed char __ATTRS_o_ai
-vec_sro(vector signed char __a, vector signed char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_sro(vector signed char __a,
+                                               vector signed char __b) {
+  return (vector signed char)__builtin_altivec_vsro((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_sro(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_sro(vector signed char __a,
+                                               vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vsro((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sro(vector unsigned char __a, vector signed char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_sro(vector unsigned char __a,
+                                                 vector signed char __b) {
+  return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sro(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_sro(vector unsigned char __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_sro(vector short __a, vector signed char __b)
-{
+static vector short __ATTRS_o_ai vec_sro(vector short __a,
+                                         vector signed char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_sro(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_sro(vector short __a,
+                                         vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sro(vector unsigned short __a, vector signed char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_sro(vector unsigned short __a,
+                                                  vector signed char __b) {
+  return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sro(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_sro(vector unsigned short __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_sro(vector pixel __a, vector signed char __b)
-{
+static vector pixel __ATTRS_o_ai vec_sro(vector pixel __a,
+                                         vector signed char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_sro(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_sro(vector pixel __a,
+                                         vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sro(vector int __a, vector signed char __b)
-{
+static vector int __ATTRS_o_ai vec_sro(vector int __a, vector signed char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sro(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_sro(vector int __a,
+                                       vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sro(vector unsigned int __a, vector signed char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_sro(vector unsigned int __a,
+                                                vector signed char __b) {
+  return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sro(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_sro(vector unsigned int __a,
+                                                vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_sro(vector float __a, vector signed char __b)
-{
+static vector float __ATTRS_o_ai vec_sro(vector float __a,
+                                         vector signed char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_sro(vector float __a, vector unsigned char __b)
-{
+static vector float __ATTRS_o_ai vec_sro(vector float __a,
+                                         vector unsigned char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
 /* vec_vsro */
 
-static vector signed char __ATTRS_o_ai
-vec_vsro(vector signed char __a, vector signed char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsro(vector signed char __a,
+                                                vector signed char __b) {
+  return (vector signed char)__builtin_altivec_vsro((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsro(vector signed char __a, vector unsigned char __b)
-{
-  return (vector signed char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector signed char __ATTRS_o_ai vec_vsro(vector signed char __a,
+                                                vector unsigned char __b) {
+  return (vector signed char)__builtin_altivec_vsro((vector int)__a,
+                                                    (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsro(vector unsigned char __a, vector signed char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsro(vector unsigned char __a,
+                                                  vector signed char __b) {
+  return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsro(vector unsigned char __a, vector unsigned char __b)
-{
-  return (vector unsigned char)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned char __ATTRS_o_ai vec_vsro(vector unsigned char __a,
+                                                  vector unsigned char __b) {
+  return (vector unsigned char)__builtin_altivec_vsro((vector int)__a,
+                                                      (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsro(vector short __a, vector signed char __b)
-{
+static vector short __ATTRS_o_ai vec_vsro(vector short __a,
+                                          vector signed char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsro(vector short __a, vector unsigned char __b)
-{
+static vector short __ATTRS_o_ai vec_vsro(vector short __a,
+                                          vector unsigned char __b) {
   return (vector short)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsro(vector unsigned short __a, vector signed char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsro(vector unsigned short __a,
+                                                   vector signed char __b) {
+  return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsro(vector unsigned short __a, vector unsigned char __b)
-{
-  return (vector unsigned short)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned short __ATTRS_o_ai vec_vsro(vector unsigned short __a,
+                                                   vector unsigned char __b) {
+  return (vector unsigned short)__builtin_altivec_vsro((vector int)__a,
+                                                       (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsro(vector pixel __a, vector signed char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a,
+                                          vector signed char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector pixel __ATTRS_o_ai
-vec_vsro(vector pixel __a, vector unsigned char __b)
-{
+static vector pixel __ATTRS_o_ai vec_vsro(vector pixel __a,
+                                          vector unsigned char __b) {
   return (vector pixel)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsro(vector int __a, vector signed char __b)
-{
+static vector int __ATTRS_o_ai vec_vsro(vector int __a,
+                                        vector signed char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsro(vector int __a, vector unsigned char __b)
-{
+static vector int __ATTRS_o_ai vec_vsro(vector int __a,
+                                        vector unsigned char __b) {
   return (vector int)__builtin_altivec_vsro(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsro(vector unsigned int __a, vector signed char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsro(vector unsigned int __a,
+                                                 vector signed char __b) {
+  return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsro(vector unsigned int __a, vector unsigned char __b)
-{
-  return (vector unsigned int)
-           __builtin_altivec_vsro((vector int)__a, (vector int)__b);
+static vector unsigned int __ATTRS_o_ai vec_vsro(vector unsigned int __a,
+                                                 vector unsigned char __b) {
+  return (vector unsigned int)__builtin_altivec_vsro((vector int)__a,
+                                                     (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_vsro(vector float __a, vector signed char __b)
-{
+static vector float __ATTRS_o_ai vec_vsro(vector float __a,
+                                          vector signed char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
-static vector float __ATTRS_o_ai
-vec_vsro(vector float __a, vector unsigned char __b)
-{
+static vector float __ATTRS_o_ai vec_vsro(vector float __a,
+                                          vector unsigned char __b) {
   return (vector float)__builtin_altivec_vsro((vector int)__a, (vector int)__b);
 }
 
 /* vec_st */
 
-static void __ATTRS_o_ai
-vec_st(vector signed char __a, int __b, vector signed char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector signed char __a, int __b,
+                                vector signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector signed char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector signed char __a, int __b,
+                                signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector unsigned char __a, int __b,
+                                vector unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector unsigned char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector unsigned char __a, int __b,
+                                unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
+                                signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
+                                unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool char __a, int __b, vector bool char *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool char __a, int __b,
+                                vector bool char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector short __a, int __b, vector short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector short __a, int __b, vector short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector short __a, int __b, short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector unsigned short __a, int __b,
+                                vector unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector unsigned short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector unsigned short __a, int __b,
+                                unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool short __a, int __b, short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
+                                unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool short __a, int __b, vector bool short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool short __a, int __b,
+                                vector bool short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector pixel __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector pixel __a, int __b, short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector pixel __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_st(vector pixel __a, int __b,
+                                unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector pixel __a, int __b, vector pixel *__c)
-{
+static void __ATTRS_o_ai vec_st(vector pixel __a, int __b, vector pixel *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector int __a, int __b, vector int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector int __a, int __b, vector int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector unsigned int __a, int __b,
+                                vector unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector unsigned int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector unsigned int __a, int __b,
+                                unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool int __a, int __b, int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
+                                unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector bool int __a, int __b, vector bool int *__c)
-{
+static void __ATTRS_o_ai vec_st(vector bool int __a, int __b,
+                                vector bool int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector float __a, int __b, vector float *__c)
-{
+static void __ATTRS_o_ai vec_st(vector float __a, int __b, vector float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_st(vector float __a, int __b, float *__c)
-{
+static void __ATTRS_o_ai vec_st(vector float __a, int __b, float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
 /* vec_stvx */
 
-static void __ATTRS_o_ai
-vec_stvx(vector signed char __a, int __b, vector signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector signed char __a, int __b,
+                                  vector signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector signed char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector signed char __a, int __b,
+                                  signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector unsigned char __a, int __b,
+                                  vector unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector unsigned char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector unsigned char __a, int __b,
+                                  unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
+                                  signed char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
+                                  unsigned char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool char __a, int __b, vector bool char *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool char __a, int __b,
+                                  vector bool char *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector short __a, int __b, vector short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector short __a, int __b,
+                                  vector short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector short __a, int __b, short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector unsigned short __a, int __b,
+                                  vector unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector unsigned short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector unsigned short __a, int __b,
+                                  unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b, short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
+                                  unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool short __a, int __b, vector bool short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool short __a, int __b,
+                                  vector bool short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector pixel __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b, short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector pixel __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
+                                  unsigned short *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector pixel __a, int __b, vector pixel *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector pixel __a, int __b,
+                                  vector pixel *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector int __a, int __b, vector int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector int __a, int __b, vector int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector unsigned int __a, int __b,
+                                  vector unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector unsigned int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector unsigned int __a, int __b,
+                                  unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b, int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
+                                  unsigned int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector bool int __a, int __b, vector bool int *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector bool int __a, int __b,
+                                  vector bool int *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector float __a, int __b, vector float *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector float __a, int __b,
+                                  vector float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvx(vector float __a, int __b, float *__c)
-{
+static void __ATTRS_o_ai vec_stvx(vector float __a, int __b, float *__c) {
   __builtin_altivec_stvx((vector int)__a, __b, __c);
 }
 
 /* vec_ste */
 
-static void __ATTRS_o_ai
-vec_ste(vector signed char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector signed char __a, int __b,
+                                 signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector unsigned char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector unsigned char __a, int __b,
+                                 unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector bool char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector bool char __a, int __b,
+                                 signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector bool char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector bool char __a, int __b,
+                                 unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector short __a, int __b, short *__c) {
   __builtin_altivec_stvehx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector unsigned short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector unsigned short __a, int __b,
+                                 unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector bool short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector bool short __a, int __b, short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector bool short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector bool short __a, int __b,
+                                 unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector pixel __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector pixel __a, int __b, short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector pixel __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector pixel __a, int __b,
+                                 unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvewx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector unsigned int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector unsigned int __a, int __b,
+                                 unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector bool int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector bool int __a, int __b, int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector bool int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector bool int __a, int __b,
+                                 unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_ste(vector float __a, int __b, float *__c)
-{
+static void __ATTRS_o_ai vec_ste(vector float __a, int __b, float *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
 /* vec_stvebx */
 
-static void __ATTRS_o_ai
-vec_stvebx(vector signed char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvebx(vector signed char __a, int __b,
+                                    signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvebx(vector unsigned char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvebx(vector unsigned char __a, int __b,
+                                    unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvebx(vector bool char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvebx(vector bool char __a, int __b,
+                                    signed char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvebx(vector bool char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvebx(vector bool char __a, int __b,
+                                    unsigned char *__c) {
   __builtin_altivec_stvebx((vector char)__a, __b, __c);
 }
 
 /* vec_stvehx */
 
-static void __ATTRS_o_ai
-vec_stvehx(vector short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvehx(vector short __a, int __b, short *__c) {
   __builtin_altivec_stvehx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvehx(vector unsigned short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvehx(vector unsigned short __a, int __b,
+                                    unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvehx(vector bool short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvehx(vector bool short __a, int __b,
+                                    short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvehx(vector bool short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvehx(vector bool short __a, int __b,
+                                    unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvehx(vector pixel __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvehx(vector pixel __a, int __b, short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvehx(vector pixel __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvehx(vector pixel __a, int __b,
+                                    unsigned short *__c) {
   __builtin_altivec_stvehx((vector short)__a, __b, __c);
 }
 
 /* vec_stvewx */
 
-static void __ATTRS_o_ai
-vec_stvewx(vector int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stvewx(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvewx(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvewx(vector unsigned int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvewx(vector unsigned int __a, int __b,
+                                    unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvewx(vector bool int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stvewx(vector bool int __a, int __b, int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvewx(vector bool int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvewx(vector bool int __a, int __b,
+                                    unsigned int *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvewx(vector float __a, int __b, float *__c)
-{
+static void __ATTRS_o_ai vec_stvewx(vector float __a, int __b, float *__c) {
   __builtin_altivec_stvewx((vector int)__a, __b, __c);
 }
 
 /* vec_stl */
 
-static void __ATTRS_o_ai
-vec_stl(vector signed char __a, int __b, vector signed char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector signed char __a, int __b,
+                                 vector signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector signed char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector signed char __a, int __b,
+                                 signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b,
+                                 vector unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector unsigned char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector unsigned char __a, int __b,
+                                 unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
+                                 signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
+                                 unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool char __a, int __b, vector bool char *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool char __a, int __b,
+                                 vector bool char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector short __a, int __b, vector short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector short __a, int __b, vector short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector short __a, int __b, short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b,
+                                 vector unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector unsigned short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector unsigned short __a, int __b,
+                                 unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool short __a, int __b, short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
+                                 unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool short __a, int __b, vector bool short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool short __a, int __b,
+                                 vector bool short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector pixel __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector pixel __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector pixel __a, int __b,
+                                 unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector pixel __a, int __b, vector pixel *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector pixel __a, int __b, vector pixel *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector int __a, int __b, vector int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector int __a, int __b, vector int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b,
+                                 vector unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector unsigned int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector unsigned int __a, int __b,
+                                 unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool int __a, int __b, int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
+                                 unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector bool int __a, int __b, vector bool int *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector bool int __a, int __b,
+                                 vector bool int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector float __a, int __b, vector float *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector float __a, int __b, vector float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stl(vector float __a, int __b, float *__c)
-{
+static void __ATTRS_o_ai vec_stl(vector float __a, int __b, float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
 /* vec_stvxl */
 
-static void __ATTRS_o_ai
-vec_stvxl(vector signed char __a, int __b, vector signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b,
+                                   vector signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector signed char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector signed char __a, int __b,
+                                   signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b,
+                                   vector unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector unsigned char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector unsigned char __a, int __b,
+                                   unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool char __a, int __b, signed char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
+                                   signed char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool char __a, int __b, unsigned char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
+                                   unsigned char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool char __a, int __b, vector bool char *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool char __a, int __b,
+                                   vector bool char *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector short __a, int __b, vector short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector short __a, int __b,
+                                   vector short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector short __a, int __b, short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector unsigned short __a, int __b,
+                                   vector unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector unsigned short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector unsigned short __a, int __b,
+                                   unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool short __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b, short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool short __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
+                                   unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool short __a, int __b, vector bool short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool short __a, int __b,
+                                   vector bool short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector pixel __a, int __b, short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b, short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector pixel __a, int __b, unsigned short *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
+                                   unsigned short *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector pixel __a, int __b, vector pixel *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector pixel __a, int __b,
+                                   vector pixel *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector int __a, int __b, vector int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector int __a, int __b, vector int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector int __a, int __b, int *__c) {
   __builtin_altivec_stvxl(__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b,
+                                   vector unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector unsigned int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector unsigned int __a, int __b,
+                                   unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool int __a, int __b, int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b, int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool int __a, int __b, unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
+                                   unsigned int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector bool int __a, int __b, vector bool int *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector bool int __a, int __b,
+                                   vector bool int *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector float __a, int __b, vector float *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector float __a, int __b,
+                                   vector float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvxl(vector float __a, int __b, float *__c)
-{
+static void __ATTRS_o_ai vec_stvxl(vector float __a, int __b, float *__c) {
   __builtin_altivec_stvxl((vector int)__a, __b, __c);
 }
 
 /* vec_sub */
 
-static vector signed char __ATTRS_o_ai
-vec_sub(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_sub(vector signed char __a,
+                                               vector signed char __b) {
   return __a - __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_sub(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_sub(vector bool char __a,
+                                               vector signed char __b) {
   return (vector signed char)__a - __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_sub(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_sub(vector signed char __a,
+                                               vector bool char __b) {
   return __a - (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sub(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sub(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_sub(vector bool char __a,
+                                                 vector unsigned char __b) {
   return (vector unsigned char)__a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_sub(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_sub(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __a - (vector unsigned char)__b;
 }
 
-static vector short __ATTRS_o_ai
-vec_sub(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_sub(vector short __a, vector short __b) {
   return __a - __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_sub(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_sub(vector bool short __a,
+                                         vector short __b) {
   return (vector short)__a - __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_sub(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_sub(vector short __a,
+                                         vector bool short __b) {
   return __a - (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sub(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sub(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_sub(vector bool short __a,
+                                                  vector unsigned short __b) {
   return (vector unsigned short)__a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_sub(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_sub(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __a - (vector unsigned short)__b;
 }
 
-static vector int __ATTRS_o_ai
-vec_sub(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_sub(vector int __a, vector int __b) {
   return __a - __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_sub(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_sub(vector bool int __a, vector int __b) {
   return (vector int)__a - __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_sub(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_sub(vector int __a, vector bool int __b) {
   return __a - (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sub(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sub(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sub(vector bool int __a,
+                                                vector unsigned int __b) {
   return (vector unsigned int)__a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sub(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sub(vector unsigned int __a,
+                                                vector bool int __b) {
   return __a - (vector unsigned int)__b;
 }
 
-static vector float __ATTRS_o_ai
-vec_sub(vector float __a, vector float __b)
-{
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai vec_sub(vector signed __int128 __a,
+                                                   vector signed __int128 __b) {
+  return __a - __b;
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_sub(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a - __b;
+}
+#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+
+static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
   return __a - __b;
 }
 
@@ -8391,39 +7748,33 @@
 
 #define __builtin_altivec_vsububm vec_vsububm
 
-static vector signed char __ATTRS_o_ai
-vec_vsububm(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsububm(vector signed char __a,
+                                                   vector signed char __b) {
   return __a - __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsububm(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsububm(vector bool char __a,
+                                                   vector signed char __b) {
   return (vector signed char)__a - __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsububm(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsububm(vector signed char __a,
+                                                   vector bool char __b) {
   return __a - (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsububm(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a,
+                                                     vector unsigned char __b) {
   return __a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsububm(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsububm(vector bool char __a,
+                                                     vector unsigned char __b) {
   return (vector unsigned char)__a - __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsububm(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsububm(vector unsigned char __a,
+                                                     vector bool char __b) {
   return __a - (vector unsigned char)__b;
 }
 
@@ -8431,39 +7782,33 @@
 
 #define __builtin_altivec_vsubuhm vec_vsubuhm
 
-static vector short __ATTRS_o_ai
-vec_vsubuhm(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vsubuhm(vector short __a,
+                                             vector short __b) {
   return __a - __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vsubuhm(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vsubuhm(vector bool short __a,
+                                             vector short __b) {
   return (vector short)__a - __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vsubuhm(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vsubuhm(vector short __a,
+                                             vector bool short __b) {
   return __a - (vector short)__b;
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vsubuhm(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vsubuhm(vector unsigned short __a, vector unsigned short __b) {
   return __a - __b;
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vsubuhm(vector bool short __a, vector unsigned short __b)
-{
+vec_vsubuhm(vector bool short __a, vector unsigned short __b) {
   return (vector unsigned short)__a - __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsubuhm(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vsubuhm(vector unsigned short __a,
+                                                      vector bool short __b) {
   return __a - (vector unsigned short)__b;
 }
 
@@ -8471,39 +7816,32 @@
 
 #define __builtin_altivec_vsubuwm vec_vsubuwm
 
-static vector int __ATTRS_o_ai
-vec_vsubuwm(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vsubuwm(vector int __a, vector int __b) {
   return __a - __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vsubuwm(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vsubuwm(vector bool int __a,
+                                           vector int __b) {
   return (vector int)__a - __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vsubuwm(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vsubuwm(vector int __a,
+                                           vector bool int __b) {
   return __a - (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsubuwm(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a,
+                                                    vector unsigned int __b) {
   return __a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsubuwm(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector bool int __a,
+                                                    vector unsigned int __b) {
   return (vector unsigned int)__a - __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsubuwm(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsubuwm(vector unsigned int __a,
+                                                    vector bool int __b) {
   return __a - (vector unsigned int)__b;
 }
 
@@ -8512,298 +7850,314 @@
 #define __builtin_altivec_vsubfp vec_vsubfp
 
 static vector float __attribute__((__always_inline__))
-vec_vsubfp(vector float __a, vector float __b)
-{
+vec_vsubfp(vector float __a, vector float __b) {
   return __a - __b;
 }
 
 /* vec_subc */
 
-static vector unsigned int __attribute__((__always_inline__))
-vec_subc(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_subc(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vsubcuw(__a, __b);
 }
 
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector unsigned __int128 __ATTRS_o_ai
+vec_subc(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __builtin_altivec_vsubcuq(__a, __b);
+}
+
+static vector signed __int128 __ATTRS_o_ai
+vec_subc(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vsubcuq(__a, __b);
+}
+#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+
 /* vec_vsubcuw */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vsubcuw(vector unsigned int __a, vector unsigned int __b)
-{
+vec_vsubcuw(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_vsubcuw(__a, __b);
 }
 
 /* vec_subs */
 
-static vector signed char __ATTRS_o_ai
-vec_subs(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_subs(vector signed char __a,
+                                                vector signed char __b) {
   return __builtin_altivec_vsubsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_subs(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_subs(vector bool char __a,
+                                                vector signed char __b) {
   return __builtin_altivec_vsubsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_subs(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_subs(vector signed char __a,
+                                                vector bool char __b) {
   return __builtin_altivec_vsubsbs(__a, (vector signed char)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_subs(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return __builtin_altivec_vsububs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_subs(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_subs(vector bool char __a,
+                                                  vector unsigned char __b) {
   return __builtin_altivec_vsububs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_subs(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_subs(vector unsigned char __a,
+                                                  vector bool char __b) {
   return __builtin_altivec_vsububs(__a, (vector unsigned char)__b);
 }
 
-static vector short __ATTRS_o_ai
-vec_subs(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_subs(vector short __a, vector short __b) {
   return __builtin_altivec_vsubshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_subs(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_subs(vector bool short __a,
+                                          vector short __b) {
   return __builtin_altivec_vsubshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_subs(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_subs(vector short __a,
+                                          vector bool short __b) {
   return __builtin_altivec_vsubshs(__a, (vector short)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_subs(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return __builtin_altivec_vsubuhs(__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_subs(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_subs(vector bool short __a,
+                                                   vector unsigned short __b) {
   return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_subs(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_subs(vector unsigned short __a,
+                                                   vector bool short __b) {
   return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b);
 }
 
-static vector int __ATTRS_o_ai
-vec_subs(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_subs(vector int __a, vector int __b) {
   return __builtin_altivec_vsubsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_subs(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_subs(vector bool int __a, vector int __b) {
   return __builtin_altivec_vsubsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_subs(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_subs(vector int __a, vector bool int __b) {
   return __builtin_altivec_vsubsws(__a, (vector int)__b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_subs(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vsubuws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_subs(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_subs(vector bool int __a,
+                                                 vector unsigned int __b) {
   return __builtin_altivec_vsubuws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_subs(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_subs(vector unsigned int __a,
+                                                 vector bool int __b) {
   return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b);
 }
 
 /* vec_vsubsbs */
 
-static vector signed char __ATTRS_o_ai
-vec_vsubsbs(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char __a,
+                                                   vector signed char __b) {
   return __builtin_altivec_vsubsbs(__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsubsbs(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsubsbs(vector bool char __a,
+                                                   vector signed char __b) {
   return __builtin_altivec_vsubsbs((vector signed char)__a, __b);
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vsubsbs(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vsubsbs(vector signed char __a,
+                                                   vector bool char __b) {
   return __builtin_altivec_vsubsbs(__a, (vector signed char)__b);
 }
 
 /* vec_vsububs */
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsububs(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char __a,
+                                                     vector unsigned char __b) {
   return __builtin_altivec_vsububs(__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsububs(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsububs(vector bool char __a,
+                                                     vector unsigned char __b) {
   return __builtin_altivec_vsububs((vector unsigned char)__a, __b);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vsububs(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vsububs(vector unsigned char __a,
+                                                     vector bool char __b) {
   return __builtin_altivec_vsububs(__a, (vector unsigned char)__b);
 }
 
 /* vec_vsubshs */
 
-static vector short __ATTRS_o_ai
-vec_vsubshs(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vsubshs(vector short __a,
+                                             vector short __b) {
   return __builtin_altivec_vsubshs(__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsubshs(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vsubshs(vector bool short __a,
+                                             vector short __b) {
   return __builtin_altivec_vsubshs((vector short)__a, __b);
 }
 
-static vector short __ATTRS_o_ai
-vec_vsubshs(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vsubshs(vector short __a,
+                                             vector bool short __b) {
   return __builtin_altivec_vsubshs(__a, (vector short)__b);
 }
 
 /* vec_vsubuhs */
 
 static vector unsigned short __ATTRS_o_ai
-vec_vsubuhs(vector unsigned short __a, vector unsigned short __b)
-{
+vec_vsubuhs(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_vsubuhs(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_vsubuhs(vector bool short __a, vector unsigned short __b)
-{
+vec_vsubuhs(vector bool short __a, vector unsigned short __b) {
   return __builtin_altivec_vsubuhs((vector unsigned short)__a, __b);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vsubuhs(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vsubuhs(vector unsigned short __a,
+                                                      vector bool short __b) {
   return __builtin_altivec_vsubuhs(__a, (vector unsigned short)__b);
 }
 
 /* vec_vsubsws */
 
-static vector int __ATTRS_o_ai
-vec_vsubsws(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vsubsws(vector int __a, vector int __b) {
   return __builtin_altivec_vsubsws(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsubsws(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vsubsws(vector bool int __a,
+                                           vector int __b) {
   return __builtin_altivec_vsubsws((vector int)__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_vsubsws(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vsubsws(vector int __a,
+                                           vector bool int __b) {
   return __builtin_altivec_vsubsws(__a, (vector int)__b);
 }
 
 /* vec_vsubuws */
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsubuws(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int __a,
+                                                    vector unsigned int __b) {
   return __builtin_altivec_vsubuws(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsubuws(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector bool int __a,
+                                                    vector unsigned int __b) {
   return __builtin_altivec_vsubuws((vector unsigned int)__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vsubuws(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vsubuws(vector unsigned int __a,
+                                                    vector bool int __b) {
   return __builtin_altivec_vsubuws(__a, (vector unsigned int)__b);
 }
 
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+/* vec_vsubuqm */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vsubuqm(vector signed __int128 __a, vector signed __int128 __b) {
+  return __a - __b;
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vsubuqm(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __a - __b;
+}
+
+/* vec_vsubeuqm */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vsubeuqm(vector signed __int128 __a, vector signed __int128 __b,
+             vector signed __int128 __c) {
+  return __builtin_altivec_vsubeuqm(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vsubeuqm(vector unsigned __int128 __a, vector unsigned __int128 __b,
+             vector unsigned __int128 __c) {
+  return __builtin_altivec_vsubeuqm(__a, __b, __c);
+}
+
+/* vec_vsubcuq */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vsubcuq(vector signed __int128 __a, vector signed __int128 __b) {
+  return __builtin_altivec_vsubcuq(__a, __b);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vsubcuq(vector unsigned __int128 __a, vector unsigned __int128 __b) {
+  return __builtin_altivec_vsubcuq(__a, __b);
+}
+
+/* vec_vsubecuq */
+
+static vector signed __int128 __ATTRS_o_ai
+vec_vsubecuq(vector signed __int128 __a, vector signed __int128 __b,
+             vector signed __int128 __c) {
+  return __builtin_altivec_vsubecuq(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_vsubecuq(vector unsigned __int128 __a, vector unsigned __int128 __b,
+             vector unsigned __int128 __c) {
+  return __builtin_altivec_vsubecuq(__a, __b, __c);
+}
+#endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+
 /* vec_sum4s */
 
-static vector int __ATTRS_o_ai
-vec_sum4s(vector signed char __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_sum4s(vector signed char __a,
+                                         vector int __b) {
   return __builtin_altivec_vsum4sbs(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_sum4s(vector unsigned char __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_sum4s(vector unsigned char __a,
+                                                  vector unsigned int __b) {
   return __builtin_altivec_vsum4ubs(__a, __b);
 }
 
-static vector int __ATTRS_o_ai
-vec_sum4s(vector signed short __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_sum4s(vector signed short __a,
+                                         vector int __b) {
   return __builtin_altivec_vsum4shs(__a, __b);
 }
 
 /* vec_vsum4sbs */
 
 static vector int __attribute__((__always_inline__))
-vec_vsum4sbs(vector signed char __a, vector int __b)
-{
+vec_vsum4sbs(vector signed char __a, vector int __b) {
   return __builtin_altivec_vsum4sbs(__a, __b);
 }
 
 /* vec_vsum4ubs */
 
 static vector unsigned int __attribute__((__always_inline__))
-vec_vsum4ubs(vector unsigned char __a, vector unsigned int __b)
-{
+vec_vsum4ubs(vector unsigned char __a, vector unsigned int __b) {
   return __builtin_altivec_vsum4ubs(__a, __b);
 }
 
 /* vec_vsum4shs */
 
 static vector int __attribute__((__always_inline__))
-vec_vsum4shs(vector signed short __a, vector int __b)
-{
+vec_vsum4shs(vector signed short __a, vector int __b) {
   return __builtin_altivec_vsum4shs(__a, __b);
 }
 
@@ -8816,16 +8170,15 @@
    endian we must perform some permutes.  */
 
 static vector signed int __attribute__((__always_inline__))
-vec_sum2s(vector int __a, vector int __b)
-{
+vec_sum2s(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
-  vector int __c = (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+  vector int __c = (vector signed int)vec_perm(
+      __b, __b, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15,
+                                       8, 9, 10, 11));
   __c = __builtin_altivec_vsum2sws(__a, __c);
-  return (vector signed int)
-    vec_perm(__c, __c, (vector unsigned char)
-             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+  return (vector signed int)vec_perm(
+      __c, __c, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15,
+                                       8, 9, 10, 11));
 #else
   return __builtin_altivec_vsum2sws(__a, __b);
 #endif
@@ -8834,16 +8187,15 @@
 /* vec_vsum2sws */
 
 static vector signed int __attribute__((__always_inline__))
-vec_vsum2sws(vector int __a, vector int __b)
-{
+vec_vsum2sws(vector int __a, vector int __b) {
 #ifdef __LITTLE_ENDIAN__
-  vector int __c = (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+  vector int __c = (vector signed int)vec_perm(
+      __b, __b, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15,
+                                       8, 9, 10, 11));
   __c = __builtin_altivec_vsum2sws(__a, __c);
-  return (vector signed int)
-    vec_perm(__c, __c, (vector unsigned char)
-             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+  return (vector signed int)vec_perm(
+      __c, __c, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15,
+                                       8, 9, 10, 11));
 #else
   return __builtin_altivec_vsum2sws(__a, __b);
 #endif
@@ -8858,8 +8210,7 @@
    some permutes.  */
 
 static vector signed int __attribute__((__always_inline__))
-vec_sums(vector signed int __a, vector signed int __b)
-{
+vec_sums(vector signed int __a, vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   __b = (vector signed int)vec_splat(__b, 3);
   __b = __builtin_altivec_vsumsws(__a, __b);
@@ -8872,8 +8223,7 @@
 /* vec_vsumsws */
 
 static vector signed int __attribute__((__always_inline__))
-vec_vsumsws(vector signed int __a, vector signed int __b)
-{
+vec_vsumsws(vector signed int __a, vector signed int __b) {
 #ifdef __LITTLE_ENDIAN__
   __b = (vector signed int)vec_splat(__b, 3);
   __b = __builtin_altivec_vsumsws(__a, __b);
@@ -8886,16 +8236,14 @@
 /* vec_trunc */
 
 static vector float __attribute__((__always_inline__))
-vec_trunc(vector float __a)
-{
+vec_trunc(vector float __a) {
   return __builtin_altivec_vrfiz(__a);
 }
 
 /* vec_vrfiz */
 
 static vector float __attribute__((__always_inline__))
-vec_vrfiz(vector float __a)
-{
+vec_vrfiz(vector float __a) {
   return __builtin_altivec_vrfiz(__a);
 }
 
@@ -8904,9 +8252,7 @@
 /* The vector unpack instructions all have a big-endian bias, so for
    little endian we must reverse the meanings of "high" and "low."  */
 
-static vector short __ATTRS_o_ai
-vec_unpackh(vector signed char __a)
-{
+static vector short __ATTRS_o_ai vec_unpackh(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -8914,9 +8260,7 @@
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_unpackh(vector bool char __a)
-{
+static vector bool short __ATTRS_o_ai vec_unpackh(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -8924,9 +8268,7 @@
 #endif
 }
 
-static vector int __ATTRS_o_ai
-vec_unpackh(vector short __a)
-{
+static vector int __ATTRS_o_ai vec_unpackh(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsh(__a);
 #else
@@ -8934,9 +8276,7 @@
 #endif
 }
 
-static vector bool int __ATTRS_o_ai
-vec_unpackh(vector bool short __a)
-{
+static vector bool int __ATTRS_o_ai vec_unpackh(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupklsh((vector short)__a);
 #else
@@ -8944,9 +8284,7 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_unpackh(vector pixel __a)
-{
+static vector unsigned int __ATTRS_o_ai vec_unpackh(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a);
 #else
@@ -8954,11 +8292,27 @@
 #endif
 }
 
+#ifdef __POWER8_VECTOR__
+static vector long long __ATTRS_o_ai vec_unpackh(vector int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vupklsw(__a);
+#else
+  return __builtin_altivec_vupkhsw(__a);
+#endif
+}
+
+static vector bool long long __ATTRS_o_ai vec_unpackh(vector bool int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
+#else
+  return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
+#endif
+}
+#endif
+
 /* vec_vupkhsb */
 
-static vector short __ATTRS_o_ai
-vec_vupkhsb(vector signed char __a)
-{
+static vector short __ATTRS_o_ai vec_vupkhsb(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -8966,9 +8320,7 @@
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vupkhsb(vector bool char __a)
-{
+static vector bool short __ATTRS_o_ai vec_vupkhsb(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupklsb((vector char)__a);
 #else
@@ -8978,9 +8330,7 @@
 
 /* vec_vupkhsh */
 
-static vector int __ATTRS_o_ai
-vec_vupkhsh(vector short __a)
-{
+static vector int __ATTRS_o_ai vec_vupkhsh(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupklsh(__a);
 #else
@@ -8988,9 +8338,7 @@
 #endif
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vupkhsh(vector bool short __a)
-{
+static vector bool int __ATTRS_o_ai vec_vupkhsh(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupklsh((vector short)__a);
 #else
@@ -8998,9 +8346,7 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vupkhsh(vector pixel __a)
-{
+static vector unsigned int __ATTRS_o_ai vec_vupkhsh(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupklpx((vector short)__a);
 #else
@@ -9008,11 +8354,29 @@
 #endif
 }
 
+/* vec_vupkhsw */
+
+#ifdef __POWER8_VECTOR__
+static vector long long __ATTRS_o_ai vec_vupkhsw(vector int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vupklsw(__a);
+#else
+  return __builtin_altivec_vupkhsw(__a);
+#endif
+}
+
+static vector bool long long __ATTRS_o_ai vec_vupkhsw(vector bool int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
+#else
+  return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
+#endif
+}
+#endif
+
 /* vec_unpackl */
 
-static vector short __ATTRS_o_ai
-vec_unpackl(vector signed char __a)
-{
+static vector short __ATTRS_o_ai vec_unpackl(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -9020,9 +8384,7 @@
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_unpackl(vector bool char __a)
-{
+static vector bool short __ATTRS_o_ai vec_unpackl(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -9030,9 +8392,7 @@
 #endif
 }
 
-static vector int __ATTRS_o_ai
-vec_unpackl(vector short __a)
-{
+static vector int __ATTRS_o_ai vec_unpackl(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsh(__a);
 #else
@@ -9040,9 +8400,7 @@
 #endif
 }
 
-static vector bool int __ATTRS_o_ai
-vec_unpackl(vector bool short __a)
-{
+static vector bool int __ATTRS_o_ai vec_unpackl(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a);
 #else
@@ -9050,9 +8408,7 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_unpackl(vector pixel __a)
-{
+static vector unsigned int __ATTRS_o_ai vec_unpackl(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a);
 #else
@@ -9060,11 +8416,27 @@
 #endif
 }
 
+#ifdef __POWER8_VECTOR__
+static vector long long __ATTRS_o_ai vec_unpackl(vector int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vupkhsw(__a);
+#else
+  return __builtin_altivec_vupklsw(__a);
+#endif
+}
+
+static vector bool long long __ATTRS_o_ai vec_unpackl(vector bool int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
+#else
+  return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
+#endif
+}
+#endif
+
 /* vec_vupklsb */
 
-static vector short __ATTRS_o_ai
-vec_vupklsb(vector signed char __a)
-{
+static vector short __ATTRS_o_ai vec_vupklsb(vector signed char __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -9072,9 +8444,7 @@
 #endif
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vupklsb(vector bool char __a)
-{
+static vector bool short __ATTRS_o_ai vec_vupklsb(vector bool char __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool short)__builtin_altivec_vupkhsb((vector char)__a);
 #else
@@ -9084,9 +8454,7 @@
 
 /* vec_vupklsh */
 
-static vector int __ATTRS_o_ai
-vec_vupklsh(vector short __a)
-{
+static vector int __ATTRS_o_ai vec_vupklsh(vector short __a) {
 #ifdef __LITTLE_ENDIAN__
   return __builtin_altivec_vupkhsh(__a);
 #else
@@ -9094,9 +8462,7 @@
 #endif
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vupklsh(vector bool short __a)
-{
+static vector bool int __ATTRS_o_ai vec_vupklsh(vector bool short __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector bool int)__builtin_altivec_vupkhsh((vector short)__a);
 #else
@@ -9104,9 +8470,7 @@
 #endif
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vupklsh(vector pixel __a)
-{
+static vector unsigned int __ATTRS_o_ai vec_vupklsh(vector pixel __a) {
 #ifdef __LITTLE_ENDIAN__
   return (vector unsigned int)__builtin_altivec_vupkhpx((vector short)__a);
 #else
@@ -9114,43 +8478,56 @@
 #endif
 }
 
+/* vec_vupklsw */
+
+#ifdef __POWER8_VECTOR__
+static vector long long __ATTRS_o_ai vec_vupklsw(vector int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return __builtin_altivec_vupkhsw(__a);
+#else
+  return __builtin_altivec_vupklsw(__a);
+#endif
+}
+
+static vector bool long long __ATTRS_o_ai vec_vupklsw(vector bool int __a) {
+#ifdef __LITTLE_ENDIAN__
+  return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
+#else
+  return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
+#endif
+}
+#endif
+
 /* vec_vsx_ld */
 
 #ifdef __VSX__
 
-static vector signed int __ATTRS_o_ai
-vec_vsx_ld(int __a, const vector signed int *__b)
-{
+static vector signed int __ATTRS_o_ai vec_vsx_ld(int __a,
+                                                 const vector signed int *__b) {
   return (vector signed int)__builtin_vsx_lxvw4x(__a, __b);
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_vsx_ld(int __a, const vector unsigned int *__b)
-{
+vec_vsx_ld(int __a, const vector unsigned int *__b) {
   return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b);
 }
 
-static vector float __ATTRS_o_ai
-vec_vsx_ld(int __a, const vector float *__b)
-{
+static vector float __ATTRS_o_ai vec_vsx_ld(int __a, const vector float *__b) {
   return (vector float)__builtin_vsx_lxvw4x(__a, __b);
 }
 
 static vector signed long long __ATTRS_o_ai
-vec_vsx_ld(int __a, const vector signed long long *__b)
-{
+vec_vsx_ld(int __a, const vector signed long long *__b) {
   return (vector signed long long)__builtin_vsx_lxvd2x(__a, __b);
 }
 
 static vector unsigned long long __ATTRS_o_ai
-vec_vsx_ld(int __a, const vector unsigned long long *__b)
-{
+vec_vsx_ld(int __a, const vector unsigned long long *__b) {
   return (vector unsigned long long)__builtin_vsx_lxvd2x(__a, __b);
 }
 
-static vector double __ATTRS_o_ai
-vec_vsx_ld(int __a, const vector double *__b)
-{
+static vector double __ATTRS_o_ai vec_vsx_ld(int __a,
+                                             const vector double *__b) {
   return (vector double)__builtin_vsx_lxvd2x(__a, __b);
 }
 
@@ -9160,40 +8537,33 @@
 
 #ifdef __VSX__
 
-static void __ATTRS_o_ai
-vec_vsx_st(vector signed int __a, int __b, vector signed int *__c)
-{
+static void __ATTRS_o_ai vec_vsx_st(vector signed int __a, int __b,
+                                    vector signed int *__c) {
   __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_vsx_st(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
+static void __ATTRS_o_ai vec_vsx_st(vector unsigned int __a, int __b,
+                                    vector unsigned int *__c) {
   __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_vsx_st(vector float __a, int __b, vector float *__c)
-{
+static void __ATTRS_o_ai vec_vsx_st(vector float __a, int __b,
+                                    vector float *__c) {
   __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_vsx_st(vector signed long long __a, int __b, vector signed long long *__c)
-{
+static void __ATTRS_o_ai vec_vsx_st(vector signed long long __a, int __b,
+                                    vector signed long long *__c) {
   __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_vsx_st(vector unsigned long long __a, int __b,
-           vector unsigned long long *__c)
-{
+static void __ATTRS_o_ai vec_vsx_st(vector unsigned long long __a, int __b,
+                                    vector unsigned long long *__c) {
   __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_vsx_st(vector double __a, int __b, vector double *__c)
-{
+static void __ATTRS_o_ai vec_vsx_st(vector double __a, int __b,
+                                    vector double *__c) {
   __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
 }
 
@@ -9203,1656 +8573,1294 @@
 
 #define __builtin_altivec_vxor vec_xor
 
-static vector signed char __ATTRS_o_ai
-vec_xor(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_xor(vector signed char __a,
+                                               vector signed char __b) {
   return __a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_xor(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_xor(vector bool char __a,
+                                               vector signed char __b) {
   return (vector signed char)__a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_xor(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_xor(vector signed char __a,
+                                               vector bool char __b) {
   return __a ^ (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_xor(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
+                                                 vector unsigned char __b) {
   return __a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_xor(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_xor(vector bool char __a,
+                                                 vector unsigned char __b) {
   return (vector unsigned char)__a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_xor(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a,
+                                                 vector bool char __b) {
   return __a ^ (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_xor(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_xor(vector bool char __a,
+                                             vector bool char __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_xor(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_xor(vector short __a, vector short __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_xor(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_xor(vector bool short __a,
+                                         vector short __b) {
   return (vector short)__a ^ __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_xor(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_xor(vector short __a,
+                                         vector bool short __b) {
   return __a ^ (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_xor(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short __a,
+                                                  vector unsigned short __b) {
   return __a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_xor(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_xor(vector bool short __a,
+                                                  vector unsigned short __b) {
   return (vector unsigned short)__a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_xor(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_xor(vector unsigned short __a,
+                                                  vector bool short __b) {
   return __a ^ (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_xor(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_xor(vector bool short __a,
+                                              vector bool short __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_xor(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_xor(vector int __a, vector int __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_xor(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_xor(vector bool int __a, vector int __b) {
   return (vector int)__a ^ __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_xor(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_xor(vector int __a, vector bool int __b) {
   return __a ^ (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_xor(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int __a,
+                                                vector unsigned int __b) {
   return __a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_xor(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_xor(vector bool int __a,
+                                                vector unsigned int __b) {
   return (vector unsigned int)__a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_xor(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_xor(vector unsigned int __a,
+                                                vector bool int __b) {
   return __a ^ (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_xor(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_xor(vector bool int __a,
+                                            vector bool int __b) {
   return __a ^ __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_xor(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_xor(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_xor(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_xor(vector bool int __a,
+                                         vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_xor(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_xor(vector float __a,
+                                         vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_xor(vector signed long long __a, vector signed long long __b) {
+  return __a ^ __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_xor(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a ^ __b;
+}
+
+static vector signed long long __ATTRS_o_ai vec_xor(vector signed long long __a,
+                                                    vector bool long long __b) {
+  return __a ^ (vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_xor(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a ^ __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_xor(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a ^ __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_xor(vector unsigned long long __a, vector bool long long __b) {
+  return __a ^ (vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_xor(vector bool long long __a,
+                                                  vector bool long long __b) {
+  return __a ^ __b;
+}
+#endif
+
 /* vec_vxor */
 
-static vector signed char __ATTRS_o_ai
-vec_vxor(vector signed char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vxor(vector signed char __a,
+                                                vector signed char __b) {
   return __a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vxor(vector bool char __a, vector signed char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vxor(vector bool char __a,
+                                                vector signed char __b) {
   return (vector signed char)__a ^ __b;
 }
 
-static vector signed char __ATTRS_o_ai
-vec_vxor(vector signed char __a, vector bool char __b)
-{
+static vector signed char __ATTRS_o_ai vec_vxor(vector signed char __a,
+                                                vector bool char __b) {
   return __a ^ (vector signed char)__b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vxor(vector unsigned char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char __a,
+                                                  vector unsigned char __b) {
   return __a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vxor(vector bool char __a, vector unsigned char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vxor(vector bool char __a,
+                                                  vector unsigned char __b) {
   return (vector unsigned char)__a ^ __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_vxor(vector unsigned char __a, vector bool char __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_vxor(vector unsigned char __a,
+                                                  vector bool char __b) {
   return __a ^ (vector unsigned char)__b;
 }
 
-static vector bool char __ATTRS_o_ai
-vec_vxor(vector bool char __a, vector bool char __b)
-{
+static vector bool char __ATTRS_o_ai vec_vxor(vector bool char __a,
+                                              vector bool char __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vxor(vector short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vxor(vector short __a, vector short __b) {
   return __a ^ __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vxor(vector bool short __a, vector short __b)
-{
+static vector short __ATTRS_o_ai vec_vxor(vector bool short __a,
+                                          vector short __b) {
   return (vector short)__a ^ __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_vxor(vector short __a, vector bool short __b)
-{
+static vector short __ATTRS_o_ai vec_vxor(vector short __a,
+                                          vector bool short __b) {
   return __a ^ (vector short)__b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vxor(vector unsigned short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short __a,
+                                                   vector unsigned short __b) {
   return __a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vxor(vector bool short __a, vector unsigned short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vxor(vector bool short __a,
+                                                   vector unsigned short __b) {
   return (vector unsigned short)__a ^ __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_vxor(vector unsigned short __a, vector bool short __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_vxor(vector unsigned short __a,
+                                                   vector bool short __b) {
   return __a ^ (vector unsigned short)__b;
 }
 
-static vector bool short __ATTRS_o_ai
-vec_vxor(vector bool short __a, vector bool short __b)
-{
+static vector bool short __ATTRS_o_ai vec_vxor(vector bool short __a,
+                                               vector bool short __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vxor(vector int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vxor(vector int __a, vector int __b) {
   return __a ^ __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vxor(vector bool int __a, vector int __b)
-{
+static vector int __ATTRS_o_ai vec_vxor(vector bool int __a, vector int __b) {
   return (vector int)__a ^ __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_vxor(vector int __a, vector bool int __b)
-{
+static vector int __ATTRS_o_ai vec_vxor(vector int __a, vector bool int __b) {
   return __a ^ (vector int)__b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vxor(vector unsigned int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int __a,
+                                                 vector unsigned int __b) {
   return __a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vxor(vector bool int __a, vector unsigned int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vxor(vector bool int __a,
+                                                 vector unsigned int __b) {
   return (vector unsigned int)__a ^ __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_vxor(vector unsigned int __a, vector bool int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_vxor(vector unsigned int __a,
+                                                 vector bool int __b) {
   return __a ^ (vector unsigned int)__b;
 }
 
-static vector bool int __ATTRS_o_ai
-vec_vxor(vector bool int __a, vector bool int __b)
-{
+static vector bool int __ATTRS_o_ai vec_vxor(vector bool int __a,
+                                             vector bool int __b) {
   return __a ^ __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_vxor(vector float __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vxor(vector float __a, vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vxor(vector bool int __a, vector float __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vxor(vector bool int __a,
+                                          vector float __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
-static vector float __ATTRS_o_ai
-vec_vxor(vector float __a, vector bool int __b)
-{
-  vector unsigned int __res = (vector unsigned int)__a ^ (vector unsigned int)__b;
+static vector float __ATTRS_o_ai vec_vxor(vector float __a,
+                                          vector bool int __b) {
+  vector unsigned int __res =
+      (vector unsigned int)__a ^ (vector unsigned int)__b;
   return (vector float)__res;
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_vxor(vector signed long long __a, vector signed long long __b) {
+  return __a ^ __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vxor(vector bool long long __a, vector signed long long __b) {
+  return (vector signed long long)__a ^ __b;
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vxor(vector signed long long __a, vector bool long long __b) {
+  return __a ^ (vector signed long long)__b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vxor(vector unsigned long long __a, vector unsigned long long __b) {
+  return __a ^ __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vxor(vector bool long long __a, vector unsigned long long __b) {
+  return (vector unsigned long long)__a ^ __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vxor(vector unsigned long long __a, vector bool long long __b) {
+  return __a ^ (vector unsigned long long)__b;
+}
+
+static vector bool long long __ATTRS_o_ai vec_vxor(vector bool long long __a,
+                                                   vector bool long long __b) {
+  return __a ^ __b;
+}
+#endif
+
 /* ------------------------ extensions for CBEA ----------------------------- */
 
 /* vec_extract */
 
-static signed char __ATTRS_o_ai
-vec_extract(vector signed char __a, int __b)
-{
+static signed char __ATTRS_o_ai vec_extract(vector signed char __a, int __b) {
   return __a[__b];
 }
 
-static unsigned char __ATTRS_o_ai
-vec_extract(vector unsigned char __a, int __b)
-{
+static unsigned char __ATTRS_o_ai vec_extract(vector unsigned char __a,
+                                              int __b) {
   return __a[__b];
 }
 
-static short __ATTRS_o_ai
-vec_extract(vector short __a, int __b)
-{
+static short __ATTRS_o_ai vec_extract(vector short __a, int __b) {
   return __a[__b];
 }
 
-static unsigned short __ATTRS_o_ai
-vec_extract(vector unsigned short __a, int __b)
-{
+static unsigned short __ATTRS_o_ai vec_extract(vector unsigned short __a,
+                                               int __b) {
   return __a[__b];
 }
 
-static int __ATTRS_o_ai
-vec_extract(vector int __a, int __b)
-{
+static int __ATTRS_o_ai vec_extract(vector int __a, int __b) {
   return __a[__b];
 }
 
-static unsigned int __ATTRS_o_ai
-vec_extract(vector unsigned int __a, int __b)
-{
+static unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a, int __b) {
   return __a[__b];
 }
 
-static float __ATTRS_o_ai
-vec_extract(vector float __a, int __b)
-{
+static float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
   return __a[__b];
 }
 
 /* vec_insert */
 
-static vector signed char __ATTRS_o_ai
-vec_insert(signed char __a, vector signed char __b, int __c)
-{
+static vector signed char __ATTRS_o_ai vec_insert(signed char __a,
+                                                  vector signed char __b,
+                                                  int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_insert(unsigned char __a, vector unsigned char __b, int __c)
-{
+static vector unsigned char __ATTRS_o_ai vec_insert(unsigned char __a,
+                                                    vector unsigned char __b,
+                                                    int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector short __ATTRS_o_ai
-vec_insert(short __a, vector short __b, int __c)
-{
+static vector short __ATTRS_o_ai vec_insert(short __a, vector short __b,
+                                            int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_insert(unsigned short __a, vector unsigned short __b, int __c)
-{
+static vector unsigned short __ATTRS_o_ai vec_insert(unsigned short __a,
+                                                     vector unsigned short __b,
+                                                     int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector int __ATTRS_o_ai
-vec_insert(int __a, vector int __b, int __c)
-{
+static vector int __ATTRS_o_ai vec_insert(int __a, vector int __b, int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_insert(unsigned int __a, vector unsigned int __b, int __c)
-{
+static vector unsigned int __ATTRS_o_ai vec_insert(unsigned int __a,
+                                                   vector unsigned int __b,
+                                                   int __c) {
   __b[__c] = __a;
   return __b;
 }
 
-static vector float __ATTRS_o_ai
-vec_insert(float __a, vector float __b, int __c)
-{
+static vector float __ATTRS_o_ai vec_insert(float __a, vector float __b,
+                                            int __c) {
   __b[__c] = __a;
   return __b;
 }
 
 /* vec_lvlx */
 
-static vector signed char __ATTRS_o_ai
-vec_lvlx(int __a, const signed char *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector signed char)(0),
+static vector signed char __ATTRS_o_ai vec_lvlx(int __a,
+                                                const signed char *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, __b));
 }
 
-static vector signed char __ATTRS_o_ai
-vec_lvlx(int __a, const vector signed char *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector signed char)(0),
+static vector signed char __ATTRS_o_ai vec_lvlx(int __a,
+                                                const vector signed char *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvlx(int __a, const unsigned char *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector unsigned char)(0),
+static vector unsigned char __ATTRS_o_ai vec_lvlx(int __a,
+                                                  const unsigned char *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_lvlx(int __a, const vector unsigned char *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector unsigned char)(0),
+vec_lvlx(int __a, const vector unsigned char *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_lvlx(int __a, const vector bool char *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector bool char)(0),
+static vector bool char __ATTRS_o_ai vec_lvlx(int __a,
+                                              const vector bool char *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector bool char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvlx(int __a, const short *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector short)(0),
-                  vec_lvsl(__a, __b));
+static vector short __ATTRS_o_ai vec_lvlx(int __a, const short *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector short)(0), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvlx(int __a, const vector short *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector short)(0),
+static vector short __ATTRS_o_ai vec_lvlx(int __a, const vector short *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvlx(int __a, const unsigned short *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector unsigned short)(0),
+static vector unsigned short __ATTRS_o_ai vec_lvlx(int __a,
+                                                   const unsigned short *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_lvlx(int __a, const vector unsigned short *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector unsigned short)(0),
+vec_lvlx(int __a, const vector unsigned short *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_lvlx(int __a, const vector bool short *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector bool short)(0),
+static vector bool short __ATTRS_o_ai vec_lvlx(int __a,
+                                               const vector bool short *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector bool short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_lvlx(int __a, const vector pixel *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector pixel)(0),
+static vector pixel __ATTRS_o_ai vec_lvlx(int __a, const vector pixel *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector pixel)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvlx(int __a, const int *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector int)(0),
-                  vec_lvsl(__a, __b));
+static vector int __ATTRS_o_ai vec_lvlx(int __a, const int *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector int)(0), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvlx(int __a, const vector int *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector int)(0),
+static vector int __ATTRS_o_ai vec_lvlx(int __a, const vector int *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvlx(int __a, const unsigned int *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector unsigned int)(0),
+static vector unsigned int __ATTRS_o_ai vec_lvlx(int __a,
+                                                 const unsigned int *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_lvlx(int __a, const vector unsigned int *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector unsigned int)(0),
+vec_lvlx(int __a, const vector unsigned int *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_lvlx(int __a, const vector bool int *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector bool int)(0),
+static vector bool int __ATTRS_o_ai vec_lvlx(int __a,
+                                             const vector bool int *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector bool int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvlx(int __a, const float *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector float)(0),
-                  vec_lvsl(__a, __b));
+static vector float __ATTRS_o_ai vec_lvlx(int __a, const float *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector float)(0), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvlx(int __a, const vector float *__b)
-{
-  return vec_perm(vec_ld(__a, __b),
-                  (vector float)(0),
+static vector float __ATTRS_o_ai vec_lvlx(int __a, const vector float *__b) {
+  return vec_perm(vec_ld(__a, __b), (vector float)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_lvlxl */
 
-static vector signed char __ATTRS_o_ai
-vec_lvlxl(int __a, const signed char *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector signed char)(0),
+static vector signed char __ATTRS_o_ai vec_lvlxl(int __a,
+                                                 const signed char *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector signed char __ATTRS_o_ai
-vec_lvlxl(int __a, const vector signed char *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector signed char)(0),
+vec_lvlxl(int __a, const vector signed char *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector signed char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvlxl(int __a, const unsigned char *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector unsigned char)(0),
+static vector unsigned char __ATTRS_o_ai vec_lvlxl(int __a,
+                                                   const unsigned char *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_lvlxl(int __a, const vector unsigned char *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector unsigned char)(0),
+vec_lvlxl(int __a, const vector unsigned char *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector unsigned char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_lvlxl(int __a, const vector bool char *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector bool char)(0),
+static vector bool char __ATTRS_o_ai vec_lvlxl(int __a,
+                                               const vector bool char *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector bool char)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvlxl(int __a, const short *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector short)(0),
-                  vec_lvsl(__a, __b));
+static vector short __ATTRS_o_ai vec_lvlxl(int __a, const short *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector short)(0), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvlxl(int __a, const vector short *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector short)(0),
+static vector short __ATTRS_o_ai vec_lvlxl(int __a, const vector short *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvlxl(int __a, const unsigned short *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector unsigned short)(0),
+static vector unsigned short __ATTRS_o_ai vec_lvlxl(int __a,
+                                                    const unsigned short *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_lvlxl(int __a, const vector unsigned short *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector unsigned short)(0),
+vec_lvlxl(int __a, const vector unsigned short *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector unsigned short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_lvlxl(int __a, const vector bool short *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector bool short)(0),
+static vector bool short __ATTRS_o_ai vec_lvlxl(int __a,
+                                                const vector bool short *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector bool short)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_lvlxl(int __a, const vector pixel *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector pixel)(0),
+static vector pixel __ATTRS_o_ai vec_lvlxl(int __a, const vector pixel *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector pixel)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvlxl(int __a, const int *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector int)(0),
-                  vec_lvsl(__a, __b));
+static vector int __ATTRS_o_ai vec_lvlxl(int __a, const int *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector int)(0), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvlxl(int __a, const vector int *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector int)(0),
+static vector int __ATTRS_o_ai vec_lvlxl(int __a, const vector int *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvlxl(int __a, const unsigned int *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector unsigned int)(0),
+static vector unsigned int __ATTRS_o_ai vec_lvlxl(int __a,
+                                                  const unsigned int *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_lvlxl(int __a, const vector unsigned int *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector unsigned int)(0),
+vec_lvlxl(int __a, const vector unsigned int *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector unsigned int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_lvlxl(int __a, const vector bool int *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector bool int)(0),
+static vector bool int __ATTRS_o_ai vec_lvlxl(int __a,
+                                              const vector bool int *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector bool int)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvlxl(int __a, const float *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector float)(0),
-                  vec_lvsl(__a, __b));
+static vector float __ATTRS_o_ai vec_lvlxl(int __a, const float *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector float)(0), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvlxl(int __a, vector float *__b)
-{
-  return vec_perm(vec_ldl(__a, __b),
-                  (vector float)(0),
+static vector float __ATTRS_o_ai vec_lvlxl(int __a, vector float *__b) {
+  return vec_perm(vec_ldl(__a, __b), (vector float)(0),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_lvrx */
 
-static vector signed char __ATTRS_o_ai
-vec_lvrx(int __a, const signed char *__b)
-{
-  return vec_perm((vector signed char)(0),
-                  vec_ld(__a, __b),
+static vector signed char __ATTRS_o_ai vec_lvrx(int __a,
+                                                const signed char *__b) {
+  return vec_perm((vector signed char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
-static vector signed char __ATTRS_o_ai
-vec_lvrx(int __a, const vector signed char *__b)
-{
-  return vec_perm((vector signed char)(0),
-                  vec_ld(__a, __b),
+static vector signed char __ATTRS_o_ai vec_lvrx(int __a,
+                                                const vector signed char *__b) {
+  return vec_perm((vector signed char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvrx(int __a, const unsigned char *__b)
-{
-  return vec_perm((vector unsigned char)(0),
-                  vec_ld(__a, __b),
+static vector unsigned char __ATTRS_o_ai vec_lvrx(int __a,
+                                                  const unsigned char *__b) {
+  return vec_perm((vector unsigned char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_lvrx(int __a, const vector unsigned char *__b)
-{
-  return vec_perm((vector unsigned char)(0),
-                  vec_ld(__a, __b),
+vec_lvrx(int __a, const vector unsigned char *__b) {
+  return vec_perm((vector unsigned char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_lvrx(int __a, const vector bool char *__b)
-{
-  return vec_perm((vector bool char)(0),
-                  vec_ld(__a, __b),
+static vector bool char __ATTRS_o_ai vec_lvrx(int __a,
+                                              const vector bool char *__b) {
+  return vec_perm((vector bool char)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvrx(int __a, const short *__b)
-{
-  return vec_perm((vector short)(0),
-                  vec_ld(__a, __b),
-                  vec_lvsl(__a, __b));
+static vector short __ATTRS_o_ai vec_lvrx(int __a, const short *__b) {
+  return vec_perm((vector short)(0), vec_ld(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvrx(int __a, const vector short *__b)
-{
-  return vec_perm((vector short)(0),
-                  vec_ld(__a, __b),
+static vector short __ATTRS_o_ai vec_lvrx(int __a, const vector short *__b) {
+  return vec_perm((vector short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvrx(int __a, const unsigned short *__b)
-{
-  return vec_perm((vector unsigned short)(0),
-                  vec_ld(__a, __b),
+static vector unsigned short __ATTRS_o_ai vec_lvrx(int __a,
+                                                   const unsigned short *__b) {
+  return vec_perm((vector unsigned short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_lvrx(int __a, const vector unsigned short *__b)
-{
-  return vec_perm((vector unsigned short)(0),
-                  vec_ld(__a, __b),
+vec_lvrx(int __a, const vector unsigned short *__b) {
+  return vec_perm((vector unsigned short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_lvrx(int __a, const vector bool short *__b)
-{
-  return vec_perm((vector bool short)(0),
-                  vec_ld(__a, __b),
+static vector bool short __ATTRS_o_ai vec_lvrx(int __a,
+                                               const vector bool short *__b) {
+  return vec_perm((vector bool short)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_lvrx(int __a, const vector pixel *__b)
-{
-  return vec_perm((vector pixel)(0),
-                  vec_ld(__a, __b),
+static vector pixel __ATTRS_o_ai vec_lvrx(int __a, const vector pixel *__b) {
+  return vec_perm((vector pixel)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvrx(int __a, const int *__b)
-{
-  return vec_perm((vector int)(0),
-                  vec_ld(__a, __b),
-                  vec_lvsl(__a, __b));
+static vector int __ATTRS_o_ai vec_lvrx(int __a, const int *__b) {
+  return vec_perm((vector int)(0), vec_ld(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvrx(int __a, const vector int *__b)
-{
-  return vec_perm((vector int)(0),
-                  vec_ld(__a, __b),
+static vector int __ATTRS_o_ai vec_lvrx(int __a, const vector int *__b) {
+  return vec_perm((vector int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvrx(int __a, const unsigned int *__b)
-{
-  return vec_perm((vector unsigned int)(0),
-                  vec_ld(__a, __b),
+static vector unsigned int __ATTRS_o_ai vec_lvrx(int __a,
+                                                 const unsigned int *__b) {
+  return vec_perm((vector unsigned int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_lvrx(int __a, const vector unsigned int *__b)
-{
-  return vec_perm((vector unsigned int)(0),
-                  vec_ld(__a, __b),
+vec_lvrx(int __a, const vector unsigned int *__b) {
+  return vec_perm((vector unsigned int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_lvrx(int __a, const vector bool int *__b)
-{
-  return vec_perm((vector bool int)(0),
-                  vec_ld(__a, __b),
+static vector bool int __ATTRS_o_ai vec_lvrx(int __a,
+                                             const vector bool int *__b) {
+  return vec_perm((vector bool int)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvrx(int __a, const float *__b)
-{
-  return vec_perm((vector float)(0),
-                  vec_ld(__a, __b),
-                  vec_lvsl(__a, __b));
+static vector float __ATTRS_o_ai vec_lvrx(int __a, const float *__b) {
+  return vec_perm((vector float)(0), vec_ld(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvrx(int __a, const vector float *__b)
-{
-  return vec_perm((vector float)(0),
-                  vec_ld(__a, __b),
+static vector float __ATTRS_o_ai vec_lvrx(int __a, const vector float *__b) {
+  return vec_perm((vector float)(0), vec_ld(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_lvrxl */
 
-static vector signed char __ATTRS_o_ai
-vec_lvrxl(int __a, const signed char *__b)
-{
-  return vec_perm((vector signed char)(0),
-                  vec_ldl(__a, __b),
+static vector signed char __ATTRS_o_ai vec_lvrxl(int __a,
+                                                 const signed char *__b) {
+  return vec_perm((vector signed char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector signed char __ATTRS_o_ai
-vec_lvrxl(int __a, const vector signed char *__b)
-{
-  return vec_perm((vector signed char)(0),
-                  vec_ldl(__a, __b),
+vec_lvrxl(int __a, const vector signed char *__b) {
+  return vec_perm((vector signed char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_lvrxl(int __a, const unsigned char *__b)
-{
-  return vec_perm((vector unsigned char)(0),
-                  vec_ldl(__a, __b),
+static vector unsigned char __ATTRS_o_ai vec_lvrxl(int __a,
+                                                   const unsigned char *__b) {
+  return vec_perm((vector unsigned char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned char __ATTRS_o_ai
-vec_lvrxl(int __a, const vector unsigned char *__b)
-{
-  return vec_perm((vector unsigned char)(0),
-                  vec_ldl(__a, __b),
+vec_lvrxl(int __a, const vector unsigned char *__b) {
+  return vec_perm((vector unsigned char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool char __ATTRS_o_ai
-vec_lvrxl(int __a, const vector bool char *__b)
-{
-  return vec_perm((vector bool char)(0),
-                  vec_ldl(__a, __b),
+static vector bool char __ATTRS_o_ai vec_lvrxl(int __a,
+                                               const vector bool char *__b) {
+  return vec_perm((vector bool char)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvrxl(int __a, const short *__b)
-{
-  return vec_perm((vector short)(0),
-                  vec_ldl(__a, __b),
-                  vec_lvsl(__a, __b));
+static vector short __ATTRS_o_ai vec_lvrxl(int __a, const short *__b) {
+  return vec_perm((vector short)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector short __ATTRS_o_ai
-vec_lvrxl(int __a, const vector short *__b)
-{
-  return vec_perm((vector short)(0),
-                  vec_ldl(__a, __b),
+static vector short __ATTRS_o_ai vec_lvrxl(int __a, const vector short *__b) {
+  return vec_perm((vector short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_lvrxl(int __a, const unsigned short *__b)
-{
-  return vec_perm((vector unsigned short)(0),
-                  vec_ldl(__a, __b),
+static vector unsigned short __ATTRS_o_ai vec_lvrxl(int __a,
+                                                    const unsigned short *__b) {
+  return vec_perm((vector unsigned short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned short __ATTRS_o_ai
-vec_lvrxl(int __a, const vector unsigned short *__b)
-{
-  return vec_perm((vector unsigned short)(0),
-                  vec_ldl(__a, __b),
+vec_lvrxl(int __a, const vector unsigned short *__b) {
+  return vec_perm((vector unsigned short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool short __ATTRS_o_ai
-vec_lvrxl(int __a, const vector bool short *__b)
-{
-  return vec_perm((vector bool short)(0),
-                  vec_ldl(__a, __b),
+static vector bool short __ATTRS_o_ai vec_lvrxl(int __a,
+                                                const vector bool short *__b) {
+  return vec_perm((vector bool short)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector pixel __ATTRS_o_ai
-vec_lvrxl(int __a, const vector pixel *__b)
-{
-  return vec_perm((vector pixel)(0),
-                  vec_ldl(__a, __b),
+static vector pixel __ATTRS_o_ai vec_lvrxl(int __a, const vector pixel *__b) {
+  return vec_perm((vector pixel)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvrxl(int __a, const int *__b)
-{
-  return vec_perm((vector int)(0),
-                  vec_ldl(__a, __b),
-                  vec_lvsl(__a, __b));
+static vector int __ATTRS_o_ai vec_lvrxl(int __a, const int *__b) {
+  return vec_perm((vector int)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector int __ATTRS_o_ai
-vec_lvrxl(int __a, const vector int *__b)
-{
-  return vec_perm((vector int)(0),
-                  vec_ldl(__a, __b),
+static vector int __ATTRS_o_ai vec_lvrxl(int __a, const vector int *__b) {
+  return vec_perm((vector int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_lvrxl(int __a, const unsigned int *__b)
-{
-  return vec_perm((vector unsigned int)(0),
-                  vec_ldl(__a, __b),
+static vector unsigned int __ATTRS_o_ai vec_lvrxl(int __a,
+                                                  const unsigned int *__b) {
+  return vec_perm((vector unsigned int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, __b));
 }
 
 static vector unsigned int __ATTRS_o_ai
-vec_lvrxl(int __a, const vector unsigned int *__b)
-{
-  return vec_perm((vector unsigned int)(0),
-                  vec_ldl(__a, __b),
+vec_lvrxl(int __a, const vector unsigned int *__b) {
+  return vec_perm((vector unsigned int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector bool int __ATTRS_o_ai
-vec_lvrxl(int __a, const vector bool int *__b)
-{
-  return vec_perm((vector bool int)(0),
-                  vec_ldl(__a, __b),
+static vector bool int __ATTRS_o_ai vec_lvrxl(int __a,
+                                              const vector bool int *__b) {
+  return vec_perm((vector bool int)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvrxl(int __a, const float *__b)
-{
-  return vec_perm((vector float)(0),
-                  vec_ldl(__a, __b),
-                  vec_lvsl(__a, __b));
+static vector float __ATTRS_o_ai vec_lvrxl(int __a, const float *__b) {
+  return vec_perm((vector float)(0), vec_ldl(__a, __b), vec_lvsl(__a, __b));
 }
 
-static vector float __ATTRS_o_ai
-vec_lvrxl(int __a, const vector float *__b)
-{
-  return vec_perm((vector float)(0),
-                  vec_ldl(__a, __b),
+static vector float __ATTRS_o_ai vec_lvrxl(int __a, const vector float *__b) {
+  return vec_perm((vector float)(0), vec_ldl(__a, __b),
                   vec_lvsl(__a, (unsigned char *)__b));
 }
 
 /* vec_stvlx */
 
-static void __ATTRS_o_ai
-vec_stvlx(vector signed char __a, int __b, signed char *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b,
+                                   signed char *__c) {
+  return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector signed char __a, int __b, vector signed char *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector signed char __a, int __b,
+                                   vector signed char *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector unsigned char __a, int __b, unsigned char *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b,
+                                   unsigned char *__c) {
+  return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector unsigned char __a, int __b,
+                                   vector unsigned char *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector bool char __a, int __b, vector bool char *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector bool char __a, int __b,
+                                   vector bool char *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector short __a, int __b, short *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector short __a, int __b, short *__c) {
+  return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector short __a, int __b, vector short *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector short __a, int __b,
+                                   vector short *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector unsigned short __a, int __b, unsigned short *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector unsigned short __a, int __b,
+                                   unsigned short *__c) {
+  return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector unsigned short __a, int __b,
+                                   vector unsigned short *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector bool short __a, int __b, vector bool short *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector bool short __a, int __b,
+                                   vector bool short *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector pixel __a, int __b, vector pixel *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector pixel __a, int __b,
+                                   vector pixel *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector int __a, int __b, int *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector int __a, int __b, int *__c) {
+  return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector int __a, int __b, vector int *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector int __a, int __b, vector int *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector unsigned int __a, int __b, unsigned int *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b,
+                                   unsigned int *__c) {
+  return vec_st(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector unsigned int __a, int __b,
+                                   vector unsigned int *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector bool int __a, int __b, vector bool int *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector bool int __a, int __b,
+                                   vector bool int *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlx(vector float __a, int __b, vector float *__c)
-{
-  return vec_st(vec_perm(vec_lvrx(__b, __c),
-                         __a,
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvlx(vector float __a, int __b,
+                                   vector float *__c) {
+  return vec_st(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
 /* vec_stvlxl */
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector signed char __a, int __b, signed char *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b,
+                                    signed char *__c) {
+  return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector signed char __a, int __b, vector signed char *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector signed char __a, int __b,
+                                    vector signed char *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector unsigned char __a, int __b, unsigned char *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a, int __b,
+                                    unsigned char *__c) {
+  return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector unsigned char __a, int __b,
+                                    vector unsigned char *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector bool char __a, int __b, vector bool char *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector bool char __a, int __b,
+                                    vector bool char *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector short __a, int __b, short *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b, short *__c) {
+  return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector short __a, int __b, vector short *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector short __a, int __b,
+                                    vector short *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector unsigned short __a, int __b, unsigned short *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a, int __b,
+                                    unsigned short *__c) {
+  return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector unsigned short __a, int __b,
+                                    vector unsigned short *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector bool short __a, int __b, vector bool short *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector bool short __a, int __b,
+                                    vector bool short *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector pixel __a, int __b, vector pixel *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector pixel __a, int __b,
+                                    vector pixel *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector int __a, int __b, int *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b, int *__c) {
+  return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector int __a, int __b, vector int *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector int __a, int __b, vector int *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector unsigned int __a, int __b, unsigned int *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b,
+                                    unsigned int *__c) {
+  return vec_stl(vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector unsigned int __a, int __b,
+                                    vector unsigned int *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector bool int __a, int __b, vector bool int *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector bool int __a, int __b,
+                                    vector bool int *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvlxl(vector float __a, int __b, vector float *__c)
-{
-  return vec_stl(vec_perm(vec_lvrx(__b, __c),
-                          __a,
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvlxl(vector float __a, int __b,
+                                    vector float *__c) {
+  return vec_stl(
+      vec_perm(vec_lvrx(__b, __c), __a, vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
 /* vec_stvrx */
 
-static void __ATTRS_o_ai
-vec_stvrx(vector signed char __a, int __b, signed char *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b,
+                                   signed char *__c) {
+  return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector signed char __a, int __b, vector signed char *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector signed char __a, int __b,
+                                   vector signed char *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector unsigned char __a, int __b, unsigned char *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b,
+                                   unsigned char *__c) {
+  return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector unsigned char __a, int __b,
+                                   vector unsigned char *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector bool char __a, int __b, vector bool char *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector bool char __a, int __b,
+                                   vector bool char *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector short __a, int __b, short *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector short __a, int __b, short *__c) {
+  return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector short __a, int __b, vector short *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector short __a, int __b,
+                                   vector short *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector unsigned short __a, int __b, unsigned short *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector unsigned short __a, int __b,
+                                   unsigned short *__c) {
+  return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector unsigned short __a, int __b,
+                                   vector unsigned short *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector bool short __a, int __b, vector bool short *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector bool short __a, int __b,
+                                   vector bool short *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector pixel __a, int __b, vector pixel *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector pixel __a, int __b,
+                                   vector pixel *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector int __a, int __b, int *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector int __a, int __b, int *__c) {
+  return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector int __a, int __b, vector int *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector int __a, int __b, vector int *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector unsigned int __a, int __b, unsigned int *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, __c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b,
+                                   unsigned int *__c) {
+  return vec_st(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector unsigned int __a, int __b,
+                                   vector unsigned int *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector bool int __a, int __b, vector bool int *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector bool int __a, int __b,
+                                   vector bool int *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrx(vector float __a, int __b, vector float *__c)
-{
-  return vec_st(vec_perm(__a,
-                         vec_lvlx(__b, __c),
-                         vec_lvsr(__b, (unsigned char *)__c)),
-                __b, __c);
+static void __ATTRS_o_ai vec_stvrx(vector float __a, int __b,
+                                   vector float *__c) {
+  return vec_st(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
 /* vec_stvrxl */
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector signed char __a, int __b, signed char *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b,
+                                    signed char *__c) {
+  return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector signed char __a, int __b, vector signed char *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector signed char __a, int __b,
+                                    vector signed char *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector unsigned char __a, int __b, unsigned char *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a, int __b,
+                                    unsigned char *__c) {
+  return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector unsigned char __a, int __b, vector unsigned char *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector unsigned char __a, int __b,
+                                    vector unsigned char *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector bool char __a, int __b, vector bool char *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector bool char __a, int __b,
+                                    vector bool char *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector short __a, int __b, short *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b, short *__c) {
+  return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector short __a, int __b, vector short *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector short __a, int __b,
+                                    vector short *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector unsigned short __a, int __b, unsigned short *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a, int __b,
+                                    unsigned short *__c) {
+  return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector unsigned short __a, int __b, vector unsigned short *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector unsigned short __a, int __b,
+                                    vector unsigned short *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector bool short __a, int __b, vector bool short *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector bool short __a, int __b,
+                                    vector bool short *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector pixel __a, int __b, vector pixel *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector pixel __a, int __b,
+                                    vector pixel *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector int __a, int __b, int *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b, int *__c) {
+  return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector int __a, int __b, vector int *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector int __a, int __b, vector int *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector unsigned int __a, int __b, unsigned int *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, __c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b,
+                                    unsigned int *__c) {
+  return vec_stl(vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, __c)), __b,
+                 __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector unsigned int __a, int __b, vector unsigned int *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector unsigned int __a, int __b,
+                                    vector unsigned int *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector bool int __a, int __b, vector bool int *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector bool int __a, int __b,
+                                    vector bool int *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
-static void __ATTRS_o_ai
-vec_stvrxl(vector float __a, int __b, vector float *__c)
-{
-  return vec_stl(vec_perm(__a,
-                          vec_lvlx(__b, __c),
-                          vec_lvsr(__b, (unsigned char *)__c)),
-                 __b, __c);
+static void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b,
+                                    vector float *__c) {
+  return vec_stl(
+      vec_perm(__a, vec_lvlx(__b, __c), vec_lvsr(__b, (unsigned char *)__c)),
+      __b, __c);
 }
 
 /* vec_promote */
 
-static vector signed char __ATTRS_o_ai
-vec_promote(signed char __a, int __b)
-{
+static vector signed char __ATTRS_o_ai vec_promote(signed char __a, int __b) {
   vector signed char __res = (vector signed char)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_promote(unsigned char __a, int __b)
-{
+static vector unsigned char __ATTRS_o_ai vec_promote(unsigned char __a,
+                                                     int __b) {
   vector unsigned char __res = (vector unsigned char)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector short __ATTRS_o_ai
-vec_promote(short __a, int __b)
-{
+static vector short __ATTRS_o_ai vec_promote(short __a, int __b) {
   vector short __res = (vector short)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_promote(unsigned short __a, int __b)
-{
+static vector unsigned short __ATTRS_o_ai vec_promote(unsigned short __a,
+                                                      int __b) {
   vector unsigned short __res = (vector unsigned short)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector int __ATTRS_o_ai
-vec_promote(int __a, int __b)
-{
+static vector int __ATTRS_o_ai vec_promote(int __a, int __b) {
   vector int __res = (vector int)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector unsigned int __ATTRS_o_ai
-vec_promote(unsigned int __a, int __b)
-{
+static vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a, int __b) {
   vector unsigned int __res = (vector unsigned int)(0);
   __res[__b] = __a;
   return __res;
 }
 
-static vector float __ATTRS_o_ai
-vec_promote(float __a, int __b)
-{
+static vector float __ATTRS_o_ai vec_promote(float __a, int __b) {
   vector float __res = (vector float)(0);
   __res[__b] = __a;
   return __res;
@@ -10860,45 +9868,29 @@
 
 /* vec_splats */
 
-static vector signed char __ATTRS_o_ai
-vec_splats(signed char __a)
-{
+static vector signed char __ATTRS_o_ai vec_splats(signed char __a) {
   return (vector signed char)(__a);
 }
 
-static vector unsigned char __ATTRS_o_ai
-vec_splats(unsigned char __a)
-{
+static vector unsigned char __ATTRS_o_ai vec_splats(unsigned char __a) {
   return (vector unsigned char)(__a);
 }
 
-static vector short __ATTRS_o_ai
-vec_splats(short __a)
-{
+static vector short __ATTRS_o_ai vec_splats(short __a) {
   return (vector short)(__a);
 }
 
-static vector unsigned short __ATTRS_o_ai
-vec_splats(unsigned short __a)
-{
+static vector unsigned short __ATTRS_o_ai vec_splats(unsigned short __a) {
   return (vector unsigned short)(__a);
 }
 
-static vector int __ATTRS_o_ai
-vec_splats(int __a)
-{
-  return (vector int)(__a);
-}
+static vector int __ATTRS_o_ai vec_splats(int __a) { return (vector int)(__a); }
 
-static vector unsigned int __ATTRS_o_ai
-vec_splats(unsigned int __a)
-{
+static vector unsigned int __ATTRS_o_ai vec_splats(unsigned int __a) {
   return (vector unsigned int)(__a);
 }
 
-static vector float __ATTRS_o_ai
-vec_splats(float __a)
-{
+static vector float __ATTRS_o_ai vec_splats(float __a) {
   return (vector float)(__a);
 }
 
@@ -10906,2531 +9898,2018 @@
 
 /* vec_all_eq */
 
-static int __ATTRS_o_ai
-vec_all_eq(vector signed char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector signed char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector signed char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned char __a, vector unsigned char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector unsigned char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool char __a, vector unsigned char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned short __a, vector unsigned short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_eq(vector unsigned short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned short __a, vector bool short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_eq(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool short __a, vector short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool short __a, vector unsigned short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool short __a, vector bool short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector pixel __a, vector pixel __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_eq(vector pixel __a, vector pixel __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_eq(vector unsigned int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_eq(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_eq(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT, (vector int)__a,
+                                      (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_all_eq(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, 
+static int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, 
+static int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool long long __a, vector long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, 
+static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
+                                   vector long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, 
+static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_eq(vector bool long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, 
+static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a,
                                       (vector long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_all_eq(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_all_eq(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __b);
 }
 
 /* vec_all_ge */
 
-static int __ATTRS_o_ai
-vec_all_ge(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector signed char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector signed char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, (vector signed char)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector bool char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __b, (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b, __a);
+static int __ATTRS_o_ai vec_all_ge(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b, (vector unsigned short)__a);
-}
-
-static int __ATTRS_o_ai
-vec_all_ge(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__b,
+                                      (vector unsigned short)__a);
+}
+
+static int __ATTRS_o_ai vec_all_ge(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned int __a,
+                                   vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector bool int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __b, (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_all_ge(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __b, __a);
 }
-static int __ATTRS_o_ai
-vec_all_ge(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, (vector signed long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector unsigned long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector unsigned long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool long long __a, vector signed long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ,
-                                      (vector unsigned long long)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_ge(vector bool long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ,
-                                      (vector unsigned long long)__b,
+static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_all_ge(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_all_ge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT, __a, __b);
 }
 
 /* vec_all_gt */
 
-static int __ATTRS_o_ai
-vec_all_gt(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector signed char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector signed char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __a, (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector unsigned char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __a, (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector bool char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a, (vector unsigned short)__b);
-}
-
-static int __ATTRS_o_ai
-vec_all_gt(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_all_gt(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a, __b);
-}
-
-static int __ATTRS_o_ai
-vec_all_gt(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_all_gt(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__a,
+                                      (vector unsigned short)__b);
+}
+
+static int __ATTRS_o_ai vec_all_gt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector unsigned int __a,
+                                   vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __a, (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector bool int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_all_gt(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __a, __b);
 }
-static int __ATTRS_o_ai
-vec_all_gt(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a, 
+static int __ATTRS_o_ai vec_all_gt(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool long long __a, vector signed long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT,
-                                      (vector unsigned long long)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_gt(vector bool long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT,
-                                      (vector unsigned long long)__a,
+static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_all_gt(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_all_gt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __a, __b);
 }
 
 /* vec_all_in */
 
 static int __attribute__((__always_inline__))
-vec_all_in(vector float __a, vector float __b)
-{
+vec_all_in(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_le */
 
-static int __ATTRS_o_ai
-vec_all_le(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector signed char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector signed char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ, __a, (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector unsigned char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, __a, (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector bool char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a, (vector unsigned short)__b);
-}
-
-static int __ATTRS_o_ai
-vec_all_le(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_all_le(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a, __b);
-}
-
-static int __ATTRS_o_ai
-vec_all_le(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_all_le(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ, (vector unsigned short)__a,
+                                      (vector unsigned short)__b);
+}
+
+static int __ATTRS_o_ai vec_all_le(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector unsigned int __a,
+                                   vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, __a, (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector bool int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_all_le(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a, 
+static int __ATTRS_o_ai vec_all_le(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool long long __a, vector signed long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ,
-                                      (vector unsigned long long)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a,
                                       __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_le(vector bool long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ,
-                                      (vector unsigned long long)__a,
+static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ, (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_all_le(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_all_le(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT, __b, __a);
 }
 
 /* vec_all_lt */
 
-static int __ATTRS_o_ai
-vec_all_lt(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector signed char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector signed char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT, (vector signed char)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned char __a, vector bool char __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned char __a,
+                                   vector bool char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector bool char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT, __b, (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b, __a);
+static int __ATTRS_o_ai vec_all_lt(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b, (vector unsigned short)__a);
-}
-
-static int __ATTRS_o_ai
-vec_all_lt(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT, (vector unsigned short)__b,
+                                      (vector unsigned short)__a);
+}
+
+static int __ATTRS_o_ai vec_all_lt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned int __a,
+                                   vector bool int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector bool int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT, __b, (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_all_lt(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT, (vector signed long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector unsigned long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector unsigned long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b,
                                       __a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool long long __a, vector signed long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT,
-                                      (vector unsigned long long)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_all_lt(vector bool long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT,
-                                      (vector unsigned long long)__b,
+static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT, (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_all_lt(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_all_lt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __b, __a);
 }
 
 /* vec_all_nan */
 
-static int __attribute__((__always_inline__))
-vec_all_nan(vector float __a)
-{
+static int __attribute__((__always_inline__)) vec_all_nan(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __a);
 }
 
 /* vec_all_ne */
 
-static int __ATTRS_o_ai
-vec_all_ne(vector signed char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector signed char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector signed char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned char __a, vector unsigned char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector unsigned char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool char __a, vector unsigned char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned short __a, vector unsigned short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_ne(vector unsigned short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned short __a, vector bool short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_ne(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool short __a, vector short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool short __a, vector unsigned short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool short __a, vector bool short __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector pixel __a, vector pixel __b)
-{
-  return
-    __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a, (vector short)__b);
+static int __ATTRS_o_ai vec_all_ne(vector pixel __a, vector pixel __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ, (vector short)__a,
+                                      (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_ne(vector unsigned int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_ne(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_all_ne(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ, (vector int)__a,
+                                      (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_all_ne(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector long long)__a, 
+static int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector long long)__a,
                                       (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, 
-                                      (vector signed long long)__b);
-}
-
-static int __ATTRS_o_ai
-vec_all_ne(vector bool long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector unsigned long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, 
+static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_all_ne(vector bool long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a, 
+static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
+                                      (vector signed long long)__b);
+}
+
+static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_EQ, (vector signed long long)__a,
                                       (vector signed long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_all_ne(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_nge */
 
 static int __attribute__((__always_inline__))
-vec_all_nge(vector float __a, vector float __b)
-{
+vec_all_nge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_ngt */
 
 static int __attribute__((__always_inline__))
-vec_all_ngt(vector float __a, vector float __b)
-{
+vec_all_ngt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __a, __b);
 }
 
 /* vec_all_nle */
 
 static int __attribute__((__always_inline__))
-vec_all_nle(vector float __a, vector float __b)
-{
+vec_all_nle(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __b, __a);
 }
 
 /* vec_all_nlt */
 
 static int __attribute__((__always_inline__))
-vec_all_nlt(vector float __a, vector float __b)
-{
+vec_all_nlt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __b, __a);
 }
 
 /* vec_all_numeric */
 
 static int __attribute__((__always_inline__))
-vec_all_numeric(vector float __a)
-{
+vec_all_numeric(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __a);
 }
 
 /* vec_any_eq */
 
-static int __ATTRS_o_ai
-vec_any_eq(vector signed char __a, vector signed char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector signed char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector signed char __a, vector bool char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned char __a, vector unsigned char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector unsigned char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned char __a, vector bool char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool char __a, vector signed char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool char __a, vector unsigned char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool char __a, vector bool char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_EQ_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, 
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_eq(vector unsigned short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, 
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_eq(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_eq(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_eq(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_eq(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector pixel __a, vector pixel __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, 
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_eq(vector pixel __a, vector pixel __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_EQ_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned int __a, vector unsigned int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_eq(vector unsigned int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned int __a, vector bool int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_eq(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool int __a, vector int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool int __a, vector unsigned int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool int __a, vector bool int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_EQ_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_any_eq(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned long long __a, vector unsigned long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector long long)__a, 
-                                 (vector long long)__b);
+static int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector long long)__a,
+                                      (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector unsigned long long __a, vector bool long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_eq(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool long long __a, vector signed long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool long long __a, vector unsigned long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_eq(vector bool long long __a, vector bool long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_EQ_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_EQ_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_any_eq(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_any_eq(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __b);
 }
 
 /* vec_any_ge */
 
-static int __ATTRS_o_ai
-vec_any_ge(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector signed char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)__b, __a);
+static int __ATTRS_o_ai vec_any_ge(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, (vector signed char)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b, __a);
+static int __ATTRS_o_ai vec_any_ge(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_any_ge(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool char __a, vector unsigned char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b, (vector unsigned char)__a);
-}
-
-static int __ATTRS_o_ai
-vec_any_ge(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_any_ge(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__b,
+                                      (vector unsigned char)__a);
+}
+
+static int __ATTRS_o_ai vec_any_ge(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned short __a, vector bool short __b)
-{
-  return
-    __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b, __a);
+static int __ATTRS_o_ai vec_any_ge(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_any_ge(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool short __a, vector unsigned short __b)
-{
-  return 
-    __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b, (vector unsigned short)__a);
-}
-
-static int __ATTRS_o_ai
-vec_any_ge(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_any_ge(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__b,
+                                      (vector unsigned short)__a);
+}
+
+static int __ATTRS_o_ai vec_any_ge(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b, __a);
+static int __ATTRS_o_ai vec_any_ge(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b, (vector unsigned int)__a);
+static int __ATTRS_o_ai vec_any_ge(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __b,
+                                      (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_any_ge(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_any_ge(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV,
                                       (vector signed long long)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, 
+static int __ATTRS_o_ai vec_any_ge(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_ge(vector bool long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_any_ge(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_any_ge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __a, __b);
 }
 
 /* vec_any_gt */
 
-static int __ATTRS_o_ai
-vec_any_gt(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector signed char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a, (vector signed char)__b);
+static int __ATTRS_o_ai vec_any_gt(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __a,
+                                      (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned char __a, vector bool char __b)
-{
-  return 
-    __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a, (vector unsigned char)__b);
-}
-
-static int __ATTRS_o_ai
-vec_any_gt(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_any_gt(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool char __a, vector unsigned char __b)
-{
-  return 
-    __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a, __b);
-}
-
-static int __ATTRS_o_ai
-vec_any_gt(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_any_gt(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_any_gt(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__a,
+                                      (vector unsigned char)__b);
+}
+
+static int __ATTRS_o_ai vec_any_gt(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned short __a, vector bool short __b)
-{
-  return 
-    __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a, (vector unsigned short)__b);
-}
-
-static int __ATTRS_o_ai
-vec_any_gt(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_any_gt(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool short __a, vector unsigned short __b)
-{
-  return
-    __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a, __b);
-}
-
-static int __ATTRS_o_ai
-vec_any_gt(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_any_gt(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_any_gt(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__a,
+                                      (vector unsigned short)__b);
+}
+
+static int __ATTRS_o_ai vec_any_gt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a, (vector unsigned int)__b);
-}
-
-static int __ATTRS_o_ai
-vec_any_gt(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_any_gt(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a, __b);
+static int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a,
+                                      (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_any_gt(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_any_gt(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_any_gt(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __a,
                                       (vector signed long long)__b);
 }
 
-
-static int __ATTRS_o_ai
-vec_any_gt(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a, 
+static int __ATTRS_o_ai vec_any_gt(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, 
+static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_gt(vector bool long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_any_gt(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_any_gt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __a, __b);
 }
 
 /* vec_any_le */
 
-static int __ATTRS_o_ai
-vec_any_le(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector signed char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a, (vector signed char)__b);
+static int __ATTRS_o_ai vec_any_le(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtsb_p(__CR6_LT_REV, __a,
+                                      (vector signed char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned char __a, vector bool char __b)
-{
-  return 
-    __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a, (vector unsigned char)__b);
-}
-
-static int __ATTRS_o_ai
-vec_any_le(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_any_le(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, __a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool char __a, vector unsigned char __b)
-{
-  return 
-    __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a, __b);
-}
-
-static int __ATTRS_o_ai
-vec_any_le(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV,
-                                      (vector unsigned char)__a,
+static int __ATTRS_o_ai vec_any_le(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a,
                                       (vector unsigned char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_any_le(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_LT_REV, (vector unsigned char)__a,
+                                      (vector unsigned char)__b);
+}
+
+static int __ATTRS_o_ai vec_any_le(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_LT_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned short __a, vector bool short __b)
-{
-  return 
-    __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a, (vector unsigned short)__b);
-}
-
-static int __ATTRS_o_ai
-vec_any_le(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_any_le(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, __a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool short __a, vector unsigned short __b)
-{
-  return 
-    __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a, __b);
-}
-
-static int __ATTRS_o_ai
-vec_any_le(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV,
-                                      (vector unsigned short)__a,
+static int __ATTRS_o_ai vec_any_le(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a,
                                       (vector unsigned short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_any_le(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_LT_REV, (vector unsigned short)__a,
+                                      (vector unsigned short)__b);
+}
+
+static int __ATTRS_o_ai vec_any_le(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_LT_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a, (vector unsigned int)__b);
-}
-
-static int __ATTRS_o_ai
-vec_any_le(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_any_le(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, __a,
                                       (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a, __b);
+static int __ATTRS_o_ai vec_any_le(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a,
+                                      (vector unsigned int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV,
-                                      (vector unsigned int)__a,
+static int __ATTRS_o_ai vec_any_le(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a,
+                                      __b);
+}
+
+static int __ATTRS_o_ai vec_any_le(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_LT_REV, (vector unsigned int)__a,
                                       (vector unsigned int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_any_le(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_LT_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a, 
+static int __ATTRS_o_ai vec_any_le(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, __a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV, 
+static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_le(vector bool long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_LT_REV,
                                       (vector unsigned long long)__a,
                                       (vector unsigned long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_any_le(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_any_le(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __b, __a);
 }
 
 /* vec_any_lt */
 
-static int __ATTRS_o_ai
-vec_any_lt(vector signed char __a, vector signed char __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector signed char __a,
+                                   vector signed char __b) {
   return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector signed char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)__b, __a);
+static int __ATTRS_o_ai vec_any_lt(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtsb_p(__CR6_EQ_REV, (vector signed char)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned char __a, vector unsigned char __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector unsigned char __a,
+                                   vector unsigned char __b) {
   return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned char __a, vector bool char __b)
-{
-  return 
-    __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b, __a);
+static int __ATTRS_o_ai vec_any_lt(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool char __a, vector signed char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_any_lt(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool char __a, vector unsigned char __b)
-{
-  return 
-    __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b, (vector unsigned char)__a);
-}
-
-static int __ATTRS_o_ai
-vec_any_lt(vector bool char __a, vector bool char __b)
-{
-  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV,
-                                      (vector unsigned char)__b,
+static int __ATTRS_o_ai vec_any_lt(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, __b,
                                       (vector unsigned char)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpgtub_p(__CR6_EQ_REV, (vector unsigned char)__b,
+                                      (vector unsigned char)__a);
+}
+
+static int __ATTRS_o_ai vec_any_lt(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpgtsh_p(__CR6_EQ_REV, (vector short)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned short __a, vector unsigned short __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector unsigned short __a,
+                                   vector unsigned short __b) {
   return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned short __a, vector bool short __b)
-{
-  return 
-    __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b, __a);
+static int __ATTRS_o_ai vec_any_lt(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_any_lt(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool short __a, vector unsigned short __b)
-{
-  return 
-    __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b, (vector unsigned short)__a);
-}
-
-static int __ATTRS_o_ai
-vec_any_lt(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV,
-                                      (vector unsigned short)__b,
+static int __ATTRS_o_ai vec_any_lt(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, __b,
                                       (vector unsigned short)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpgtuh_p(__CR6_EQ_REV, (vector unsigned short)__b,
+                                      (vector unsigned short)__a);
+}
+
+static int __ATTRS_o_ai vec_any_lt(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpgtsw_p(__CR6_EQ_REV, (vector int)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned int __a, vector unsigned int __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector unsigned int __a,
+                                   vector unsigned int __b) {
   return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b, __a);
+static int __ATTRS_o_ai vec_any_lt(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b,
+                                      __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool int __a, vector int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool int __a, vector unsigned int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b, (vector unsigned int)__a);
+static int __ATTRS_o_ai vec_any_lt(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, __b,
+                                      (vector unsigned int)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool int __a, vector bool int __b)
-{
-  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV,
-                                      (vector unsigned int)__b,
+static int __ATTRS_o_ai vec_any_lt(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpgtuw_p(__CR6_EQ_REV, (vector unsigned int)__b,
                                       (vector unsigned int)__a);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_any_lt(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned long long __a, vector unsigned long long __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtsd_p(__CR6_EQ_REV,
                                       (vector signed long long)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector unsigned long long __a, vector bool long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, 
+static int __ATTRS_o_ai vec_any_lt(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__b, __a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool long long __a, vector unsigned long long __b)
-{
-  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b, 
+static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV, __b,
                                       (vector unsigned long long)__a);
 }
 
-static int __ATTRS_o_ai
-vec_any_lt(vector bool long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpgtud_p(__CR6_EQ_REV,
                                       (vector unsigned long long)__b,
                                       (vector unsigned long long)__a);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_any_lt(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_any_lt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __b, __a);
 }
 
 /* vec_any_nan */
 
-static int __attribute__((__always_inline__))
-vec_any_nan(vector float __a)
-{
+static int __attribute__((__always_inline__)) vec_any_nan(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __a);
 }
 
 /* vec_any_ne */
 
-static int __ATTRS_o_ai
-vec_any_ne(vector signed char __a, vector signed char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector signed char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector signed char __a, vector bool char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector signed char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned char __a, vector unsigned char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector unsigned char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned char __a, vector bool char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector unsigned char __a,
+                                   vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool char __a, vector signed char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool char __a,
+                                   vector signed char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool char __a, vector unsigned char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool char __a,
+                                   vector unsigned char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool char __a, vector bool char __b)
-{
-  return
-    __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a, (vector char)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool char __a, vector bool char __b) {
+  return __builtin_altivec_vcmpequb_p(__CR6_LT_REV, (vector char)__a,
+                                      (vector char)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector short __a, vector short __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector short __a, vector short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector short __a, vector bool short __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector short __a, vector bool short __b) {
   return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, __a, (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, 
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_ne(vector unsigned short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_ne(vector unsigned short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool short __a, vector short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_ne(vector bool short __a, vector short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool short __a, vector unsigned short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_ne(vector bool short __a,
+                                   vector unsigned short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool short __a, vector bool short __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_ne(vector bool short __a,
+                                   vector bool short __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector pixel __a, vector pixel __b)
-{
-  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV,
-                                      (vector short)__a,
+static int __ATTRS_o_ai vec_any_ne(vector pixel __a, vector pixel __b) {
+  return __builtin_altivec_vcmpequh_p(__CR6_LT_REV, (vector short)__a,
                                       (vector short)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector int __a, vector int __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector int __a, vector int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector int __a, vector bool int __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector int __a, vector bool int __b) {
   return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, __a, (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned int __a, vector unsigned int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_ne(vector unsigned int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned int __a, vector bool int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_ne(vector unsigned int __a,
+                                   vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool int __a, vector int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool int __a, vector unsigned int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool int __a,
+                                   vector unsigned int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool int __a, vector bool int __b)
-{
-  return
-    __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a, (vector int)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool int __a, vector bool int __b) {
+  return __builtin_altivec_vcmpequw_p(__CR6_LT_REV, (vector int)__a,
+                                      (vector int)__b);
 }
 
 #ifdef __POWER8_VECTOR__
-static int __ATTRS_o_ai
-vec_any_ne(vector signed long long __a, vector signed long long __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
+                                   vector signed long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned long long __a, vector unsigned long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a, 
-                                 (vector long long)__b);
+static int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a,
+                                      (vector long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector signed long long __a, vector bool long long __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector signed long long __a,
+                                   vector bool long long __b) {
   return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a,
                                       (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector unsigned long long __a, vector bool long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool long long __a, vector signed long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
+                                   vector signed long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool long long __a, vector unsigned long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
+                                   vector unsigned long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 
-static int __ATTRS_o_ai
-vec_any_ne(vector bool long long __a, vector bool long long __b)
-{
-  return
-    __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector signed long long)__a, 
-                                 (vector signed long long)__b);
+static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
+                                   vector bool long long __b) {
+  return __builtin_altivec_vcmpequd_p(
+      __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b);
 }
 #endif
 
-static int __ATTRS_o_ai
-vec_any_ne(vector float __a, vector float __b)
-{
+static int __ATTRS_o_ai vec_any_ne(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_nge */
 
 static int __attribute__((__always_inline__))
-vec_any_nge(vector float __a, vector float __b)
-{
+vec_any_nge(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_ngt */
 
 static int __attribute__((__always_inline__))
-vec_any_ngt(vector float __a, vector float __b)
-{
+vec_any_ngt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __a, __b);
 }
 
 /* vec_any_nle */
 
 static int __attribute__((__always_inline__))
-vec_any_nle(vector float __a, vector float __b)
-{
+vec_any_nle(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgefp_p(__CR6_LT_REV, __b, __a);
 }
 
 /* vec_any_nlt */
 
 static int __attribute__((__always_inline__))
-vec_any_nlt(vector float __a, vector float __b)
-{
+vec_any_nlt(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT_REV, __b, __a);
 }
 
 /* vec_any_numeric */
 
 static int __attribute__((__always_inline__))
-vec_any_numeric(vector float __a)
-{
+vec_any_numeric(vector float __a) {
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __a);
 }
 
 /* vec_any_out */
 
 static int __attribute__((__always_inline__))
-vec_any_out(vector float __a, vector float __b)
-{
+vec_any_out(vector float __a, vector float __b) {
   return __builtin_altivec_vcmpbfp_p(__CR6_EQ_REV, __a, __b);
 }
 
@@ -13452,113 +11931,110 @@
 */
 #ifdef __CRYPTO__
 static vector unsigned long long __attribute__((__always_inline__))
-__builtin_crypto_vsbox (vector unsigned long long __a)
-{
+__builtin_crypto_vsbox(vector unsigned long long __a) {
   return __builtin_altivec_crypto_vsbox(__a);
 }
 
 static vector unsigned long long __attribute__((__always_inline__))
-__builtin_crypto_vcipher (vector unsigned long long __a,
-                          vector unsigned long long __b)
-{
+__builtin_crypto_vcipher(vector unsigned long long __a,
+                         vector unsigned long long __b) {
   return __builtin_altivec_crypto_vcipher(__a, __b);
 }
 
 static vector unsigned long long __attribute__((__always_inline__))
-__builtin_crypto_vcipherlast (vector unsigned long long __a,
-                              vector unsigned long long __b)
-{
+__builtin_crypto_vcipherlast(vector unsigned long long __a,
+                             vector unsigned long long __b) {
   return __builtin_altivec_crypto_vcipherlast(__a, __b);
 }
 
 static vector unsigned long long __attribute__((__always_inline__))
-__builtin_crypto_vncipher (vector unsigned long long __a,
-                           vector unsigned long long __b)
-{
+__builtin_crypto_vncipher(vector unsigned long long __a,
+                          vector unsigned long long __b) {
   return __builtin_altivec_crypto_vncipher(__a, __b);
 }
 
 static vector unsigned long long __attribute__((__always_inline__))
-__builtin_crypto_vncipherlast (vector unsigned long long __a,
-                               vector unsigned long long __b)
-{
+__builtin_crypto_vncipherlast(vector unsigned long long __a,
+                              vector unsigned long long __b) {
   return __builtin_altivec_crypto_vncipherlast(__a, __b);
 }
 
-
 #define __builtin_crypto_vshasigmad __builtin_altivec_crypto_vshasigmad
 #define __builtin_crypto_vshasigmaw __builtin_altivec_crypto_vshasigmaw
 #endif
 
 #ifdef __POWER8_VECTOR__
 static vector unsigned char __ATTRS_o_ai
-__builtin_crypto_vpermxor (vector unsigned char __a,
-                           vector unsigned char __b,
-                           vector unsigned char __c)
-{
+__builtin_crypto_vpermxor(vector unsigned char __a, vector unsigned char __b,
+                          vector unsigned char __c) {
   return __builtin_altivec_crypto_vpermxor(__a, __b, __c);
 }
 
 static vector unsigned short __ATTRS_o_ai
-__builtin_crypto_vpermxor (vector unsigned short __a,
-                           vector unsigned short __b,
-                           vector unsigned short __c)
-{
-  return (vector unsigned short)
-          __builtin_altivec_crypto_vpermxor((vector unsigned char) __a,
-                                             (vector unsigned char) __b,
-                                             (vector unsigned char) __c);
+__builtin_crypto_vpermxor(vector unsigned short __a, vector unsigned short __b,
+                          vector unsigned short __c) {
+  return (vector unsigned short)__builtin_altivec_crypto_vpermxor(
+      (vector unsigned char)__a, (vector unsigned char)__b,
+      (vector unsigned char)__c);
 }
 
-static vector unsigned int __ATTRS_o_ai
-__builtin_crypto_vpermxor (vector unsigned int __a,
-                           vector unsigned int __b,
-                           vector unsigned int __c)
-{
-  return (vector unsigned int)
-          __builtin_altivec_crypto_vpermxor((vector unsigned char) __a,
-                                              (vector unsigned char) __b,
-                                              (vector unsigned char) __c);
+static vector unsigned int __ATTRS_o_ai __builtin_crypto_vpermxor(
+    vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) {
+  return (vector unsigned int)__builtin_altivec_crypto_vpermxor(
+      (vector unsigned char)__a, (vector unsigned char)__b,
+      (vector unsigned char)__c);
 }
 
-static vector unsigned long long __ATTRS_o_ai
-__builtin_crypto_vpermxor (vector unsigned long long __a,
-                           vector unsigned long long __b,
-                           vector unsigned long long __c)
-{
-  return (vector unsigned long long)
-          __builtin_altivec_crypto_vpermxor((vector unsigned char) __a,
-                                              (vector unsigned char) __b,
-                                              (vector unsigned char) __c);
+static vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpermxor(
+    vector unsigned long long __a, vector unsigned long long __b,
+    vector unsigned long long __c) {
+  return (vector unsigned long long)__builtin_altivec_crypto_vpermxor(
+      (vector unsigned char)__a, (vector unsigned char)__b,
+      (vector unsigned char)__c);
 }
 
 static vector unsigned char __ATTRS_o_ai
-__builtin_crypto_vpmsumb (vector unsigned char __a,
-                          vector unsigned char __b)
-{
+__builtin_crypto_vpmsumb(vector unsigned char __a, vector unsigned char __b) {
   return __builtin_altivec_crypto_vpmsumb(__a, __b);
 }
 
 static vector unsigned short __ATTRS_o_ai
-__builtin_crypto_vpmsumb (vector unsigned short __a,
-                          vector unsigned short __b)
-{
+__builtin_crypto_vpmsumb(vector unsigned short __a, vector unsigned short __b) {
   return __builtin_altivec_crypto_vpmsumh(__a, __b);
 }
 
 static vector unsigned int __ATTRS_o_ai
-__builtin_crypto_vpmsumb (vector unsigned int __a,
-                          vector unsigned int __b)
-{
+__builtin_crypto_vpmsumb(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_crypto_vpmsumw(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
-__builtin_crypto_vpmsumb (vector unsigned long long __a,
-                          vector unsigned long long __b)
-{
+static vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpmsumb(
+    vector unsigned long long __a, vector unsigned long long __b) {
   return __builtin_altivec_crypto_vpmsumd(__a, __b);
 }
+
+static vector signed char __ATTRS_o_ai vec_vgbbd (vector signed char __a)
+{
+  return __builtin_altivec_vgbbd((vector unsigned char) __a);
+}
+
+static vector unsigned char __ATTRS_o_ai vec_vgbbd (vector unsigned char __a)
+{
+  return __builtin_altivec_vgbbd(__a);
+}
+
+static vector long long __ATTRS_o_ai
+vec_vbpermq (vector signed char __a, vector signed char __b)
+{
+  return __builtin_altivec_vbpermq((vector unsigned char) __a,
+                                   (vector unsigned char) __b);
+}
+
+static vector long long __ATTRS_o_ai
+vec_vbpermq (vector unsigned char __a, vector unsigned char __b)
+{
+  return __builtin_altivec_vbpermq(__a, __b);
+}
 #endif
 
 #undef __ATTRS_o_ai
diff --git a/lib/Headers/ammintrin.h b/lib/Headers/ammintrin.h
index d87b9cd..17f5ab1 100644
--- a/lib/Headers/ammintrin.h
+++ b/lib/Headers/ammintrin.h
@@ -30,33 +30,175 @@
 
 #include <pmmintrin.h>
 
+/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
+///    integer vector operand at the index idx and of the length len.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code 
+/// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
+/// \endcode 
+///
+/// \code                                                    
+/// This intrinsic corresponds to the \c EXTRQ instruction.
+/// \endcode 
+///
+/// \param x
+///    The value from which bits are extracted.
+/// \param len
+///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
+///    are zero, the length is interpreted as 64.
+/// \param idx
+///    Bits [5:0] specify the index of the least significant bit; the other 
+///    bits are ignored. If the sum of the index and length is greater than 
+///    64, the result is undefined. If the length and index are both zero, 
+///    bits [63:0] of parameter x are extracted. If the length is zero 
+///    but the index is non-zero, the result is undefined.
+/// \returns A 128-bit integer vector whose lower 64 bits contain the bits
+///    extracted from the source operand.
 #define _mm_extracti_si64(x, len, idx) \
   ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
                                   (char)(len), (char)(idx)))
 
+/// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
+///    integer vector operand at the index and of the length specified by __y.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code 
+/// This intrinsic corresponds to the \c EXTRQ instruction.
+/// \endcode 
+///
+/// \param __x
+///    The value from which bits are extracted.
+/// \param __y
+///    Specifies the index of the least significant bit at [13:8] 
+///    and the length at [5:0]; all other bits are ignored. 
+///    If bits [5:0] are zero, the length is interpreted as 64.
+///    If the sum of the index and length is greater than 64, the result is 
+///    undefined. If the length and index are both zero, bits [63:0] of 
+///    parameter __x are extracted. If the length is zero but the index is 
+///    non-zero, the result is undefined. 
+/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 
+///    from the source operand.
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_extract_si64(__m128i __x, __m128i __y)
 {
   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
 }
 
+/// \brief Inserts bits of a specified length from the source integer vector 
+///    y into the lower 64 bits of the destination integer vector x at the 
+///    index idx and of the length len.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code 
+/// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
+/// const int idx);
+/// \endcode 
+///
+/// \code 
+/// This intrinsic corresponds to the \c INSERTQ instruction.
+/// \endcode 
+///
+/// \param x
+///    The destination operand where bits will be inserted. The inserted bits 
+///    are defined by the length len and by the index idx specifying the least 
+///    significant bit.
+/// \param y
+///    The source operand containing the bits to be extracted. The extracted 
+///    bits are the least significant bits of operand y of length len.
+/// \param len
+///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
+///    are zero, the length is interpreted as 64.
+/// \param idx
+///    Bits [5:0] specify the index of the least significant bit; the other 
+///    bits are ignored. If the sum of the index and length is greater than 
+///    64, the result is undefined. If the length and index are both zero, 
+///    bits [63:0] of parameter y are inserted into parameter x. If the 
+///    length is zero but the index is non-zero, the result is undefined.
+/// \returns A 128-bit integer vector containing the original lower 64-bits 
+///    of destination operand x with the specified bitfields replaced by the
+///    lower bits of source operand y. The upper 64 bits of the return value 
+///    are undefined.
+
 #define _mm_inserti_si64(x, y, len, idx) \
   ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
                                     (__v2di)(__m128i)(y), \
                                     (char)(len), (char)(idx)))
 
+/// \brief Inserts bits of a specified length from the source integer vector 
+///    __y into the lower 64 bits of the destination integer vector __x at 
+///    the index and of the length specified by __y.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code 
+/// This intrinsic corresponds to the \c INSERTQ instruction.
+/// \endcode 
+///
+/// \param __x
+///    The destination operand where bits will be inserted. The inserted bits 
+///    are defined by the length and by the index of the least significant bit 
+///    specified by operand __y.
+/// \param __y
+///    The source operand containing the bits to be extracted. The extracted 
+///    bits are the least significant bits of operand __y with length specified
+///    by bits [69:64]. These are inserted into the destination at the index 
+///    specified by bits [77:72]; all other bits are ignored.
+///    If bits [69:64] are zero, the length is interpreted as 64.
+///    If the sum of the index and length is greater than 64, the result is 
+///    undefined. If the length and index are both zero, bits [63:0] of 
+///    parameter __y are inserted into parameter __x. If the length
+///    is zero but the index is non-zero, the result is undefined. 
+/// \returns A 128-bit integer vector containing the original lower 64-bits 
+///    of destination operand __x with the specified bitfields replaced by the
+///    lower bits of source operand __y. The upper 64 bits of the return value 
+///    are undefined.
+
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_insert_si64(__m128i __x, __m128i __y)
 {
   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
 }
 
+/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. 
+///    To minimize caching, the data is flagged as non-temporal (unlikely to be
+///    used again soon).
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code 
+/// This intrinsic corresponds to the \c MOVNTSD instruction.
+/// \endcode 
+///
+/// \param __p
+///    The 64-bit memory location used to store the register value.
+/// \param __a
+///    The 64-bit double-precision floating-point register value to
+///    be stored.
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
 _mm_stream_sd(double *__p, __m128d __a)
 {
   __builtin_ia32_movntsd(__p, (__v2df)__a);
 }
 
+/// \brief Stores a 32-bit single-precision floating-point value in a 32-bit
+///    memory location. To minimize caching, the data is flagged as
+///    non-temporal (unlikely to be used again soon).
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code 
+/// This intrinsic corresponds to the \c MOVNTSS instruction.
+/// \endcode 
+///
+/// \param __p
+///    The 32-bit memory location used to store the register value.
+/// \param __a
+///    The 32-bit single-precision floating-point register value to
+///    be stored.
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
 _mm_stream_ss(float *__p, __m128 __a)
 {
diff --git a/lib/Headers/arm_acle.h b/lib/Headers/arm_acle.h
index 6c56f3b..73a7e76 100644
--- a/lib/Headers/arm_acle.h
+++ b/lib/Headers/arm_acle.h
@@ -289,6 +289,14 @@
 }
 #endif
 
+/* 10.1 Special register intrinsics */
+#define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg)
+#define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg)
+#define __arm_rsrp(sysreg) __builtin_arm_rsrp(sysreg)
+#define __arm_wsr(sysreg, v) __builtin_arm_wsr(sysreg, v)
+#define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v)
+#define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v)
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h
index 949195b..e1e639d 100644
--- a/lib/Headers/avx2intrin.h
+++ b/lib/Headers/avx2intrin.h
@@ -542,6 +542,8 @@
   __m256i __a = (a); \
   (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
 
+#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
+
 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_slli_epi16(__m256i __a, int __count)
 {
@@ -606,6 +608,8 @@
   __m256i __a = (a); \
   (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
 
+#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
+
 static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_srli_epi16(__m256i __a, int __count)
 {
@@ -756,6 +760,12 @@
   return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
 }
 
+static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_broadcastsd_pd(__m128d __a)
+{
+  return __builtin_shufflevector(__a, __a, 0, 0);
+}
+
 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
 _mm256_broadcastss_ps(__m128 __X)
 {
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h
index acc3da2..d0591e4 100644
--- a/lib/Headers/avx512bwintrin.h
+++ b/lib/Headers/avx512bwintrin.h
@@ -33,6 +33,25 @@
 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 
+static  __inline __v64qi __attribute__ ((__always_inline__, __nodebug__))
+_mm512_setzero_qi (void) {
+  return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+static  __inline __v32hi __attribute__ ((__always_inline__, __nodebug__))
+_mm512_setzero_hi (void) {
+  return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0,
+                       0, 0, 0, 0, 0, 0, 0, 0 };
+}
 
 /* Integer compare */
 
@@ -324,6 +343,116 @@
                                                  __u);
 }
 
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_add_epi8 (__m512i __A, __m512i __B) {
+  return (__m512i) ((__v64qi) __A + (__v64qi) __B);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+             (__v64qi) __B,
+             (__v64qi) __W,
+             (__mmask64) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+             (__v64qi) __B,
+             (__v64qi)
+             _mm512_setzero_qi (),
+             (__mmask64) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_sub_epi8 (__m512i __A, __m512i __B) {
+  return (__m512i) ((__v64qi) __A - (__v64qi) __B);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+             (__v64qi) __B,
+             (__v64qi) __W,
+             (__mmask64) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+             (__v64qi) __B,
+             (__v64qi)
+             _mm512_setzero_qi (),
+             (__mmask64) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_add_epi16 (__m512i __A, __m512i __B) {
+  return (__m512i) ((__v32hi) __A + (__v32hi) __B);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+             (__v32hi) __B,
+             (__v32hi) __W,
+             (__mmask32) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+             (__v32hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_sub_epi16 (__m512i __A, __m512i __B) {
+  return (__m512i) ((__v32hi) __A - (__v32hi) __B);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+             (__v32hi) __B,
+             (__v32hi) __W,
+             (__mmask32) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+             (__v32hi) __B,
+             (__v32hi)
+             _mm512_setzero_hi (),
+             (__mmask32) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
+  return (__m512i) ((__v32hi) __A * (__v32hi) __B);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi)
+              _mm512_setzero_hi (),
+              (__mmask32) __U);
+}
+
 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
                                          (__v64qi)(__m512i)(b), \
diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h
new file mode 100644
index 0000000..fd33be2
--- /dev/null
+++ b/lib/Headers/avx512dqintrin.h
@@ -0,0 +1,237 @@
+/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512DQINTRIN_H
+#define __AVX512DQINTRIN_H
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
+  return (__m512i) ((__v8di) __A * (__v8di) __B);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_xor_pd (__m512d __A, __m512d __B) {
+  return (__m512d) ((__v8di) __A ^ (__v8di) __B);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_xor_ps (__m512 __A, __m512 __B) {
+  return (__m512) ((__v16si) __A ^ (__v16si) __B);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) __W,
+            (__mmask16) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_or_pd (__m512d __A, __m512d __B) {
+  return (__m512d) ((__v8di) __A | (__v8di) __B);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+            (__v8df) __B,
+            (__v8df) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+            (__v8df) __B,
+            (__v8df)
+            _mm512_setzero_pd (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_or_ps (__m512 __A, __m512 __B) {
+  return (__m512) ((__v16si) __A | (__v16si) __B);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+                 (__v16sf) __B,
+                 (__v16sf) __W,
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+                 (__v16sf) __B,
+                 (__v16sf)
+                 _mm512_setzero_ps (),
+                 (__mmask16) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_and_pd (__m512d __A, __m512d __B) {
+  return (__m512d) ((__v8di) __A & (__v8di) __B);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_and_ps (__m512 __A, __m512 __B) {
+  return (__m512) ((__v16si) __A & (__v16si) __B);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) __W,
+            (__mmask16) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_andnot_pd (__m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+              (__v8df) __B,
+              (__v8df)
+              _mm512_setzero_pd (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+              (__v8df) __B,
+              (__v8df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+              (__v8df) __B,
+              (__v8df)
+              _mm512_setzero_pd (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_andnot_ps (__m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+             (__v16sf) __B,
+             (__v16sf)
+             _mm512_setzero_ps (),
+             (__mmask16) -1);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+             (__v16sf) __B,
+             (__v16sf) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+             (__v16sf) __B,
+             (__v16sf)
+             _mm512_setzero_ps (),
+             (__mmask16) __U);
+}
+
+#endif
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 72af281..d299704 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -212,6 +212,62 @@
 }
 
 static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_andnot_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_andnot_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di) __W, __U);
+}
+
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_pd (),
+              __U);
+}
+static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
   return __a | __b;
@@ -362,6 +418,106 @@
   return __a - __b;
 }
 
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_add_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) ((__v8di) __A + (__v8di) __B);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+             (__v8di) __B,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
+             (__v8di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_sub_epi64 (__m512i __A, __m512i __B)
+{
+  return (__m512i) ((__v8di) __A - (__v8di) __B);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+             (__v8di) __B,
+             (__v8di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
+             (__v8di) __B,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_add_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) ((__v16si) __A + (__v16si) __B);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+             (__v16si) __B,
+             (__v16si) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
+             (__v16si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_sub_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) ((__v16si) __A - (__v16si) __B);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+             (__v16si) __B,
+             (__v16si) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
+             (__v16si) __B,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) __U);
+}
+
 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
 _mm512_max_pd(__m512d __A, __m512d __B)
 {
@@ -499,6 +655,24 @@
 }
 
 static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v8di) __W, __M);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              __M);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
 _mm512_mul_epu32(__m512i __X, __m512i __Y)
 {
   return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
@@ -508,6 +682,48 @@
                (__mmask8) -1);
 }
 
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+               (__v16si) __Y,
+               (__v8di) __W, __M);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+               (__v16si) __Y,
+               (__v8di)
+               _mm512_setzero_si512 (),
+               __M);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mullo_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) ((__v16si) __A * (__v16si) __B);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              __M);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si) __W, __M);
+}
+
 static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
 _mm512_sqrt_pd(__m512d a)
 {
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h
index 0746f43..c3b087e 100644
--- a/lib/Headers/avx512vlbwintrin.h
+++ b/lib/Headers/avx512vlbwintrin.h
@@ -606,6 +606,174 @@
                                                  __u);
 }
 
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
+  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
+             (__v32qi) __B,
+             (__v32qi) __W,
+             (__mmask32) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
+             (__v32qi) __B,
+             (__v32qi)
+             _mm256_setzero_si256 (),
+             (__mmask32) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
+             (__v16hi) __B,
+             (__v16hi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
+             (__v16hi) __B,
+             (__v16hi)
+             _mm256_setzero_si256 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
+             (__v32qi) __B,
+             (__v32qi) __W,
+             (__mmask32) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
+             (__v32qi) __B,
+             (__v32qi)
+             _mm256_setzero_si256 (),
+             (__mmask32) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
+             (__v16hi) __B,
+             (__v16hi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
+             (__v16hi) __B,
+             (__v16hi)
+             _mm256_setzero_si256 (),
+             (__mmask16) __U);
+}
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
+             (__v16qi) __B,
+             (__v16qi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
+             (__v16qi) __B,
+             (__v16qi)
+             _mm_setzero_si128 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
+             (__v16qi) __B,
+             (__v16qi) __W,
+             (__mmask16) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
+             (__v16qi) __B,
+             (__v16qi)
+             _mm_setzero_si128 (),
+             (__mmask16) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
+             (__v8hi) __B,
+             (__v8hi)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi) __W,
+              (__mmask16) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
+              (__v16hi) __B,
+              (__v16hi)
+              _mm256_setzero_si256 (),
+              (__mmask16) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
+              (__v8hi) __B,
+              (__v8hi) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
+              (__v8hi) __B,
+              (__v8hi)
+              _mm_setzero_si128 (),
+              (__mmask8) __U);
+}
 #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
                                          (__v16qi)(__m128i)(b), \
diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h
new file mode 100644
index 0000000..4024446
--- /dev/null
+++ b/lib/Headers/avx512vldqintrin.h
@@ -0,0 +1,349 @@
+/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+ 
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLDQINTRIN_H
+#define __AVX512VLDQINTRIN_H
+
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
+  return (__m256i) ((__v4di) __A * (__v4di) __B);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mullo_epi64 (__m128i __A, __m128i __B) {
+  return (__m128i) ((__v2di) __A * (__v2di) __B);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
+              (__v4df) __B,
+              (__v4df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
+              (__v4df) __B,
+              (__v4df)
+              _mm256_setzero_pd (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
+              (__v2df) __B,
+              (__v2df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
+              (__v2df) __B,
+              (__v2df)
+              _mm_setzero_pd (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
+             (__v8sf) __B,
+             (__v8sf) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
+             (__v8sf) __B,
+             (__v8sf)
+             _mm256_setzero_ps (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
+             (__v4sf) __B,
+             (__v4sf) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
+             (__v4sf) __B,
+             (__v4sf)
+             _mm_setzero_ps (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df)
+             _mm_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf)
+            _mm_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
+        __m256d __B) {
+  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df)
+             _mm_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf)
+            _mm_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
+            (__v4df) __B,
+            (__v4df) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
+            (__v4df) __B,
+            (__v4df)
+            _mm256_setzero_pd (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
+            (__v2df) __B,
+            (__v2df) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
+            (__v2df) __B,
+            (__v2df)
+            _mm_setzero_pd (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
+                 (__v8sf) __B,
+                 (__v8sf) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
+                 (__v8sf) __B,
+                 (__v8sf)
+                 _mm256_setzero_ps (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
+                 (__v4sf) __B,
+                 (__v4sf) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
+                 (__v4sf) __B,
+                 (__v4sf)
+                 _mm_setzero_ps (),
+                 (__mmask8) __U);
+}
+
+#endif
diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h
index b460992..9de0cf4 100644
--- a/lib/Headers/avx512vlintrin.h
+++ b/lib/Headers/avx512vlintrin.h
@@ -610,6 +610,593 @@
                                                 __u);
 }
 
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
+           __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
+              (__v8si) __Y,
+              (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
+              (__v8si) __Y,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
+        __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
+              (__v4si) __Y,
+              (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
+              (__v4si) __Y,
+              (__v2di)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
+           __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
+               (__v8si) __Y,
+               (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
+               (__v8si) __Y,
+               (__v4di)
+               _mm256_setzero_si256 (),
+               __M);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
+        __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
+               (__v4si) __Y,
+               (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
+_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
+               (__v4si) __Y,
+               (__v2di)
+               _mm_setzero_si128 (),
+               __M);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+       __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+          __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+        __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+           __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+          __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
+            (__v8si) __B,
+            (__v8si) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
+            (__v8si) __B,
+            (__v8si)
+            _mm256_setzero_si256 (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
+            (__v4si) __B,
+            (__v4si) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
+            (__v4si) __B,
+            (__v4si)
+            _mm_setzero_si128 (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
+             (__v8si) __B,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
+             (__v4si) __B,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di) __W, __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di)
+             _mm256_setzero_pd (),
+             __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W, __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_pd (),
+             __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+        __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W, __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_pd (),
+              __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+           __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W, __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_pd (),
+              __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+          __m256i __B)
+{
+  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
+            (__v4di) __B,
+            (__v4di) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
+            (__v4di) __B,
+            (__v4di)
+            _mm256_setzero_si256 (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
+            (__v2di) __B,
+            (__v2di) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
+            (__v2di) __B,
+            (__v2di)
+            _mm_setzero_si128 (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+           __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
+             (__v4di) __B,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+        __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
+_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
+             (__v2di) __B,
+             (__v2di)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
                                         (__v4si)(__m128i)(b), \
@@ -690,4 +1277,43 @@
                                          (__v4di)(__m256i)(b), \
                                          (p), (__mmask8)(m)); })
 
+#define _mm256_cmp_ps_mask(a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
+                                         (__v8sf)(__m256)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
+                                         (__v8sf)(__m256)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_pd_mask(a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256)(a), \
+                                         (__v4df)(__m256)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256)(a), \
+                                         (__v4df)(__m256)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm128_cmp_ps_mask(a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
+                                         (__v4sf)(__m128)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm128_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
+                                         (__v4sf)(__m128)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm128_cmp_pd_mask(a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \
+                                         (__v2df)(__m128)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm128_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128)(a), \
+                                         (__v2df)(__m128)(b), \
+                                         (p), (__mmask8)(m)); })
 #endif /* __AVX512VLINTRIN_H */
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index f30a5ad..4907965 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -1270,4 +1270,34 @@
   __builtin_ia32_storedqu((char *)__addr_hi, (__v16qi)__v128);
 }
 
+static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_set_m128 (__m128 __hi, __m128 __lo) {
+  return (__m256) __builtin_shufflevector(__lo, __hi, 0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_set_m128d (__m128d __hi, __m128d __lo) {
+  return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
+}
+
+static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_set_m128i (__m128i __hi, __m128i __lo) {
+  return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
+}
+
+static __inline __m256 __attribute__((__always_inline__, __nodebug__))
+_mm256_setr_m128 (__m128 __lo, __m128 __hi) {
+  return _mm256_set_m128(__hi, __lo);
+}
+
+static __inline __m256d __attribute__((__always_inline__, __nodebug__))
+_mm256_setr_m128d (__m128d __lo, __m128d __hi) {
+  return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo);
+}
+
+static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_setr_m128i (__m128i __lo, __m128i __hi) {
+  return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo);
+}
+
 #endif /* __AVXINTRIN_H */
diff --git a/lib/Headers/cuda_builtin_vars.h b/lib/Headers/cuda_builtin_vars.h
new file mode 100644
index 0000000..901356b
--- /dev/null
+++ b/lib/Headers/cuda_builtin_vars.h
@@ -0,0 +1,110 @@
+/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CUDA_BUILTIN_VARS_H
+#define __CUDA_BUILTIN_VARS_H
+
+// The file implements built-in CUDA variables using __declspec(property).
+// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
+// All read accesses of built-in variable fields get converted into calls to a
+// getter function which in turn would call appropriate builtin to fetch the
+// value.
+//
+// Example:
+//    int x = threadIdx.x;
+// IR output:
+//  %0 = call i32 @llvm.ptx.read.tid.x() #3
+// PTX output:
+//  mov.u32     %r2, %tid.x;
+
+#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
+  __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
+  static inline __attribute__((always_inline))                                 \
+      __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
+    return INTRINSIC;                                                          \
+  }
+
+#if __cplusplus >= 201103L
+#define __DELETE =delete
+#else
+#define __DELETE
+#endif
+
+// Make sure nobody can create instances of the special varible types.  nvcc
+// also disallows taking address of special variables, so we disable address-of
+// operator as well.
+#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
+  __attribute__((device)) TypeName() __DELETE;                                 \
+  __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
+  __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
+  __attribute__((device)) TypeName *operator&() const __DELETE
+
+struct __cuda_builtin_threadIdx_t {
+  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x());
+  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y());
+  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z());
+private:
+  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
+};
+
+struct __cuda_builtin_blockIdx_t {
+  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x());
+  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y());
+  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z());
+private:
+  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
+};
+
+struct __cuda_builtin_blockDim_t {
+  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x());
+  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y());
+  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z());
+private:
+  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
+};
+
+struct __cuda_builtin_gridDim_t {
+  __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x());
+  __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y());
+  __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z());
+private:
+  __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
+};
+
+#define __CUDA_BUILTIN_VAR                                                     \
+  extern const __attribute__((device)) __attribute__((weak))
+__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
+__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
+__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
+__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
+
+// warpSize should translate to read of %WARP_SZ but there's currently no
+// builtin to do so. According to PTX v4.2 docs 'to date, all target
+// architectures have a WARP_SZ value of 32'.
+__attribute__((device)) const int warpSize = 32;
+
+#undef __CUDA_DEVICE_BUILTIN
+#undef __CUDA_BUILTIN_VAR
+#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
+
+#endif /* __CUDA_BUILTIN_VARS_H */
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 2400fea..ac7d54a 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -88,10 +88,18 @@
 #include <avx512bwintrin.h>
 #endif
 
+#ifdef __AVX512DQ__
+#include <avx512dqintrin.h>
+#endif
+
 #if defined (__AVX512VL__) && defined (__AVX512BW__)
 #include <avx512vlbwintrin.h>
 #endif
 
+#if defined (__AVX512VL__) && defined (__AVX512DQ__)
+#include <avx512vldqintrin.h>
+#endif
+
 #ifdef __AVX512ER__
 #include <avx512erintrin.h>
 #endif
diff --git a/lib/Headers/module.modulemap b/lib/Headers/module.modulemap
index bb2ca95..8fcb5bc 100644
--- a/lib/Headers/module.modulemap
+++ b/lib/Headers/module.modulemap
@@ -49,7 +49,7 @@
     explicit module sse {
       requires sse
       export mmx
-      export * // note: for hackish <emmintrin.h> dependency
+      export sse2 // note: for hackish <emmintrin.h> dependency
       header "xmmintrin.h"
     }
 
@@ -157,6 +157,8 @@
     explicit module aes_pclmul {
       requires aes, pclmul
       header "wmmintrin.h"
+      export aes
+      export pclmul
     }
 
     explicit module aes {
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index d1afe81..3a6b95e 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -994,7 +994,7 @@
 #define _m_ _mm_
 
 /* Ugly hack for backwards-compatibility (compatible with gcc) */
-#ifdef __SSE2__
+#if defined(__SSE2__) && !__has_feature(modules)
 #include <emmintrin.h>
 #endif
 
diff --git a/lib/Index/USRGeneration.cpp b/lib/Index/USRGeneration.cpp
index baa166e..8cdd283 100644
--- a/lib/Index/USRGeneration.cpp
+++ b/lib/Index/USRGeneration.cpp
@@ -847,7 +847,7 @@
   return UG.ignoreResults();
 }
 
-bool clang::index::generateUSRForMacro(const MacroDefinition *MD,
+bool clang::index::generateUSRForMacro(const MacroDefinitionRecord *MD,
                                        const SourceManager &SM,
                                        SmallVectorImpl<char> &Buf) {
   // Don't generate USRs for things with invalid locations.
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 3e59cdb..ad7d344 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -18,6 +18,7 @@
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
@@ -594,7 +595,13 @@
       RelativePath->append(Filename.begin(), Filename.end());
     }
     // Otherwise, just return the file.
-    return FileMgr.getFile(Filename, /*openFile=*/true);
+    const FileEntry *File = FileMgr.getFile(Filename, /*openFile=*/true);
+    if (File && SuggestedModule) {
+      // If there is a module that corresponds to this header, suggest it.
+      hasModuleMap(Filename, File->getDir(), /*SystemHeaderDir*/false);
+      *SuggestedModule = findModuleForHeader(File);
+    }
+    return File;
   }
 
   // This is the header that MSVC's header search would have found.
@@ -1016,7 +1023,9 @@
   HFI.setHeaderRole(Role);
 }
 
-bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){
+bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
+                                          const FileEntry *File,
+                                          bool isImport) {
   ++NumIncluded; // Count # of attempted #includes.
 
   // Get information about this file.
@@ -1041,7 +1050,7 @@
   // if the macro that guards it is defined, we know the #include has no effect.
   if (const IdentifierInfo *ControllingMacro
       = FileInfo.getControllingMacro(ExternalLookup))
-    if (ControllingMacro->hasMacroDefinition()) {
+    if (PP.isMacroDefined(ControllingMacro)) {
       ++NumMultiIncludeFileOptzn;
       return false;
     }
@@ -1067,7 +1076,7 @@
 bool HeaderSearch::hasModuleMap(StringRef FileName, 
                                 const DirectoryEntry *Root,
                                 bool IsSystem) {
-  if (!enabledModules() || !LangOpts.ModulesImplicitMaps)
+  if (!HSOpts->ModuleMaps || !LangOpts.ModulesImplicitMaps)
     return false;
 
   SmallVector<const DirectoryEntry *, 2> FixUpDirectories;
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index a3b520b..4007914 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -199,7 +199,7 @@
 
 /// Stringify - Convert the specified string into a C string, with surrounding
 /// ""'s, and with escaped \ and " characters.
-std::string Lexer::Stringify(const std::string &Str, bool Charify) {
+std::string Lexer::Stringify(StringRef Str, bool Charify) {
   std::string Result = Str;
   char Quote = Charify ? '\'' : '"';
   for (unsigned i = 0, e = Result.size(); i != e; ++i) {
@@ -1854,7 +1854,7 @@
   char C = getAndAdvanceChar(CurPtr, Result);
   while (C != '>') {
     // Skip escaped characters.
-    if (C == '\\') {
+    if (C == '\\' && CurPtr < BufferEnd) {
       // Skip the escaped character.
       getAndAdvanceChar(CurPtr, Result);
     } else if (C == '\n' || C == '\r' ||             // Newline.
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 9967f3f..1c1979d 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -133,12 +133,11 @@
   // If there are no identifiers in the argument list, or if the identifiers are
   // known to not be macros, pre-expansion won't modify it.
   for (; ArgTok->isNot(tok::eof); ++ArgTok)
-    if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
-      if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled())
+    if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
+      if (II->hasMacroDefinition())
         // Return true even though the macro could be a function-like macro
-        // without a following '(' token.
+        // without a following '(' token, or could be disabled, or not visible.
         return true;
-    }
   return false;
 }
 
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 5416886..109b6c1 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -218,13 +218,9 @@
   if (auto *Prev = getPrevious())
     Out << " prev " << Prev;
   if (IsFromPCH) Out << " from_pch";
-  if (IsImported) Out << " imported";
-  if (IsAmbiguous) Out << " ambiguous";
 
-  if (IsPublic)
-    Out << " public";
-  else if (isa<VisibilityMacroDirective>(this))
-    Out << " private";
+  if (isa<VisibilityMacroDirective>(this))
+    Out << (IsPublic ? " public" : " private");
 
   if (auto *DMD = dyn_cast<DefMacroDirective>(this)) {
     if (auto *Info = DMD->getInfo()) {
@@ -234,3 +230,12 @@
   }
   Out << "\n";
 }
+
+ModuleMacro *ModuleMacro::create(Preprocessor &PP, Module *OwningModule,
+                                 IdentifierInfo *II, MacroInfo *Macro,
+                                 ArrayRef<ModuleMacro *> Overrides) {
+  void *Mem = PP.getPreprocessorAllocator().Allocate(
+      sizeof(ModuleMacro) + sizeof(ModuleMacro *) * Overrides.size(),
+      llvm::alignOf<ModuleMacro>());
+  return new (Mem) ModuleMacro(OwningModule, II, Macro, Overrides);
+}
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index a4f1c05..c67ce24 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -89,7 +89,7 @@
                      HeaderSearch &HeaderInfo)
     : SourceMgr(SourceMgr), Diags(Diags), LangOpts(LangOpts), Target(Target),
       HeaderInfo(HeaderInfo), BuiltinIncludeDir(nullptr),
-      CompilingModule(nullptr), SourceModule(nullptr) {
+      CompilingModule(nullptr), SourceModule(nullptr), NumCreatedModules(0) {
   MMapLangOpts.LineComment = true;
 }
 
@@ -330,42 +330,23 @@
   return false;
 }
 
-ModuleMap::KnownHeader
-ModuleMap::findModuleForHeader(const FileEntry *File,
-                               Module *RequestingModule,
-                               bool IncludeTextualHeaders) {
-  HeadersMap::iterator Known = findKnownHeader(File);
-
+ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File) {
   auto MakeResult = [&](ModuleMap::KnownHeader R) -> ModuleMap::KnownHeader {
-    if (!IncludeTextualHeaders && (R.getRole() & ModuleMap::TextualHeader))
+    if (R.getRole() & ModuleMap::TextualHeader)
       return ModuleMap::KnownHeader();
     return R;
   };
 
+  HeadersMap::iterator Known = findKnownHeader(File);
   if (Known != Headers.end()) {
     ModuleMap::KnownHeader Result;
-
     // Iterate over all modules that 'File' is part of to find the best fit.
-    for (SmallVectorImpl<KnownHeader>::iterator I = Known->second.begin(),
-                                                E = Known->second.end();
-         I != E; ++I) {
+    for (KnownHeader &H : Known->second) {
       // Cannot use a module if it is unavailable.
-      if (!I->getModule()->isAvailable())
+      if (!H.getModule()->isAvailable())
         continue;
-
-      // If 'File' is part of 'RequestingModule', 'RequestingModule' is the
-      // module we are looking for.
-      if (I->getModule() == RequestingModule)
-        return MakeResult(*I);
-
-      // If uses need to be specified explicitly, we are only allowed to return
-      // modules that are explicitly used by the requesting module.
-      if (RequestingModule && LangOpts.ModulesDeclUse &&
-          !RequestingModule->directlyUses(I->getModule()))
-        continue;
-
-      if (!Result || isBetterKnownHeader(*I, Result))
-        Result = *I;
+      if (!Result || isBetterKnownHeader(H, Result))
+        Result = H;
     }
     return MakeResult(Result);
   }
@@ -563,7 +544,7 @@
   
   // Create a new module with this name.
   Module *Result = new Module(Name, SourceLocation(), Parent,
-                              IsFramework, IsExplicit);
+                              IsFramework, IsExplicit, NumCreatedModules++);
   if (LangOpts.CurrentModule == Name) {
     SourceModule = Result;
     SourceModuleName = Name;
@@ -693,7 +674,8 @@
     return nullptr;
 
   Module *Result = new Module(ModuleName, SourceLocation(), Parent,
-                              /*IsFramework=*/true, /*IsExplicit=*/false);
+                              /*IsFramework=*/true, /*IsExplicit=*/false,
+                              NumCreatedModules++);
   InferredModuleAllowedBy[Result] = ModuleMapFile;
   Result->IsInferred = true;
   if (LangOpts.CurrentModule == ModuleName) {
@@ -704,14 +686,16 @@
   Result->IsSystem |= Attrs.IsSystem;
   Result->IsExternC |= Attrs.IsExternC;
   Result->ConfigMacrosExhaustive |= Attrs.IsExhaustive;
+  Result->Directory = FrameworkDir;
 
   if (!Parent)
     Modules[ModuleName] = Result;
   
   // umbrella header "umbrella-header-name"
-  Result->Umbrella = UmbrellaHeader;
-  Headers[UmbrellaHeader].push_back(KnownHeader(Result, NormalHeader));
-  UmbrellaDirs[UmbrellaHeader->getDir()] = Result;
+  //
+  // The "Headers/" component of the name is implied because this is
+  // a framework module.
+  setUmbrellaHeader(Result, UmbrellaHeader, ModuleName + ".h");
   
   // export *
   Result->Exports.push_back(Module::ExportDecl(nullptr, true));
@@ -772,14 +756,18 @@
   return Result;
 }
 
-void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader){
+void ModuleMap::setUmbrellaHeader(Module *Mod, const FileEntry *UmbrellaHeader,
+                                  Twine NameAsWritten) {
   Headers[UmbrellaHeader].push_back(KnownHeader(Mod, NormalHeader));
   Mod->Umbrella = UmbrellaHeader;
+  Mod->UmbrellaAsWritten = NameAsWritten.str();
   UmbrellaDirs[UmbrellaHeader->getDir()] = Mod;
 }
 
-void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir) {
+void ModuleMap::setUmbrellaDir(Module *Mod, const DirectoryEntry *UmbrellaDir,
+                               Twine NameAsWritten) {
   Mod->Umbrella = UmbrellaDir;
+  Mod->UmbrellaAsWritten = NameAsWritten.str();
   UmbrellaDirs[UmbrellaDir] = Mod;
 }
 
@@ -864,50 +852,44 @@
 }
 
 bool ModuleMap::resolveExports(Module *Mod, bool Complain) {
-  bool HadError = false;
-  for (unsigned I = 0, N = Mod->UnresolvedExports.size(); I != N; ++I) {
-    Module::ExportDecl Export = resolveExport(Mod, Mod->UnresolvedExports[I], 
-                                              Complain);
+  auto Unresolved = std::move(Mod->UnresolvedExports);
+  Mod->UnresolvedExports.clear();
+  for (auto &UE : Unresolved) {
+    Module::ExportDecl Export = resolveExport(Mod, UE, Complain);
     if (Export.getPointer() || Export.getInt())
       Mod->Exports.push_back(Export);
     else
-      HadError = true;
+      Mod->UnresolvedExports.push_back(UE);
   }
-  Mod->UnresolvedExports.clear();
-  return HadError;
+  return !Mod->UnresolvedExports.empty();
 }
 
 bool ModuleMap::resolveUses(Module *Mod, bool Complain) {
-  bool HadError = false;
-  for (unsigned I = 0, N = Mod->UnresolvedDirectUses.size(); I != N; ++I) {
-    Module *DirectUse =
-        resolveModuleId(Mod->UnresolvedDirectUses[I], Mod, Complain);
+  auto Unresolved = std::move(Mod->UnresolvedDirectUses);
+  Mod->UnresolvedDirectUses.clear();
+  for (auto &UDU : Unresolved) {
+    Module *DirectUse = resolveModuleId(UDU, Mod, Complain);
     if (DirectUse)
       Mod->DirectUses.push_back(DirectUse);
     else
-      HadError = true;
+      Mod->UnresolvedDirectUses.push_back(UDU);
   }
-  Mod->UnresolvedDirectUses.clear();
-  return HadError;
+  return !Mod->UnresolvedDirectUses.empty();
 }
 
 bool ModuleMap::resolveConflicts(Module *Mod, bool Complain) {
-  bool HadError = false;
-  for (unsigned I = 0, N = Mod->UnresolvedConflicts.size(); I != N; ++I) {
-    Module *OtherMod = resolveModuleId(Mod->UnresolvedConflicts[I].Id,
-                                       Mod, Complain);
-    if (!OtherMod) {
-      HadError = true;
-      continue;
-    }
-
-    Module::Conflict Conflict;
-    Conflict.Other = OtherMod;
-    Conflict.Message = Mod->UnresolvedConflicts[I].Message;
-    Mod->Conflicts.push_back(Conflict);
-  }
+  auto Unresolved = std::move(Mod->UnresolvedConflicts);
   Mod->UnresolvedConflicts.clear();
-  return HadError;
+  for (auto &UC : Unresolved) {
+    if (Module *OtherMod = resolveModuleId(UC.Id, Mod, Complain)) {
+      Module::Conflict Conflict;
+      Conflict.Other = OtherMod;
+      Conflict.Message = UC.Message;
+      Mod->Conflicts.push_back(Conflict);
+    } else
+      Mod->UnresolvedConflicts.push_back(UC);
+  }
+  return !Mod->UnresolvedConflicts.empty();
 }
 
 Module *ModuleMap::inferModuleFromLocation(FullSourceLoc Loc) {
@@ -1758,7 +1740,13 @@
         // If Clang supplies this header but the underlying system does not,
         // just silently swap in our builtin version. Otherwise, we'll end
         // up adding both (later).
-        if (!File && BuiltinFile) {
+        //
+        // For local visibility, entirely replace the system file with our
+        // one and textually include the system one. We need to pass macros
+        // from our header to the system one if we #include_next it.
+        //
+        // FIXME: Can we do this in all cases?
+        if (BuiltinFile && (!File || Map.LangOpts.ModulesLocalVisibility)) {
           File = BuiltinFile;
           RelativePathName = BuiltinPathName;
           BuiltinFile = nullptr;
@@ -1778,7 +1766,7 @@
         HadError = true;
       } else {
         // Record this umbrella header.
-        Map.setUmbrellaHeader(ActiveModule, File);
+        Map.setUmbrellaHeader(ActiveModule, File, RelativePathName.str());
       }
     } else if (LeadingToken == MMToken::ExcludeKeyword) {
       Module::Header H = {RelativePathName.str(), File};
@@ -1860,7 +1848,7 @@
   } 
   
   // Record this umbrella directory.
-  Map.setUmbrellaDir(ActiveModule, Dir);
+  Map.setUmbrellaDir(ActiveModule, Dir, DirName);
 }
 
 /// \brief Parse a module export declaration.
diff --git a/lib/Lex/PPConditionalDirectiveRecord.cpp b/lib/Lex/PPConditionalDirectiveRecord.cpp
index 99b87a0..12a7784 100644
--- a/lib/Lex/PPConditionalDirectiveRecord.cpp
+++ b/lib/Lex/PPConditionalDirectiveRecord.cpp
@@ -84,14 +84,14 @@
 
 void PPConditionalDirectiveRecord::Ifdef(SourceLocation Loc,
                                          const Token &MacroNameTok,
-                                         const MacroDirective *MD) {
+                                         const MacroDefinition &MD) {
   addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
   CondDirectiveStack.push_back(Loc);
 }
 
 void PPConditionalDirectiveRecord::Ifndef(SourceLocation Loc,
                                           const Token &MacroNameTok,
-                                          const MacroDirective *MD) {
+                                          const MacroDefinition &MD) {
   addCondDirectiveLoc(CondDirectiveLoc(Loc, CondDirectiveStack.back()));
   CondDirectiveStack.push_back(Loc);
 }
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index a50c8a8..ec06e79 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -62,26 +62,14 @@
   return MI;
 }
 
-DefMacroDirective *
-Preprocessor::AllocateDefMacroDirective(MacroInfo *MI, SourceLocation Loc,
-                                        unsigned ImportedFromModuleID,
-                                        ArrayRef<unsigned> Overrides) {
-  unsigned NumExtra = (ImportedFromModuleID ? 1 : 0) + Overrides.size();
-  return new (BP.Allocate(sizeof(DefMacroDirective) +
-                              sizeof(unsigned) * NumExtra,
-                          llvm::alignOf<DefMacroDirective>()))
-      DefMacroDirective(MI, Loc, ImportedFromModuleID, Overrides);
+DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
+                                                           SourceLocation Loc) {
+  return new (BP) DefMacroDirective(MI, Loc);
 }
 
 UndefMacroDirective *
-Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc,
-                                          unsigned ImportedFromModuleID,
-                                          ArrayRef<unsigned> Overrides) {
-  unsigned NumExtra = (ImportedFromModuleID ? 1 : 0) + Overrides.size();
-  return new (BP.Allocate(sizeof(UndefMacroDirective) +
-                              sizeof(unsigned) * NumExtra,
-                          llvm::alignOf<UndefMacroDirective>()))
-      UndefMacroDirective(UndefLoc, ImportedFromModuleID, Overrides);
+Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
+  return new (BP) UndefMacroDirective(UndefLoc);
 }
 
 VisibilityMacroDirective *
@@ -182,11 +170,13 @@
     return Diag(MacroNameTok, diag::err_defined_macro_name);
   }
 
-  if (isDefineUndef == MU_Undef && II->hasMacroDefinition() &&
-      getMacroInfo(II)->isBuiltinMacro()) {
-    // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
-    // and C++ [cpp.predefined]p4], but allow it as an extension.
-    Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
+  if (isDefineUndef == MU_Undef) {
+    auto *MI = getMacroInfo(II);
+    if (MI && MI->isBuiltinMacro()) {
+      // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4
+      // and C++ [cpp.predefined]p4], but allow it as an extension.
+      Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
+    }
   }
 
   // If defining/undefining reserved identifier or a keyword, we need to issue
@@ -585,16 +575,16 @@
   }
 }
 
-Module *Preprocessor::getModuleForLocation(SourceLocation FilenameLoc) {
+Module *Preprocessor::getModuleForLocation(SourceLocation Loc) {
   ModuleMap &ModMap = HeaderInfo.getModuleMap();
-  if (SourceMgr.isInMainFile(FilenameLoc)) {
+  if (SourceMgr.isInMainFile(Loc)) {
     if (Module *CurMod = getCurrentModule())
       return CurMod;                               // Compiling a module.
     return HeaderInfo.getModuleMap().SourceModule; // Compiling a source.
   }
   // Try to determine the module of the include directive.
   // FIXME: Look into directly passing the FileEntry from LookupFile instead.
-  FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(FilenameLoc));
+  FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
   if (const FileEntry *EntryOfIncl = SourceMgr.getFileEntryForID(IDOfIncl)) {
     // The include comes from a file.
     return ModMap.findModuleForHeader(EntryOfIncl).getModule();
@@ -605,6 +595,11 @@
   }
 }
 
+Module *Preprocessor::getModuleContainingLocation(SourceLocation Loc) {
+  return HeaderInfo.getModuleMap().inferModuleFromLocation(
+      FullSourceLoc(Loc, SourceMgr));
+}
+
 const FileEntry *Preprocessor::LookupFile(
     SourceLocation FilenameLoc,
     StringRef Filename,
@@ -1290,7 +1285,7 @@
 
   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   // Okay, we finally have a valid identifier to undef.
-  MacroDirective *MD = getMacroDirective(II);
+  MacroDirective *MD = getLocalMacroDirective(II);
   
   // If the macro is not defined, this is an error.
   if (!MD) {
@@ -1317,7 +1312,7 @@
   
   IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
   // Okay, we finally have a valid identifier to undef.
-  MacroDirective *MD = getMacroDirective(II);
+  MacroDirective *MD = getLocalMacroDirective(II);
   
   // If the macro is not defined, this is an error.
   if (!MD) {
@@ -1444,6 +1439,8 @@
 static void EnterAnnotationToken(Preprocessor &PP,
                                  SourceLocation Begin, SourceLocation End,
                                  tok::TokenKind Kind, void *AnnotationVal) {
+  // FIXME: Produce this as the current token directly, rather than
+  // allocating a new token for it.
   Token *Tok = new Token[1];
   Tok[0].startToken();
   Tok[0].setKind(Kind);
@@ -1453,6 +1450,51 @@
   PP.EnterTokenStream(Tok, 1, true, true);
 }
 
+/// \brief Produce a diagnostic informing the user that a #include or similar
+/// was implicitly treated as a module import.
+static void diagnoseAutoModuleImport(
+    Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
+    ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
+    SourceLocation PathEnd) {
+  assert(PP.getLangOpts().ObjC2 && "no import syntax available");
+
+  SmallString<128> PathString;
+  for (unsigned I = 0, N = Path.size(); I != N; ++I) {
+    if (I)
+      PathString += '.';
+    PathString += Path[I].first->getName();
+  }
+  int IncludeKind = 0;
+  
+  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
+  case tok::pp_include:
+    IncludeKind = 0;
+    break;
+    
+  case tok::pp_import:
+    IncludeKind = 1;
+    break;        
+      
+  case tok::pp_include_next:
+    IncludeKind = 2;
+    break;
+      
+  case tok::pp___include_macros:
+    IncludeKind = 3;
+    break;
+      
+  default:
+    llvm_unreachable("unknown include directive kind");
+  }
+
+  CharSourceRange ReplaceRange(SourceRange(HashLoc, PathEnd),
+                               /*IsTokenRange=*/false);
+  PP.Diag(HashLoc, diag::warn_auto_module_import)
+      << IncludeKind << PathString
+      << FixItHint::CreateReplacement(ReplaceRange,
+                                      ("@import " + PathString + ";").str());
+}
+
 /// HandleIncludeDirective - The "\#include" tokens have just been read, read
 /// the file to be included from the lexer, then include it!  This is a common
 /// routine with functionality shared between \#include, \#include_next and
@@ -1563,8 +1605,8 @@
       Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
       HeaderInfo.getHeaderSearchOpts().ModuleMaps ? &SuggestedModule : nullptr);
 
-  if (Callbacks) {
-    if (!File) {
+  if (!File) {
+    if (Callbacks) {
       // Give the clients a chance to recover.
       SmallString<128> RecoveryPath;
       if (Callbacks->FileNotFound(Filename, RecoveryPath)) {
@@ -1584,18 +1626,7 @@
         }
       }
     }
-    
-    if (!SuggestedModule || !getLangOpts().Modules) {
-      // Notify the callback object that we've seen an inclusion directive.
-      Callbacks->InclusionDirective(HashLoc, IncludeTok,
-                                    LangOpts.MSVCCompat ? NormalizedPath.c_str()
-                                                        : Filename,
-                                    isAngled, FilenameRange, File, SearchPath,
-                                    RelativePath, /*ImportedModule=*/nullptr);
-    }
-  }
 
-  if (!File) {
     if (!SuppressIncludeNotFoundError) {
       // If the file could not be located and it was included via angle 
       // brackets, we can attempt a lookup as though it were a quoted path to
@@ -1616,19 +1647,27 @@
             FixItHint::CreateReplacement(Range, "\"" + Filename.str() + "\"");
         }
       }
+
       // If the file is still not found, just go with the vanilla diagnostic
       if (!File)
         Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
     }
-    if (!File)
-      return;
   }
 
-  // If we are supposed to import a module rather than including the header,
-  // do so now.
-  if (SuggestedModule && getLangOpts().Modules &&
+  // Should we enter the source file? Set to false if either the source file is
+  // known to have no effect beyond its effect on module visibility -- that is,
+  // if it's got an include guard that is already defined or is a modular header
+  // we've imported or already built.
+  bool ShouldEnter = true;
+
+  // Determine whether we should try to import the module for this #include, if
+  // there is one. Don't do so if precompiled module support is disabled or we
+  // are processing this module textually (because we're building the module).
+  if (File && SuggestedModule && getLangOpts().Modules &&
       SuggestedModule.getModule()->getTopLevelModuleName() !=
-      getLangOpts().ImplementationOfModule) {
+          getLangOpts().CurrentModule &&
+      SuggestedModule.getModule()->getTopLevelModuleName() !=
+          getLangOpts().ImplementationOfModule) {
     // Compute the module access path corresponding to this module.
     // FIXME: Should we have a second loadModule() overload to avoid this
     // extra lookup step?
@@ -1639,111 +1678,57 @@
     std::reverse(Path.begin(), Path.end());
 
     // Warn that we're replacing the include/import with a module import.
-    SmallString<128> PathString;
-    for (unsigned I = 0, N = Path.size(); I != N; ++I) {
-      if (I)
-        PathString += '.';
-      PathString += Path[I].first->getName();
-    }
-    int IncludeKind = 0;
+    // We only do this in Objective-C, where we have a module-import syntax.
+    if (getLangOpts().ObjC2)
+      diagnoseAutoModuleImport(*this, HashLoc, IncludeTok, Path, CharEnd);
     
-    switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
-    case tok::pp_include:
-      IncludeKind = 0;
-      break;
-      
-    case tok::pp_import:
-      IncludeKind = 1;
-      break;        
-        
-    case tok::pp_include_next:
-      IncludeKind = 2;
-      break;
-        
-    case tok::pp___include_macros:
-      IncludeKind = 3;
-      break;
-        
-    default:
-      llvm_unreachable("unknown include directive kind");
-    }
-
-    // Determine whether we are actually building the module that this
-    // include directive maps to.
-    bool BuildingImportedModule
-      = Path[0].first->getName() == getLangOpts().CurrentModule;
-
-    if (!BuildingImportedModule && getLangOpts().ObjC2) {
-      // If we're not building the imported module, warn that we're going
-      // to automatically turn this inclusion directive into a module import.
-      // We only do this in Objective-C, where we have a module-import syntax.
-      CharSourceRange ReplaceRange(SourceRange(HashLoc, CharEnd), 
-                                   /*IsTokenRange=*/false);
-      Diag(HashLoc, diag::warn_auto_module_import)
-          << IncludeKind << PathString
-          << FixItHint::CreateReplacement(
-                 ReplaceRange, ("@import " + PathString + ";").str());
-    }
-    
-    // Load the module. Only make macros visible. We'll make the declarations
+    // Load the module to import its macros. We'll make the declarations
     // visible when the parser gets here.
-    Module::NameVisibilityKind Visibility = Module::MacrosVisible;
-    ModuleLoadResult Imported
-      = TheModuleLoader.loadModule(IncludeTok.getLocation(), Path, Visibility,
-                                   /*IsIncludeDirective=*/true);
+    // FIXME: Pass SuggestedModule in here rather than converting it to a path
+    // and making the module loader convert it back again.
+    ModuleLoadResult Imported = TheModuleLoader.loadModule(
+        IncludeTok.getLocation(), Path, Module::Hidden,
+        /*IsIncludeDirective=*/true);
     assert((Imported == nullptr || Imported == SuggestedModule.getModule()) &&
            "the imported module is different than the suggested one");
 
-    if (!Imported && hadModuleLoaderFatalFailure()) {
-      // With a fatal failure in the module loader, we abort parsing.
-      Token &Result = IncludeTok;
-      if (CurLexer) {
-        Result.startToken();
-        CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
-        CurLexer->cutOffLexing();
-      } else {
-        assert(CurPTHLexer && "#include but no current lexer set!");
-        CurPTHLexer->getEOF(Result);
+    if (Imported)
+      ShouldEnter = false;
+    else if (Imported.isMissingExpected()) {
+      // We failed to find a submodule that we assumed would exist (because it
+      // was in the directory of an umbrella header, for instance), but no
+      // actual module exists for it (because the umbrella header is
+      // incomplete).  Treat this as a textual inclusion.
+      SuggestedModule = ModuleMap::KnownHeader();
+    } else {
+      // We hit an error processing the import. Bail out.
+      if (hadModuleLoaderFatalFailure()) {
+        // With a fatal failure in the module loader, we abort parsing.
+        Token &Result = IncludeTok;
+        if (CurLexer) {
+          Result.startToken();
+          CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
+          CurLexer->cutOffLexing();
+        } else {
+          assert(CurPTHLexer && "#include but no current lexer set!");
+          CurPTHLexer->getEOF(Result);
+        }
       }
       return;
     }
-
-    // If this header isn't part of the module we're building, we're done.
-    if (!BuildingImportedModule && Imported) {
-      if (Callbacks) {
-        Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled,
-                                      FilenameRange, File,
-                                      SearchPath, RelativePath, Imported);
-      }
-
-      if (IncludeKind != 3) {
-        // Let the parser know that we hit a module import, and it should
-        // make the module visible.
-        // FIXME: Produce this as the current token directly, rather than
-        // allocating a new token for it.
-        EnterAnnotationToken(*this, HashLoc, End, tok::annot_module_include,
-                             Imported);
-      }
-      return;
-    }
-
-    // If we failed to find a submodule that we expected to find, we can
-    // continue. Otherwise, there's an error in the included file, so we
-    // don't want to include it.
-    if (!BuildingImportedModule && !Imported.isMissingExpected()) {
-      return;
-    }
   }
 
-  if (Callbacks && SuggestedModule) {
-    // We didn't notify the callback object that we've seen an inclusion
-    // directive before. Now that we are parsing the include normally and not
-    // turning it to a module import, notify the callback object.
-    Callbacks->InclusionDirective(HashLoc, IncludeTok, Filename, isAngled,
-                                  FilenameRange, File,
-                                  SearchPath, RelativePath,
-                                  /*ImportedModule=*/nullptr);
+  if (Callbacks) {
+    // Notify the callback object that we've seen an inclusion directive.
+    Callbacks->InclusionDirective(
+        HashLoc, IncludeTok,
+        LangOpts.MSVCCompat ? NormalizedPath.c_str() : Filename, isAngled,
+        FilenameRange, File, SearchPath, RelativePath,
+        ShouldEnter ? nullptr : SuggestedModule.getModule());
   }
+
+  if (!File)
+    return;
   
   // The #included file will be considered to be a system header if either it is
   // in a system include directory, or if the #includer is a system include
@@ -1752,11 +1737,28 @@
     std::max(HeaderInfo.getFileDirFlavor(File),
              SourceMgr.getFileCharacteristic(FilenameTok.getLocation()));
 
+  // FIXME: If we have a suggested module, and we've already visited this file,
+  // don't bother entering it again. We know it has no further effect.
+
   // Ask HeaderInfo if we should enter this #include file.  If not, #including
   // this file will have no effect.
-  if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) {
+  if (ShouldEnter &&
+      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport)) {
+    ShouldEnter = false;
     if (Callbacks)
       Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
+  }
+
+  // If we don't need to enter the file, stop now.
+  if (!ShouldEnter) {
+    // If this is a module import, make it visible if needed.
+    if (auto *M = SuggestedModule.getModule()) {
+      makeModuleVisible(M, HashLoc);
+
+      if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
+          tok::pp___include_macros)
+        EnterAnnotationToken(*this, HashLoc, End, tok::annot_module_include, M);
+    }
     return;
   }
 
@@ -1769,26 +1771,24 @@
   FileID FID = SourceMgr.createFileID(File, IncludePos, FileCharacter);
   assert(!FID.isInvalid() && "Expected valid file ID");
 
-  // Determine if we're switching to building a new submodule, and which one.
-  ModuleMap::KnownHeader BuildingModule;
-  if (getLangOpts().Modules && !getLangOpts().CurrentModule.empty()) {
-    Module *RequestingModule = getModuleForLocation(FilenameLoc);
-    BuildingModule =
-        HeaderInfo.getModuleMap().findModuleForHeader(File, RequestingModule);
-  }
-
   // If all is good, enter the new file!
   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
     return;
 
-  // If we're walking into another part of the same module, let the parser
-  // know that any future declarations are within that other submodule.
-  if (BuildingModule) {
+  // Determine if we're switching to building a new submodule, and which one.
+  if (auto *M = SuggestedModule.getModule()) {
     assert(!CurSubmodule && "should not have marked this as a module yet");
-    CurSubmodule = BuildingModule.getModule();
+    CurSubmodule = M;
 
-    EnterAnnotationToken(*this, HashLoc, End, tok::annot_module_begin,
-                         CurSubmodule);
+    // Let the macro handling code know that any future macros are within
+    // the new submodule.
+    EnterSubmodule(M, HashLoc);
+
+    // Let the parser know that any future declarations are within the new
+    // submodule.
+    // FIXME: There's no point doing this if we're handling a #__include_macros
+    // directive.
+    EnterAnnotationToken(*this, HashLoc, End, tok::annot_module_begin, M);
   }
 }
 
@@ -2290,9 +2290,9 @@
   // Check to see if this is the last token on the #undef line.
   CheckEndOfDirective("undef");
 
-  // Okay, we finally have a valid identifier to undef.
-  MacroDirective *MD = getMacroDirective(MacroNameTok.getIdentifierInfo());
-  const MacroInfo *MI = MD ? MD->getMacroInfo() : nullptr;
+  // Okay, we have a valid identifier to undef.
+  auto *II = MacroNameTok.getIdentifierInfo();
+  auto MD = getMacroDefinition(II);
 
   // If the callbacks want to know, tell them about the macro #undef.
   // Note: no matter if the macro was defined or not.
@@ -2300,6 +2300,7 @@
     Callbacks->MacroUndefined(MacroNameTok, MD);
 
   // If the macro is not defined, this is a noop undef, just return.
+  const MacroInfo *MI = MD.getMacroInfo();
   if (!MI)
     return;
 
@@ -2344,8 +2345,8 @@
   CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
 
   IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
-  MacroDirective *MD = getMacroDirective(MII);
-  MacroInfo *MI = MD ? MD->getMacroInfo() : nullptr;
+  auto MD = getMacroDefinition(MII);
+  MacroInfo *MI = MD.getMacroInfo();
 
   if (CurPPLexer->getConditionalStackDepth() == 0) {
     // If the start of a top-level #ifdef and if the macro is not defined,
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index a6f16f8..44513023 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -108,15 +108,13 @@
 
   // Otherwise, we got an identifier, is it defined to something?
   IdentifierInfo *II = PeekTok.getIdentifierInfo();
-  Result.Val = II->hasMacroDefinition();
-  Result.Val.setIsUnsigned(false);  // Result is signed intmax_t.
+  MacroDefinition Macro = PP.getMacroDefinition(II);
+  Result.Val = !!Macro;
+  Result.Val.setIsUnsigned(false); // Result is signed intmax_t.
 
-  MacroDirective *Macro = nullptr;
   // If there is a macro, mark it used.
-  if (Result.Val != 0 && ValueLive) {
-    Macro = PP.getMacroDirective(II);
-    PP.markMacroAsUsed(Macro->getMacroInfo());
-  }
+  if (Result.Val != 0 && ValueLive)
+    PP.markMacroAsUsed(Macro.getMacroInfo());
 
   // Save macro token for callback.
   Token macroToken(PeekTok);
@@ -144,11 +142,7 @@
 
   // Invoke the 'defined' callback.
   if (PPCallbacks *Callbacks = PP.getPPCallbacks()) {
-    MacroDirective *MD = Macro;
-    // Pass the MacroInfo for the macro name even if the value is dead.
-    if (!MD && Result.Val != 0)
-      MD = PP.getMacroDirective(II);
-    Callbacks->Defined(macroToken, MD,
+    Callbacks->Defined(macroToken, Macro,
                        SourceRange(beginLoc, PeekTok.getLocation()));
   }
 
@@ -734,8 +728,7 @@
 /// EvaluateDirectiveExpression - Evaluate an integer constant expression that
 /// may occur after a #if or #elif directive.  If the expression is equivalent
 /// to "!defined(X)" return X in IfNDefMacro.
-bool Preprocessor::
-EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
+bool Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) {
   SaveAndRestore<bool> PPDir(ParsingIfOrElifDirective, true);
   // Save the current state of 'DisableMacroExpansion' and reset it to false. If
   // 'DisableMacroExpansion' is true, then we must be in a macro argument list
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index fb5e2b0..e68fb7d 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -309,7 +309,7 @@
         }
         if (const IdentifierInfo *DefinedMacro =
               CurPPLexer->MIOpt.GetDefinedMacro()) {
-          if (!ControllingMacro->hasMacroDefinition() &&
+          if (!isMacroDefined(ControllingMacro) &&
               DefinedMacro != ControllingMacro &&
               HeaderInfo.FirstTimeLexingFile(FE)) {
 
@@ -400,6 +400,9 @@
       CurLexer->FormTokenWithChars(Result, EndPos, tok::annot_module_end);
       Result.setAnnotationEndLoc(Result.getLocation());
       Result.setAnnotationValue(CurSubmodule);
+
+      // We're done with this submodule.
+      LeaveSubmodule();
     }
 
     // We're done with the #included file.
@@ -471,7 +474,7 @@
       if (!getDiagnostics().isIgnored(diag::warn_uncovered_module_header,
                                       StartLoc)) {
         ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
-        const DirectoryEntry *Dir = Mod->getUmbrellaDir();
+        const DirectoryEntry *Dir = Mod->getUmbrellaDir().Entry;
         vfs::FileSystem &FS = *FileMgr.getVirtualFileSystem();
         std::error_code EC;
         for (vfs::recursive_directory_iterator Entry(FS, Dir->getName(), EC), End;
@@ -605,3 +608,126 @@
   // preprocessor directive mode), so just return EOF as our token.
   assert(!FoundLexer && "Lexer should return EOD before EOF in PP mode");
 }
+
+void Preprocessor::EnterSubmodule(Module *M, SourceLocation ImportLoc) {
+  if (!getLangOpts().ModulesLocalVisibility) {
+    // Just track that we entered this submodule.
+    BuildingSubmoduleStack.push_back(
+        BuildingSubmoduleInfo(M, ImportLoc, CurSubmoduleState));
+    return;
+  }
+
+  // Resolve as much of the module definition as we can now, before we enter
+  // one of its headers.
+  // FIXME: Can we enable Complain here?
+  // FIXME: Can we do this when local visibility is disabled?
+  ModuleMap &ModMap = getHeaderSearchInfo().getModuleMap();
+  ModMap.resolveExports(M, /*Complain=*/false);
+  ModMap.resolveUses(M, /*Complain=*/false);
+  ModMap.resolveConflicts(M, /*Complain=*/false);
+
+  // If this is the first time we've entered this module, set up its state.
+  auto R = Submodules.insert(std::make_pair(M, SubmoduleState()));
+  auto &State = R.first->second;
+  bool FirstTime = R.second;
+  if (FirstTime) {
+    // Determine the set of starting macros for this submodule; take these
+    // from the "null" module (the predefines buffer).
+    auto &StartingMacros = NullSubmoduleState.Macros;
+
+    // Restore to the starting state.
+    // FIXME: Do this lazily, when each macro name is first referenced.
+    for (auto &Macro : StartingMacros) {
+      MacroState MS(Macro.second.getLatest());
+      MS.setOverriddenMacros(*this, Macro.second.getOverriddenMacros());
+      State.Macros.insert(std::make_pair(Macro.first, std::move(MS)));
+    }
+  }
+
+  // Track that we entered this module.
+  BuildingSubmoduleStack.push_back(
+      BuildingSubmoduleInfo(M, ImportLoc, CurSubmoduleState));
+
+  // Switch to this submodule as the current submodule.
+  CurSubmoduleState = &State;
+
+  // This module is visible to itself.
+  if (FirstTime)
+    makeModuleVisible(M, ImportLoc);
+}
+
+void Preprocessor::LeaveSubmodule() {
+  auto &Info = BuildingSubmoduleStack.back();
+
+  Module *LeavingMod = Info.M;
+  SourceLocation ImportLoc = Info.ImportLoc;
+
+  // Create ModuleMacros for any macros defined in this submodule.
+  for (auto &Macro : CurSubmoduleState->Macros) {
+    auto *II = const_cast<IdentifierInfo*>(Macro.first);
+
+    // Find the starting point for the MacroDirective chain in this submodule.
+    MacroDirective *OldMD = nullptr;
+    if (getLangOpts().ModulesLocalVisibility) {
+      // FIXME: It'd be better to start at the state from when we most recently
+      // entered this submodule, but it doesn't really matter.
+      auto &PredefMacros = NullSubmoduleState.Macros;
+      auto PredefMacroIt = PredefMacros.find(Macro.first);
+      if (PredefMacroIt == PredefMacros.end())
+        OldMD = nullptr;
+      else
+        OldMD = PredefMacroIt->second.getLatest();
+    }
+
+    // This module may have exported a new macro. If so, create a ModuleMacro
+    // representing that fact.
+    bool ExplicitlyPublic = false;
+    for (auto *MD = Macro.second.getLatest(); MD != OldMD;
+         MD = MD->getPrevious()) {
+      assert(MD && "broken macro directive chain");
+
+      // Stop on macros defined in other submodules we #included along the way.
+      // There's no point doing this if we're tracking local submodule
+      // visibility, since there can be no such directives in our list.
+      if (!getLangOpts().ModulesLocalVisibility) {
+        Module *Mod = getModuleContainingLocation(MD->getLocation());
+        if (Mod != LeavingMod)
+          break;
+      }
+
+      if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
+        // The latest visibility directive for a name in a submodule affects
+        // all the directives that come before it.
+        if (VisMD->isPublic())
+          ExplicitlyPublic = true;
+        else if (!ExplicitlyPublic)
+          // Private with no following public directive: not exported.
+          break;
+      } else {
+        MacroInfo *Def = nullptr;
+        if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD))
+          Def = DefMD->getInfo();
+
+        // FIXME: Issue a warning if multiple headers for the same submodule
+        // define a macro, rather than silently ignoring all but the first.
+        bool IsNew;
+        // Don't bother creating a module macro if it would represent a #undef
+        // that doesn't override anything.
+        if (Def || !Macro.second.getOverriddenMacros().empty())
+          addModuleMacro(LeavingMod, II, Def,
+                         Macro.second.getOverriddenMacros(), IsNew);
+        break;
+      }
+    }
+  }
+
+  // Put back the outer module's state, if we're tracking it.
+  if (getLangOpts().ModulesLocalVisibility)
+    CurSubmoduleState = Info.OuterSubmoduleState;
+
+  BuildingSubmoduleStack.pop_back();
+
+  // A nested #include makes the included submodule visible.
+  if (!BuildingSubmoduleStack.empty() || !getLangOpts().ModulesLocalVisibility)
+    makeModuleVisible(LeavingMod, ImportLoc);
+}
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 3ceba05..0aaf3dd 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -34,44 +34,234 @@
 using namespace clang;
 
 MacroDirective *
-Preprocessor::getMacroDirectiveHistory(const IdentifierInfo *II) const {
-  assert(II->hadMacroDefinition() && "Identifier has not been not a macro!");
-
-  macro_iterator Pos = Macros.find(II);
-  assert(Pos != Macros.end() && "Identifier macro info is missing!");
-  return Pos->second;
+Preprocessor::getLocalMacroDirectiveHistory(const IdentifierInfo *II) const {
+  if (!II->hadMacroDefinition())
+    return nullptr;
+  auto Pos = CurSubmoduleState->Macros.find(II);
+  return Pos == CurSubmoduleState->Macros.end() ? nullptr
+                                                : Pos->second.getLatest();
 }
 
 void Preprocessor::appendMacroDirective(IdentifierInfo *II, MacroDirective *MD){
   assert(MD && "MacroDirective should be non-zero!");
   assert(!MD->getPrevious() && "Already attached to a MacroDirective history.");
 
-  MacroDirective *&StoredMD = Macros[II];
-  MD->setPrevious(StoredMD);
-  StoredMD = MD;
-  // Setup the identifier as having associated macro history.
+  MacroState &StoredMD = CurSubmoduleState->Macros[II];
+  auto *OldMD = StoredMD.getLatest();
+  MD->setPrevious(OldMD);
+  StoredMD.setLatest(MD);
+  StoredMD.overrideActiveModuleMacros(*this, II);
+
+  // Set up the identifier as having associated macro history.
   II->setHasMacroDefinition(true);
-  if (!MD->isDefined())
+  if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end())
     II->setHasMacroDefinition(false);
-  bool isImportedMacro = isa<DefMacroDirective>(MD) &&
-                         cast<DefMacroDirective>(MD)->isImported();
-  if (II->isFromAST() && !isImportedMacro)
+  if (II->isFromAST())
     II->setChangedSinceDeserialization();
 }
 
 void Preprocessor::setLoadedMacroDirective(IdentifierInfo *II,
                                            MacroDirective *MD) {
   assert(II && MD);
-  MacroDirective *&StoredMD = Macros[II];
-  assert(!StoredMD &&
+  MacroState &StoredMD = CurSubmoduleState->Macros[II];
+  assert(!StoredMD.getLatest() &&
          "the macro history was modified before initializing it from a pch");
   StoredMD = MD;
   // Setup the identifier as having associated macro history.
   II->setHasMacroDefinition(true);
-  if (!MD->isDefined())
+  if (!MD->isDefined() && LeafModuleMacros.find(II) == LeafModuleMacros.end())
     II->setHasMacroDefinition(false);
 }
 
+ModuleMacro *Preprocessor::addModuleMacro(Module *Mod, IdentifierInfo *II,
+                                          MacroInfo *Macro,
+                                          ArrayRef<ModuleMacro *> Overrides,
+                                          bool &New) {
+  llvm::FoldingSetNodeID ID;
+  ModuleMacro::Profile(ID, Mod, II);
+
+  void *InsertPos;
+  if (auto *MM = ModuleMacros.FindNodeOrInsertPos(ID, InsertPos)) {
+    New = false;
+    return MM;
+  }
+
+  auto *MM = ModuleMacro::create(*this, Mod, II, Macro, Overrides);
+  ModuleMacros.InsertNode(MM, InsertPos);
+
+  // Each overridden macro is now overridden by one more macro.
+  bool HidAny = false;
+  for (auto *O : Overrides) {
+    HidAny |= (O->NumOverriddenBy == 0);
+    ++O->NumOverriddenBy;
+  }
+
+  // If we were the first overrider for any macro, it's no longer a leaf.
+  auto &LeafMacros = LeafModuleMacros[II];
+  if (HidAny) {
+    LeafMacros.erase(std::remove_if(LeafMacros.begin(), LeafMacros.end(),
+                                    [](ModuleMacro *MM) {
+                                      return MM->NumOverriddenBy != 0;
+                                    }),
+                     LeafMacros.end());
+  }
+
+  // The new macro is always a leaf macro.
+  LeafMacros.push_back(MM);
+  // The identifier now has defined macros (that may or may not be visible).
+  II->setHasMacroDefinition(true);
+
+  New = true;
+  return MM;
+}
+
+ModuleMacro *Preprocessor::getModuleMacro(Module *Mod, IdentifierInfo *II) {
+  llvm::FoldingSetNodeID ID;
+  ModuleMacro::Profile(ID, Mod, II);
+
+  void *InsertPos;
+  return ModuleMacros.FindNodeOrInsertPos(ID, InsertPos);
+}
+
+void Preprocessor::updateModuleMacroInfo(const IdentifierInfo *II,
+                                         ModuleMacroInfo &Info) {
+  assert(Info.ActiveModuleMacrosGeneration !=
+             CurSubmoduleState->VisibleModules.getGeneration() &&
+         "don't need to update this macro name info");
+  Info.ActiveModuleMacrosGeneration =
+      CurSubmoduleState->VisibleModules.getGeneration();
+
+  auto Leaf = LeafModuleMacros.find(II);
+  if (Leaf == LeafModuleMacros.end()) {
+    // No imported macros at all: nothing to do.
+    return;
+  }
+
+  Info.ActiveModuleMacros.clear();
+
+  // Every macro that's locally overridden is overridden by a visible macro.
+  llvm::DenseMap<ModuleMacro *, int> NumHiddenOverrides;
+  for (auto *O : Info.OverriddenMacros)
+    NumHiddenOverrides[O] = -1;
+
+  // Collect all macros that are not overridden by a visible macro.
+  llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf->second.begin(),
+                                                Leaf->second.end());
+  while (!Worklist.empty()) {
+    auto *MM = Worklist.pop_back_val();
+    if (CurSubmoduleState->VisibleModules.isVisible(MM->getOwningModule())) {
+      // We only care about collecting definitions; undefinitions only act
+      // to override other definitions.
+      if (MM->getMacroInfo())
+        Info.ActiveModuleMacros.push_back(MM);
+    } else {
+      for (auto *O : MM->overrides())
+        if ((unsigned)++NumHiddenOverrides[O] == O->getNumOverridingMacros())
+          Worklist.push_back(O);
+    }
+  }
+  // Our reverse postorder walk found the macros in reverse order.
+  std::reverse(Info.ActiveModuleMacros.begin(), Info.ActiveModuleMacros.end());
+
+  // Determine whether the macro name is ambiguous.
+  MacroInfo *MI = nullptr;
+  bool IsSystemMacro = true;
+  bool IsAmbiguous = false;
+  if (auto *MD = Info.MD) {
+    while (MD && isa<VisibilityMacroDirective>(MD))
+      MD = MD->getPrevious();
+    if (auto *DMD = dyn_cast_or_null<DefMacroDirective>(MD)) {
+      MI = DMD->getInfo();
+      IsSystemMacro &= SourceMgr.isInSystemHeader(DMD->getLocation());
+    }
+  }
+  for (auto *Active : Info.ActiveModuleMacros) {
+    auto *NewMI = Active->getMacroInfo();
+
+    // Before marking the macro as ambiguous, check if this is a case where
+    // both macros are in system headers. If so, we trust that the system
+    // did not get it wrong. This also handles cases where Clang's own
+    // headers have a different spelling of certain system macros:
+    //   #define LONG_MAX __LONG_MAX__ (clang's limits.h)
+    //   #define LONG_MAX 0x7fffffffffffffffL (system's limits.h)
+    //
+    // FIXME: Remove the defined-in-system-headers check. clang's limits.h
+    // overrides the system limits.h's macros, so there's no conflict here.
+    if (MI && NewMI != MI &&
+        !MI->isIdenticalTo(*NewMI, *this, /*Syntactically=*/true))
+      IsAmbiguous = true;
+    IsSystemMacro &= Active->getOwningModule()->IsSystem ||
+                     SourceMgr.isInSystemHeader(NewMI->getDefinitionLoc());
+    MI = NewMI;
+  }
+  Info.IsAmbiguous = IsAmbiguous && !IsSystemMacro;
+}
+
+void Preprocessor::dumpMacroInfo(const IdentifierInfo *II) {
+  ArrayRef<ModuleMacro*> Leaf;
+  auto LeafIt = LeafModuleMacros.find(II);
+  if (LeafIt != LeafModuleMacros.end())
+    Leaf = LeafIt->second;
+  const MacroState *State = nullptr;
+  auto Pos = CurSubmoduleState->Macros.find(II);
+  if (Pos != CurSubmoduleState->Macros.end())
+    State = &Pos->second;
+
+  llvm::errs() << "MacroState " << State << " " << II->getNameStart();
+  if (State && State->isAmbiguous(*this, II))
+    llvm::errs() << " ambiguous";
+  if (State && !State->getOverriddenMacros().empty()) {
+    llvm::errs() << " overrides";
+    for (auto *O : State->getOverriddenMacros())
+      llvm::errs() << " " << O->getOwningModule()->getFullModuleName();
+  }
+  llvm::errs() << "\n";
+
+  // Dump local macro directives.
+  for (auto *MD = State ? State->getLatest() : nullptr; MD;
+       MD = MD->getPrevious()) {
+    llvm::errs() << " ";
+    MD->dump();
+  }
+
+  // Dump module macros.
+  llvm::DenseSet<ModuleMacro*> Active;
+  for (auto *MM : State ? State->getActiveModuleMacros(*this, II) : None)
+    Active.insert(MM);
+  llvm::DenseSet<ModuleMacro*> Visited;
+  llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf.begin(), Leaf.end());
+  while (!Worklist.empty()) {
+    auto *MM = Worklist.pop_back_val();
+    llvm::errs() << " ModuleMacro " << MM << " "
+                 << MM->getOwningModule()->getFullModuleName();
+    if (!MM->getMacroInfo())
+      llvm::errs() << " undef";
+
+    if (Active.count(MM))
+      llvm::errs() << " active";
+    else if (!CurSubmoduleState->VisibleModules.isVisible(
+                 MM->getOwningModule()))
+      llvm::errs() << " hidden";
+    else if (MM->getMacroInfo())
+      llvm::errs() << " overridden";
+
+    if (!MM->overrides().empty()) {
+      llvm::errs() << " overrides";
+      for (auto *O : MM->overrides()) {
+        llvm::errs() << " " << O->getOwningModule()->getFullModuleName();
+        if (Visited.insert(O).second)
+          Worklist.push_back(O);
+      }
+    }
+    llvm::errs() << "\n";
+    if (auto *MI = MM->getMacroInfo()) {
+      llvm::errs() << "  ";
+      MI->dump();
+      llvm::errs() << "\n";
+    }
+  }
+}
+
 /// RegisterBuiltinMacro - Register the specified identifier in the identifier
 /// table and mark it as a builtin macro to be expanded.
 static IdentifierInfo *RegisterBuiltinMacro(Preprocessor &PP, const char *Name){
@@ -97,7 +287,11 @@
   Ident_Pragma  = RegisterBuiltinMacro(*this, "_Pragma");
 
   // C++ Standing Document Extensions.
-  Ident__has_cpp_attribute = RegisterBuiltinMacro(*this, "__has_cpp_attribute");
+  if (LangOpts.CPlusPlus)
+    Ident__has_cpp_attribute =
+        RegisterBuiltinMacro(*this, "__has_cpp_attribute");
+  else
+    Ident__has_cpp_attribute = nullptr;
 
   // GCC Extensions.
   Ident__BASE_FILE__     = RegisterBuiltinMacro(*this, "__BASE_FILE__");
@@ -156,10 +350,11 @@
 
   // If the identifier is a macro, and if that macro is enabled, it may be
   // expanded so it's not a trivial expansion.
-  if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled() &&
-      // Fast expanding "#define X X" is ok, because X would be disabled.
-      II != MacroIdent)
-    return false;
+  if (auto *ExpansionMI = PP.getMacroInfo(II))
+    if (ExpansionMI->isEnabled() &&
+        // Fast expanding "#define X X" is ok, because X would be disabled.
+        II != MacroIdent)
+      return false;
 
   // If this is an object-like macro invocation, it is safe to trivially expand
   // it.
@@ -167,12 +362,8 @@
 
   // If this is a function-like macro invocation, it's safe to trivially expand
   // as long as the identifier is not a macro argument.
-  for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
-       I != E; ++I)
-    if (*I == II)
-      return false;   // Identifier is a macro argument.
+  return std::find(MI->arg_begin(), MI->arg_end(), II) == MI->arg_end();
 
-  return true;
 }
 
 
@@ -222,10 +413,8 @@
 /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be
 /// expanded as a macro, handle it and return the next token as 'Identifier'.
 bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
-                                                 MacroDirective *MD) {
-  MacroDirective::DefInfo Def = MD->getDefinition();
-  assert(Def.isValid());
-  MacroInfo *MI = Def.getMacroInfo();
+                                                 const MacroDefinition &M) {
+  MacroInfo *MI = M.getMacroInfo();
 
   // If this is a macro expansion in the "#if !defined(x)" line for the file,
   // then the macro could expand to different things in other contexts, we need
@@ -234,9 +423,9 @@
 
   // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially.
   if (MI->isBuiltinMacro()) {
-    if (Callbacks) Callbacks->MacroExpands(Identifier, MD,
-                                           Identifier.getLocation(),
-                                           /*Args=*/nullptr);
+    if (Callbacks)
+      Callbacks->MacroExpands(Identifier, M, Identifier.getLocation(),
+                              /*Args=*/nullptr);
     ExpandBuiltinMacro(Identifier);
     return true;
   }
@@ -283,9 +472,9 @@
       // MacroExpands callbacks still happen in source order, queue this
       // callback to have it happen after the function macro callback.
       DelayedMacroExpandsCallbacks.push_back(
-                              MacroExpandsInfo(Identifier, MD, ExpansionRange));
+          MacroExpandsInfo(Identifier, M, ExpansionRange));
     } else {
-      Callbacks->MacroExpands(Identifier, MD, ExpansionRange, Args);
+      Callbacks->MacroExpands(Identifier, M, ExpansionRange, Args);
       if (!DelayedMacroExpandsCallbacks.empty()) {
         for (unsigned i=0, e = DelayedMacroExpandsCallbacks.size(); i!=e; ++i) {
           MacroExpandsInfo &Info = DelayedMacroExpandsCallbacks[i];
@@ -299,20 +488,16 @@
   }
 
   // If the macro definition is ambiguous, complain.
-  if (Def.getDirective()->isAmbiguous()) {
+  if (M.isAmbiguous()) {
     Diag(Identifier, diag::warn_pp_ambiguous_macro)
       << Identifier.getIdentifierInfo();
     Diag(MI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_chosen)
       << Identifier.getIdentifierInfo();
-    for (MacroDirective::DefInfo PrevDef = Def.getPreviousDefinition();
-         PrevDef && !PrevDef.isUndefined();
-         PrevDef = PrevDef.getPreviousDefinition()) {
-      Diag(PrevDef.getMacroInfo()->getDefinitionLoc(),
-           diag::note_pp_ambiguous_macro_other)
-        << Identifier.getIdentifierInfo();
-      if (!PrevDef.getDirective()->isAmbiguous())
-        break;
-    }
+    M.forAllDefinitions([&](const MacroInfo *OtherMI) {
+      if (OtherMI != MI)
+        Diag(OtherMI->getDefinitionLoc(), diag::note_pp_ambiguous_macro_other)
+          << Identifier.getIdentifierInfo();
+    });
   }
 
   // If we started lexing a macro, enter the macro expansion body.
@@ -1005,6 +1190,7 @@
       .Case("is_trivially_copyable", LangOpts.CPlusPlus)
       .Case("is_union", LangOpts.CPlusPlus)
       .Case("modules", LangOpts.Modules)
+      .Case("safe_stack", LangOpts.Sanitize.has(SanitizerKind::SafeStack))
       .Case("tls", PP.getTargetInfo().isTLSSupported())
       .Case("underlying_type", LangOpts.CPlusPlus)
       .Default(false);
@@ -1052,6 +1238,7 @@
            .Case("cxx_range_for", LangOpts.CPlusPlus)
            .Case("cxx_reference_qualified_functions", LangOpts.CPlusPlus)
            .Case("cxx_rvalue_references", LangOpts.CPlusPlus)
+           .Case("cxx_variadic_templates", LangOpts.CPlusPlus)
            // C++1y features supported by other languages as extensions.
            .Case("cxx_binary_literals", true)
            .Case("cxx_init_captures", LangOpts.CPlusPlus11)
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index af7a153..5f63d35 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -431,8 +431,7 @@
   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
 }
 
-PTHManager *PTHManager::Create(const std::string &file,
-                               DiagnosticsEngine &Diags) {
+PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) {
   // Memory map the PTH file.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
       llvm::MemoryBuffer::getFile(file);
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index bfac3fd..26ed674 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -22,6 +22,7 @@
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
@@ -400,7 +401,7 @@
     if (II->isPoisoned()) continue;
 
     // If this is a macro identifier, emit a warning.
-    if (II->hasMacroDefinition())
+    if (isMacroDefined(II))
       Diag(Tok, diag::pp_poisoning_existing_macro);
 
     // Finally, poison it!
@@ -590,8 +591,7 @@
     PragmaPushMacroInfo.find(IdentInfo);
   if (iter != PragmaPushMacroInfo.end()) {
     // Forget the MacroInfo currently associated with IdentInfo.
-    if (MacroDirective *CurrentMD = getMacroDirective(IdentInfo)) {
-      MacroInfo *MI = CurrentMD->getMacroInfo();
+    if (MacroInfo *MI = getMacroInfo(IdentInfo)) {
       if (MI->isWarnIfUnused())
         WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
       appendMacroDirective(IdentInfo, AllocateUndefMacroDirective(MessageLoc));
@@ -600,11 +600,9 @@
     // Get the MacroInfo we want to reinstall.
     MacroInfo *MacroToReInstall = iter->second.back();
 
-    if (MacroToReInstall) {
+    if (MacroToReInstall)
       // Reinstall the previously pushed macro.
-      appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc,
-                              /*isImported=*/false, /*Overrides*/None);
-    }
+      appendDefMacroDirective(IdentInfo, MacroToReInstall, MessageLoc);
 
     // Pop PragmaPushMacroInfo stack.
     iter->second.pop_back();
@@ -878,6 +876,14 @@
       llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error");
     } else if (II->isStr("llvm_unreachable")) {
       llvm_unreachable("#pragma clang __debug llvm_unreachable");
+    } else if (II->isStr("macro")) {
+      Token MacroName;
+      PP.LexUnexpandedToken(MacroName);
+      auto *MacroII = MacroName.getIdentifierInfo();
+      if (MacroII)
+        PP.dumpMacroInfo(MacroII);
+      else
+        PP.Diag(MacroName, diag::warn_pragma_diagnostic_invalid);
     } else if (II->isStr("overflow_stack")) {
       DebugOverflowStack();
     } else if (II->isStr("handle_crash")) {
@@ -1031,12 +1037,8 @@
 
     PP.Lex(Tok);
     IdentifierInfo *II = Tok.getIdentifierInfo();
-    if (!II) {
-      PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid);
-      return;
-    }
 
-    if (II->isStr("push")) {
+    if (II && II->isStr("push")) {
       // #pragma warning( push[ ,n ] )
       int Level = -1;
       PP.Lex(Tok);
@@ -1053,7 +1055,7 @@
       }
       if (Callbacks)
         Callbacks->PragmaWarningPush(DiagLoc, Level);
-    } else if (II->isStr("pop")) {
+    } else if (II && II->isStr("pop")) {
       // #pragma warning( pop )
       PP.Lex(Tok);
       if (Callbacks)
@@ -1063,23 +1065,40 @@
       //                  [; warning-specifier : warning-number-list...] )
       while (true) {
         II = Tok.getIdentifierInfo();
-        if (!II) {
+        if (!II && !Tok.is(tok::numeric_constant)) {
           PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid);
           return;
         }
 
         // Figure out which warning specifier this is.
-        StringRef Specifier = II->getName();
-        bool SpecifierValid =
-            llvm::StringSwitch<bool>(Specifier)
-                .Cases("1", "2", "3", "4", true)
-                .Cases("default", "disable", "error", "once", "suppress", true)
-                .Default(false);
+        bool SpecifierValid;
+        StringRef Specifier;
+        llvm::SmallString<1> SpecifierBuf;
+        if (II) {
+          Specifier = II->getName();
+          SpecifierValid = llvm::StringSwitch<bool>(Specifier)
+                               .Cases("default", "disable", "error", "once",
+                                      "suppress", true)
+                               .Default(false);
+          // If we read a correct specifier, snatch next token (that should be
+          // ":", checked later).
+          if (SpecifierValid)
+            PP.Lex(Tok);
+        } else {
+          // Token is a numeric constant. It should be either 1, 2, 3 or 4.
+          uint64_t Value;
+          Specifier = PP.getSpelling(Tok, SpecifierBuf);
+          if (PP.parseSimpleIntegerLiteral(Tok, Value)) {
+            SpecifierValid = (Value >= 1) && (Value <= 4);
+          } else
+            SpecifierValid = false;
+          // Next token already snatched by parseSimpleIntegerLiteral.
+        }
+
         if (!SpecifierValid) {
           PP.Diag(Tok, diag::warn_pragma_warning_spec_invalid);
           return;
         }
-        PP.Lex(Tok);
         if (Tok.isNot(tok::colon)) {
           PP.Diag(Tok, diag::warn_pragma_warning_expected) << ":";
           return;
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index dafcbbe..a423041 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -246,10 +246,11 @@
   assert(Entity);
   SourceLocation BeginLoc = Entity->getSourceRange().getBegin();
 
-  if (isa<MacroDefinition>(Entity)) {
+  if (isa<MacroDefinitionRecord>(Entity)) {
     assert((PreprocessedEntities.empty() ||
-            !SourceMgr.isBeforeInTranslationUnit(BeginLoc,
-                   PreprocessedEntities.back()->getSourceRange().getBegin())) &&
+            !SourceMgr.isBeforeInTranslationUnit(
+                BeginLoc,
+                PreprocessedEntities.back()->getSourceRange().getBegin())) &&
            "a macro definition was encountered out-of-order");
     PreprocessedEntities.push_back(Entity);
     return getPPEntityID(PreprocessedEntities.size()-1, /*isLoaded=*/false);
@@ -318,7 +319,7 @@
 }
 
 void PreprocessingRecord::RegisterMacroDefinition(MacroInfo *Macro,
-                                                  MacroDefinition *Def) {
+                                                  MacroDefinitionRecord *Def) {
   MacroDefinitions[Macro] = Def;
 }
 
@@ -355,9 +356,10 @@
   return Entity;
 }
 
-MacroDefinition *PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {
-  llvm::DenseMap<const MacroInfo *, MacroDefinition *>::iterator Pos
-    = MacroDefinitions.find(MI);
+MacroDefinitionRecord *
+PreprocessingRecord::findMacroDefinition(const MacroInfo *MI) {
+  llvm::DenseMap<const MacroInfo *, MacroDefinitionRecord *>::iterator Pos =
+      MacroDefinitions.find(MI);
   if (Pos == MacroDefinitions.end())
     return nullptr;
 
@@ -372,35 +374,34 @@
     return;
 
   if (MI->isBuiltinMacro())
-    addPreprocessedEntity(
-                      new (*this) MacroExpansion(Id.getIdentifierInfo(),Range));
-  else if (MacroDefinition *Def = findMacroDefinition(MI))
-    addPreprocessedEntity(
-                       new (*this) MacroExpansion(Def, Range));
+    addPreprocessedEntity(new (*this)
+                              MacroExpansion(Id.getIdentifierInfo(), Range));
+  else if (MacroDefinitionRecord *Def = findMacroDefinition(MI))
+    addPreprocessedEntity(new (*this) MacroExpansion(Def, Range));
 }
 
 void PreprocessingRecord::Ifdef(SourceLocation Loc, const Token &MacroNameTok,
-                                const MacroDirective *MD) {
+                                const MacroDefinition &MD) {
   // This is not actually a macro expansion but record it as a macro reference.
   if (MD)
-    addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+    addMacroExpansion(MacroNameTok, MD.getMacroInfo(),
                       MacroNameTok.getLocation());
 }
 
 void PreprocessingRecord::Ifndef(SourceLocation Loc, const Token &MacroNameTok,
-                                 const MacroDirective *MD) {
+                                 const MacroDefinition &MD) {
   // This is not actually a macro expansion but record it as a macro reference.
   if (MD)
-    addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+    addMacroExpansion(MacroNameTok, MD.getMacroInfo(),
                       MacroNameTok.getLocation());
 }
 
 void PreprocessingRecord::Defined(const Token &MacroNameTok,
-                                  const MacroDirective *MD,
+                                  const MacroDefinition &MD,
                                   SourceRange Range) {
   // This is not actually a macro expansion but record it as a macro reference.
   if (MD)
-    addMacroExpansion(MacroNameTok, MD->getMacroInfo(),
+    addMacroExpansion(MacroNameTok, MD.getMacroInfo(),
                       MacroNameTok.getLocation());
 }
 
@@ -408,27 +409,26 @@
   SkippedRanges.push_back(Range);
 }
 
-void PreprocessingRecord::MacroExpands(const Token &Id,const MacroDirective *MD,
+void PreprocessingRecord::MacroExpands(const Token &Id,
+                                       const MacroDefinition &MD,
                                        SourceRange Range,
                                        const MacroArgs *Args) {
-  addMacroExpansion(Id, MD->getMacroInfo(), Range);
+  addMacroExpansion(Id, MD.getMacroInfo(), Range);
 }
 
 void PreprocessingRecord::MacroDefined(const Token &Id,
                                        const MacroDirective *MD) {
   const MacroInfo *MI = MD->getMacroInfo();
   SourceRange R(MI->getDefinitionLoc(), MI->getDefinitionEndLoc());
-  MacroDefinition *Def
-      = new (*this) MacroDefinition(Id.getIdentifierInfo(), R);
+  MacroDefinitionRecord *Def =
+      new (*this) MacroDefinitionRecord(Id.getIdentifierInfo(), R);
   addPreprocessedEntity(Def);
   MacroDefinitions[MI] = Def;
 }
 
 void PreprocessingRecord::MacroUndefined(const Token &Id,
-                                         const MacroDirective *MD) {
-  // Note: MI may be null (when #undef'ining an undefined macro).
-  if (MD)
-    MacroDefinitions.erase(MD->getMacroInfo());
+                                         const MacroDefinition &MD) {
+  MD.forAllDefinitions([&](MacroInfo *MI) { MacroDefinitions.erase(MI); });
 }
 
 void PreprocessingRecord::InclusionDirective(
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 51a038a..7e33f1c 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -73,7 +73,8 @@
       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
-      Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
+      Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
+      MacroArgCache(nullptr), Record(nullptr),
       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
   OwnsHeaderSearch = OwnsHeaders;
   
@@ -266,7 +267,9 @@
   llvm::errs() << "\n  Macro Expanded Tokens: "
                << llvm::capacity_in_bytes(MacroExpandedTokens);
   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
-  llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
+  // FIXME: List information for all submodules.
+  llvm::errs() << "\n  Macros: "
+               << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
   llvm::errs() << "\n  #pragma push_macro Info: "
                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
   llvm::errs() << "\n  Poison Reasons: "
@@ -283,14 +286,16 @@
     ExternalSource->ReadDefinedMacros();
   }
 
-  return Macros.begin();
+  return CurSubmoduleState->Macros.begin();
 }
 
 size_t Preprocessor::getTotalMemory() const {
   return BP.getTotalMemory()
     + llvm::capacity_in_bytes(MacroExpandedTokens)
     + Predefines.capacity() /* Predefines buffer. */
-    + llvm::capacity_in_bytes(Macros)
+    // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
+    // and ModuleMacros.
+    + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
     + llvm::capacity_in_bytes(PoisonReasons)
     + llvm::capacity_in_bytes(CommentHandlers);
@@ -304,7 +309,7 @@
     ExternalSource->ReadDefinedMacros();
   }
 
-  return Macros.end();
+  return CurSubmoduleState->Macros.end();
 }
 
 /// \brief Compares macro tokens with a specified token value sequence.
@@ -322,7 +327,7 @@
   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
        I != E; ++I) {
     const MacroDirective::DefInfo
-      Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
+      Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
     if (!Def || !Def.getMacroInfo())
       continue;
     if (!Def.getMacroInfo()->isObjectLike())
@@ -584,6 +589,23 @@
     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
 }
 
+/// \brief Returns a diagnostic message kind for reporting a future keyword as
+/// appropriate for the identifier and specified language.
+static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
+                                          const LangOptions &LangOpts) {
+  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
+
+  if (LangOpts.CPlusPlus)
+    return llvm::StringSwitch<diag::kind>(II.getName())
+#define CXX11_KEYWORD(NAME, FLAGS)                                             \
+        .Case(#NAME, diag::warn_cxx11_keyword)
+#include "clang/Basic/TokenKinds.def"
+        ;
+
+  llvm_unreachable(
+      "Keyword not known to come from a newer Standard or proposed Standard");
+}
+
 /// HandleIdentifier - This callback is invoked when the lexer reads an
 /// identifier.  This callback looks up the identifier in the map and/or
 /// potentially macro expands it or turns it into a named token (like 'for').
@@ -622,8 +644,9 @@
   }
 
   // If this is a macro to be expanded, do it.
-  if (MacroDirective *MD = getMacroDirective(&II)) {
-    MacroInfo *MI = MD->getMacroInfo();
+  if (MacroDefinition MD = getMacroDefinition(&II)) {
+    auto *MI = MD.getMacroInfo();
+    assert(MI && "macro definition with no macro info?");
     if (!DisableMacroExpansion) {
       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
         // C99 6.10.3p10: If the preprocessing token immediately after the
@@ -641,15 +664,16 @@
     }
   }
 
-  // If this identifier is a keyword in C++11, produce a warning. Don't warn if
-  // we're not considering macro expansion, since this identifier might be the
-  // name of a macro.
+  // If this identifier is a keyword in a newer Standard or proposed Standard,
+  // produce a warning. Don't warn if we're not considering macro expansion,
+  // since this identifier might be the name of a macro.
   // FIXME: This warning is disabled in cases where it shouldn't be, like
   //   "#define constexpr constexpr", "int constexpr;"
-  if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) {
-    Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
+  if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
+    Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
+        << II.getName();
     // Don't diagnose this keyword again in this translation unit.
-    II.setIsCXX11CompatKeyword(false);
+    II.setIsFutureCompatKeyword(false);
   }
 
   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
@@ -748,16 +772,36 @@
   // If we have a non-empty module path, load the named module.
   if (!ModuleImportPath.empty()) {
     Module *Imported = nullptr;
-    if (getLangOpts().Modules)
+    if (getLangOpts().Modules) {
       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
                                             ModuleImportPath,
-                                            Module::MacrosVisible,
+                                            Module::Hidden,
                                             /*IsIncludeDirective=*/false);
+      if (Imported)
+        makeModuleVisible(Imported, ModuleImportLoc);
+    }
     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
   }
 }
 
+void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
+  CurSubmoduleState->VisibleModules.setVisible(
+      M, Loc, [](Module *) {},
+      [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
+        // FIXME: Include the path in the diagnostic.
+        // FIXME: Include the import location for the conflicting module.
+        Diag(ModuleImportLoc, diag::warn_module_conflict)
+            << Path[0]->getFullModuleName()
+            << Conflict->getFullModuleName()
+            << Message;
+      });
+
+  // Add this module to the imports list of the currently-built submodule.
+  if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
+    BuildingSubmoduleStack.back().M->Imports.insert(M);
+}
+
 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
                                           const char *DiagnosticTag,
                                           bool AllowMacroExpansion) {
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 23d7281..83efbab 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -521,6 +521,13 @@
 /// are more ## after it, chomp them iteratively.  Return the result as Tok.
 /// If this returns true, the caller should immediately return the token.
 bool TokenLexer::PasteTokens(Token &Tok) {
+  // MSVC: If previous token was pasted, this must be a recovery from an invalid
+  // paste operation. Ignore spaces before this token to mimic MSVC output.
+  // Required for generating valid UUID strings in some MS headers.
+  if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) &&
+      Tokens[CurToken - 2].is(tok::hashhash))
+    Tok.clearFlag(Token::LeadingSpace);
+  
   SmallString<128> Buffer;
   const char *ResultTokStrPtr = nullptr;
   SourceLocation StartLoc = Tok.getLocation();
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 8f4afdf..bd114d7 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -529,64 +529,72 @@
 /// [MS] extended-decl-modifier-seq:
 ///             extended-decl-modifier[opt]
 ///             extended-decl-modifier extended-decl-modifier-seq
-void Parser::ParseMicrosoftDeclSpec(ParsedAttributes &Attrs) {
+void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs,
+                                     SourceLocation *End) {
+  assert((getLangOpts().MicrosoftExt || getLangOpts().Borland ||
+          getLangOpts().CUDA) &&
+         "Incorrect language options for parsing __declspec");
   assert(Tok.is(tok::kw___declspec) && "Not a declspec!");
 
-  ConsumeToken();
-  BalancedDelimiterTracker T(*this, tok::l_paren);
-  if (T.expectAndConsume(diag::err_expected_lparen_after, "__declspec",
-                         tok::r_paren))
-    return;
-
-  // An empty declspec is perfectly legal and should not warn.  Additionally,
-  // you can specify multiple attributes per declspec.
-  while (Tok.isNot(tok::r_paren)) {
-    // Attribute not present.
-    if (TryConsumeToken(tok::comma))
-      continue;
-
-    // We expect either a well-known identifier or a generic string.  Anything
-    // else is a malformed declspec.
-    bool IsString = Tok.getKind() == tok::string_literal;
-    if (!IsString && Tok.getKind() != tok::identifier &&
-        Tok.getKind() != tok::kw_restrict) {
-      Diag(Tok, diag::err_ms_declspec_type);
-      T.skipToEnd();
+  while (Tok.is(tok::kw___declspec)) {
+    ConsumeToken();
+    BalancedDelimiterTracker T(*this, tok::l_paren);
+    if (T.expectAndConsume(diag::err_expected_lparen_after, "__declspec",
+                           tok::r_paren))
       return;
-    }
 
-    IdentifierInfo *AttrName;
-    SourceLocation AttrNameLoc;
-    if (IsString) {
-      SmallString<8> StrBuffer;
-      bool Invalid = false;
-      StringRef Str = PP.getSpelling(Tok, StrBuffer, &Invalid);
-      if (Invalid) {
+    // An empty declspec is perfectly legal and should not warn.  Additionally,
+    // you can specify multiple attributes per declspec.
+    while (Tok.isNot(tok::r_paren)) {
+      // Attribute not present.
+      if (TryConsumeToken(tok::comma))
+        continue;
+
+      // We expect either a well-known identifier or a generic string.  Anything
+      // else is a malformed declspec.
+      bool IsString = Tok.getKind() == tok::string_literal;
+      if (!IsString && Tok.getKind() != tok::identifier &&
+          Tok.getKind() != tok::kw_restrict) {
+        Diag(Tok, diag::err_ms_declspec_type);
         T.skipToEnd();
         return;
       }
-      AttrName = PP.getIdentifierInfo(Str);
-      AttrNameLoc = ConsumeStringToken();
-    } else {
-      AttrName = Tok.getIdentifierInfo();
-      AttrNameLoc = ConsumeToken();
+
+      IdentifierInfo *AttrName;
+      SourceLocation AttrNameLoc;
+      if (IsString) {
+        SmallString<8> StrBuffer;
+        bool Invalid = false;
+        StringRef Str = PP.getSpelling(Tok, StrBuffer, &Invalid);
+        if (Invalid) {
+          T.skipToEnd();
+          return;
+        }
+        AttrName = PP.getIdentifierInfo(Str);
+        AttrNameLoc = ConsumeStringToken();
+      } else {
+        AttrName = Tok.getIdentifierInfo();
+        AttrNameLoc = ConsumeToken();
+      }
+
+      bool AttrHandled = false;
+
+      // Parse attribute arguments.
+      if (Tok.is(tok::l_paren))
+        AttrHandled = ParseMicrosoftDeclSpecArgs(AttrName, AttrNameLoc, Attrs);
+      else if (AttrName->getName() == "property")
+        // The property attribute must have an argument list.
+        Diag(Tok.getLocation(), diag::err_expected_lparen_after)
+            << AttrName->getName();
+
+      if (!AttrHandled)
+        Attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0,
+                     AttributeList::AS_Declspec);
     }
-
-    bool AttrHandled = false;
-
-    // Parse attribute arguments.
-    if (Tok.is(tok::l_paren))
-      AttrHandled = ParseMicrosoftDeclSpecArgs(AttrName, AttrNameLoc, Attrs);
-    else if (AttrName->getName() == "property")
-      // The property attribute must have an argument list.
-      Diag(Tok.getLocation(), diag::err_expected_lparen_after)
-          << AttrName->getName();
-
-    if (!AttrHandled)
-      Attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0,
-                   AttributeList::AS_Declspec);
+    T.consumeClose();
+    if (End)
+      *End = T.getCloseLocation();
   }
-  T.consumeClose();
 }
 
 void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) {
@@ -1360,6 +1368,46 @@
   }
 }
 
+// As an exception to the rule, __declspec(align(...)) before the
+// class-key affects the type instead of the variable.
+void Parser::handleDeclspecAlignBeforeClassKey(ParsedAttributesWithRange &Attrs,
+                                               DeclSpec &DS,
+                                               Sema::TagUseKind TUK) {
+  if (TUK == Sema::TUK_Reference)
+    return;
+
+  ParsedAttributes &PA = DS.getAttributes();
+  AttributeList *AL = PA.getList();
+  AttributeList *Prev = nullptr;
+  while (AL) {
+    AttributeList *Next = AL->getNext();
+
+    // We only consider attributes using the appropriate '__declspec' spelling,
+    // this behavior doesn't extend to any other spellings.
+    if (AL->getKind() == AttributeList::AT_Aligned &&
+        AL->isDeclspecAttribute()) {
+      // Stitch the attribute into the tag's attribute list.
+      AL->setNext(nullptr);
+      Attrs.add(AL);
+
+      // Remove the attribute from the variable's attribute list.
+      if (Prev) {
+        // Set the last variable attribute's next attribute to be the attribute
+        // after the current one.
+        Prev->setNext(Next);
+      } else {
+        // Removing the head of the list requires us to reset the head to the
+        // next attribute.
+        PA.set(Next);
+      }
+    } else {
+      Prev = AL;
+    }
+
+    AL = Next;
+  }
+}
+
 /// ParseDeclaration - Parse a full 'declaration', which consists of
 /// declaration-specifiers, some number of declarators, and a semicolon.
 /// 'Context' should be a Declarator::TheContext value.  This returns the
@@ -2956,7 +3004,7 @@
 
     // Microsoft declspec support.
     case tok::kw___declspec:
-      ParseMicrosoftDeclSpec(DS.getAttributes());
+      ParseMicrosoftDeclSpecs(DS.getAttributes());
       continue;
 
     // Microsoft single token adornments.
@@ -3600,10 +3648,7 @@
   ParsedAttributesWithRange attrs(AttrFactory);
   MaybeParseGNUAttributes(attrs);
   MaybeParseCXX11Attributes(attrs);
-
-  // If declspecs exist after tag, parse them.
-  while (Tok.is(tok::kw___declspec))
-    ParseMicrosoftDeclSpec(attrs);
+  MaybeParseMicrosoftDeclSpecs(attrs);
 
   SourceLocation ScopedEnumKWLoc;
   bool IsScopedUsingClassTag = false;
@@ -3622,8 +3667,7 @@
     // They are allowed afterwards, though.
     MaybeParseGNUAttributes(attrs);
     MaybeParseCXX11Attributes(attrs);
-    while (Tok.is(tok::kw___declspec))
-      ParseMicrosoftDeclSpec(attrs);
+    MaybeParseMicrosoftDeclSpecs(attrs);
   }
 
   // C++11 [temp.explicit]p12:
@@ -3851,6 +3895,15 @@
     return;
   }
 
+  handleDeclspecAlignBeforeClassKey(attrs, DS, TUK);
+
+  Sema::SkipBodyInfo SkipBody;
+  if (!Name && TUK == Sema::TUK_Definition && Tok.is(tok::l_brace) &&
+      NextToken().is(tok::identifier))
+    SkipBody = Actions.shouldSkipAnonEnumBody(getCurScope(),
+                                              NextToken().getIdentifierInfo(),
+                                              NextToken().getLocation());
+
   bool Owned = false;
   bool IsDependent = false;
   const char *PrevSpec = nullptr;
@@ -3860,7 +3913,22 @@
                                    AS, DS.getModulePrivateSpecLoc(), TParams,
                                    Owned, IsDependent, ScopedEnumKWLoc,
                                    IsScopedUsingClassTag, BaseType,
-                                   DSC == DSC_type_specifier);
+                                   DSC == DSC_type_specifier, &SkipBody);
+
+  if (SkipBody.ShouldSkip) {
+    assert(TUK == Sema::TUK_Definition && "can only skip a definition");
+
+    BalancedDelimiterTracker T(*this, tok::l_brace);
+    T.consumeOpen();
+    T.skipToEnd();
+
+    if (DS.SetTypeSpecType(DeclSpec::TST_enum, StartLoc,
+                           NameLoc.isValid() ? NameLoc : StartLoc,
+                           PrevSpec, DiagID, TagDecl, Owned,
+                           Actions.getASTContext().getPrintingPolicy()))
+      Diag(StartLoc, DiagID) << PrevSpec;
+    return;
+  }
 
   if (IsDependent) {
     // This enum has a dependent nested-name-specifier. Handle it as a
@@ -3932,6 +4000,7 @@
     Diag(Tok, diag::error_empty_enum);
 
   SmallVector<Decl *, 32> EnumConstantDecls;
+  SmallVector<SuppressAccessChecks, 32> EnumAvailabilityDiags;
 
   Decl *LastEnumConstDecl = nullptr;
 
@@ -3962,7 +4031,7 @@
 
     SourceLocation EqualLoc;
     ExprResult AssignedVal;
-    ParsingDeclRAIIObject PD(*this, ParsingDeclRAIIObject::NoParent);
+    EnumAvailabilityDiags.emplace_back(*this);
 
     if (TryConsumeToken(tok::equal, EqualLoc)) {
       AssignedVal = ParseConstantExpression();
@@ -3976,7 +4045,7 @@
                                                     IdentLoc, Ident,
                                                     attrs.getList(), EqualLoc,
                                                     AssignedVal.get());
-    PD.complete(EnumConstDecl);
+    EnumAvailabilityDiags.back().done();
 
     EnumConstantDecls.push_back(EnumConstDecl);
     LastEnumConstDecl = EnumConstDecl;
@@ -4032,6 +4101,14 @@
                         getCurScope(),
                         attrs.getList());
 
+  // Now handle enum constant availability diagnostics.
+  assert(EnumConstantDecls.size() == EnumAvailabilityDiags.size());
+  for (size_t i = 0, e = EnumConstantDecls.size(); i != e; ++i) {
+    ParsingDeclRAIIObject PD(*this, ParsingDeclRAIIObject::NoParent);
+    EnumAvailabilityDiags[i].redelay();
+    PD.complete(EnumConstantDecls[i]);
+  }
+
   EnumScope.Exit();
   Actions.ActOnTagFinishDefinition(getCurScope(), EnumDecl,
                                    T.getCloseLocation());
diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp
index c74b028..9ed797f 100644
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@@ -1229,10 +1229,7 @@
   ParsedAttributesWithRange attrs(AttrFactory);
   // If attributes exist after tag, parse them.
   MaybeParseGNUAttributes(attrs);
-
-  // If declspecs exist after tag, parse them.
-  while (Tok.is(tok::kw___declspec))
-    ParseMicrosoftDeclSpec(attrs);
+  MaybeParseMicrosoftDeclSpecs(attrs);
 
   // Parse inheritance specifiers.
   if (Tok.is(tok::kw___single_inheritance) ||
@@ -1553,7 +1550,7 @@
   TypeResult TypeResult = true; // invalid
 
   bool Owned = false;
-  bool SkipBody = false;
+  Sema::SkipBodyInfo SkipBody;
   if (TemplateId) {
     // Explicit specialization, class template partial specialization,
     // or explicit instantiation.
@@ -1640,7 +1637,8 @@
           *TemplateId, attrs.getList(),
           MultiTemplateParamsArg(TemplateParams ? &(*TemplateParams)[0]
                                                 : nullptr,
-                                 TemplateParams ? TemplateParams->size() : 0));
+                                 TemplateParams ? TemplateParams->size() : 0),
+          &SkipBody);
     }
   } else if (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation &&
              TUK == Sema::TUK_Declaration) {
@@ -1692,6 +1690,8 @@
       TParams =
         MultiTemplateParamsArg(&(*TemplateParams)[0], TemplateParams->size());
 
+    handleDeclspecAlignBeforeClassKey(attrs, DS, TUK);
+
     // Declaration or definition of a class type
     TagOrTempResult = Actions.ActOnTag(getCurScope(), TagType, TUK, StartLoc,
                                        SS, Name, NameLoc, attrs.getList(), AS,
@@ -1716,7 +1716,7 @@
     assert(Tok.is(tok::l_brace) ||
            (getLangOpts().CPlusPlus && Tok.is(tok::colon)) ||
            isCXX11FinalKeyword());
-    if (SkipBody)
+    if (SkipBody.ShouldSkip)
       SkipCXXMemberSpecification(StartLoc, AttrFixitLoc, TagType,
                                  TagOrTempResult.get());
     else if (getLangOpts().CPlusPlus)
@@ -2724,12 +2724,13 @@
     ParseScope ClassScope(this, Scope::ClassScope|Scope::DeclScope);
     ParsingClassDefinition ParsingDef(*this, TagDecl, /*NonNestedClass*/ true,
                                       TagType == DeclSpec::TST_interface);
-    Actions.ActOnTagStartSkippedDefinition(getCurScope(), TagDecl);
+    auto OldContext =
+        Actions.ActOnTagStartSkippedDefinition(getCurScope(), TagDecl);
 
     // Parse the bases but don't attach them to the class.
     ParseBaseClause(nullptr);
 
-    Actions.ActOnTagFinishSkippedDefinition();
+    Actions.ActOnTagFinishSkippedDefinition(OldContext);
 
     if (!Tok.is(tok::l_brace)) {
       Diag(PP.getLocForEndOfToken(PrevTokLocation),
@@ -3779,7 +3780,7 @@
   return EndLoc;
 }
 
-/// ParseMicrosoftAttributes - Parse a Microsoft attribute [Attr]
+/// ParseMicrosoftAttributes - Parse Microsoft attributes [Attr]
 ///
 /// [MS] ms-attribute:
 ///             '[' token-seq ']'
@@ -3791,13 +3792,15 @@
                                       SourceLocation *endLoc) {
   assert(Tok.is(tok::l_square) && "Not a Microsoft attribute list");
 
-  while (Tok.is(tok::l_square)) {
+  do {
     // FIXME: If this is actually a C++11 attribute, parse it as one.
-    ConsumeBracket();
+    BalancedDelimiterTracker T(*this, tok::l_square);
+    T.consumeOpen();
     SkipUntil(tok::r_square, StopAtSemi | StopBeforeMatch);
-    if (endLoc) *endLoc = Tok.getLocation();
-    ExpectAndConsume(tok::r_square);
-  }
+    T.consumeClose();
+    if (endLoc)
+      *endLoc = T.getCloseLocation();
+  } while (Tok.is(tok::l_square));
 }
 
 void Parser::ParseMicrosoftIfExistsClassDeclaration(DeclSpec::TST TagType,
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index 315c957..95a28a8 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -347,7 +347,11 @@
       RHS = ParseCastExpression(false);
 
     if (RHS.isInvalid()) {
+      // FIXME: Errors generated by the delayed typo correction should be
+      // printed before errors from parsing the RHS, not after.
       Actions.CorrectDelayedTyposInExpr(LHS);
+      if (TernaryMiddle.isUsable())
+        TernaryMiddle = Actions.CorrectDelayedTyposInExpr(TernaryMiddle);
       LHS = ExprError();
     }
 
@@ -380,7 +384,11 @@
       RHSIsInitList = false;
 
       if (RHS.isInvalid()) {
+        // FIXME: Errors generated by the delayed typo correction should be
+        // printed before errors from ParseRHSOfBinaryExpression, not after.
         Actions.CorrectDelayedTyposInExpr(LHS);
+        if (TernaryMiddle.isUsable())
+          TernaryMiddle = Actions.CorrectDelayedTyposInExpr(TernaryMiddle);
         LHS = ExprError();
       }
 
@@ -923,7 +931,12 @@
     auto Validator = llvm::make_unique<CastExpressionIdValidator>(
         Tok, isTypeCast != NotTypeCast, isTypeCast != IsTypeCast);
     Validator->IsAddressOfOperand = isAddressOfOperand;
-    Validator->WantRemainingKeywords = Tok.isNot(tok::r_paren);
+    if (Tok.is(tok::periodstar) || Tok.is(tok::arrowstar)) {
+      Validator->WantExpressionKeywords = false;
+      Validator->WantRemainingKeywords = false;
+    } else {
+      Validator->WantRemainingKeywords = Tok.isNot(tok::r_paren);
+    }
     Name.setIdentifier(&II, ILoc);
     Res = Actions.ActOnIdExpression(
         getCurScope(), ScopeSpec, TemplateKWLoc, Name, Tok.is(tok::l_paren),
@@ -1471,7 +1484,19 @@
       if (LHS.isInvalid()) {
         SkipUntil(tok::r_paren, StopAtSemi);
       } else if (Tok.isNot(tok::r_paren)) {
-        PT.consumeClose();
+        bool HadDelayedTypo = false;
+        if (Actions.CorrectDelayedTyposInExpr(LHS).get() != LHS.get())
+          HadDelayedTypo = true;
+        for (auto &E : ArgExprs)
+          if (Actions.CorrectDelayedTyposInExpr(E).get() != E)
+            HadDelayedTypo = true;
+        // If there were delayed typos in the LHS or ArgExprs, call SkipUntil
+        // instead of PT.consumeClose() to avoid emitting extra diagnostics for
+        // the unmatched l_paren.
+        if (HadDelayedTypo)
+          SkipUntil(tok::r_paren, StopAtSemi);
+        else
+          PT.consumeClose();
         LHS = ExprError();
       } else {
         assert((ArgExprs.size() == 0 || 
@@ -2106,6 +2131,17 @@
     if (!getCurScope()->getFnParent() && !getCurScope()->getBlockParent()) {
       Result = ExprError(Diag(OpenLoc, diag::err_stmtexpr_file_scope));
     } else {
+      // Find the nearest non-record decl context. Variables declared in a
+      // statement expression behave as if they were declared in the enclosing
+      // function, block, or other code construct.
+      DeclContext *CodeDC = Actions.CurContext;
+      while (CodeDC->isRecord() || isa<EnumDecl>(CodeDC)) {
+        CodeDC = CodeDC->getParent();
+        assert(CodeDC && !CodeDC->isFileContext() &&
+               "statement expr not in code context");
+      }
+      Sema::ContextRAII SavedContext(Actions, CodeDC, /*NewThisContext=*/false);
+
       Actions.ActOnStartStmtExpr();
 
       StmtResult Stmt(ParseCompoundStatement(true));
@@ -2274,6 +2310,11 @@
     InMessageExpressionRAIIObject InMessage(*this, false);
 
     Result = ParseExpression(MaybeTypeCast);
+    if (!getLangOpts().CPlusPlus && MaybeTypeCast && Result.isUsable()) {
+      // Correct typos in non-C++ code earlier so that implicit-cast-like
+      // expressions are parsed correctly.
+      Result = Actions.CorrectDelayedTyposInExpr(Result);
+    }
     ExprType = SimpleExpr;
 
     if (isFoldOperator(Tok.getKind()) && NextToken().is(tok::ellipsis))
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index 08606d0..ed9f75d 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -1096,8 +1096,7 @@
 
     // MSVC-style attributes must be parsed before the mutable specifier to be
     // compatible with MSVC.
-    while (Tok.is(tok::kw___declspec))
-      ParseMicrosoftDeclSpec(Attr);
+    MaybeParseMicrosoftDeclSpecs(Attr, &DeclEndLoc);
 
     // Parse 'mutable'[opt].
     SourceLocation MutableLoc;
diff --git a/lib/Parse/ParseObjc.cpp b/lib/Parse/ParseObjc.cpp
index a597a16..691f53f 100644
--- a/lib/Parse/ParseObjc.cpp
+++ b/lib/Parse/ParseObjc.cpp
@@ -240,7 +240,7 @@
     SmallVector<Decl *, 8> ProtocolRefs;
     SmallVector<SourceLocation, 8> ProtocolLocs;
     if (Tok.is(tok::less) &&
-        ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, true,
+        ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, true, true,
                                     LAngleLoc, EndProtoLoc))
       return nullptr;
 
@@ -286,7 +286,7 @@
   SmallVector<SourceLocation, 8> ProtocolLocs;
   SourceLocation LAngleLoc, EndProtoLoc;
   if (Tok.is(tok::less) &&
-      ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, true,
+      ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, true, true,
                                   LAngleLoc, EndProtoLoc))
     return nullptr;
 
@@ -1151,7 +1151,7 @@
 bool Parser::
 ParseObjCProtocolReferences(SmallVectorImpl<Decl *> &Protocols,
                             SmallVectorImpl<SourceLocation> &ProtocolLocs,
-                            bool WarnOnDeclarations,
+                            bool WarnOnDeclarations, bool ForObjCContainer,
                             SourceLocation &LAngleLoc, SourceLocation &EndLoc) {
   assert(Tok.is(tok::less) && "expected <");
 
@@ -1186,7 +1186,7 @@
     return true;
 
   // Convert the list of protocols identifiers into a list of protocol decls.
-  Actions.FindProtocolDeclaration(WarnOnDeclarations,
+  Actions.FindProtocolDeclaration(WarnOnDeclarations, ForObjCContainer,
                                   &ProtocolIdents[0], ProtocolIdents.size(),
                                   Protocols);
   return false;
@@ -1201,6 +1201,7 @@
   SmallVector<Decl *, 8> ProtocolDecl;
   SmallVector<SourceLocation, 8> ProtocolLocs;
   bool Result = ParseObjCProtocolReferences(ProtocolDecl, ProtocolLocs, false,
+                                            false,
                                             LAngleLoc, EndProtoLoc);
   DS.setProtocolQualifiers(ProtocolDecl.data(), ProtocolDecl.size(),
                            ProtocolLocs.data(), LAngleLoc);
@@ -1416,7 +1417,7 @@
   SmallVector<Decl *, 8> ProtocolRefs;
   SmallVector<SourceLocation, 8> ProtocolLocs;
   if (Tok.is(tok::less) &&
-      ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, false,
+      ParseObjCProtocolReferences(ProtocolRefs, ProtocolLocs, false, true,
                                   LAngleLoc, EndProtoLoc))
     return DeclGroupPtrTy();
 
diff --git a/lib/Parse/ParseOpenMP.cpp b/lib/Parse/ParseOpenMP.cpp
index 143ef70..187289e 100644
--- a/lib/Parse/ParseOpenMP.cpp
+++ b/lib/Parse/ParseOpenMP.cpp
@@ -223,6 +223,7 @@
     ParseScope OMPDirectiveScope(this, ScopeFlags);
     Actions.StartOpenMPDSABlock(DKind, DirName, Actions.getCurScope(), Loc);
 
+    Actions.StartOpenMPClauses();
     while (Tok.isNot(tok::annot_pragma_openmp_end)) {
       OpenMPClauseKind CKind =
           Tok.isAnnotation()
@@ -242,6 +243,7 @@
       if (Tok.is(tok::comma))
         ConsumeToken();
     }
+    Actions.EndOpenMPClauses();
     // End location of the directive.
     EndLoc = Tok.getLocation();
     // Consume final annot_pragma_openmp_end.
diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp
index 96484b4..892d3c6 100644
--- a/lib/Parse/ParsePragma.cpp
+++ b/lib/Parse/ParsePragma.cpp
@@ -799,8 +799,10 @@
          "PragmaLoopHintInfo::Toks must contain at least one token.");
 
   // If no option is specified the argument is assumed to be a constant expr.
+  bool OptionUnroll = false;
   bool StateOption = false;
-  if (OptionInfo) { // Pragma unroll does not specify an option.
+  if (OptionInfo) { // Pragma Unroll does not specify an option.
+    OptionUnroll = OptionInfo->isStr("unroll");
     StateOption = llvm::StringSwitch<bool>(OptionInfo->getName())
                       .Case("vectorize", true)
                       .Case("interleave", true)
@@ -812,19 +814,20 @@
   if (Toks[0].is(tok::eof)) {
     ConsumeToken(); // The annotation token.
     Diag(Toks[0].getLocation(), diag::err_pragma_loop_missing_argument)
-        << /*StateArgument=*/StateOption << /*FullKeyword=*/PragmaUnroll;
+        << /*StateArgument=*/StateOption << /*FullKeyword=*/OptionUnroll;
     return false;
   }
 
   // Validate the argument.
   if (StateOption) {
     ConsumeToken(); // The annotation token.
-    bool OptionUnroll = OptionInfo->isStr("unroll");
     SourceLocation StateLoc = Toks[0].getLocation();
     IdentifierInfo *StateInfo = Toks[0].getIdentifierInfo();
-    if (!StateInfo || ((OptionUnroll ? !StateInfo->isStr("full")
-                                     : !StateInfo->isStr("enable")) &&
-                       !StateInfo->isStr("disable"))) {
+    if (!StateInfo ||
+        ((OptionUnroll ? !StateInfo->isStr("full")
+                       : !StateInfo->isStr("enable") &&
+                             !StateInfo->isStr("assume_safety")) &&
+         !StateInfo->isStr("disable"))) {
       Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword)
           << /*FullKeyword=*/OptionUnroll;
       return false;
@@ -1489,7 +1492,7 @@
   Token *Toks = new Token[Pragma.size()];
   std::copy(Pragma.begin(), Pragma.end(), Toks);
   PP.EnterTokenStream(Toks, Pragma.size(),
-                      /*DisableMacroExpansion=*/true, /*OwnsTokens=*/true);
+                      /*DisableMacroExpansion=*/false, /*OwnsTokens=*/true);
 }
 
 /// \brief Handle '#pragma pointers_to_members'
@@ -1953,6 +1956,7 @@
 ///  loop-hint-keyword:
 ///    'enable'
 ///    'disable'
+///    'assume_safety'
 ///
 ///  unroll-hint-keyword:
 ///    'full'
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index c31216d..055bdea 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -1689,6 +1689,12 @@
                                                      FirstPart.get(),
                                                      Collection.get(),
                                                      T.getCloseLocation());
+  } else {
+    // In OpenMP loop region loop control variable must be captured and be
+    // private. Perform analysis of first part (if any).
+    if (getLangOpts().OpenMP && FirstPart.isUsable()) {
+      Actions.ActOnOpenMPLoopInitialization(ForLoc, FirstPart.get());
+    }
   }
 
   // C99 6.8.5p5 - In C99, the body of the for statement is a scope, even if
diff --git a/lib/Parse/ParseStmtAsm.cpp b/lib/Parse/ParseStmtAsm.cpp
index 85eff17..8ba9f15 100644
--- a/lib/Parse/ParseStmtAsm.cpp
+++ b/lib/Parse/ParseStmtAsm.cpp
@@ -616,10 +616,6 @@
     return ParseMicrosoftAsmStatement(AsmLoc);
   }
 
-  // Check if GNU-style inline Asm is disabled.
-  if (!getLangOpts().GNUAsm)
-    Diag(AsmLoc, diag::err_gnu_inline_asm_disabled);
-
   DeclSpec DS(AttrFactory);
   SourceLocation Loc = Tok.getLocation();
   ParseTypeQualifierListOpt(DS, AR_VendorAttributesParsed);
@@ -644,6 +640,15 @@
   T.consumeOpen();
 
   ExprResult AsmString(ParseAsmStringLiteral());
+
+  // Check if GNU-style InlineAsm is disabled.
+  // Error on anything other than empty string.
+  if (!(getLangOpts().GNUAsm || AsmString.isInvalid())) {
+    const auto *SL = cast<StringLiteral>(AsmString.get());
+    if (!SL->getString().trim().empty())
+      Diag(Loc, diag::err_gnu_inline_asm_disabled);
+  }
+
   if (AsmString.isInvalid()) {
     // Consume up to and including the closing paren.
     T.skipToEnd();
diff --git a/lib/Parse/ParseTemplate.cpp b/lib/Parse/ParseTemplate.cpp
index 53de72c..f1467fe 100644
--- a/lib/Parse/ParseTemplate.cpp
+++ b/lib/Parse/ParseTemplate.cpp
@@ -14,6 +14,7 @@
 #include "clang/Parse/Parser.h"
 #include "RAIIObjectsForParser.h"
 #include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/DeclSpec.h"
@@ -1301,7 +1302,8 @@
   TemplateParameterDepthRAII CurTemplateDepthTracker(TemplateParameterDepth);
 
   // To restore the context after late parsing.
-  Sema::ContextRAII GlobalSavedContext(Actions, Actions.CurContext);
+  Sema::ContextRAII GlobalSavedContext(
+      Actions, Actions.Context.getTranslationUnitDecl());
 
   SmallVector<ParseScope*, 4> TemplateParamScopeStack;
 
diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp
index 3b56102..dea7a69 100644
--- a/lib/Parse/Parser.cpp
+++ b/lib/Parse/Parser.cpp
@@ -38,6 +38,26 @@
     return false;
   }
 };
+
+/// \brief RAIIObject to destroy the contents of a SmallVector of
+/// TemplateIdAnnotation pointers and clear the vector.
+class DestroyTemplateIdAnnotationsRAIIObj {
+  SmallVectorImpl<TemplateIdAnnotation *> &Container;
+
+public:
+  DestroyTemplateIdAnnotationsRAIIObj(
+      SmallVectorImpl<TemplateIdAnnotation *> &Container)
+      : Container(Container) {}
+
+  ~DestroyTemplateIdAnnotationsRAIIObj() {
+    for (SmallVectorImpl<TemplateIdAnnotation *>::iterator I =
+             Container.begin(),
+                                                           E = Container.end();
+         I != E; ++I)
+      (*I)->Destroy();
+    Container.clear();
+  }
+};
 } // end anonymous namespace
 
 IdentifierInfo *Parser::getSEHExceptKeyword() {
@@ -414,6 +434,15 @@
 
   PP.clearCodeCompletionHandler();
 
+  if (getLangOpts().DelayedTemplateParsing &&
+      !PP.isIncrementalProcessingEnabled() && !TemplateIds.empty()) {
+    // If an ASTConsumer parsed delay-parsed templates in their
+    // HandleTranslationUnit() method, TemplateIds created there were not
+    // guarded by a DestroyTemplateIdAnnotationsRAIIObj object in
+    // ParseTopLevelDecl(). Destroy them here.
+    DestroyTemplateIdAnnotationsRAIIObj CleanupRAII(TemplateIds);
+  }
+
   assert(TemplateIds.empty() && "Still alive TemplateIdAnnotations around?");
 }
 
@@ -490,26 +519,6 @@
   ConsumeToken();
 }
 
-namespace {
-  /// \brief RAIIObject to destroy the contents of a SmallVector of
-  /// TemplateIdAnnotation pointers and clear the vector.
-  class DestroyTemplateIdAnnotationsRAIIObj {
-    SmallVectorImpl<TemplateIdAnnotation *> &Container;
-  public:
-    DestroyTemplateIdAnnotationsRAIIObj(SmallVectorImpl<TemplateIdAnnotation *>
-                                       &Container)
-      : Container(Container) {}
-
-    ~DestroyTemplateIdAnnotationsRAIIObj() {
-      for (SmallVectorImpl<TemplateIdAnnotation *>::iterator I =
-           Container.begin(), E = Container.end();
-           I != E; ++I)
-        (*I)->Destroy();
-      Container.clear();
-    }
-  };
-}
-
 void Parser::LateTemplateParserCleanupCallback(void *P) {
   // While this RAII helper doesn't bracket any actual work, the destructor will
   // clean up annotations that were created during ActOnEndOfTranslationUnit
@@ -541,8 +550,14 @@
     return false;
 
   case tok::annot_module_begin:
+    Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(
+                                                    Tok.getAnnotationValue()));
+    ConsumeToken();
+    return false;
+
   case tok::annot_module_end:
-    // FIXME: Update visibility based on the submodule we're in.
+    Actions.ActOnModuleEnd(Tok.getLocation(), reinterpret_cast<Module *>(
+                                                  Tok.getAnnotationValue()));
     ConsumeToken();
     return false;
 
@@ -669,8 +684,18 @@
 
     SourceLocation StartLoc = Tok.getLocation();
     SourceLocation EndLoc;
+
     ExprResult Result(ParseSimpleAsm(&EndLoc));
 
+    // Check if GNU-style InlineAsm is disabled.
+    // Empty asm string is allowed because it will not introduce
+    // any assembly code.
+    if (!(getLangOpts().GNUAsm || Result.isInvalid())) {
+      const auto *SL = cast<StringLiteral>(Result.get());
+      if (!SL->getString().trim().empty())
+        Diag(StartLoc, diag::err_gnu_inline_asm_disabled);
+    }
+
     ExpectAndConsume(tok::semi, diag::err_expected_after,
                      "top-level asm block");
 
diff --git a/lib/Parse/RAIIObjectsForParser.h b/lib/Parse/RAIIObjectsForParser.h
index 71cfec4..36d87eb 100644
--- a/lib/Parse/RAIIObjectsForParser.h
+++ b/lib/Parse/RAIIObjectsForParser.h
@@ -58,6 +58,12 @@
         Active = false;
       }
     }
+    SuppressAccessChecks(SuppressAccessChecks &&Other)
+      : S(Other.S), DiagnosticPool(std::move(Other.DiagnosticPool)),
+        State(Other.State), Active(Other.Active) {
+      Other.Active = false;
+    }
+    void operator=(SuppressAccessChecks &&Other) = delete;
 
     void done() {
       assert(Active && "trying to end an inactive suppression");
@@ -423,7 +429,13 @@
       if (P.Tok.is(Close)) {
         LClose = (P.*Consumer)();
         return false;
-      } 
+      } else if (P.Tok.is(tok::semi) && P.NextToken().is(Close)) {
+        SourceLocation SemiLoc = P.ConsumeToken();
+        P.Diag(SemiLoc, diag::err_unexpected_semi)
+            << Close << FixItHint::CreateRemoval(SourceRange(SemiLoc, SemiLoc));
+        LClose = (P.*Consumer)();
+        return false;
+      }
       
       return diagnoseMissingClose();
     }
diff --git a/lib/Sema/AnalysisBasedWarnings.cpp b/lib/Sema/AnalysisBasedWarnings.cpp
index d697ecb..36030b9 100644
--- a/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/lib/Sema/AnalysisBasedWarnings.cpp
@@ -574,28 +574,29 @@
 /// ContainsReference - A visitor class to search for references to
 /// a particular declaration (the needle) within any evaluated component of an
 /// expression (recursively).
-class ContainsReference : public EvaluatedExprVisitor<ContainsReference> {
+class ContainsReference : public ConstEvaluatedExprVisitor<ContainsReference> {
   bool FoundReference;
   const DeclRefExpr *Needle;
 
 public:
-  ContainsReference(ASTContext &Context, const DeclRefExpr *Needle)
-    : EvaluatedExprVisitor<ContainsReference>(Context),
-      FoundReference(false), Needle(Needle) {}
+  typedef ConstEvaluatedExprVisitor<ContainsReference> Inherited;
 
-  void VisitExpr(Expr *E) {
+  ContainsReference(ASTContext &Context, const DeclRefExpr *Needle)
+    : Inherited(Context), FoundReference(false), Needle(Needle) {}
+
+  void VisitExpr(const Expr *E) {
     // Stop evaluating if we already have a reference.
     if (FoundReference)
       return;
 
-    EvaluatedExprVisitor<ContainsReference>::VisitExpr(E);
+    Inherited::VisitExpr(E);
   }
 
-  void VisitDeclRefExpr(DeclRefExpr *E) {
+  void VisitDeclRefExpr(const DeclRefExpr *E) {
     if (E == Needle)
       FoundReference = true;
     else
-      EvaluatedExprVisitor<ContainsReference>::VisitDeclRefExpr(E);
+      Inherited::VisitDeclRefExpr(E);
   }
 
   bool doesContainReference() const { return FoundReference; }
@@ -854,7 +855,7 @@
         return false;
 
       ContainsReference CR(S.Context, DRE);
-      CR.Visit(const_cast<Expr*>(Initializer));
+      CR.Visit(Initializer);
       if (CR.doesContainReference()) {
         S.Diag(DRE->getLocStart(),
                diag::warn_uninit_self_reference_in_init)
@@ -1463,7 +1464,7 @@
       PartialDiagnosticAt FNote(CurrentFunction->getBody()->getLocStart(),
                                 S.PDiag(diag::note_thread_warning_in_fun)
                                     << CurrentFunction->getNameAsString());
-      ONS.push_back(FNote);
+      ONS.push_back(std::move(FNote));
     }
     return ONS;
   }
@@ -1477,7 +1478,7 @@
       PartialDiagnosticAt FNote(CurrentFunction->getBody()->getLocStart(),
                                 S.PDiag(diag::note_thread_warning_in_fun)
                                     << CurrentFunction->getNameAsString());
-      ONS.push_back(FNote);
+      ONS.push_back(std::move(FNote));
     }
     return ONS;
   }
@@ -1490,7 +1491,7 @@
     if (!Loc.isValid())
       Loc = FunLocation;
     PartialDiagnosticAt Warning(Loc, S.PDiag(DiagID) << Kind << LockName);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
  public:
@@ -1516,7 +1517,7 @@
   void handleInvalidLockExp(StringRef Kind, SourceLocation Loc) override {
     PartialDiagnosticAt Warning(Loc, S.PDiag(diag::warn_cannot_resolve_lock)
                                          << Loc);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void handleUnmatchedUnlock(StringRef Kind, Name LockName,
@@ -1532,7 +1533,7 @@
     PartialDiagnosticAt Warning(Loc, S.PDiag(diag::warn_unlock_kind_mismatch)
                                          << Kind << LockName << Received
                                          << Expected);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void handleDoubleLock(StringRef Kind, Name LockName, SourceLocation Loc) override {
@@ -1566,10 +1567,10 @@
     if (LocLocked.isValid()) {
       PartialDiagnosticAt Note(LocLocked, S.PDiag(diag::note_locked_here)
                                               << Kind);
-      Warnings.push_back(DelayedDiag(Warning, getNotes(Note)));
+      Warnings.emplace_back(std::move(Warning), getNotes(Note));
       return;
     }
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void handleExclusiveAndShared(StringRef Kind, Name LockName,
@@ -1580,7 +1581,7 @@
                                     << Kind << LockName);
     PartialDiagnosticAt Note(Loc2, S.PDiag(diag::note_lock_exclusive_and_shared)
                                        << Kind << LockName);
-    Warnings.push_back(DelayedDiag(Warning, getNotes(Note)));
+    Warnings.emplace_back(std::move(Warning), getNotes(Note));
   }
 
   void handleNoMutexHeld(StringRef Kind, const NamedDecl *D,
@@ -1593,7 +1594,7 @@
                         diag::warn_var_deref_requires_any_lock;
     PartialDiagnosticAt Warning(Loc, S.PDiag(DiagID)
       << D->getNameAsString() << getLockKindFromAccessKind(AK));
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void handleMutexNotHeld(StringRef Kind, const NamedDecl *D,
@@ -1628,9 +1629,9 @@
         PartialDiagnosticAt VNote(D->getLocation(),
                                  S.PDiag(diag::note_guarded_by_declared_here)
                                      << D->getNameAsString());
-        Warnings.push_back(DelayedDiag(Warning, getNotes(Note, VNote)));
+        Warnings.emplace_back(std::move(Warning), getNotes(Note, VNote));
       } else
-        Warnings.push_back(DelayedDiag(Warning, getNotes(Note)));
+        Warnings.emplace_back(std::move(Warning), getNotes(Note));
     } else {
       switch (POK) {
         case POK_VarAccess:
@@ -1656,9 +1657,9 @@
         PartialDiagnosticAt Note(D->getLocation(),
                                  S.PDiag(diag::note_guarded_by_declared_here)
                                      << D->getNameAsString());
-        Warnings.push_back(DelayedDiag(Warning, getNotes(Note)));
+        Warnings.emplace_back(std::move(Warning), getNotes(Note));
       } else
-        Warnings.push_back(DelayedDiag(Warning, getNotes()));
+        Warnings.emplace_back(std::move(Warning), getNotes());
     }
   }
 
@@ -1667,7 +1668,7 @@
     PartialDiagnosticAt Warning(Loc,
         S.PDiag(diag::warn_acquire_requires_negative_cap)
         << Kind << LockName << Neg);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
 
@@ -1675,20 +1676,20 @@
                              SourceLocation Loc) override {
     PartialDiagnosticAt Warning(Loc, S.PDiag(diag::warn_fun_excludes_mutex)
                                          << Kind << FunName << LockName);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void handleLockAcquiredBefore(StringRef Kind, Name L1Name, Name L2Name,
                                 SourceLocation Loc) override {
     PartialDiagnosticAt Warning(Loc,
       S.PDiag(diag::warn_acquired_before) << Kind << L1Name << L2Name);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void handleBeforeAfterCycle(Name L1Name, SourceLocation Loc) override {
     PartialDiagnosticAt Warning(Loc,
       S.PDiag(diag::warn_acquired_before_after_cycle) << L1Name);
-    Warnings.push_back(DelayedDiag(Warning, getNotes()));
+    Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
   void enterFunction(const FunctionDecl* FD) override {
@@ -1732,8 +1733,8 @@
                              StringRef VariableName) override {
     PartialDiagnosticAt Warning(Loc, S.PDiag(diag::warn_loop_state_mismatch) <<
       VariableName);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
   
   void warnParamReturnTypestateMismatch(SourceLocation Loc,
@@ -1744,8 +1745,8 @@
     PartialDiagnosticAt Warning(Loc, S.PDiag(
       diag::warn_param_return_typestate_mismatch) << VariableName <<
         ExpectedState << ObservedState);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
   
   void warnParamTypestateMismatch(SourceLocation Loc, StringRef ExpectedState,
@@ -1753,16 +1754,16 @@
     
     PartialDiagnosticAt Warning(Loc, S.PDiag(
       diag::warn_param_typestate_mismatch) << ExpectedState << ObservedState);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
   
   void warnReturnTypestateForUnconsumableType(SourceLocation Loc,
                                               StringRef TypeName) override {
     PartialDiagnosticAt Warning(Loc, S.PDiag(
       diag::warn_return_typestate_for_unconsumable_type) << TypeName);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
   
   void warnReturnTypestateMismatch(SourceLocation Loc, StringRef ExpectedState,
@@ -1770,8 +1771,8 @@
                                     
     PartialDiagnosticAt Warning(Loc, S.PDiag(
       diag::warn_return_typestate_mismatch) << ExpectedState << ObservedState);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
   
   void warnUseOfTempInInvalidState(StringRef MethodName, StringRef State,
@@ -1779,8 +1780,8 @@
                                                     
     PartialDiagnosticAt Warning(Loc, S.PDiag(
       diag::warn_use_of_temp_in_invalid_state) << MethodName << State);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
   
   void warnUseInInvalidState(StringRef MethodName, StringRef VariableName,
@@ -1788,8 +1789,8 @@
   
     PartialDiagnosticAt Warning(Loc, S.PDiag(diag::warn_use_in_invalid_state) <<
                                 MethodName << VariableName << State);
-    
-    Warnings.push_back(DelayedDiag(Warning, OptionalNotes()));
+
+    Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
 };
 }}}
@@ -1886,6 +1887,7 @@
   AC.getCFGBuildOptions().AddImplicitDtors = true;
   AC.getCFGBuildOptions().AddTemporaryDtors = true;
   AC.getCFGBuildOptions().AddCXXNewAllocator = false;
+  AC.getCFGBuildOptions().AddCXXDefaultInitExprInCtors = true;
 
   // Force that certain expressions appear as CFGElements in the CFG.  This
   // is used to speed up various analyses.
diff --git a/lib/Sema/JumpDiagnostics.cpp b/lib/Sema/JumpDiagnostics.cpp
index aac28be..6b9eb2a 100644
--- a/lib/Sema/JumpDiagnostics.cpp
+++ b/lib/Sema/JumpDiagnostics.cpp
@@ -72,10 +72,10 @@
   JumpScopeChecker(Stmt *Body, Sema &S);
 private:
   void BuildScopeInformation(Decl *D, unsigned &ParentScope);
-  void BuildScopeInformation(VarDecl *D, const BlockDecl *BDecl, 
+  void BuildScopeInformation(VarDecl *D, const BlockDecl *BDecl,
                              unsigned &ParentScope);
   void BuildScopeInformation(Stmt *S, unsigned &origParentScope);
-  
+
   void VerifyJumps();
   void VerifyIndirectJumps();
   void NoteJumpIntoScopes(ArrayRef<unsigned> ToScopes);
@@ -166,7 +166,7 @@
       //   A program that jumps from a point where a variable with automatic
       //   storage duration is not in scope to a point where it is in scope
       //   is ill-formed unless the variable has scalar type, class type with
-      //   a trivial default constructor and a trivial destructor, a 
+      //   a trivial default constructor and a trivial destructor, a
       //   cv-qualified version of one of these types, or an array of one of
       //   the preceding types and is declared without an initializer.
 
@@ -218,7 +218,7 @@
                                D->getLocation()));
     ParentScope = Scopes.size()-1;
   }
-  
+
   // If the decl has an initializer, walk it with the potentially new
   // scope we just installed.
   if (VarDecl *VD = dyn_cast<VarDecl>(D))
@@ -227,8 +227,8 @@
 }
 
 /// \brief Build scope information for a captured block literal variables.
-void JumpScopeChecker::BuildScopeInformation(VarDecl *D, 
-                                             const BlockDecl *BDecl, 
+void JumpScopeChecker::BuildScopeInformation(VarDecl *D,
+                                             const BlockDecl *BDecl,
                                              unsigned &ParentScope) {
   // exclude captured __block variables; there's no destructor
   // associated with the block literal for them.
@@ -257,7 +257,7 @@
     SourceLocation Loc = D->getLocation();
     if (Loc.isInvalid())
       Loc = BDecl->getLocation();
-    Scopes.push_back(GotoScope(ParentScope, 
+    Scopes.push_back(GotoScope(ParentScope,
                                Diags.first, Diags.second, Loc));
     ParentScope = Scopes.size()-1;
   }
@@ -272,11 +272,11 @@
   // propagate out into the enclosing scope.  Otherwise we have to worry
   // about block literals, which have the lifetime of their enclosing statement.
   unsigned independentParentScope = origParentScope;
-  unsigned &ParentScope = ((isa<Expr>(S) && !isa<StmtExpr>(S)) 
+  unsigned &ParentScope = ((isa<Expr>(S) && !isa<StmtExpr>(S))
                             ? origParentScope : independentParentScope);
 
   bool SkipFirstSubStmt = false;
-  
+
   // If we found a label, remember that it is in ParentScope scope.
   switch (S->getStmtClass()) {
   case Stmt::AddrLabelExprClass:
@@ -307,7 +307,7 @@
       SkipFirstSubStmt = true;
     }
     // Fall through
-      
+
   case Stmt::GotoStmtClass:
     // Remember both what scope a goto is in as well as the fact that we have
     // it.  This makes the second scan not have to walk the AST again.
@@ -332,7 +332,7 @@
                                  diag::note_protected_by_cxx_catch,
                                  diag::note_exits_cxx_catch,
                                  CS->getSourceRange().getBegin()));
-      BuildScopeInformation(CS->getHandlerBlock(), 
+      BuildScopeInformation(CS->getHandlerBlock(),
                             (newParentScope = Scopes.size()-1));
     }
     return;
@@ -354,14 +354,14 @@
                                  diag::note_protected_by_seh_except,
                                  diag::note_exits_seh_except,
                                  Except->getSourceRange().getBegin()));
-      BuildScopeInformation(Except->getBlock(), 
+      BuildScopeInformation(Except->getBlock(),
                             (newParentScope = Scopes.size()-1));
     } else if (SEHFinallyStmt *Finally = TS->getFinallyHandler()) {
       Scopes.push_back(GotoScope(ParentScope,
                                  diag::note_protected_by_seh_finally,
                                  diag::note_exits_seh_finally,
                                  Finally->getSourceRange().getBegin()));
-      BuildScopeInformation(Finally->getBlock(), 
+      BuildScopeInformation(Finally->getBlock(),
                             (newParentScope = Scopes.size()-1));
     }
 
@@ -377,7 +377,7 @@
       SkipFirstSubStmt = false;
       continue;
     }
-    
+
     Stmt *SubStmt = *CI;
     if (!SubStmt) continue;
 
@@ -428,7 +428,7 @@
                                    diag::note_exits_objc_catch,
                                    AC->getAtCatchLoc()));
         // @catches are nested and it isn't
-        BuildScopeInformation(AC->getCatchBody(), 
+        BuildScopeInformation(AC->getCatchBody(),
                               (newParentScope = Scopes.size()-1));
       }
 
@@ -443,7 +443,7 @@
 
       continue;
     }
-    
+
     unsigned newParentScope;
     // Disallow jumps into the protected statement of an @synchronized, but
     // allow jumps into the object expression it protects.
@@ -459,7 +459,7 @@
                                  diag::note_protected_by_objc_synchronized,
                                  diag::note_exits_objc_synchronized,
                                  AS->getAtSynchronizedLoc()));
-      BuildScopeInformation(AS->getSynchBody(), 
+      BuildScopeInformation(AS->getSynchBody(),
                             (newParentScope = Scopes.size()-1));
       continue;
     }
diff --git a/lib/Sema/MultiplexExternalSemaSource.cpp b/lib/Sema/MultiplexExternalSemaSource.cpp
index 51a1274..9ecb5a7 100644
--- a/lib/Sema/MultiplexExternalSemaSource.cpp
+++ b/lib/Sema/MultiplexExternalSemaSource.cpp
@@ -212,7 +212,15 @@
   for(size_t i = 0; i < Sources.size(); ++i)
     Sources[i]->ReadUndefinedButUsed(Undefined);
 }
-  
+
+void MultiplexExternalSemaSource::ReadMismatchingDeleteExpressions(
+    llvm::MapVector<FieldDecl *,
+                    llvm::SmallVector<std::pair<SourceLocation, bool>, 4>> &
+        Exprs) {
+  for (auto &Source : Sources)
+    Source->ReadMismatchingDeleteExpressions(Exprs);
+}
+
 bool MultiplexExternalSemaSource::LookupUnqualified(LookupResult &R, Scope *S){ 
   for(size_t i = 0; i < Sources.size(); ++i)
     Sources[i]->LookupUnqualified(R, S);
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index 6825dfa..3e0b4a5 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -99,6 +99,7 @@
     GlobalNewDeleteDeclared(false),
     TUKind(TUKind),
     NumSFINAEErrors(0),
+    CachedFakeTopLevelModule(nullptr),
     AccessCheckingSFINAE(false), InNonInstantiationSFINAEContext(false),
     NonInstantiationEntries(0), ArgumentPackSubstitutionIndex(-1),
     CurrentInstantiationScope(nullptr), DisableTypoCorrection(false),
@@ -721,11 +722,7 @@
         ModMap.resolveConflicts(Mod, /*Complain=*/false);
 
         // Queue the submodules, so their exports will also be resolved.
-        for (Module::submodule_iterator Sub = Mod->submodule_begin(),
-                                     SubEnd = Mod->submodule_end();
-             Sub != SubEnd; ++Sub) {
-          Stack.push_back(*Sub);
-        }
+        Stack.append(Mod->submodule_begin(), Mod->submodule_end());
       }
     }
 
@@ -860,6 +857,17 @@
     }
   }
 
+  if (!Diags.isIgnored(diag::warn_mismatched_delete_new, SourceLocation())) {
+    if (ExternalSource)
+      ExternalSource->ReadMismatchingDeleteExpressions(DeleteExprs);
+    for (const auto &DeletedFieldInfo : DeleteExprs) {
+      for (const auto &DeleteExprLoc : DeletedFieldInfo.second) {
+        AnalyzeDeleteExprMismatch(DeletedFieldInfo.first, DeleteExprLoc.first,
+                                  DeleteExprLoc.second);
+      }
+    }
+  }
+
   // Check we've noticed that we're no longer parsing the initializer for every
   // variable. If we miss cases, then at best we have a performance issue and
   // at worst a rejects-valid bug.
@@ -1219,6 +1227,9 @@
                        llvm::DenseMap<NamedDecl *, SourceLocation> &Undefined) {
 }
 
+void ExternalSemaSource::ReadMismatchingDeleteExpressions(llvm::MapVector<
+    FieldDecl *, llvm::SmallVector<std::pair<SourceLocation, bool>, 4>> &) {}
+
 void PrettyDeclStackTraceEntry::print(raw_ostream &OS) const {
   SourceLocation Loc = this->Loc;
   if (!Loc.isValid() && TheDecl) Loc = TheDecl->getLocation();
@@ -1467,3 +1478,8 @@
 
   return dyn_cast<CapturedRegionScopeInfo>(FunctionScopes.back());
 }
+
+const llvm::MapVector<FieldDecl *, Sema::DeleteLocs> &
+Sema::getMismatchingDeleteExpressions() const {
+  return DeleteExprs;
+}
diff --git a/lib/Sema/SemaAccess.cpp b/lib/Sema/SemaAccess.cpp
index 37240c2..0e973cc 100644
--- a/lib/Sema/SemaAccess.cpp
+++ b/lib/Sema/SemaAccess.cpp
@@ -1462,7 +1462,7 @@
   case AR_inaccessible: return Sema::AR_inaccessible;
   case AR_dependent: return Sema::AR_dependent;
   }
-  llvm_unreachable("falling off end");
+  llvm_unreachable("invalid access result");
 }
 
 void Sema::HandleDelayedAccessCheck(DelayedDiagnostic &DD, Decl *D) {
diff --git a/lib/Sema/SemaCast.cpp b/lib/Sema/SemaCast.cpp
index d28a244..d9dc4df 100644
--- a/lib/Sema/SemaCast.cpp
+++ b/lib/Sema/SemaCast.cpp
@@ -1081,6 +1081,15 @@
           Kind = CK_BitCast;
           return TC_Success;
         }
+
+        // Microsoft permits static_cast from 'pointer-to-void' to
+        // 'pointer-to-function'.
+        if (!CStyle && Self.getLangOpts().MSVCCompat &&
+            DestPointee->isFunctionType()) {
+          Self.Diag(OpRange.getBegin(), diag::ext_ms_cast_fn_obj) << OpRange;
+          Kind = CK_BitCast;
+          return TC_Success;
+        }
       }
       else if (DestType->isObjCObjectPointerType()) {
         // allow both c-style cast and static_cast of objective-c pointers as 
@@ -1817,8 +1826,8 @@
     //   can be explicitly converted to an rvalue of type "pointer to member
     //   of Y of type T2" if T1 and T2 are both function types or both object
     //   types.
-    if (DestMemPtr->getPointeeType()->isFunctionType() !=
-        SrcMemPtr->getPointeeType()->isFunctionType())
+    if (DestMemPtr->isMemberFunctionPointer() !=
+        SrcMemPtr->isMemberFunctionPointer())
       return TC_NotApplicable;
 
     // C++ 5.2.10p2: The reinterpret_cast operator shall not cast away
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 05eaaec..2014052 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -623,7 +623,10 @@
   case NeonTypeFlags::Poly16:
     return IsPolyUnsigned ? Context.UnsignedShortTy : Context.ShortTy;
   case NeonTypeFlags::Poly64:
-    return Context.UnsignedLongTy;
+    if (IsInt64Long)
+      return Context.UnsignedLongTy;
+    else
+      return Context.UnsignedLongLongTy;
   case NeonTypeFlags::Poly128:
     break;
   case NeonTypeFlags::Float16:
@@ -833,6 +836,16 @@
       SemaBuiltinConstantArgRange(TheCall, 2, 0, 1);
   }
 
+  if (BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+      BuiltinID == ARM::BI__builtin_arm_wsr64)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 3, false);
+
+  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+      BuiltinID == ARM::BI__builtin_arm_rsrp ||
+      BuiltinID == ARM::BI__builtin_arm_wsr ||
+      BuiltinID == ARM::BI__builtin_arm_wsrp)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
+
   if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
 
@@ -873,6 +886,16 @@
       SemaBuiltinConstantArgRange(TheCall, 4, 0, 1);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr64)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, false);
+
+  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+      BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+      BuiltinID == AArch64::BI__builtin_arm_wsr ||
+      BuiltinID == AArch64::BI__builtin_arm_wsrp)
+    return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true);
+
   if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
     return true;
 
@@ -959,7 +982,49 @@
              << Arg->getSourceRange();
   }
 
-  return false;
+  // For intrinsics which take an immediate value as part of the instruction,
+  // range check them here.
+  unsigned i = 0, l = 0, u = 0;
+  switch (BuiltinID) {
+  default: return false;
+  case SystemZ::BI__builtin_s390_lcbb: i = 1; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_verimb:
+  case SystemZ::BI__builtin_s390_verimh:
+  case SystemZ::BI__builtin_s390_verimf:
+  case SystemZ::BI__builtin_s390_verimg: i = 3; l = 0; u = 255; break;
+  case SystemZ::BI__builtin_s390_vfaeb:
+  case SystemZ::BI__builtin_s390_vfaeh:
+  case SystemZ::BI__builtin_s390_vfaef:
+  case SystemZ::BI__builtin_s390_vfaebs:
+  case SystemZ::BI__builtin_s390_vfaehs:
+  case SystemZ::BI__builtin_s390_vfaefs:
+  case SystemZ::BI__builtin_s390_vfaezb:
+  case SystemZ::BI__builtin_s390_vfaezh:
+  case SystemZ::BI__builtin_s390_vfaezf:
+  case SystemZ::BI__builtin_s390_vfaezbs:
+  case SystemZ::BI__builtin_s390_vfaezhs:
+  case SystemZ::BI__builtin_s390_vfaezfs: i = 2; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vfidb:
+    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15) ||
+           SemaBuiltinConstantArgRange(TheCall, 2, 0, 15);
+  case SystemZ::BI__builtin_s390_vftcidb: i = 1; l = 0; u = 4095; break;
+  case SystemZ::BI__builtin_s390_vlbb: i = 1; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vpdi: i = 2; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vsldb: i = 2; l = 0; u = 15; break;
+  case SystemZ::BI__builtin_s390_vstrcb:
+  case SystemZ::BI__builtin_s390_vstrch:
+  case SystemZ::BI__builtin_s390_vstrcf:
+  case SystemZ::BI__builtin_s390_vstrczb:
+  case SystemZ::BI__builtin_s390_vstrczh:
+  case SystemZ::BI__builtin_s390_vstrczf:
+  case SystemZ::BI__builtin_s390_vstrcbs:
+  case SystemZ::BI__builtin_s390_vstrchs:
+  case SystemZ::BI__builtin_s390_vstrcfs:
+  case SystemZ::BI__builtin_s390_vstrczbs:
+  case SystemZ::BI__builtin_s390_vstrczhs:
+  case SystemZ::BI__builtin_s390_vstrczfs: i = 3; l = 0; u = 15; break;
+  }
+  return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
 bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -1289,11 +1354,14 @@
 
 bool Sema::CheckPointerCall(NamedDecl *NDecl, CallExpr *TheCall,
                             const FunctionProtoType *Proto) {
-  const VarDecl *V = dyn_cast<VarDecl>(NDecl);
-  if (!V)
+  QualType Ty;
+  if (const auto *V = dyn_cast<VarDecl>(NDecl))
+    Ty = V->getType();
+  else if (const auto *F = dyn_cast<FieldDecl>(NDecl))
+    Ty = F->getType();
+  else
     return false;
 
-  QualType Ty = V->getType();
   if (!Ty->isBlockPointerType() && !Ty->isFunctionPointerType())
     return false;
 
@@ -1556,6 +1624,10 @@
     return ExprError();
   }
 
+  // atomic_fetch_or takes a pointer to a volatile 'A'.  We shouldn't let the
+  // volatile-ness of the pointee-type inject itself into the result or the
+  // other operands.
+  ValType.removeLocalVolatile();
   QualType ResultType = ValType;
   if (Form == Copy || Form == GNUXchg || Form == Init)
     ResultType = Context.VoidTy;
@@ -2541,6 +2613,107 @@
   return false;
 }
 
+/// SemaBuiltinARMSpecialReg - Handle a check if argument ArgNum of CallExpr
+/// TheCall is an ARM/AArch64 special register string literal.
+bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
+                                    int ArgNum, unsigned ExpectedFieldNum,
+                                    bool AllowName) {
+  bool IsARMBuiltin = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+                      BuiltinID == ARM::BI__builtin_arm_wsr64 ||
+                      BuiltinID == ARM::BI__builtin_arm_rsr ||
+                      BuiltinID == ARM::BI__builtin_arm_rsrp ||
+                      BuiltinID == ARM::BI__builtin_arm_wsr ||
+                      BuiltinID == ARM::BI__builtin_arm_wsrp;
+  bool IsAArch64Builtin = BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+                          BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
+                          BuiltinID == AArch64::BI__builtin_arm_rsr ||
+                          BuiltinID == AArch64::BI__builtin_arm_rsrp ||
+                          BuiltinID == AArch64::BI__builtin_arm_wsr ||
+                          BuiltinID == AArch64::BI__builtin_arm_wsrp;
+  assert((IsARMBuiltin || IsAArch64Builtin) && "Unexpected ARM builtin.");
+
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+    return false;
+
+  // Check if the argument is a string literal.
+  if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts()))
+    return Diag(TheCall->getLocStart(), diag::err_expr_not_string_literal)
+           << Arg->getSourceRange();
+
+  // Check the type of special register given.
+  StringRef Reg = cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString();
+  SmallVector<StringRef, 6> Fields;
+  Reg.split(Fields, ":");
+
+  if (Fields.size() != ExpectedFieldNum && !(AllowName && Fields.size() == 1))
+    return Diag(TheCall->getLocStart(), diag::err_arm_invalid_specialreg)
+           << Arg->getSourceRange();
+
+  // If the string is the name of a register then we cannot check that it is
+  // valid here but if the string is of one the forms described in ACLE then we
+  // can check that the supplied fields are integers and within the valid
+  // ranges.
+  if (Fields.size() > 1) {
+    bool FiveFields = Fields.size() == 5;
+
+    bool ValidString = true;
+    if (IsARMBuiltin) {
+      ValidString &= Fields[0].startswith_lower("cp") ||
+                     Fields[0].startswith_lower("p");
+      if (ValidString)
+        Fields[0] =
+          Fields[0].drop_front(Fields[0].startswith_lower("cp") ? 2 : 1);
+
+      ValidString &= Fields[2].startswith_lower("c");
+      if (ValidString)
+        Fields[2] = Fields[2].drop_front(1);
+
+      if (FiveFields) {
+        ValidString &= Fields[3].startswith_lower("c");
+        if (ValidString)
+          Fields[3] = Fields[3].drop_front(1);
+      }
+    }
+
+    SmallVector<int, 5> Ranges;
+    if (FiveFields)
+      Ranges.append({IsAArch64Builtin ? 1 : 15, 7, 7, 15, 15});
+    else
+      Ranges.append({15, 7, 15});
+
+    for (unsigned i=0; i<Fields.size(); ++i) {
+      int IntField;
+      ValidString &= !Fields[i].getAsInteger(10, IntField);
+      ValidString &= (IntField >= 0 && IntField <= Ranges[i]);
+    }
+
+    if (!ValidString)
+      return Diag(TheCall->getLocStart(), diag::err_arm_invalid_specialreg)
+             << Arg->getSourceRange();
+
+  } else if (IsAArch64Builtin && Fields.size() == 1) {
+    // If the register name is one of those that appear in the condition below
+    // and the special register builtin being used is one of the write builtins,
+    // then we require that the argument provided for writing to the register
+    // is an integer constant expression. This is because it will be lowered to
+    // an MSR (immediate) instruction, so we need to know the immediate at
+    // compile time.
+    if (TheCall->getNumArgs() != 2)
+      return false;
+
+    std::string RegLower = Reg.lower();
+    if (RegLower != "spsel" && RegLower != "daifset" && RegLower != "daifclr" &&
+        RegLower != "pan" && RegLower != "uao")
+      return false;
+
+    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15);
+  }
+
+  return false;
+}
+
 /// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
 /// This checks that the target supports __builtin_longjmp and
 /// that val is a constant 1.
@@ -6052,7 +6225,7 @@
   // TODO: Investigate using GetExprRange() to get tighter bounds
   // on the bit ranges.
   QualType OtherT = Other->getType();
-  if (const AtomicType *AT = dyn_cast<AtomicType>(OtherT))
+  if (const auto *AT = OtherT->getAs<AtomicType>())
     OtherT = AT->getValueType();
   IntRange OtherRange = IntRange::forValueOfType(S.Context, OtherT);
   unsigned OtherWidth = OtherRange.Width;
diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp
index eeeb851..fd97809 100644
--- a/lib/Sema/SemaCodeComplete.cpp
+++ b/lib/Sema/SemaCodeComplete.cpp
@@ -1018,9 +1018,7 @@
 }
 
 /// \brief Enter into a new scope.
-void ResultBuilder::EnterNewScope() {
-  ShadowMaps.push_back(ShadowMap());
-}
+void ResultBuilder::EnterNewScope() { ShadowMaps.emplace_back(); }
 
 /// \brief Exit from the current scope.
 void ResultBuilder::ExitScope() {
@@ -2017,7 +2015,7 @@
     if (SemaRef.getLangOpts().C11) {
       // _Alignof
       Builder.AddResultTypeChunk("size_t");
-      if (SemaRef.getASTContext().Idents.get("alignof").hasMacroDefinition())
+      if (SemaRef.PP.isMacroDefined("alignof"))
         Builder.AddTypedTextChunk("alignof");
       else
         Builder.AddTypedTextChunk("_Alignof");
@@ -2085,15 +2083,14 @@
                                                     Result.getAllocator()));
 }
 
-static void MaybeAddSentinel(ASTContext &Context,
+static void MaybeAddSentinel(Preprocessor &PP,
                              const NamedDecl *FunctionOrMethod,
                              CodeCompletionBuilder &Result) {
   if (SentinelAttr *Sentinel = FunctionOrMethod->getAttr<SentinelAttr>())
     if (Sentinel->getSentinel() == 0) {
-      if (Context.getLangOpts().ObjC1 &&
-          Context.Idents.get("nil").hasMacroDefinition())
+      if (PP.getLangOpts().ObjC1 && PP.isMacroDefined("nil"))
         Result.AddTextChunk(", nil");
-      else if (Context.Idents.get("NULL").hasMacroDefinition())
+      else if (PP.isMacroDefined("NULL"))
         Result.AddTextChunk(", NULL");
       else
         Result.AddTextChunk(", (void*)0");
@@ -2117,8 +2114,7 @@
   return Result;
 }
 
-static std::string FormatFunctionParameter(ASTContext &Context,
-                                           const PrintingPolicy &Policy,
+static std::string FormatFunctionParameter(const PrintingPolicy &Policy,
                                            const ParmVarDecl *Param,
                                            bool SuppressName = false,
                                            bool SuppressBlock = false) {
@@ -2217,7 +2213,7 @@
     for (unsigned I = 0, N = Block.getNumParams(); I != N; ++I) {
       if (I)
         Params += ", ";
-      Params += FormatFunctionParameter(Context, Policy, Block.getParam(I),
+      Params += FormatFunctionParameter(Policy, Block.getParam(I),
                                         /*SuppressName=*/false,
                                         /*SuppressBlock=*/true);
 
@@ -2247,7 +2243,7 @@
 }
 
 /// \brief Add function parameter chunks to the given code completion string.
-static void AddFunctionParameterChunks(ASTContext &Context,
+static void AddFunctionParameterChunks(Preprocessor &PP,
                                        const PrintingPolicy &Policy,
                                        const FunctionDecl *Function,
                                        CodeCompletionBuilder &Result,
@@ -2265,7 +2261,7 @@
                                 Result.getCodeCompletionTUInfo());
       if (!FirstParameter)
         Opt.AddChunk(CodeCompletionString::CK_Comma);
-      AddFunctionParameterChunks(Context, Policy, Function, Opt, P, true);
+      AddFunctionParameterChunks(PP, Policy, Function, Opt, P, true);
       Result.AddOptionalChunk(Opt.TakeString());
       break;
     }
@@ -2278,9 +2274,8 @@
     InOptional = false;
     
     // Format the placeholder string.
-    std::string PlaceholderStr = FormatFunctionParameter(Context, Policy, 
-                                                         Param);
-        
+    std::string PlaceholderStr = FormatFunctionParameter(Policy, Param);
+
     if (Function->isVariadic() && P == N - 1)
       PlaceholderStr += ", ...";
 
@@ -2295,7 +2290,7 @@
       if (Proto->getNumParams() == 0)
         Result.AddPlaceholderChunk("...");
 
-      MaybeAddSentinel(Context, Function, Result);
+      MaybeAddSentinel(PP, Function, Result);
     }
 }
 
@@ -2575,11 +2570,7 @@
   }
   
   if (Kind == RK_Macro) {
-    const MacroDirective *MD = PP.getMacroDirectiveHistory(Macro);
-    assert(MD && "Not a macro?");
-    const MacroInfo *MI = MD->getMacroInfo();
-    assert((!MD->isDefined() || MI) && "missing MacroInfo for define");
-
+    const MacroInfo *MI = PP.getMacroInfo(Macro);
     Result.AddTypedTextChunk(
                             Result.getAllocator().CopyString(Macro->getName()));
 
@@ -2654,7 +2645,7 @@
                                    Ctx, Policy);
     AddTypedNameChunk(Ctx, Policy, ND, Result);
     Result.AddChunk(CodeCompletionString::CK_LeftParen);
-    AddFunctionParameterChunks(Ctx, Policy, Function, Result);
+    AddFunctionParameterChunks(PP, Policy, Function, Result);
     Result.AddChunk(CodeCompletionString::CK_RightParen);
     AddFunctionTypeQualsToCompletionString(Result, Function);
     return Result.TakeString();
@@ -2708,7 +2699,7 @@
     
     // Add the function parameters
     Result.AddChunk(CodeCompletionString::CK_LeftParen);
-    AddFunctionParameterChunks(Ctx, Policy, Function, Result);
+    AddFunctionParameterChunks(PP, Policy, Function, Result);
     Result.AddChunk(CodeCompletionString::CK_RightParen);
     AddFunctionTypeQualsToCompletionString(Result, Function);
     return Result.TakeString();
@@ -2769,7 +2760,7 @@
       std::string Arg;
       
       if ((*P)->getType()->isBlockPointerType() && !DeclaringEntity)
-        Arg = FormatFunctionParameter(Ctx, Policy, *P, true);
+        Arg = FormatFunctionParameter(Policy, *P, true);
       else {
         (*P)->getType().getAsStringInternal(Arg, Policy);
         Arg = "(" + formatObjCParamQualifiers((*P)->getObjCDeclQualifier()) 
@@ -2800,7 +2791,7 @@
           Result.AddPlaceholderChunk(", ...");
       }
       
-      MaybeAddSentinel(Ctx, Method, Result);
+      MaybeAddSentinel(PP, Method, Result);
     }
     
     return Result.TakeString();
@@ -2854,8 +2845,7 @@
     // Format the placeholder string.
     std::string Placeholder;
     if (Function)
-      Placeholder = FormatFunctionParameter(Context, Policy,
-                                            Function->getParamDecl(P));
+      Placeholder = FormatFunctionParameter(Policy, Function->getParamDecl(P));
     else
       Placeholder = Prototype->getParamType(P).getAsString(Policy);
 
@@ -3036,8 +3026,9 @@
   for (Preprocessor::macro_iterator M = PP.macro_begin(), 
                                  MEnd = PP.macro_end();
        M != MEnd; ++M) {
-    if (IncludeUndefined || M->first->hasMacroDefinition()) {
-      if (MacroInfo *MI = M->second->getMacroInfo())
+    auto MD = PP.getMacroDefinition(M->first);
+    if (IncludeUndefined || MD) {
+      if (MacroInfo *MI = MD.getMacroInfo())
         if (MI->isUsedForHeaderGuard())
           continue;
 
@@ -5122,7 +5113,7 @@
   // an action, e.g.,
   //   IBAction)<#selector#>:(id)sender
   if (DS.getObjCDeclQualifier() == 0 && !IsParameter &&
-      Context.Idents.get("IBAction").hasMacroDefinition()) {
+      PP.isMacroDefined("IBAction")) {
     CodeCompletionBuilder Builder(Results.getAllocator(),
                                   Results.getCodeCompletionTUInfo(),
                                   CCP_CodePattern, CXAvailability_Available);
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index 1bad38f..347d807 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -16,7 +16,6 @@
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
-#include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/CommentDiagnostic.h"
@@ -1082,6 +1081,22 @@
   assert(CurContext && "Popped translation unit!");
 }
 
+Sema::SkippedDefinitionContext Sema::ActOnTagStartSkippedDefinition(Scope *S,
+                                                                    Decl *D) {
+  // Unlike PushDeclContext, the context to which we return is not necessarily
+  // the containing DC of TD, because the new context will be some pre-existing
+  // TagDecl definition instead of a fresh one.
+  auto Result = static_cast<SkippedDefinitionContext>(CurContext);
+  CurContext = cast<TagDecl>(D)->getDefinition();
+  assert(CurContext && "skipping definition of undefined tag");
+  S->setEntity(CurContext);
+  return Result;
+}
+
+void Sema::ActOnTagFinishSkippedDefinition(SkippedDefinitionContext Context) {
+  CurContext = static_cast<decltype(CurContext)>(Context);
+}
+
 /// EnterDeclaratorContext - Used when we must lookup names in the context
 /// of a declarator's nested name specifier.
 ///
@@ -1749,7 +1764,7 @@
                                            Loc, Loc, II, R, /*TInfo=*/nullptr,
                                            SC_Extern,
                                            false,
-                                           /*hasPrototype=*/true);
+                                           R->isFunctionProtoType());
   New->setImplicit();
 
   // Create Decl objects for each parameter, adding them to the
@@ -1785,11 +1800,11 @@
 /// should not consider because they are not permitted to conflict, e.g.,
 /// because they come from hidden sub-modules and do not refer to the same
 /// entity.
-static void filterNonConflictingPreviousDecls(ASTContext &context,
+static void filterNonConflictingPreviousDecls(Sema &S,
                                               NamedDecl *decl,
                                               LookupResult &previous){
   // This is only interesting when modules are enabled.
-  if (!context.getLangOpts().Modules)
+  if (!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility)
     return;
 
   // Empty sets are uninteresting.
@@ -1801,7 +1816,7 @@
     NamedDecl *old = filter.next();
 
     // Non-hidden declarations are never ignored.
-    if (!old->isHidden())
+    if (S.isVisible(old))
       continue;
 
     if (!old->isExternallyVisible())
@@ -1815,11 +1830,11 @@
 /// entity if their types are the same.
 /// FIXME: This is notionally doing the same thing as ASTReaderDecl's
 /// isSameEntity.
-static void filterNonConflictingPreviousTypedefDecls(ASTContext &Context,
+static void filterNonConflictingPreviousTypedefDecls(Sema &S,
                                                      TypedefNameDecl *Decl,
                                                      LookupResult &Previous) {
   // This is only interesting when modules are enabled.
-  if (!Context.getLangOpts().Modules)
+  if (!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility)
     return;
 
   // Empty sets are uninteresting.
@@ -1831,19 +1846,19 @@
     NamedDecl *Old = Filter.next();
 
     // Non-hidden declarations are never ignored.
-    if (!Old->isHidden())
+    if (S.isVisible(Old))
       continue;
 
     // Declarations of the same entity are not ignored, even if they have
     // different linkages.
     if (auto *OldTD = dyn_cast<TypedefNameDecl>(Old)) {
-      if (Context.hasSameType(OldTD->getUnderlyingType(),
-                              Decl->getUnderlyingType()))
+      if (S.Context.hasSameType(OldTD->getUnderlyingType(),
+                                Decl->getUnderlyingType()))
         continue;
 
       // If both declarations give a tag declaration a typedef name for linkage
       // purposes, then they declare the same entity.
-      if (OldTD->getAnonDeclWithTypedefName() &&
+      if (OldTD->getAnonDeclWithTypedefName(/*AnyRedecl*/true) &&
           Decl->getAnonDeclWithTypedefName())
         continue;
     }
@@ -1958,7 +1973,7 @@
     return New->setInvalidDecl();
 
   if (auto *OldTD = dyn_cast<TypedefNameDecl>(Old)) {
-    auto *OldTag = OldTD->getAnonDeclWithTypedefName();
+    auto *OldTag = OldTD->getAnonDeclWithTypedefName(/*AnyRedecl*/true);
     auto *NewTag = New->getAnonDeclWithTypedefName();
     NamedDecl *Hidden = nullptr;
     if (getLangOpts().CPlusPlus && OldTag && NewTag &&
@@ -1974,9 +1989,7 @@
         New->setTypeSourceInfo(OldTD->getTypeSourceInfo());
 
       // Make the old tag definition visible.
-      if (auto *Listener = getASTMutationListener())
-        Listener->RedefinedHiddenDefinition(Hidden, NewTag->getLocation());
-      Hidden->setHidden(false);
+      makeMergedDefinitionVisible(Hidden, NewTag->getLocation());
     }
   }
 
@@ -2713,7 +2726,7 @@
   // UndefinedButUsed.
   if (!Old->isInlined() && New->isInlined() &&
       !New->hasAttr<GNUInlineAttr>() &&
-      (getLangOpts().CPlusPlus || !getLangOpts().GNUInline) &&
+      !getLangOpts().GNUInline &&
       Old->isUsed(false) &&
       !Old->isDefined() && !New->isThisDeclarationADefinition())
     UndefinedButUsed.insert(std::make_pair(Old->getCanonicalDecl(),
@@ -3407,14 +3420,23 @@
   }
 
   // C++ doesn't have tentative definitions, so go right ahead and check here.
-  const VarDecl *Def;
+  VarDecl *Def;
   if (getLangOpts().CPlusPlus &&
       New->isThisDeclarationADefinition() == VarDecl::Definition &&
       (Def = Old->getDefinition())) {
-    Diag(New->getLocation(), diag::err_redefinition) << New;
-    Diag(Def->getLocation(), diag::note_previous_definition);
-    New->setInvalidDecl();
-    return;
+    NamedDecl *Hidden = nullptr;
+    if (!hasVisibleDefinition(Def, &Hidden) && 
+        (New->getDescribedVarTemplate() ||
+         New->getNumTemplateParameterLists() ||
+         New->getDeclContext()->isDependentContext())) {
+      // The previous definition is hidden, and multiple definitions are
+      // permitted (in separate TUs). Form another definition of it.
+    } else {
+      Diag(New->getLocation(), diag::err_redefinition) << New;
+      Diag(Def->getLocation(), diag::note_previous_definition);
+      New->setInvalidDecl();
+      return;
+    }
   }
 
   if (haveIncompatibleLanguageLinkages(Old, New)) {
@@ -3453,8 +3475,9 @@
 // We will pick our mangling number depending on which version of MSVC is being
 // targeted.
 static unsigned getMSManglingNumber(const LangOptions &LO, Scope *S) {
-  return LO.isCompatibleWithMSVC(19) ? S->getMSCurManglingNumber()
-                                     : S->getMSLastManglingNumber();
+  return LO.isCompatibleWithMSVC(LangOptions::MSVC2015)
+             ? S->getMSCurManglingNumber()
+             : S->getMSLastManglingNumber();
 }
 
 void Sema::handleTagNumbering(const TagDecl *Tag, Scope *TagScope) {
@@ -4656,12 +4679,14 @@
         RequireCompleteDeclContext(D.getCXXScopeSpec(), DC))
       return nullptr;
 
+    // If a class is incomplete, do not parse entities inside it.
     if (isa<CXXRecordDecl>(DC) && !cast<CXXRecordDecl>(DC)->hasDefinition()) {
       Diag(D.getIdentifierLoc(),
            diag::err_member_def_undefined_record)
         << Name << DC << D.getCXXScopeSpec().getRange();
-      D.setInvalidType();
-    } else if (!D.getDeclSpec().isFriendSpecified()) {
+      return nullptr;
+    }
+    if (!D.getDeclSpec().isFriendSpecified()) {
       if (diagnoseQualifiedDeclaration(D.getCXXScopeSpec(), DC,
                                       Name, D.getIdentifierLoc())) {
         if (DC->isRecord())
@@ -4891,6 +4916,8 @@
 
 static void
 FixInvalidVariablyModifiedTypeLoc(TypeLoc SrcTL, TypeLoc DstTL) {
+  SrcTL = SrcTL.getUnqualifiedLoc();
+  DstTL = DstTL.getUnqualifiedLoc();
   if (PointerTypeLoc SrcPTL = SrcTL.getAs<PointerTypeLoc>()) {
     PointerTypeLoc DstPTL = DstTL.castAs<PointerTypeLoc>();
     FixInvalidVariablyModifiedTypeLoc(SrcPTL.getPointeeLoc(),
@@ -5065,7 +5092,7 @@
   // in an outer scope, it isn't the same thing.
   FilterLookupForScope(Previous, DC, S, /*ConsiderLinkage*/false,
                        /*AllowInlineNamespace*/false);
-  filterNonConflictingPreviousTypedefDecls(Context, NewTD, Previous);
+  filterNonConflictingPreviousTypedefDecls(*this, NewTD, Previous);
   if (!Previous.empty()) {
     Redeclaration = true;
     MergeTypedefNameDecl(NewTD, Previous);
@@ -5227,11 +5254,12 @@
     }
   }
 
-  // 'selectany' only applies to externally visible varable declarations.
+  // 'selectany' only applies to externally visible variable declarations.
   // It does not apply to functions.
   if (SelectAnyAttr *Attr = ND.getAttr<SelectAnyAttr>()) {
     if (isa<FunctionDecl>(ND) || !ND.isExternallyVisible()) {
-      S.Diag(Attr->getLocation(), diag::err_attribute_selectany_non_extern_data);
+      S.Diag(Attr->getLocation(),
+             diag::err_attribute_selectany_non_extern_data);
       ND.dropAttr<SelectAnyAttr>();
     }
   }
@@ -5750,6 +5778,7 @@
   if (IsLocalExternDecl)
     NewVD->setLocalExternDecl();
 
+  bool EmitTLSUnsupportedError = false;
   if (DeclSpec::TSCS TSCS = D.getDeclSpec().getThreadStorageClassSpec()) {
     // C++11 [dcl.stc]p4:
     //   When thread_local is applied to a variable of block scope the
@@ -5764,10 +5793,20 @@
       Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
            diag::err_thread_non_global)
         << DeclSpec::getSpecifierName(TSCS);
-    else if (!Context.getTargetInfo().isTLSSupported())
-      Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
-           diag::err_thread_unsupported);
-    else
+    else if (!Context.getTargetInfo().isTLSSupported()) {
+      if (getLangOpts().CUDA) {
+        // Postpone error emission until we've collected attributes required to
+        // figure out whether it's a host or device variable and whether the
+        // error should be ignored.
+        EmitTLSUnsupportedError = true;
+        // We still need to mark the variable as TLS so it shows up in AST with
+        // proper storage class for other tools to use even if we're not going
+        // to emit any code for it.
+        NewVD->setTSCSpec(TSCS);
+      } else
+        Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
+             diag::err_thread_unsupported);
+    } else
       NewVD->setTSCSpec(TSCS);
   }
 
@@ -5816,6 +5855,9 @@
   ProcessDeclAttributes(S, NewVD, D);
 
   if (getLangOpts().CUDA) {
+    if (EmitTLSUnsupportedError && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD))
+      Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
+           diag::err_thread_unsupported);
     // CUDA B.2.5: "__shared__ and __constant__ variables have implied static
     // storage [duration]."
     if (SC == SC_None && S->getFnParent() != nullptr &&
@@ -6366,7 +6408,7 @@
     Previous.setShadowed();
 
   // Filter out any non-conflicting previous declarations.
-  filterNonConflictingPreviousDecls(Context, NewVD, Previous);
+  filterNonConflictingPreviousDecls(*this, NewVD, Previous);
 
   if (!Previous.empty()) {
     MergeVarDecl(NewVD, Previous);
@@ -7915,7 +7957,7 @@
                                !Previous.isShadowed();
 
   // Filter out any non-conflicting previous declarations.
-  filterNonConflictingPreviousDecls(Context, NewFD, Previous);
+  filterNonConflictingPreviousDecls(*this, NewFD, Previous);
 
   bool Redeclaration = false;
   NamedDecl *OldDecl = nullptr;
@@ -7970,7 +8012,7 @@
   // Check for a previous extern "C" declaration with this name.
   if (!Redeclaration &&
       checkForConflictWithNonVisibleExternC(*this, NewFD, Previous)) {
-    filterNonConflictingPreviousDecls(Context, NewFD, Previous);
+    filterNonConflictingPreviousDecls(*this, NewFD, Previous);
     if (!Previous.empty()) {
       // This is an extern "C" declaration with the same name as a previous
       // declaration, and thus redeclares that entity...
@@ -8249,6 +8291,12 @@
 
   bool HasExtraParameters = (nparams > 3);
 
+  if (FTP->isVariadic()) {
+    Diag(FD->getLocation(), diag::ext_variadic_main);
+    // FIXME: if we had information about the location of the ellipsis, we
+    // could add a FixIt hint to remove it as a parameter.
+  }
+
   // Darwin passes an undocumented fourth argument of type char**.  If
   // other platforms start sprouting these, the logic below will start
   // getting shifty.
@@ -8687,7 +8735,7 @@
   // If there is no declaration, there was an error parsing it.  Just ignore
   // the initializer.
   if (!RealDecl || RealDecl->isInvalidDecl()) {
-    CorrectDelayedTyposInExpr(Init);
+    CorrectDelayedTyposInExpr(Init, dyn_cast_or_null<VarDecl>(RealDecl));
     return;
   }
 
@@ -8721,11 +8769,12 @@
     // Attempt typo correction early so that the type of the init expression can
     // be deduced based on the chosen correction:if the original init contains a
     // TypoExpr.
-    ExprResult Res = CorrectDelayedTyposInExpr(Init);
+    ExprResult Res = CorrectDelayedTyposInExpr(Init, VDecl);
     if (!Res.isUsable()) {
       RealDecl->setInvalidDecl();
       return;
     }
+
     if (Res.get() != Init) {
       Init = Res.get();
       if (CXXDirectInit)
@@ -8861,16 +8910,24 @@
       VDecl->setInvalidDecl();
   }
 
-  const VarDecl *Def;
+  VarDecl *Def;
   if ((Def = VDecl->getDefinition()) && Def != VDecl) {
-    Diag(VDecl->getLocation(), diag::err_redefinition)
-      << VDecl->getDeclName();
-    Diag(Def->getLocation(), diag::note_previous_definition);
-    VDecl->setInvalidDecl();
-    return;
+    NamedDecl *Hidden = nullptr;
+    if (!hasVisibleDefinition(Def, &Hidden) && 
+        (VDecl->getDescribedVarTemplate() ||
+         VDecl->getNumTemplateParameterLists() ||
+         VDecl->getDeclContext()->isDependentContext())) {
+      // The previous definition is hidden, and multiple definitions are
+      // permitted (in separate TUs). Form another definition of it.
+    } else {
+      Diag(VDecl->getLocation(), diag::err_redefinition)
+        << VDecl->getDeclName();
+      Diag(Def->getLocation(), diag::note_previous_definition);
+      VDecl->setInvalidDecl();
+      return;
+    }
   }
 
-  const VarDecl *PrevInit = nullptr;
   if (getLangOpts().CPlusPlus) {
     // C++ [class.static.data]p4
     //   If a static data member is of const integral or const
@@ -8884,10 +8941,12 @@
     // We already performed a redefinition check above, but for static
     // data members we also need to check whether there was an in-class
     // declaration with an initializer.
-    if (VDecl->isStaticDataMember() && VDecl->getAnyInitializer(PrevInit)) {
+    if (VDecl->isStaticDataMember() && VDecl->getCanonicalDecl()->hasInit()) {
       Diag(Init->getExprLoc(), diag::err_static_data_member_reinitialization)
           << VDecl->getDeclName();
-      Diag(PrevInit->getInit()->getExprLoc(), diag::note_previous_initializer) << 0;
+      Diag(VDecl->getCanonicalDecl()->getInit()->getExprLoc(),
+           diag::note_previous_initializer)
+          << 0;
       return;
     }  
 
@@ -8944,8 +9003,8 @@
 
     // Try to correct any TypoExprs in the initialization arguments.
     for (size_t Idx = 0; Idx < Args.size(); ++Idx) {
-      ExprResult Res =
-          CorrectDelayedTyposInExpr(Args[Idx], [this, Entity, Kind](Expr *E) {
+      ExprResult Res = CorrectDelayedTyposInExpr(
+          Args[Idx], VDecl, [this, Entity, Kind](Expr *E) {
             InitializationSequence Init(*this, Entity, Kind, MultiExprArg(E));
             return Init.Failed() ? ExprError() : E;
           });
@@ -10568,6 +10627,23 @@
         Context.adjustDeducedFunctionResultType(
             FD, SubstAutoType(ResultType.getType(), Context.VoidTy));
       }
+    } else if (getLangOpts().CPlusPlus11 && isLambdaCallOperator(FD)) {
+      auto *LSI = getCurLambda();
+      if (LSI->HasImplicitReturnType) {
+        deduceClosureReturnType(*LSI);
+
+        // C++11 [expr.prim.lambda]p4:
+        //   [...] if there are no return statements in the compound-statement
+        //   [the deduced type is] the type void
+        QualType RetType =
+            LSI->ReturnType.isNull() ? Context.VoidTy : LSI->ReturnType;
+
+        // Update the return type to the deduced type.
+        const FunctionProtoType *Proto =
+            FD->getType()->getAs<FunctionProtoType>();
+        FD->setType(Context.getFunctionType(RetType, Proto->getParamTypes(),
+                                            Proto->getExtProtoInfo()));
+      }
     }
 
     // The only way to be included in UndefinedButUsed is if there is an
@@ -10577,7 +10653,7 @@
       if (!FD->isExternallyVisible())
         UndefinedButUsed.erase(FD);
       else if (FD->isInlined() &&
-               (LangOpts.CPlusPlus || !LangOpts.GNUInline) &&
+               !LangOpts.GNUInline &&
                (!FD->getPreviousDecl()->hasAttr<GNUInlineAttr>()))
         UndefinedButUsed.erase(FD);
     }
@@ -11270,8 +11346,8 @@
 /// \param IsTypeSpecifier \c true if this is a type-specifier (or
 /// trailing-type-specifier) other than one in an alias-declaration.
 ///
-/// \param SkipBody If non-null, will be set to true if the caller should skip
-/// the definition of this tag, and treat it as if it were a declaration.
+/// \param SkipBody If non-null, will be set to indicate if the caller should
+/// skip the definition of this tag and treat it as if it were a declaration.
 Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                      SourceLocation KWLoc, CXXScopeSpec &SS,
                      IdentifierInfo *Name, SourceLocation NameLoc,
@@ -11282,7 +11358,7 @@
                      SourceLocation ScopedEnumKWLoc,
                      bool ScopedEnumUsesClassTag,
                      TypeResult UnderlyingType,
-                     bool IsTypeSpecifier, bool *SkipBody) {
+                     bool IsTypeSpecifier, SkipBodyInfo *SkipBody) {
   // If this is not a definition, it must have a name.
   IdentifierInfo *OrigName = Name;
   assert((Name != nullptr || TUK == TUK_Definition) &&
@@ -11592,6 +11668,10 @@
     }
   }
 
+  // If we have a known previous declaration to use, then use it.
+  if (Previous.empty() && SkipBody && SkipBody->Previous)
+    Previous.addDecl(SkipBody->Previous);
+
   if (!Previous.empty()) {
     NamedDecl *PrevDecl = Previous.getFoundDecl();
     NamedDecl *DirectPrevDecl =
@@ -11733,10 +11813,8 @@
                 // assume that this definition is identical to the hidden one
                 // we already have. Make the existing definition visible and
                 // use it in place of this one.
-                *SkipBody = true;
-                if (auto *Listener = getASTMutationListener())
-                  Listener->RedefinedHiddenDefinition(Hidden, KWLoc);
-                Hidden->setHidden(false);
+                SkipBody->ShouldSkip = true;
+                makeMergedDefinitionVisible(Hidden, KWLoc);
                 return Def;
               } else if (!IsExplicitSpecializationAfterInstantiation) {
                 // A redeclaration in function prototype scope in C isn't
@@ -12472,8 +12550,10 @@
     InvalidDecl = true;
 
   bool ZeroWidth = false;
+  if (InvalidDecl)
+    BitWidth = nullptr;
   // If this is declared as a bit-field, check the bit-field.
-  if (!InvalidDecl && BitWidth) {
+  if (BitWidth) {
     BitWidth = VerifyBitField(Loc, II, T, Record->isMsStruct(Context), BitWidth,
                               &ZeroWidth).get();
     if (!BitWidth) {
@@ -13422,6 +13502,30 @@
                                   Val, EnumVal);
 }
 
+Sema::SkipBodyInfo Sema::shouldSkipAnonEnumBody(Scope *S, IdentifierInfo *II,
+                                                SourceLocation IILoc) {
+  if (!(getLangOpts().Modules || getLangOpts().ModulesLocalVisibility) ||
+      !getLangOpts().CPlusPlus)
+    return SkipBodyInfo();
+
+  // We have an anonymous enum definition. Look up the first enumerator to
+  // determine if we should merge the definition with an existing one and
+  // skip the body.
+  NamedDecl *PrevDecl = LookupSingleName(S, II, IILoc, LookupOrdinaryName,
+                                         ForRedeclaration);
+  auto *PrevECD = dyn_cast_or_null<EnumConstantDecl>(PrevDecl);
+  NamedDecl *Hidden;
+  if (PrevECD &&
+      !hasVisibleDefinition(cast<NamedDecl>(PrevECD->getDeclContext()),
+                            &Hidden)) {
+    SkipBodyInfo Skip;
+    Skip.ShouldSkip = true;
+    Skip.Previous = Hidden;
+    return Skip;
+  }
+
+  return SkipBodyInfo();
+}
 
 Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
                               SourceLocation IdLoc, IdentifierInfo *Id,
@@ -14010,6 +14114,8 @@
   if (!Mod)
     return true;
 
+  VisibleModules.setVisible(Mod, ImportLoc);
+
   checkModuleImportContext(*this, Mod, ImportLoc, CurContext);
 
   // FIXME: we should support importing a submodule within a different submodule
@@ -14045,9 +14151,46 @@
 void Sema::ActOnModuleInclude(SourceLocation DirectiveLoc, Module *Mod) {
   checkModuleImportContext(*this, Mod, DirectiveLoc, CurContext);
 
-  // FIXME: Should we synthesize an ImportDecl here?
-  getModuleLoader().makeModuleVisible(Mod, Module::AllVisible, DirectiveLoc,
-                                      /*Complain=*/true);
+  // Determine whether we're in the #include buffer for a module. The #includes
+  // in that buffer do not qualify as module imports; they're just an
+  // implementation detail of us building the module.
+  //
+  // FIXME: Should we even get ActOnModuleInclude calls for those?
+  bool IsInModuleIncludes =
+      TUKind == TU_Module &&
+      getSourceManager().isWrittenInMainFile(DirectiveLoc);
+
+  // If this module import was due to an inclusion directive, create an 
+  // implicit import declaration to capture it in the AST.
+  if (!IsInModuleIncludes) {
+    TranslationUnitDecl *TU = getASTContext().getTranslationUnitDecl();
+    ImportDecl *ImportD = ImportDecl::CreateImplicit(getASTContext(), TU,
+                                                     DirectiveLoc, Mod,
+                                                     DirectiveLoc);
+    TU->addDecl(ImportD);
+    Consumer.HandleImplicitImportDecl(ImportD);
+  }
+  
+  getModuleLoader().makeModuleVisible(Mod, Module::AllVisible, DirectiveLoc);
+  VisibleModules.setVisible(Mod, DirectiveLoc);
+}
+
+void Sema::ActOnModuleBegin(SourceLocation DirectiveLoc, Module *Mod) {
+  checkModuleImportContext(*this, Mod, DirectiveLoc, CurContext);
+
+  if (getLangOpts().ModulesLocalVisibility)
+    VisibleModulesStack.push_back(std::move(VisibleModules));
+  VisibleModules.setVisible(Mod, DirectiveLoc);
+}
+
+void Sema::ActOnModuleEnd(SourceLocation DirectiveLoc, Module *Mod) {
+  checkModuleImportContext(*this, Mod, DirectiveLoc, CurContext);
+
+  if (getLangOpts().ModulesLocalVisibility) {
+    VisibleModules = std::move(VisibleModulesStack.back());
+    VisibleModulesStack.pop_back();
+    VisibleModules.setVisible(Mod, DirectiveLoc);
+  }
 }
 
 void Sema::createImplicitModuleImportForErrorRecovery(SourceLocation Loc,
@@ -14064,8 +14207,8 @@
   Consumer.HandleImplicitImportDecl(ImportD);
 
   // Make the module visible.
-  getModuleLoader().makeModuleVisible(Mod, Module::AllVisible, Loc,
-                                      /*Complain=*/false);
+  getModuleLoader().makeModuleVisible(Mod, Module::AllVisible, Loc);
+  VisibleModules.setVisible(Mod, Loc);
 }
 
 void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name,
@@ -14073,16 +14216,22 @@
                                       SourceLocation PragmaLoc,
                                       SourceLocation NameLoc,
                                       SourceLocation AliasNameLoc) {
-  Decl *PrevDecl = LookupSingleName(TUScope, Name, NameLoc,
-                                    LookupOrdinaryName);
-  AsmLabelAttr *Attr = ::new (Context) AsmLabelAttr(AliasNameLoc, Context,
-                                                    AliasName->getName(), 0);
+  NamedDecl *PrevDecl = LookupSingleName(TUScope, Name, NameLoc,
+                                         LookupOrdinaryName);
+  AsmLabelAttr *Attr =
+      AsmLabelAttr::CreateImplicit(Context, AliasName->getName(), AliasNameLoc);
 
-  if (PrevDecl) 
+  // If a declaration that:
+  // 1) declares a function or a variable
+  // 2) has external linkage
+  // already exists, add a label attribute to it.
+  if (PrevDecl &&
+      (isa<FunctionDecl>(PrevDecl) || isa<VarDecl>(PrevDecl)) &&
+      PrevDecl->hasExternalFormalLinkage())
     PrevDecl->addAttr(Attr);
-  else 
-    (void)ExtnameUndeclaredIdentifiers.insert(
-      std::pair<IdentifierInfo*,AsmLabelAttr*>(Name, Attr));
+  // Otherwise, add a label atttibute to ExtnameUndeclaredIdentifiers.
+  else
+    (void)ExtnameUndeclaredIdentifiers.insert(std::make_pair(Name, Attr));
 }
 
 void Sema::ActOnPragmaWeakID(IdentifierInfo* Name,
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 4f3fed5..a3c77a8 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -2407,6 +2407,28 @@
     D->addAttr(NewAttr);
 }
 
+// Check for things we'd like to warn about, no errors or validation for now.
+// TODO: Validation should use a backend target library that specifies
+// the allowable subtarget features and cpus. We could use something like a
+// TargetCodeGenInfo hook here to do validation.
+void Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
+  for (auto Str : {"tune=", "fpmath="})
+    if (AttrStr.find(Str) != StringRef::npos)
+      Diag(LiteralLoc, diag::warn_unsupported_target_attribute) << Str;
+}
+
+static void handleTargetAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  StringRef Str;
+  SourceLocation LiteralLoc;
+  if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &LiteralLoc))
+    return;
+  S.checkTargetAttr(LiteralLoc, Str);
+  unsigned Index = Attr.getAttributeSpellingListIndex();
+  TargetAttr *NewAttr =
+      ::new (S.Context) TargetAttr(Attr.getRange(), S.Context, Str, Index);
+  D->addAttr(NewAttr);
+}
+
 
 static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   VarDecl *VD = cast<VarDecl>(D);
@@ -3312,11 +3334,10 @@
 static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (hasDeclarator(D)) return;
 
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
   // Diagnostic is emitted elsewhere: here we store the (valid) Attr
   // in the Decl node for syntactic reasoning, e.g., pretty-printing.
   CallingConv CC;
-  if (S.CheckCallingConvAttr(Attr, CC, FD))
+  if (S.CheckCallingConvAttr(Attr, CC, /*FD*/nullptr))
     return;
 
   if (!isa<ObjCMethodDecl>(D)) {
@@ -3498,20 +3519,63 @@
   return false;
 }
 
-static void handleLaunchBoundsAttr(Sema &S, Decl *D,
-                                   const AttributeList &Attr) {
-  uint32_t MaxThreads, MinBlocks = 0;
-  if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1))
-    return;
-  if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr,
-                                                    Attr.getArgAsExpr(1),
-                                                    MinBlocks, 2))
+// Checks whether an argument of launch_bounds attribute is acceptable
+// May output an error.
+static bool checkLaunchBoundsArgument(Sema &S, Expr *E,
+                                      const CUDALaunchBoundsAttr &Attr,
+                                      const unsigned Idx) {
+
+  if (S.DiagnoseUnexpandedParameterPack(E))
+    return false;
+
+  // Accept template arguments for now as they depend on something else.
+  // We'll get to check them when they eventually get instantiated.
+  if (E->isValueDependent())
+    return true;
+
+  llvm::APSInt I(64);
+  if (!E->isIntegerConstantExpr(I, S.Context)) {
+    S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
+        << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
+    return false;
+  }
+  // Make sure we can fit it in 32 bits.
+  if (!I.isIntN(32)) {
+    S.Diag(E->getExprLoc(), diag::err_ice_too_large) << I.toString(10, false)
+                                                     << 32 << /* Unsigned */ 1;
+    return false;
+  }
+  if (I < 0)
+    S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
+        << &Attr << Idx << E->getSourceRange();
+
+  return true;
+}
+
+void Sema::AddLaunchBoundsAttr(SourceRange AttrRange, Decl *D, Expr *MaxThreads,
+                               Expr *MinBlocks, unsigned SpellingListIndex) {
+  CUDALaunchBoundsAttr TmpAttr(AttrRange, Context, MaxThreads, MinBlocks,
+                               SpellingListIndex);
+
+  if (!checkLaunchBoundsArgument(*this, MaxThreads, TmpAttr, 0))
     return;
 
-  D->addAttr(::new (S.Context)
-              CUDALaunchBoundsAttr(Attr.getRange(), S.Context,
-                                  MaxThreads, MinBlocks,
-                                  Attr.getAttributeSpellingListIndex()));
+  if (MinBlocks && !checkLaunchBoundsArgument(*this, MinBlocks, TmpAttr, 1))
+    return;
+
+  D->addAttr(::new (Context) CUDALaunchBoundsAttr(
+      AttrRange, Context, MaxThreads, MinBlocks, SpellingListIndex));
+}
+
+static void handleLaunchBoundsAttr(Sema &S, Decl *D,
+                                   const AttributeList &Attr) {
+  if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
+      !checkAttributeAtMostNumArgs(S, Attr, 2))
+    return;
+
+  S.AddLaunchBoundsAttr(Attr.getRange(), D, Attr.getArgAsExpr(0),
+                        Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr,
+                        Attr.getAttributeSpellingListIndex());
 }
 
 static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
@@ -4321,6 +4385,43 @@
   handleAttrWithMessage<DeprecatedAttr>(S, D, Attr);
 }
 
+static void handleNoSanitizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  if (!checkAttributeAtLeastNumArgs(S, Attr, 1))
+    return;
+
+  std::vector<std::string> Sanitizers;
+
+  for (unsigned I = 0, E = Attr.getNumArgs(); I != E; ++I) {
+    StringRef SanitizerName;
+    SourceLocation LiteralLoc;
+
+    if (!S.checkStringLiteralArgumentAttr(Attr, I, SanitizerName, &LiteralLoc))
+      return;
+
+    if (parseSanitizerValue(SanitizerName, /*AllowGroups=*/true) == 0)
+      S.Diag(LiteralLoc, diag::warn_unknown_sanitizer_ignored) << SanitizerName;
+
+    Sanitizers.push_back(SanitizerName);
+  }
+
+  D->addAttr(::new (S.Context) NoSanitizeAttr(
+      Attr.getRange(), S.Context, Sanitizers.data(), Sanitizers.size(),
+      Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D,
+                                         const AttributeList &Attr) {
+  std::string SanitizerName =
+      llvm::StringSwitch<std::string>(Attr.getName()->getName())
+          .Case("no_address_safety_analysis", "address")
+          .Case("no_sanitize_address", "address")
+          .Case("no_sanitize_thread", "thread")
+          .Case("no_sanitize_memory", "memory");
+  D->addAttr(::new (S.Context)
+                 NoSanitizeAttr(Attr.getRange(), S.Context, &SanitizerName, 1,
+                                Attr.getAttributeSpellingListIndex()));
+}
+
 /// Handles semantic checking for features that are common to all attributes,
 /// such as checking whether a parameter was properly specified, or the correct
 /// number of arguments were passed, etc.
@@ -4650,6 +4751,9 @@
   case AttributeList::AT_Section:
     handleSectionAttr(S, D, Attr);
     break;
+  case AttributeList::AT_Target:
+    handleTargetAttr(S, D, Attr);
+    break;
   case AttributeList::AT_Unavailable:
     handleAttrWithMessage<UnavailableAttr>(S, D, Attr);
     break;
@@ -4792,18 +4896,15 @@
   case AttributeList::AT_ScopedLockable:
     handleSimpleAttribute<ScopedLockableAttr>(S, D, Attr);
     break;
-  case AttributeList::AT_NoSanitizeAddress:
-    handleSimpleAttribute<NoSanitizeAddressAttr>(S, D, Attr);
+  case AttributeList::AT_NoSanitize:
+    handleNoSanitizeAttr(S, D, Attr);
+    break;
+  case AttributeList::AT_NoSanitizeSpecific:
+    handleNoSanitizeSpecificAttr(S, D, Attr);
     break;
   case AttributeList::AT_NoThreadSafetyAnalysis:
     handleSimpleAttribute<NoThreadSafetyAnalysisAttr>(S, D, Attr);
     break;
-  case AttributeList::AT_NoSanitizeThread:
-    handleSimpleAttribute<NoSanitizeThreadAttr>(S, D, Attr);
-    break;
-  case AttributeList::AT_NoSanitizeMemory:
-    handleSimpleAttribute<NoSanitizeMemoryAttr>(S, D, Attr);
-    break;
   case AttributeList::AT_GuardedBy:
     handleGuardedByAttr(S, D, Attr);
     break;
@@ -5181,7 +5282,7 @@
   // Don't warn if our current context is deprecated or unavailable.
   switch (K) {
   case Sema::AD_Deprecation:
-    if (isDeclDeprecated(Ctx))
+    if (isDeclDeprecated(Ctx) || isDeclUnavailable(Ctx))
       return;
     diag = !ObjCPropertyAccess ? diag::warn_deprecated
                                : diag::warn_property_method_deprecated;
diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp
index b0e6aca..7ed9bfc 100644
--- a/lib/Sema/SemaDeclCXX.cpp
+++ b/lib/Sema/SemaDeclCXX.cpp
@@ -438,6 +438,45 @@
                                 Scope *S) {
   bool Invalid = false;
 
+  // The declaration context corresponding to the scope is the semantic
+  // parent, unless this is a local function declaration, in which case
+  // it is that surrounding function.
+  DeclContext *ScopeDC = New->isLocalExternDecl()
+                             ? New->getLexicalDeclContext()
+                             : New->getDeclContext();
+
+  // Find the previous declaration for the purpose of default arguments.
+  FunctionDecl *PrevForDefaultArgs = Old;
+  for (/**/; PrevForDefaultArgs;
+       // Don't bother looking back past the latest decl if this is a local
+       // extern declaration; nothing else could work.
+       PrevForDefaultArgs = New->isLocalExternDecl()
+                                ? nullptr
+                                : PrevForDefaultArgs->getPreviousDecl()) {
+    // Ignore hidden declarations.
+    if (!LookupResult::isVisible(*this, PrevForDefaultArgs))
+      continue;
+
+    if (S && !isDeclInScope(PrevForDefaultArgs, ScopeDC, S) &&
+        !New->isCXXClassMember()) {
+      // Ignore default arguments of old decl if they are not in
+      // the same scope and this is not an out-of-line definition of
+      // a member function.
+      continue;
+    }
+
+    if (PrevForDefaultArgs->isLocalExternDecl() != New->isLocalExternDecl()) {
+      // If only one of these is a local function declaration, then they are
+      // declared in different scopes, even though isDeclInScope may think
+      // they're in the same scope. (If both are local, the scope check is
+      // sufficent, and if neither is local, then they are in the same scope.)
+      continue;
+    }
+
+    // We found our guy.
+    break;
+  }
+
   // C++ [dcl.fct.default]p4:
   //   For non-template functions, default arguments can be added in
   //   later declarations of a function in the same
@@ -456,34 +495,17 @@
   //   in a member function definition that appears outside of the class
   //   definition are added to the set of default arguments provided by the
   //   member function declaration in the class definition.
-  for (unsigned p = 0, NumParams = Old->getNumParams(); p < NumParams; ++p) {
-    ParmVarDecl *OldParam = Old->getParamDecl(p);
+  for (unsigned p = 0, NumParams = PrevForDefaultArgs
+                                       ? PrevForDefaultArgs->getNumParams()
+                                       : 0;
+       p < NumParams; ++p) {
+    ParmVarDecl *OldParam = PrevForDefaultArgs->getParamDecl(p);
     ParmVarDecl *NewParam = New->getParamDecl(p);
 
-    bool OldParamHasDfl = OldParam->hasDefaultArg();
+    bool OldParamHasDfl = OldParam ? OldParam->hasDefaultArg() : false;
     bool NewParamHasDfl = NewParam->hasDefaultArg();
 
-    // The declaration context corresponding to the scope is the semantic
-    // parent, unless this is a local function declaration, in which case
-    // it is that surrounding function.
-    DeclContext *ScopeDC = New->isLocalExternDecl()
-                               ? New->getLexicalDeclContext()
-                               : New->getDeclContext();
-    if (S && !isDeclInScope(Old, ScopeDC, S) &&
-        !New->getDeclContext()->isRecord())
-      // Ignore default parameters of old decl if they are not in
-      // the same scope and this is not an out-of-line definition of
-      // a member function.
-      OldParamHasDfl = false;
-    if (New->isLocalExternDecl() != Old->isLocalExternDecl())
-      // If only one of these is a local function declaration, then they are
-      // declared in different scopes, even though isDeclInScope may think
-      // they're in the same scope. (If both are local, the scope check is
-      // sufficent, and if neither is local, then they are in the same scope.)
-      OldParamHasDfl = false;
-
     if (OldParamHasDfl && NewParamHasDfl) {
-
       unsigned DiagDefaultParamID =
         diag::err_param_default_argument_redefinition;
 
@@ -491,7 +513,7 @@
       // of template class. The new default parameter's value is ignored.
       Invalid = true;
       if (getLangOpts().MicrosoftExt) {
-        CXXMethodDecl* MD = dyn_cast<CXXMethodDecl>(New);
+        CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(New);
         if (MD && MD->getParent()->getDescribedClassTemplate()) {
           // Merge the old default argument into the new parameter.
           NewParam->setHasInheritedDefaultArg();
@@ -518,7 +540,8 @@
       
       // Look for the function declaration where the default argument was
       // actually written, which may be a declaration prior to Old.
-      for (auto Older = Old; OldParam->hasInheritedDefaultArg();) {
+      for (auto Older = PrevForDefaultArgs;
+           OldParam->hasInheritedDefaultArg(); /**/) {
         Older = Older->getPreviousDecl();
         OldParam = Older->getParamDecl(p);
       }
@@ -543,8 +566,9 @@
         Diag(NewParam->getLocation(),
              diag::err_param_default_argument_template_redecl)
           << NewParam->getDefaultArgRange();
-        Diag(Old->getLocation(), diag::note_template_prev_declaration)
-          << false;
+        Diag(PrevForDefaultArgs->getLocation(),
+             diag::note_template_prev_declaration)
+            << false;
       } else if (New->getTemplateSpecializationKind()
                    != TSK_ImplicitInstantiation &&
                  New->getTemplateSpecializationKind() != TSK_Undeclared) {
@@ -804,7 +828,8 @@
     // - it shall not be virtual;
     const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(NewFD);
     if (Method && Method->isVirtual()) {
-      Diag(NewFD->getLocation(), diag::err_constexpr_virtual);
+      Method = Method->getCanonicalDecl();
+      Diag(Method->getLocation(), diag::err_constexpr_virtual);
 
       // If it's not obvious why this function is virtual, find an overridden
       // function which uses the 'virtual' keyword.
@@ -1320,57 +1345,6 @@
   return false;
 }
 
-/// \brief Perform propagation of DLL attributes from a derived class to a
-/// templated base class for MS compatibility.
-static void propagateDLLAttrToBaseClassTemplate(
-    Sema &S, CXXRecordDecl *Class, Attr *ClassAttr,
-    ClassTemplateSpecializationDecl *BaseTemplateSpec, SourceLocation BaseLoc) {
-  if (getDLLAttr(
-          BaseTemplateSpec->getSpecializedTemplate()->getTemplatedDecl())) {
-    // If the base class template has a DLL attribute, don't try to change it.
-    return;
-  }
-
-  if (BaseTemplateSpec->getSpecializationKind() == TSK_Undeclared) {
-    // If the base class is not already specialized, we can do the propagation.
-    auto *NewAttr = cast<InheritableAttr>(ClassAttr->clone(S.getASTContext()));
-    NewAttr->setInherited(true);
-    BaseTemplateSpec->addAttr(NewAttr);
-    return;
-  }
-
-  bool DifferentAttribute = false;
-  if (Attr *SpecializationAttr = getDLLAttr(BaseTemplateSpec)) {
-    if (!SpecializationAttr->isInherited()) {
-      // The template has previously been specialized or instantiated with an
-      // explicit attribute. We should not try to change it.
-      return;
-    }
-    if (SpecializationAttr->getKind() == ClassAttr->getKind()) {
-      // The specialization already has the right attribute.
-      return;
-    }
-    DifferentAttribute = true;
-  }
-
-  // The template was previously instantiated or explicitly specialized without
-  // a dll attribute, or the template was previously instantiated with a
-  // different inherited attribute. It's too late for us to change the
-  // attribute, so warn that this is unsupported.
-  S.Diag(BaseLoc, diag::warn_attribute_dll_instantiated_base_class)
-      << BaseTemplateSpec->isExplicitSpecialization() << DifferentAttribute;
-  S.Diag(ClassAttr->getLocation(), diag::note_attribute);
-  if (BaseTemplateSpec->isExplicitSpecialization()) {
-    S.Diag(BaseTemplateSpec->getLocation(),
-           diag::note_template_class_explicit_specialization_was_here)
-        << BaseTemplateSpec;
-  } else {
-    S.Diag(BaseTemplateSpec->getPointOfInstantiation(),
-           diag::note_template_class_instantiation_was_here)
-        << BaseTemplateSpec;
-  }
-}
-
 /// \brief Check the validity of a C++ base class specifier.
 ///
 /// \returns a new CXXBaseSpecifier if well-formed, emits diagnostics
@@ -1442,8 +1416,8 @@
     if (Attr *ClassAttr = getDLLAttr(Class)) {
       if (auto *BaseTemplate = dyn_cast_or_null<ClassTemplateSpecializationDecl>(
               BaseType->getAsCXXRecordDecl())) {
-        propagateDLLAttrToBaseClassTemplate(*this, Class, ClassAttr,
-                                            BaseTemplate, BaseLoc);
+        propagateDLLAttrToBaseClassTemplate(Class, ClassAttr, BaseTemplate,
+                                            BaseLoc);
       }
     }
   }
@@ -4714,15 +4688,15 @@
 }
 
 /// \brief Check class-level dllimport/dllexport attribute.
-static void checkDLLAttribute(Sema &S, CXXRecordDecl *Class) {
+void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
   Attr *ClassAttr = getDLLAttr(Class);
 
   // MSVC inherits DLL attributes to partial class template specializations.
-  if (S.Context.getTargetInfo().getCXXABI().isMicrosoft() && !ClassAttr) {
+  if (Context.getTargetInfo().getCXXABI().isMicrosoft() && !ClassAttr) {
     if (auto *Spec = dyn_cast<ClassTemplatePartialSpecializationDecl>(Class)) {
       if (Attr *TemplateAttr =
               getDLLAttr(Spec->getSpecializedTemplate()->getTemplatedDecl())) {
-        auto *A = cast<InheritableAttr>(TemplateAttr->clone(S.getASTContext()));
+        auto *A = cast<InheritableAttr>(TemplateAttr->clone(getASTContext()));
         A->setInherited(true);
         ClassAttr = A;
       }
@@ -4733,12 +4707,12 @@
     return;
 
   if (!Class->isExternallyVisible()) {
-    S.Diag(Class->getLocation(), diag::err_attribute_dll_not_extern)
+    Diag(Class->getLocation(), diag::err_attribute_dll_not_extern)
         << Class << ClassAttr;
     return;
   }
 
-  if (S.Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+  if (Context.getTargetInfo().getCXXABI().isMicrosoft() &&
       !ClassAttr->isInherited()) {
     // Diagnose dll attributes on members of class with dll attribute.
     for (Decl *Member : Class->decls()) {
@@ -4748,10 +4722,10 @@
       if (!MemberAttr || MemberAttr->isInherited() || Member->isInvalidDecl())
         continue;
 
-      S.Diag(MemberAttr->getLocation(),
+      Diag(MemberAttr->getLocation(),
              diag::err_attribute_dll_member_of_dll_class)
           << MemberAttr << ClassAttr;
-      S.Diag(ClassAttr->getLocation(), diag::note_previous_attribute);
+      Diag(ClassAttr->getLocation(), diag::note_previous_attribute);
       Member->setInvalidDecl();
     }
   }
@@ -4766,14 +4740,15 @@
 
   TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();
 
-  // Don't dllexport explicit class template instantiation declarations.
-  if (ClassExported && TSK == TSK_ExplicitInstantiationDeclaration) {
+  // Ignore explicit dllexport on explicit class template instantiation declarations.
+  if (ClassExported && !ClassAttr->isInherited() &&
+      TSK == TSK_ExplicitInstantiationDeclaration) {
     Class->dropAttr<DLLExportAttr>();
     return;
   }
 
   // Force declaration of implicit members so they can inherit the attribute.
-  S.ForceDeclarationOfImplicitMembers(Class);
+  ForceDeclarationOfImplicitMembers(Class);
 
   // FIXME: MSVC's docs say all bases must be exportable, but this doesn't
   // seem to be true in practice?
@@ -4791,37 +4766,43 @@
       if (MD->isDeleted())
         continue;
 
-      if (MD->isMoveAssignmentOperator() && ClassImported && MD->isInlined()) {
-        // Current MSVC versions don't export the move assignment operators, so
-        // don't attempt to import them if we have a definition.
-        continue;
-      }
-
-      if (MD->isInlined() &&
-          !S.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+      if (MD->isInlined()) {
         // MinGW does not import or export inline methods.
-        continue;
+        if (!Context.getTargetInfo().getCXXABI().isMicrosoft())
+          continue;
+
+        // MSVC versions before 2015 don't export the move assignment operators,
+        // so don't attempt to import them if we have a definition.
+        if (ClassImported && MD->isMoveAssignmentOperator() &&
+            !getLangOpts().isCompatibleWithMSVC(LangOptions::MSVC2015))
+          continue;
       }
     }
 
+    if (!cast<NamedDecl>(Member)->isExternallyVisible())
+      continue;
+
     if (!getDLLAttr(Member)) {
       auto *NewAttr =
-          cast<InheritableAttr>(ClassAttr->clone(S.getASTContext()));
+          cast<InheritableAttr>(ClassAttr->clone(getASTContext()));
       NewAttr->setInherited(true);
       Member->addAttr(NewAttr);
     }
 
     if (MD && ClassExported) {
+      if (TSK == TSK_ExplicitInstantiationDeclaration)
+        // Don't go any further if this is just an explicit instantiation
+        // declaration.
+        continue;
+
       if (MD->isUserProvided()) {
         // Instantiate non-default class member functions ...
 
         // .. except for certain kinds of template specializations.
-        if (TSK == TSK_ExplicitInstantiationDeclaration)
-          continue;
         if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
           continue;
 
-        S.MarkFunctionReferenced(Class->getLocation(), MD);
+        MarkFunctionReferenced(Class->getLocation(), MD);
 
         // The function will be passed to the consumer when its definition is
         // encountered.
@@ -4832,22 +4813,77 @@
         // defaulted methods, and the copy and move assignment operators. The
         // latter are exported even if they are trivial, because the address of
         // an operator can be taken and should compare equal accross libraries.
-        DiagnosticErrorTrap Trap(S.Diags);
-        S.MarkFunctionReferenced(Class->getLocation(), MD);
+        DiagnosticErrorTrap Trap(Diags);
+        MarkFunctionReferenced(Class->getLocation(), MD);
         if (Trap.hasErrorOccurred()) {
-          S.Diag(ClassAttr->getLocation(), diag::note_due_to_dllexported_class)
-              << Class->getName() << !S.getLangOpts().CPlusPlus11;
+          Diag(ClassAttr->getLocation(), diag::note_due_to_dllexported_class)
+              << Class->getName() << !getLangOpts().CPlusPlus11;
           break;
         }
 
         // There is no later point when we will see the definition of this
         // function, so pass it to the consumer now.
-        S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
+        Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
       }
     }
   }
 }
 
+/// \brief Perform propagation of DLL attributes from a derived class to a
+/// templated base class for MS compatibility.
+void Sema::propagateDLLAttrToBaseClassTemplate(
+    CXXRecordDecl *Class, Attr *ClassAttr,
+    ClassTemplateSpecializationDecl *BaseTemplateSpec, SourceLocation BaseLoc) {
+  if (getDLLAttr(
+          BaseTemplateSpec->getSpecializedTemplate()->getTemplatedDecl())) {
+    // If the base class template has a DLL attribute, don't try to change it.
+    return;
+  }
+
+  auto TSK = BaseTemplateSpec->getSpecializationKind();
+  if (!getDLLAttr(BaseTemplateSpec) &&
+      (TSK == TSK_Undeclared || TSK == TSK_ExplicitInstantiationDeclaration ||
+       TSK == TSK_ImplicitInstantiation)) {
+    // The template hasn't been instantiated yet (or it has, but only as an
+    // explicit instantiation declaration or implicit instantiation, which means
+    // we haven't codegenned any members yet), so propagate the attribute.
+    auto *NewAttr = cast<InheritableAttr>(ClassAttr->clone(getASTContext()));
+    NewAttr->setInherited(true);
+    BaseTemplateSpec->addAttr(NewAttr);
+
+    // If the template is already instantiated, checkDLLAttributeRedeclaration()
+    // needs to be run again to work see the new attribute. Otherwise this will
+    // get run whenever the template is instantiated.
+    if (TSK != TSK_Undeclared)
+      checkClassLevelDLLAttribute(BaseTemplateSpec);
+
+    return;
+  }
+
+  if (getDLLAttr(BaseTemplateSpec)) {
+    // The template has already been specialized or instantiated with an
+    // attribute, explicitly or through propagation. We should not try to change
+    // it.
+    return;
+  }
+
+  // The template was previously instantiated or explicitly specialized without
+  // a dll attribute, It's too late for us to add an attribute, so warn that
+  // this is unsupported.
+  Diag(BaseLoc, diag::warn_attribute_dll_instantiated_base_class)
+      << BaseTemplateSpec->isExplicitSpecialization();
+  Diag(ClassAttr->getLocation(), diag::note_attribute);
+  if (BaseTemplateSpec->isExplicitSpecialization()) {
+    Diag(BaseTemplateSpec->getLocation(),
+           diag::note_template_class_explicit_specialization_was_here)
+        << BaseTemplateSpec;
+  } else {
+    Diag(BaseTemplateSpec->getPointOfInstantiation(),
+           diag::note_template_class_instantiation_was_here)
+        << BaseTemplateSpec;
+  }
+}
+
 /// \brief Perform semantic checks on a class definition that has been
 /// completing, introducing implicitly-declared members, checking for
 /// abstract types, etc.
@@ -4986,7 +5022,7 @@
   //   have inheriting constructors.
   DeclareInheritingConstructors(Record);
 
-  checkDLLAttribute(*this, Record);
+  checkClassLevelDLLAttribute(Record);
 }
 
 /// Look up the special member function that would be called by a special
@@ -9458,6 +9494,7 @@
 
       Expr *DefaultArg = S.BuildCXXDefaultArgExpr(Class->getLocation(), CD,
                                                   CD->getParamDecl(I)).get();
+      S.DiscardCleanupsInEvaluationContext();
       S.Context.addDefaultArgExprForConstructor(CD, I, DefaultArg);
     }
   }
@@ -10211,7 +10248,9 @@
   
   // Assign non-static members.
   for (auto *Field : ClassDecl->fields()) {
-    if (Field->isUnnamedBitfield())
+    // FIXME: We should form some kind of AST representation for the implied
+    // memcpy in a union copy operation.
+    if (Field->isUnnamedBitfield() || Field->getParent()->isUnion())
       continue;
 
     if (Field->isInvalidDecl()) {
@@ -10641,7 +10680,9 @@
 
   // Assign non-static members.
   for (auto *Field : ClassDecl->fields()) {
-    if (Field->isUnnamedBitfield())
+    // FIXME: We should form some kind of AST representation for the implied
+    // memcpy in a union copy operation.
+    if (Field->isUnnamedBitfield() || Field->getParent()->isUnion())
       continue;
 
     if (Field->isInvalidDecl()) {
diff --git a/lib/Sema/SemaDeclObjC.cpp b/lib/Sema/SemaDeclObjC.cpp
index dc47ce9..3831879 100644
--- a/lib/Sema/SemaDeclObjC.cpp
+++ b/lib/Sema/SemaDeclObjC.cpp
@@ -448,6 +448,19 @@
 
 }
 
+static void diagnoseUseOfProtocols(Sema &TheSema,
+                                   ObjCContainerDecl *CD,
+                                   ObjCProtocolDecl *const *ProtoRefs,
+                                   unsigned NumProtoRefs,
+                                   const SourceLocation *ProtoLocs) {
+  assert(ProtoRefs);
+  // Diagnose availability in the context of the ObjC container.
+  Sema::ContextRAII SavedContext(TheSema, CD);
+  for (unsigned i = 0; i < NumProtoRefs; ++i) {
+    (void)TheSema.DiagnoseUseOfDecl(ProtoRefs[i], ProtoLocs[i]);
+  }
+}
+
 Decl *Sema::
 ActOnStartClassInterface(SourceLocation AtInterfaceLoc,
                          IdentifierInfo *ClassName, SourceLocation ClassLoc,
@@ -535,6 +548,8 @@
       ObjCInterfaceDecl *SuperClassDecl =
                                 dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl);
 
+      // Diagnose availability in the context of the @interface.
+      ContextRAII SavedContext(*this, IDecl);
       // Diagnose classes that inherit from deprecated classes.
       if (SuperClassDecl)
         (void)DiagnoseUseOfDecl(SuperClassDecl, SuperLoc);
@@ -591,6 +606,8 @@
 
   // Check then save referenced protocols.
   if (NumProtoRefs) {
+    diagnoseUseOfProtocols(*this, IDecl, (ObjCProtocolDecl*const*)ProtoRefs,
+                           NumProtoRefs, ProtoLocs);
     IDecl->setProtocolList((ObjCProtocolDecl*const*)ProtoRefs, NumProtoRefs,
                            ProtoLocs, Context);
     IDecl->setEndOfDefinitionLoc(EndProtoLoc);
@@ -751,6 +768,8 @@
 
   if (!err && NumProtoRefs ) {
     /// Check then save referenced protocols.
+    diagnoseUseOfProtocols(*this, PDecl, (ObjCProtocolDecl*const*)ProtoRefs,
+                           NumProtoRefs, ProtoLocs);
     PDecl->setProtocolList((ObjCProtocolDecl*const*)ProtoRefs, NumProtoRefs,
                            ProtoLocs, Context);
   }
@@ -778,7 +797,7 @@
 /// issues an error if they are not declared. It returns list of
 /// protocol declarations in its 'Protocols' argument.
 void
-Sema::FindProtocolDeclaration(bool WarnOnDeclarations,
+Sema::FindProtocolDeclaration(bool WarnOnDeclarations, bool ForObjCContainer,
                               const IdentifierLocPair *ProtocolId,
                               unsigned NumProtocols,
                               SmallVectorImpl<Decl *> &Protocols) {
@@ -804,8 +823,12 @@
     // If this is a forward protocol declaration, get its definition.
     if (!PDecl->isThisDeclarationADefinition() && PDecl->getDefinition())
       PDecl = PDecl->getDefinition();
-    
-    (void)DiagnoseUseOfDecl(PDecl, ProtocolId[i].second);
+
+    // For an objc container, delay protocol reference checking until after we
+    // can set the objc decl as the availability context, otherwise check now.
+    if (!ForObjCContainer) {
+      (void)DiagnoseUseOfDecl(PDecl, ProtocolId[i].second);
+    }
 
     // If this is a forward declaration and we are supposed to warn in this
     // case, do it.
@@ -934,7 +957,9 @@
   CurContext->addDecl(CDecl);
 
   if (NumProtoRefs) {
-    CDecl->setProtocolList((ObjCProtocolDecl*const*)ProtoRefs, NumProtoRefs, 
+    diagnoseUseOfProtocols(*this, CDecl, (ObjCProtocolDecl*const*)ProtoRefs,
+                           NumProtoRefs, ProtoLocs);
+    CDecl->setProtocolList((ObjCProtocolDecl*const*)ProtoRefs, NumProtoRefs,
                            ProtoLocs, Context);
     // Protocols in the class extension belong to the class.
     if (CDecl->IsClassExtension())
@@ -2360,10 +2385,10 @@
   // Diagnose finding more than one method in global pool
   SmallVector<ObjCMethodDecl *, 4> Methods;
   Methods.push_back(BestMethod);
-  for (ObjCMethodList *M = &MethList; M; M = M->getNext())
-    if (M->getMethod() && !M->getMethod()->isHidden() &&
-        M->getMethod() != BestMethod)
-      Methods.push_back(M->getMethod());
+  for (ObjCMethodList *ML = &MethList; ML; ML = ML->getNext())
+    if (ObjCMethodDecl *M = ML->getMethod())
+      if (!M->isHidden() && M != BestMethod && !M->hasAttr<UnavailableAttr>())
+        Methods.push_back(M);
   if (Methods.size() > 1)
     DiagnoseMultipleMethodInGlobalPool(Methods, Sel, R, receiverIdOrClass);
 
@@ -2395,7 +2420,7 @@
                                               bool receiverIdOrClass) {
   // We found multiple methods, so we may have to complain.
   bool issueDiagnostic = false, issueError = false;
-  
+
   // We support a warning which complains about *any* difference in
   // method signature.
   bool strictSelectorMatch =
@@ -2409,7 +2434,7 @@
       }
     }
   }
-  
+
   // If we didn't see any strict differences, we won't see any loose
   // differences.  In ARC, however, we also need to check for loose
   // mismatches, because most of them are errors.
diff --git a/lib/Sema/SemaExceptionSpec.cpp b/lib/Sema/SemaExceptionSpec.cpp
index 51d6ace..f3bcf76 100644
--- a/lib/Sema/SemaExceptionSpec.cpp
+++ b/lib/Sema/SemaExceptionSpec.cpp
@@ -1041,6 +1041,7 @@
   case Expr::CXXReinterpretCastExprClass:
   case Expr::CXXStdInitializerListExprClass:
   case Expr::DesignatedInitExprClass:
+  case Expr::DesignatedInitUpdateExprClass:
   case Expr::ExprWithCleanupsClass:
   case Expr::ExtVectorElementExprClass:
   case Expr::InitListExprClass:
@@ -1135,6 +1136,7 @@
   case Expr::ImaginaryLiteralClass:
   case Expr::ImplicitValueInitExprClass:
   case Expr::IntegerLiteralClass:
+  case Expr::NoInitExprClass:
   case Expr::ObjCEncodeExprClass:
   case Expr::ObjCStringLiteralClass:
   case Expr::ObjCBoolLiteralExprClass:
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index d18aeab..b0bc231 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -104,13 +104,29 @@
                            bool ObjCPropertyAccess) {
   // See if this declaration is unavailable or deprecated.
   std::string Message;
+  AvailabilityResult Result = D->getAvailability(&Message);
+
+  // For typedefs, if the typedef declaration appears available look
+  // to the underlying type to see if it is more restrictive.
+  while (const TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D)) {
+    if (Result == AR_Available) {
+      if (const TagType *TT = TD->getUnderlyingType()->getAs<TagType>()) {
+        D = TT->getDecl();
+        Result = D->getAvailability(&Message);
+        continue;
+      }
+    }
+    break;
+  }
     
   // Forward class declarations get their attributes from their definition.
   if (ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(D)) {
-    if (IDecl->getDefinition())
+    if (IDecl->getDefinition()) {
       D = IDecl->getDefinition();
+      Result = D->getAvailability(&Message);
+    }
   }
-  AvailabilityResult Result = D->getAvailability(&Message);
+
   if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(D))
     if (Result == AR_Available) {
       const DeclContext *DC = ECD->getDeclContext();
@@ -450,12 +466,11 @@
   SourceLocation MissingNilLoc
     = PP.getLocForEndOfToken(sentinelExpr->getLocEnd());
   std::string NullValue;
-  if (calleeType == CT_Method &&
-      PP.getIdentifierInfo("nil")->hasMacroDefinition())
+  if (calleeType == CT_Method && PP.isMacroDefined("nil"))
     NullValue = "nil";
   else if (getLangOpts().CPlusPlus11)
     NullValue = "nullptr";
-  else if (PP.getIdentifierInfo("NULL")->hasMacroDefinition())
+  else if (PP.isMacroDefined("NULL"))
     NullValue = "NULL";
   else
     NullValue = "(void*) 0";
@@ -1095,10 +1110,15 @@
     return RHSType;
   }
 
-  if (LHSFloat)
+  if (LHSFloat) {
+    // Half FP has to be promoted to float unless it is natively supported
+    if (LHSType->isHalfType() && !S.getLangOpts().NativeHalfType)
+      LHSType = S.Context.FloatTy;
+
     return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType,
                                       /*convertFloat=*/!IsCompAssign,
                                       /*convertInt=*/ true);
+  }
   assert(RHSFloat);
   return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType,
                                     /*convertInt=*/ true,
@@ -3405,6 +3425,22 @@
             Ty = Context.LongTy;
           else if (AllowUnsigned)
             Ty = Context.UnsignedLongTy;
+          // Check according to the rules of C90 6.1.3.2p5. C++03 [lex.icon]p2
+          // is compatible.
+          else if (!getLangOpts().C99 && !getLangOpts().CPlusPlus11) {
+            const unsigned LongLongSize =
+                Context.getTargetInfo().getLongLongWidth();
+            Diag(Tok.getLocation(),
+                 getLangOpts().CPlusPlus
+                     ? Literal.isLong
+                           ? diag::warn_old_implicitly_unsigned_long_cxx
+                           : /*C++98 UB*/ diag::
+                                 ext_old_implicitly_unsigned_long_cxx
+                     : diag::warn_old_implicitly_unsigned_long)
+                << (LongLongSize > LongSize ? /*will have type 'long long'*/ 0
+                                            : /*will be ill-formed*/ 1);
+            Ty = Context.UnsignedLongTy;
+          }
           Width = LongSize;
         }
       }
@@ -6513,6 +6549,8 @@
   DiagnoseConditionalPrecedence(*this, QuestionLoc, Cond.get(), LHS.get(),
                                 RHS.get());
 
+  CheckBoolLikeConversion(Cond.get(), QuestionLoc);
+
   if (!commonExpr)
     return new (Context)
         ConditionalOperator(Cond.get(), QuestionLoc, LHS.get(), ColonLoc,
@@ -9183,6 +9221,9 @@
     }
 
     break;
+  case Expr::MLV_ConstAddrSpace:
+    DiagnoseConstAssignment(S, E, Loc);
+    return true;
   case Expr::MLV_ArrayType:
   case Expr::MLV_ArrayTemporary:
     DiagID = diag::err_typecheck_array_not_modifiable_lvalue;
@@ -12232,7 +12273,7 @@
     if (mightHaveNonExternalLinkage(Func))
       UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
     else if (Func->getMostRecentDecl()->isInlined() &&
-             (LangOpts.CPlusPlus || !LangOpts.GNUInline) &&
+             !LangOpts.GNUInline &&
              !Func->getMostRecentDecl()->hasAttr<GNUInlineAttr>())
       UndefinedButUsed.insert(std::make_pair(Func->getCanonicalDecl(), Loc));
   }
@@ -12534,13 +12575,11 @@
 }
 
 /// \brief Create a field within the lambda class for the variable
-///  being captured.  Handle Array captures.  
-static ExprResult addAsFieldToClosureType(Sema &S, 
-                                 LambdaScopeInfo *LSI,
-                                  VarDecl *Var, QualType FieldType, 
-                                  QualType DeclRefType,
-                                  SourceLocation Loc,
-                                  bool RefersToCapturedVariable) {
+/// being captured.
+static void addAsFieldToClosureType(Sema &S, LambdaScopeInfo *LSI, VarDecl *Var,
+                                    QualType FieldType, QualType DeclRefType,
+                                    SourceLocation Loc,
+                                    bool RefersToCapturedVariable) {
   CXXRecordDecl *Lambda = LSI->Lambda;
 
   // Build the non-static data member.
@@ -12551,111 +12590,8 @@
   Field->setImplicit(true);
   Field->setAccess(AS_private);
   Lambda->addDecl(Field);
-
-  // C++11 [expr.prim.lambda]p21:
-  //   When the lambda-expression is evaluated, the entities that
-  //   are captured by copy are used to direct-initialize each
-  //   corresponding non-static data member of the resulting closure
-  //   object. (For array members, the array elements are
-  //   direct-initialized in increasing subscript order.) These
-  //   initializations are performed in the (unspecified) order in
-  //   which the non-static data members are declared.
-      
-  // Introduce a new evaluation context for the initialization, so
-  // that temporaries introduced as part of the capture are retained
-  // to be re-"exported" from the lambda expression itself.
-  EnterExpressionEvaluationContext scope(S, Sema::PotentiallyEvaluated);
-
-  // C++ [expr.prim.labda]p12:
-  //   An entity captured by a lambda-expression is odr-used (3.2) in
-  //   the scope containing the lambda-expression.
-  Expr *Ref = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, 
-                                          DeclRefType, VK_LValue, Loc);
-  Var->setReferenced(true);
-  Var->markUsed(S.Context);
-
-  // When the field has array type, create index variables for each
-  // dimension of the array. We use these index variables to subscript
-  // the source array, and other clients (e.g., CodeGen) will perform
-  // the necessary iteration with these index variables.
-  SmallVector<VarDecl *, 4> IndexVariables;
-  QualType BaseType = FieldType;
-  QualType SizeType = S.Context.getSizeType();
-  LSI->ArrayIndexStarts.push_back(LSI->ArrayIndexVars.size());
-  while (const ConstantArrayType *Array
-                        = S.Context.getAsConstantArrayType(BaseType)) {
-    // Create the iteration variable for this array index.
-    IdentifierInfo *IterationVarName = nullptr;
-    {
-      SmallString<8> Str;
-      llvm::raw_svector_ostream OS(Str);
-      OS << "__i" << IndexVariables.size();
-      IterationVarName = &S.Context.Idents.get(OS.str());
-    }
-    VarDecl *IterationVar
-      = VarDecl::Create(S.Context, S.CurContext, Loc, Loc,
-                        IterationVarName, SizeType,
-                        S.Context.getTrivialTypeSourceInfo(SizeType, Loc),
-                        SC_None);
-    IndexVariables.push_back(IterationVar);
-    LSI->ArrayIndexVars.push_back(IterationVar);
-    
-    // Create a reference to the iteration variable.
-    ExprResult IterationVarRef
-      = S.BuildDeclRefExpr(IterationVar, SizeType, VK_LValue, Loc);
-    assert(!IterationVarRef.isInvalid() &&
-           "Reference to invented variable cannot fail!");
-    IterationVarRef = S.DefaultLvalueConversion(IterationVarRef.get());
-    assert(!IterationVarRef.isInvalid() &&
-           "Conversion of invented variable cannot fail!");
-    
-    // Subscript the array with this iteration variable.
-    ExprResult Subscript = S.CreateBuiltinArraySubscriptExpr(
-                             Ref, Loc, IterationVarRef.get(), Loc);
-    if (Subscript.isInvalid()) {
-      S.CleanupVarDeclMarking();
-      S.DiscardCleanupsInEvaluationContext();
-      return ExprError();
-    }
-
-    Ref = Subscript.get();
-    BaseType = Array->getElementType();
-  }
-
-  // Construct the entity that we will be initializing. For an array, this
-  // will be first element in the array, which may require several levels
-  // of array-subscript entities. 
-  SmallVector<InitializedEntity, 4> Entities;
-  Entities.reserve(1 + IndexVariables.size());
-  Entities.push_back(
-    InitializedEntity::InitializeLambdaCapture(Var->getIdentifier(), 
-        Field->getType(), Loc));
-  for (unsigned I = 0, N = IndexVariables.size(); I != N; ++I)
-    Entities.push_back(InitializedEntity::InitializeElement(S.Context,
-                                                            0,
-                                                            Entities.back()));
-
-  InitializationKind InitKind
-    = InitializationKind::CreateDirect(Loc, Loc, Loc);
-  InitializationSequence Init(S, Entities.back(), InitKind, Ref);
-  ExprResult Result(true);
-  if (!Init.Diagnose(S, Entities.back(), InitKind, Ref))
-    Result = Init.Perform(S, Entities.back(), InitKind, Ref);
-
-  // If this initialization requires any cleanups (e.g., due to a
-  // default argument to a copy constructor), note that for the
-  // lambda.
-  if (S.ExprNeedsCleanups)
-    LSI->ExprNeedsCleanups = true;
-
-  // Exit the expression evaluation context used for the capture.
-  S.CleanupVarDeclMarking();
-  S.DiscardCleanupsInEvaluationContext();
-  return Result;
 }
 
-
-
 /// \brief Capture the given variable in the lambda.
 static bool captureInLambda(LambdaScopeInfo *LSI,
                             VarDecl *Var, 
@@ -12733,14 +12669,9 @@
   }
 
   // Capture this variable in the lambda.
-  Expr *CopyExpr = nullptr;
-  if (BuildAndDiagnose) {
-    ExprResult Result = addAsFieldToClosureType(S, LSI, Var, 
-                                        CaptureType, DeclRefType, Loc,
-                                        RefersToCapturedVariable);
-    if (!Result.isInvalid())
-      CopyExpr = Result.get();
-  }
+  if (BuildAndDiagnose)
+    addAsFieldToClosureType(S, LSI, Var, CaptureType, DeclRefType, Loc,
+                            RefersToCapturedVariable);
     
   // Compute the type of a reference to this captured variable.
   if (ByRef)
@@ -12759,18 +12690,20 @@
   // Add the capture.
   if (BuildAndDiagnose)
     LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable, 
-                    Loc, EllipsisLoc, CaptureType, CopyExpr);
+                    Loc, EllipsisLoc, CaptureType, /*CopyExpr=*/nullptr);
       
   return true;
 }
 
-bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation ExprLoc, 
-                              TryCaptureKind Kind, SourceLocation EllipsisLoc,
-                              bool BuildAndDiagnose, 
-                              QualType &CaptureType,
-                              QualType &DeclRefType,
-						                const unsigned *const FunctionScopeIndexToStopAt) {
-  bool Nested = Var->isInitCapture();
+bool Sema::tryCaptureVariable(
+    VarDecl *Var, SourceLocation ExprLoc, TryCaptureKind Kind,
+    SourceLocation EllipsisLoc, bool BuildAndDiagnose, QualType &CaptureType,
+    QualType &DeclRefType, const unsigned *const FunctionScopeIndexToStopAt) {
+  // An init-capture is notionally from the context surrounding its
+  // declaration, but its parent DC is the lambda class.
+  DeclContext *VarDC = Var->getDeclContext();
+  if (Var->isInitCapture())
+    VarDC = VarDC->getParent();
   
   DeclContext *DC = CurContext;
   const unsigned MaxFunctionScopesIndex = FunctionScopeIndexToStopAt 
@@ -12786,9 +12719,9 @@
   }
 
   
-  // If the variable is declared in the current context (and is not an 
-  // init-capture), there is no need to capture it.
-  if (!Nested && Var->getDeclContext() == DC) return true;
+  // If the variable is declared in the current context, there is no need to
+  // capture it.
+  if (VarDC == DC) return true;
 
   // Capture global variables if it is required to use private copy of this
   // variable.
@@ -12806,6 +12739,7 @@
   // the variable.
   CaptureType = Var->getType();
   DeclRefType = CaptureType.getNonReferenceType();
+  bool Nested = false;
   bool Explicit = (Kind != TryCapture_Implicit);
   unsigned FunctionScopesIndex = MaxFunctionScopesIndex;
   do {
@@ -13006,7 +12940,7 @@
     FunctionScopesIndex--;
     DC = ParentDC;
     Explicit = false;
-  } while (!Var->getDeclContext()->Equals(DC));
+  } while (!VarDC->Equals(DC));
 
   // Walk back down the scope stack, (e.g. from outer lambda to inner lambda)
   // computing the type of the capture at each step, checking type-specific 
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index b050c93..6c839f3 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -20,7 +20,6 @@
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/DeclObjC.h"
-#include "clang/AST/EvaluatedExprVisitor.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/RecursiveASTVisitor.h"
@@ -2339,6 +2338,260 @@
   return false;
 }
 
+namespace {
+/// \brief Checks whether delete-expression, and new-expression used for
+///  initializing deletee have the same array form.
+class MismatchingNewDeleteDetector {
+public:
+  enum MismatchResult {
+    /// Indicates that there is no mismatch or a mismatch cannot be proven.
+    NoMismatch,
+    /// Indicates that variable is initialized with mismatching form of \a new.
+    VarInitMismatches,
+    /// Indicates that member is initialized with mismatching form of \a new.
+    MemberInitMismatches,
+    /// Indicates that 1 or more constructors' definitions could not been
+    /// analyzed, and they will be checked again at the end of translation unit.
+    AnalyzeLater
+  };
+
+  /// \param EndOfTU True, if this is the final analysis at the end of
+  /// translation unit. False, if this is the initial analysis at the point
+  /// delete-expression was encountered.
+  explicit MismatchingNewDeleteDetector(bool EndOfTU)
+      : IsArrayForm(false), Field(nullptr), EndOfTU(EndOfTU),
+        HasUndefinedConstructors(false) {}
+
+  /// \brief Checks whether pointee of a delete-expression is initialized with
+  /// matching form of new-expression.
+  ///
+  /// If return value is \c VarInitMismatches or \c MemberInitMismatches at the
+  /// point where delete-expression is encountered, then a warning will be
+  /// issued immediately. If return value is \c AnalyzeLater at the point where
+  /// delete-expression is seen, then member will be analyzed at the end of
+  /// translation unit. \c AnalyzeLater is returned iff at least one constructor
+  /// couldn't be analyzed. If at least one constructor initializes the member
+  /// with matching type of new, the return value is \c NoMismatch.
+  MismatchResult analyzeDeleteExpr(const CXXDeleteExpr *DE);
+  /// \brief Analyzes a class member.
+  /// \param Field Class member to analyze.
+  /// \param DeleteWasArrayForm Array form-ness of the delete-expression used
+  /// for deleting the \p Field.
+  MismatchResult analyzeField(FieldDecl *Field, bool DeleteWasArrayForm);
+  /// List of mismatching new-expressions used for initialization of the pointee
+  llvm::SmallVector<const CXXNewExpr *, 4> NewExprs;
+  /// Indicates whether delete-expression was in array form.
+  bool IsArrayForm;
+  FieldDecl *Field;
+
+private:
+  const bool EndOfTU;
+  /// \brief Indicates that there is at least one constructor without body.
+  bool HasUndefinedConstructors;
+  /// \brief Returns \c CXXNewExpr from given initialization expression.
+  /// \param E Expression used for initializing pointee in delete-expression.
+  /// E can be a single-element \c InitListExpr consisting of new-expression.
+  const CXXNewExpr *getNewExprFromInitListOrExpr(const Expr *E);
+  /// \brief Returns whether member is initialized with mismatching form of
+  /// \c new either by the member initializer or in-class initialization.
+  ///
+  /// If bodies of all constructors are not visible at the end of translation
+  /// unit or at least one constructor initializes member with the matching
+  /// form of \c new, mismatch cannot be proven, and this function will return
+  /// \c NoMismatch.
+  MismatchResult analyzeMemberExpr(const MemberExpr *ME);
+  /// \brief Returns whether variable is initialized with mismatching form of
+  /// \c new.
+  ///
+  /// If variable is initialized with matching form of \c new or variable is not
+  /// initialized with a \c new expression, this function will return true.
+  /// If variable is initialized with mismatching form of \c new, returns false.
+  /// \param D Variable to analyze.
+  bool hasMatchingVarInit(const DeclRefExpr *D);
+  /// \brief Checks whether the constructor initializes pointee with mismatching
+  /// form of \c new.
+  ///
+  /// Returns true, if member is initialized with matching form of \c new in
+  /// member initializer list. Returns false, if member is initialized with the
+  /// matching form of \c new in this constructor's initializer or given
+  /// constructor isn't defined at the point where delete-expression is seen, or
+  /// member isn't initialized by the constructor.
+  bool hasMatchingNewInCtor(const CXXConstructorDecl *CD);
+  /// \brief Checks whether member is initialized with matching form of
+  /// \c new in member initializer list.
+  bool hasMatchingNewInCtorInit(const CXXCtorInitializer *CI);
+  /// Checks whether member is initialized with mismatching form of \c new by
+  /// in-class initializer.
+  MismatchResult analyzeInClassInitializer();
+};
+}
+
+MismatchingNewDeleteDetector::MismatchResult
+MismatchingNewDeleteDetector::analyzeDeleteExpr(const CXXDeleteExpr *DE) {
+  NewExprs.clear();
+  assert(DE && "Expected delete-expression");
+  IsArrayForm = DE->isArrayForm();
+  const Expr *E = DE->getArgument()->IgnoreParenImpCasts();
+  if (const MemberExpr *ME = dyn_cast<const MemberExpr>(E)) {
+    return analyzeMemberExpr(ME);
+  } else if (const DeclRefExpr *D = dyn_cast<const DeclRefExpr>(E)) {
+    if (!hasMatchingVarInit(D))
+      return VarInitMismatches;
+  }
+  return NoMismatch;
+}
+
+const CXXNewExpr *
+MismatchingNewDeleteDetector::getNewExprFromInitListOrExpr(const Expr *E) {
+  assert(E != nullptr && "Expected a valid initializer expression");
+  E = E->IgnoreParenImpCasts();
+  if (const InitListExpr *ILE = dyn_cast<const InitListExpr>(E)) {
+    if (ILE->getNumInits() == 1)
+      E = dyn_cast<const CXXNewExpr>(ILE->getInit(0)->IgnoreParenImpCasts());
+  }
+
+  return dyn_cast_or_null<const CXXNewExpr>(E);
+}
+
+bool MismatchingNewDeleteDetector::hasMatchingNewInCtorInit(
+    const CXXCtorInitializer *CI) {
+  const CXXNewExpr *NE = nullptr;
+  if (Field == CI->getMember() &&
+      (NE = getNewExprFromInitListOrExpr(CI->getInit()))) {
+    if (NE->isArray() == IsArrayForm)
+      return true;
+    else
+      NewExprs.push_back(NE);
+  }
+  return false;
+}
+
+bool MismatchingNewDeleteDetector::hasMatchingNewInCtor(
+    const CXXConstructorDecl *CD) {
+  if (CD->isImplicit())
+    return false;
+  const FunctionDecl *Definition = CD;
+  if (!CD->isThisDeclarationADefinition() && !CD->isDefined(Definition)) {
+    HasUndefinedConstructors = true;
+    return EndOfTU;
+  }
+  for (const auto *CI : cast<const CXXConstructorDecl>(Definition)->inits()) {
+    if (hasMatchingNewInCtorInit(CI))
+      return true;
+  }
+  return false;
+}
+
+MismatchingNewDeleteDetector::MismatchResult
+MismatchingNewDeleteDetector::analyzeInClassInitializer() {
+  assert(Field != nullptr && "This should be called only for members");
+  if (const CXXNewExpr *NE =
+          getNewExprFromInitListOrExpr(Field->getInClassInitializer())) {
+    if (NE->isArray() != IsArrayForm) {
+      NewExprs.push_back(NE);
+      return MemberInitMismatches;
+    }
+  }
+  return NoMismatch;
+}
+
+MismatchingNewDeleteDetector::MismatchResult
+MismatchingNewDeleteDetector::analyzeField(FieldDecl *Field,
+                                           bool DeleteWasArrayForm) {
+  assert(Field != nullptr && "Analysis requires a valid class member.");
+  this->Field = Field;
+  IsArrayForm = DeleteWasArrayForm;
+  const CXXRecordDecl *RD = cast<const CXXRecordDecl>(Field->getParent());
+  for (const auto *CD : RD->ctors()) {
+    if (hasMatchingNewInCtor(CD))
+      return NoMismatch;
+  }
+  if (HasUndefinedConstructors)
+    return EndOfTU ? NoMismatch : AnalyzeLater;
+  if (!NewExprs.empty())
+    return MemberInitMismatches;
+  return Field->hasInClassInitializer() ? analyzeInClassInitializer()
+                                        : NoMismatch;
+}
+
+MismatchingNewDeleteDetector::MismatchResult
+MismatchingNewDeleteDetector::analyzeMemberExpr(const MemberExpr *ME) {
+  assert(ME != nullptr && "Expected a member expression");
+  if (FieldDecl *F = dyn_cast<FieldDecl>(ME->getMemberDecl()))
+    return analyzeField(F, IsArrayForm);
+  return NoMismatch;
+}
+
+bool MismatchingNewDeleteDetector::hasMatchingVarInit(const DeclRefExpr *D) {
+  const CXXNewExpr *NE = nullptr;
+  if (const VarDecl *VD = dyn_cast<const VarDecl>(D->getDecl())) {
+    if (VD->hasInit() && (NE = getNewExprFromInitListOrExpr(VD->getInit())) &&
+        NE->isArray() != IsArrayForm) {
+      NewExprs.push_back(NE);
+    }
+  }
+  return NewExprs.empty();
+}
+
+static void
+DiagnoseMismatchedNewDelete(Sema &SemaRef, SourceLocation DeleteLoc,
+                            const MismatchingNewDeleteDetector &Detector) {
+  SourceLocation EndOfDelete = SemaRef.getLocForEndOfToken(DeleteLoc);
+  FixItHint H;
+  if (!Detector.IsArrayForm)
+    H = FixItHint::CreateInsertion(EndOfDelete, "[]");
+  else {
+    SourceLocation RSquare = Lexer::findLocationAfterToken(
+        DeleteLoc, tok::l_square, SemaRef.getSourceManager(),
+        SemaRef.getLangOpts(), true);
+    if (RSquare.isValid())
+      H = FixItHint::CreateRemoval(SourceRange(EndOfDelete, RSquare));
+  }
+  SemaRef.Diag(DeleteLoc, diag::warn_mismatched_delete_new)
+      << Detector.IsArrayForm << H;
+
+  for (const auto *NE : Detector.NewExprs)
+    SemaRef.Diag(NE->getExprLoc(), diag::note_allocated_here)
+        << Detector.IsArrayForm;
+}
+
+void Sema::AnalyzeDeleteExprMismatch(const CXXDeleteExpr *DE) {
+  if (Diags.isIgnored(diag::warn_mismatched_delete_new, SourceLocation()))
+    return;
+  MismatchingNewDeleteDetector Detector(/*EndOfTU=*/false);
+  switch (Detector.analyzeDeleteExpr(DE)) {
+  case MismatchingNewDeleteDetector::VarInitMismatches:
+  case MismatchingNewDeleteDetector::MemberInitMismatches: {
+    DiagnoseMismatchedNewDelete(*this, DE->getLocStart(), Detector);
+    break;
+  }
+  case MismatchingNewDeleteDetector::AnalyzeLater: {
+    DeleteExprs[Detector.Field].push_back(
+        std::make_pair(DE->getLocStart(), DE->isArrayForm()));
+    break;
+  }
+  case MismatchingNewDeleteDetector::NoMismatch:
+    break;
+  }
+}
+
+void Sema::AnalyzeDeleteExprMismatch(FieldDecl *Field, SourceLocation DeleteLoc,
+                                     bool DeleteWasArrayForm) {
+  MismatchingNewDeleteDetector Detector(/*EndOfTU=*/true);
+  switch (Detector.analyzeField(Field, DeleteWasArrayForm)) {
+  case MismatchingNewDeleteDetector::VarInitMismatches:
+    llvm_unreachable("This analysis should have been done for class members.");
+  case MismatchingNewDeleteDetector::AnalyzeLater:
+    llvm_unreachable("Analysis cannot be postponed any point beyond end of "
+                     "translation unit.");
+  case MismatchingNewDeleteDetector::MemberInitMismatches:
+    DiagnoseMismatchedNewDelete(*this, DeleteLoc, Detector);
+    break;
+  case MismatchingNewDeleteDetector::NoMismatch:
+    break;
+  }
+}
+
 /// ActOnCXXDelete - Parsed a C++ 'delete' expression (C++ 5.3.5), as in:
 /// @code ::delete ptr; @endcode
 /// or
@@ -2454,12 +2707,6 @@
       }
     }
 
-    // C++ [expr.delete]p2:
-    //   [Note: a pointer to a const type can be the operand of a
-    //   delete-expression; it is not necessary to cast away the constness
-    //   (5.2.11) of the pointer expression before it is used as the operand
-    //   of the delete-expression. ]
-
     if (Pointee->isArrayType() && !ArrayForm) {
       Diag(StartLoc, diag::warn_delete_array_type)
           << Type << Ex.get()->getSourceRange()
@@ -2534,7 +2781,7 @@
           DeleteName);
 
     MarkFunctionReferenced(StartLoc, OperatorDelete);
-    
+
     // Check access and ambiguity of operator delete and destructor.
     if (PointeeRD) {
       if (CXXDestructorDecl *Dtor = LookupDestructor(PointeeRD)) {
@@ -2544,9 +2791,11 @@
     }
   }
 
-  return new (Context) CXXDeleteExpr(
+  CXXDeleteExpr *Result = new (Context) CXXDeleteExpr(
       Context.VoidTy, UseGlobal, ArrayForm, ArrayFormAsWritten,
       UsualArrayDeleteWantsSize, OperatorDelete, Ex.get(), StartLoc);
+  AnalyzeDeleteExprMismatch(Result);
+  return Result;
 }
 
 /// \brief Check the use of the given variable as a C++ condition in an if,
@@ -3041,10 +3290,10 @@
 
     // We may not have been able to figure out what this member pointer resolved
     // to up until this exact point.  Attempt to lock-in it's inheritance model.
-    QualType FromType = From->getType();
-    if (FromType->isMemberPointerType())
-      if (Context.getTargetInfo().getCXXABI().isMicrosoft())
-        RequireCompleteType(From->getExprLoc(), FromType, 0);
+    if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+      RequireCompleteType(From->getExprLoc(), From->getType(), 0);
+      RequireCompleteType(From->getExprLoc(), ToType, 0);
+    }
 
     From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK)
              .get();
@@ -5791,6 +6040,16 @@
 
 ExprResult Sema::BuildCXXNoexceptExpr(SourceLocation KeyLoc, Expr *Operand,
                                       SourceLocation RParen) {
+  // If the operand is an unresolved lookup expression, the expression is ill-
+  // formed per [over.over]p1, because overloaded function names cannot be used
+  // without arguments except in explicit contexts.
+  ExprResult R = CheckPlaceholderExpr(Operand);
+  if (R.isInvalid())
+    return R;
+
+  // The operand may have been modified when checking the placeholder type.
+  Operand = R.get();
+
   if (ActiveTemplateInstantiations.empty() &&
       Operand->HasSideEffects(Context, false)) {
     // The expression operand for noexcept is in an unevaluated expression
@@ -6128,6 +6387,8 @@
 class TransformTypos : public TreeTransform<TransformTypos> {
   typedef TreeTransform<TransformTypos> BaseTransform;
 
+  VarDecl *InitDecl; // A decl to avoid as a correction because it is in the
+                     // process of being initialized.
   llvm::function_ref<ExprResult(Expr *)> ExprFilter;
   llvm::SmallSetVector<TypoExpr *, 2> TypoExprs, AmbiguousTypoExprs;
   llvm::SmallDenseMap<TypoExpr *, ExprResult, 2> TransformCache;
@@ -6206,8 +6467,8 @@
   }
 
 public:
-  TransformTypos(Sema &SemaRef, llvm::function_ref<ExprResult(Expr *)> Filter)
-      : BaseTransform(SemaRef), ExprFilter(Filter) {}
+  TransformTypos(Sema &SemaRef, VarDecl *InitDecl, llvm::function_ref<ExprResult(Expr *)> Filter)
+      : BaseTransform(SemaRef), InitDecl(InitDecl), ExprFilter(Filter) {}
 
   ExprResult RebuildCallExpr(Expr *Callee, SourceLocation LParenLoc,
                                    MultiExprArg Args,
@@ -6286,6 +6547,8 @@
     // For the first TypoExpr and an uncached TypoExpr, find the next likely
     // typo correction and return it.
     while (TypoCorrection TC = State.Consumer->getNextCorrection()) {
+      if (InitDecl && TC.getCorrectionDecl() == InitDecl)
+        continue;
       ExprResult NE = State.RecoveryHandler ?
           State.RecoveryHandler(SemaRef, E, TC) :
           attemptRecovery(SemaRef, *State.Consumer, TC);
@@ -6310,8 +6573,9 @@
 };
 }
 
-ExprResult Sema::CorrectDelayedTyposInExpr(
-    Expr *E, llvm::function_ref<ExprResult(Expr *)> Filter) {
+ExprResult
+Sema::CorrectDelayedTyposInExpr(Expr *E, VarDecl *InitDecl,
+                                llvm::function_ref<ExprResult(Expr *)> Filter) {
   // If the current evaluation context indicates there are uncorrected typos
   // and the current expression isn't guaranteed to not have typos, try to
   // resolve any TypoExpr nodes that might be in the expression.
@@ -6322,7 +6586,7 @@
     assert(TyposInContext < ~0U && "Recursive call of CorrectDelayedTyposInExpr");
     ExprEvalContexts.back().NumTypos = ~0U;
     auto TyposResolved = DelayedTypos.size();
-    auto Result = TransformTypos(*this, Filter).Transform(E);
+    auto Result = TransformTypos(*this, InitDecl, Filter).Transform(E);
     ExprEvalContexts.back().NumTypos = TyposInContext;
     TyposResolved -= DelayedTypos.size();
     if (Result.isInvalid() || Result.get() != E) {
diff --git a/lib/Sema/SemaFixItUtils.cpp b/lib/Sema/SemaFixItUtils.cpp
index 32b56bc..2e327ec 100644
--- a/lib/Sema/SemaFixItUtils.cpp
+++ b/lib/Sema/SemaFixItUtils.cpp
@@ -161,11 +161,8 @@
 }
 
 static bool isMacroDefined(const Sema &S, SourceLocation Loc, StringRef Name) {
-  const IdentifierInfo *II = &S.getASTContext().Idents.get(Name);
-  if (!II->hadMacroDefinition()) return false;
-
-  MacroDirective *Macro = S.PP.getMacroDirectiveHistory(II);
-  return Macro && Macro->findDirectiveAtLoc(Loc, S.getSourceManager());
+  return (bool)S.PP.getMacroDefinitionAtLoc(&S.getASTContext().Idents.get(Name),
+                                            Loc);
 }
 
 static std::string getScalarZeroExpressionForType(
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index 75ccc4e..821d7f6 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -306,7 +306,8 @@
                                            QualType CurrentObjectType,
                                            InitListExpr *StructuredList,
                                            unsigned StructuredIndex,
-                                           SourceRange InitRange);
+                                           SourceRange InitRange,
+                                           bool IsFullyOverwritten = false);
   void UpdateStructuredListElement(InitListExpr *StructuredList,
                                    unsigned &StructuredIndex,
                                    Expr *expr);
@@ -317,11 +318,33 @@
                                      SourceLocation Loc,
                                      const InitializedEntity &Entity,
                                      bool VerifyOnly);
+
+  // Explanation on the "FillWithNoInit" mode:
+  //
+  // Assume we have the following definitions (Case#1):
+  // struct P { char x[6][6]; } xp = { .x[1] = "bar" };
+  // struct PP { struct P lp; } l = { .lp = xp, .lp.x[1][2] = 'f' };
+  //
+  // l.lp.x[1][0..1] should not be filled with implicit initializers because the
+  // "base" initializer "xp" will provide values for them; l.lp.x[1] will be "baf".
+  //
+  // But if we have (Case#2):
+  // struct PP l = { .lp = xp, .lp.x[1] = { [2] = 'f' } };
+  //
+  // l.lp.x[1][0..1] are implicitly initialized and do not use values from the
+  // "base" initializer; l.lp.x[1] will be "\0\0f\0\0\0".
+  //
+  // To distinguish Case#1 from Case#2, and also to avoid leaving many "holes"
+  // in the InitListExpr, the "holes" in Case#1 are filled not with empty
+  // initializers but with special "NoInitExpr" place holders, which tells the
+  // CodeGen not to generate any initializers for these parts.
   void FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
                                const InitializedEntity &ParentEntity,
-                               InitListExpr *ILE, bool &RequiresSecondPass);
+                               InitListExpr *ILE, bool &RequiresSecondPass,
+                               bool FillWithNoInit = false);
   void FillInEmptyInitializations(const InitializedEntity &Entity,
-                                  InitListExpr *ILE, bool &RequiresSecondPass);
+                                  InitListExpr *ILE, bool &RequiresSecondPass,
+                                  bool FillWithNoInit = false);
   bool CheckFlexibleArrayInit(const InitializedEntity &Entity,
                               Expr *InitExpr, FieldDecl *Field,
                               bool TopLevelObject);
@@ -455,12 +478,26 @@
 void InitListChecker::FillInEmptyInitForField(unsigned Init, FieldDecl *Field,
                                         const InitializedEntity &ParentEntity,
                                               InitListExpr *ILE,
-                                              bool &RequiresSecondPass) {
+                                              bool &RequiresSecondPass,
+                                              bool FillWithNoInit) {
   SourceLocation Loc = ILE->getLocEnd();
   unsigned NumInits = ILE->getNumInits();
   InitializedEntity MemberEntity
     = InitializedEntity::InitializeMember(Field, &ParentEntity);
+
+  if (const RecordType *RType = ILE->getType()->getAs<RecordType>())
+    if (!RType->getDecl()->isUnion())
+      assert(Init < NumInits && "This ILE should have been expanded");
+
   if (Init >= NumInits || !ILE->getInit(Init)) {
+    if (FillWithNoInit) {
+      Expr *Filler = new (SemaRef.Context) NoInitExpr(Field->getType());
+      if (Init < NumInits)
+        ILE->setInit(Init, Filler);
+      else
+        ILE->updateInit(SemaRef.Context, Init, Filler);
+      return;
+    }
     // C++1y [dcl.init.aggr]p7:
     //   If there are fewer initializer-clauses in the list than there are
     //   members in the aggregate, then each member not explicitly initialized
@@ -516,7 +553,11 @@
   } else if (InitListExpr *InnerILE
                = dyn_cast<InitListExpr>(ILE->getInit(Init)))
     FillInEmptyInitializations(MemberEntity, InnerILE,
-                               RequiresSecondPass);
+                               RequiresSecondPass, FillWithNoInit);
+  else if (DesignatedInitUpdateExpr *InnerDIUE
+               = dyn_cast<DesignatedInitUpdateExpr>(ILE->getInit(Init)))
+    FillInEmptyInitializations(MemberEntity, InnerDIUE->getUpdater(),
+                               RequiresSecondPass, /*FillWithNoInit =*/ true);
 }
 
 /// Recursively replaces NULL values within the given initializer list
@@ -525,7 +566,8 @@
 void
 InitListChecker::FillInEmptyInitializations(const InitializedEntity &Entity,
                                             InitListExpr *ILE,
-                                            bool &RequiresSecondPass) {
+                                            bool &RequiresSecondPass,
+                                            bool FillWithNoInit) {
   assert((ILE->getType() != SemaRef.Context.VoidTy) &&
          "Should not have void type");
 
@@ -533,16 +575,27 @@
     const RecordDecl *RDecl = RType->getDecl();
     if (RDecl->isUnion() && ILE->getInitializedFieldInUnion())
       FillInEmptyInitForField(0, ILE->getInitializedFieldInUnion(),
-                              Entity, ILE, RequiresSecondPass);
+                              Entity, ILE, RequiresSecondPass, FillWithNoInit);
     else if (RDecl->isUnion() && isa<CXXRecordDecl>(RDecl) &&
              cast<CXXRecordDecl>(RDecl)->hasInClassInitializer()) {
       for (auto *Field : RDecl->fields()) {
         if (Field->hasInClassInitializer()) {
-          FillInEmptyInitForField(0, Field, Entity, ILE, RequiresSecondPass);
+          FillInEmptyInitForField(0, Field, Entity, ILE, RequiresSecondPass,
+                                  FillWithNoInit);
           break;
         }
       }
     } else {
+      // The fields beyond ILE->getNumInits() are default initialized, so in
+      // order to leave them uninitialized, the ILE is expanded and the extra
+      // fields are then filled with NoInitExpr.
+      unsigned NumFields = 0;
+      for (auto *Field : RDecl->fields())
+        if (!Field->isUnnamedBitfield())
+          ++NumFields;
+      if (ILE->getNumInits() < NumFields)
+        ILE->resizeInits(SemaRef.Context, NumFields);
+
       unsigned Init = 0;
       for (auto *Field : RDecl->fields()) {
         if (Field->isUnnamedBitfield())
@@ -551,7 +604,8 @@
         if (hadError)
           return;
 
-        FillInEmptyInitForField(Init, Field, Entity, ILE, RequiresSecondPass);
+        FillInEmptyInitForField(Init, Field, Entity, ILE, RequiresSecondPass,
+                                FillWithNoInit);
         if (hadError)
           return;
 
@@ -594,13 +648,23 @@
       ElementEntity.setElementIndex(Init);
 
     Expr *InitExpr = (Init < NumInits ? ILE->getInit(Init) : nullptr);
-    if (!InitExpr && !ILE->hasArrayFiller()) {
-      ExprResult ElementInit = PerformEmptyInit(SemaRef, ILE->getLocEnd(),
-                                                ElementEntity,
-                                                /*VerifyOnly*/false);
-      if (ElementInit.isInvalid()) {
-        hadError = true;
-        return;
+    if (!InitExpr && Init < NumInits && ILE->hasArrayFiller())
+      ILE->setInit(Init, ILE->getArrayFiller());
+    else if (!InitExpr && !ILE->hasArrayFiller()) {
+      Expr *Filler = nullptr;
+
+      if (FillWithNoInit)
+        Filler = new (SemaRef.Context) NoInitExpr(ElementType);
+      else {
+        ExprResult ElementInit = PerformEmptyInit(SemaRef, ILE->getLocEnd(),
+                                                  ElementEntity,
+                                                  /*VerifyOnly*/false);
+        if (ElementInit.isInvalid()) {
+          hadError = true;
+          return;
+        }
+
+        Filler = ElementInit.getAs<Expr>();
       }
 
       if (hadError) {
@@ -609,29 +673,34 @@
         // For arrays, just set the expression used for value-initialization
         // of the "holes" in the array.
         if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement)
-          ILE->setArrayFiller(ElementInit.getAs<Expr>());
+          ILE->setArrayFiller(Filler);
         else
-          ILE->setInit(Init, ElementInit.getAs<Expr>());
+          ILE->setInit(Init, Filler);
       } else {
         // For arrays, just set the expression used for value-initialization
         // of the rest of elements and exit.
         if (ElementEntity.getKind() == InitializedEntity::EK_ArrayElement) {
-          ILE->setArrayFiller(ElementInit.getAs<Expr>());
+          ILE->setArrayFiller(Filler);
           return;
         }
 
-        if (!isa<ImplicitValueInitExpr>(ElementInit.get())) {
+        if (!isa<ImplicitValueInitExpr>(Filler) && !isa<NoInitExpr>(Filler)) {
           // Empty initialization requires a constructor call, so
           // extend the initializer list to include the constructor
           // call and make a note that we'll need to take another pass
           // through the initializer list.
-          ILE->updateInit(SemaRef.Context, Init, ElementInit.getAs<Expr>());
+          ILE->updateInit(SemaRef.Context, Init, Filler);
           RequiresSecondPass = true;
         }
       }
     } else if (InitListExpr *InnerILE
                  = dyn_cast_or_null<InitListExpr>(InitExpr))
-      FillInEmptyInitializations(ElementEntity, InnerILE, RequiresSecondPass);
+      FillInEmptyInitializations(ElementEntity, InnerILE, RequiresSecondPass,
+                                 FillWithNoInit);
+    else if (DesignatedInitUpdateExpr *InnerDIUE
+                 = dyn_cast_or_null<DesignatedInitUpdateExpr>(InitExpr))
+      FillInEmptyInitializations(ElementEntity, InnerDIUE->getUpdater(),
+                                 RequiresSecondPass, /*FillWithNoInit =*/ true);
   }
 }
 
@@ -966,13 +1035,26 @@
                               StructuredList, StructuredIndex);
 
   if (InitListExpr *SubInitList = dyn_cast<InitListExpr>(expr)) {
-    if (!SemaRef.getLangOpts().CPlusPlus) {
+    if (SubInitList->getNumInits() == 1 &&
+        IsStringInit(SubInitList->getInit(0), ElemType, SemaRef.Context) ==
+        SIF_None) {
+      expr = SubInitList->getInit(0);
+    } else if (!SemaRef.getLangOpts().CPlusPlus) {
       InitListExpr *InnerStructuredList
         = getStructuredSubobjectInit(IList, Index, ElemType,
                                      StructuredList, StructuredIndex,
-                                     SubInitList->getSourceRange());
+                                     SubInitList->getSourceRange(), true);
       CheckExplicitInitList(Entity, SubInitList, ElemType,
                             InnerStructuredList);
+
+      if (!hadError && !VerifyOnly) {
+        bool RequiresSecondPass = false;
+        FillInEmptyInitializations(Entity, InnerStructuredList,
+                                   RequiresSecondPass);
+        if (RequiresSecondPass && !hadError)
+          FillInEmptyInitializations(Entity, InnerStructuredList,
+                                     RequiresSecondPass);
+      }
       ++StructuredIndex;
       ++Index;
       return;
@@ -1913,11 +1995,66 @@
 
     // Determine the structural initializer list that corresponds to the
     // current subobject.
-    StructuredList = IsFirstDesignator? SyntacticToSemantic.lookup(IList)
-      : getStructuredSubobjectInit(IList, Index, CurrentObjectType,
-                                   StructuredList, StructuredIndex,
-                                   SourceRange(D->getLocStart(),
-                                               DIE->getLocEnd()));
+    if (IsFirstDesignator)
+      StructuredList = SyntacticToSemantic.lookup(IList);
+    else {
+      Expr *ExistingInit = StructuredIndex < StructuredList->getNumInits() ?
+          StructuredList->getInit(StructuredIndex) : nullptr;
+      if (!ExistingInit && StructuredList->hasArrayFiller())
+        ExistingInit = StructuredList->getArrayFiller();
+
+      if (!ExistingInit)
+        StructuredList =
+          getStructuredSubobjectInit(IList, Index, CurrentObjectType,
+                                     StructuredList, StructuredIndex,
+                                     SourceRange(D->getLocStart(),
+                                                 DIE->getLocEnd()));
+      else if (InitListExpr *Result = dyn_cast<InitListExpr>(ExistingInit))
+        StructuredList = Result;
+      else {
+        if (DesignatedInitUpdateExpr *E =
+                dyn_cast<DesignatedInitUpdateExpr>(ExistingInit))
+          StructuredList = E->getUpdater();
+        else {
+          DesignatedInitUpdateExpr *DIUE =
+              new (SemaRef.Context) DesignatedInitUpdateExpr(SemaRef.Context,
+                                        D->getLocStart(), ExistingInit,
+                                        DIE->getLocEnd());
+          StructuredList->updateInit(SemaRef.Context, StructuredIndex, DIUE);
+          StructuredList = DIUE->getUpdater();
+        }
+
+        // We need to check on source range validity because the previous
+        // initializer does not have to be an explicit initializer. e.g.,
+        //
+        // struct P { int a, b; };
+        // struct PP { struct P p } l = { { .a = 2 }, .p.b = 3 };
+        //
+        // There is an overwrite taking place because the first braced initializer
+        // list "{ .a = 2 }" already provides value for .p.b (which is zero).
+        if (ExistingInit->getSourceRange().isValid()) {
+          // We are creating an initializer list that initializes the
+          // subobjects of the current object, but there was already an
+          // initialization that completely initialized the current
+          // subobject, e.g., by a compound literal:
+          //
+          // struct X { int a, b; };
+          // struct X xs[] = { [0] = (struct X) { 1, 2 }, [0].b = 3 };
+          //
+          // Here, xs[0].a == 0 and xs[0].b == 3, since the second,
+          // designated initializer re-initializes the whole
+          // subobject [0], overwriting previous initializers.
+          SemaRef.Diag(D->getLocStart(),
+                       diag::warn_subobject_initializer_overrides)
+            << SourceRange(D->getLocStart(), DIE->getLocEnd());
+  
+          SemaRef.Diag(ExistingInit->getLocStart(),
+                       diag::note_previous_initializer)
+            << /*FIXME:has side effects=*/0
+            << ExistingInit->getSourceRange();
+        }
+      }
+    }
     assert(StructuredList && "Expected a structured initializer list");
   }
 
@@ -2367,7 +2504,8 @@
                                             QualType CurrentObjectType,
                                             InitListExpr *StructuredList,
                                             unsigned StructuredIndex,
-                                            SourceRange InitRange) {
+                                            SourceRange InitRange,
+                                            bool IsFullyOverwritten) {
   if (VerifyOnly)
     return nullptr; // No structured list in verification-only mode.
   Expr *ExistingInit = nullptr;
@@ -2377,7 +2515,16 @@
     ExistingInit = StructuredList->getInit(StructuredIndex);
 
   if (InitListExpr *Result = dyn_cast_or_null<InitListExpr>(ExistingInit))
-    return Result;
+    // There might have already been initializers for subobjects of the current
+    // object, but a subsequent initializer list will overwrite the entirety
+    // of the current object. (See DR 253 and C99 6.7.8p21). e.g.,
+    //
+    // struct P { char x[6]; };
+    // struct P l = { .x[2] = 'x', .x = { [0] = 'f' } };
+    //
+    // The first designated initializer is ignored, and l.x is just "f".
+    if (!IsFullyOverwritten)
+      return Result;
 
   if (ExistingInit) {
     // We are creating an initializer list that initializes the
@@ -2469,13 +2616,22 @@
   if (Expr *PrevInit = StructuredList->updateInit(SemaRef.Context,
                                                   StructuredIndex, expr)) {
     // This initializer overwrites a previous initializer. Warn.
-    SemaRef.Diag(expr->getLocStart(),
-                  diag::warn_initializer_overrides)
-      << expr->getSourceRange();
-    SemaRef.Diag(PrevInit->getLocStart(),
-                  diag::note_previous_initializer)
-      << /*FIXME:has side effects=*/0
-      << PrevInit->getSourceRange();
+    // We need to check on source range validity because the previous
+    // initializer does not have to be an explicit initializer.
+    // struct P { int a, b; };
+    // struct PP { struct P p } l = { { .a = 2 }, .p.b = 3 };
+    // There is an overwrite taking place because the first braced initializer
+    // list "{ .a = 2 }' already provides value for .p.b (which is zero).
+    if (PrevInit->getSourceRange().isValid()) {
+      SemaRef.Diag(expr->getLocStart(),
+                   diag::warn_initializer_overrides)
+        << expr->getSourceRange();
+
+      SemaRef.Diag(PrevInit->getLocStart(),
+                   diag::note_previous_initializer)
+        << /*FIXME:has side effects=*/0
+        << PrevInit->getSourceRange();
+    }
   }
 
   ++StructuredIndex;
@@ -3101,6 +3257,28 @@
 // Attempt initialization
 //===----------------------------------------------------------------------===//
 
+/// Tries to add a zero initializer. Returns true if that worked.
+static bool
+maybeRecoverWithZeroInitialization(Sema &S, InitializationSequence &Sequence,
+                                   const InitializedEntity &Entity) {
+  if (Entity.getKind() != InitializedEntity::EK_Variable)
+    return false;
+
+  VarDecl *VD = cast<VarDecl>(Entity.getDecl());
+  if (VD->getInit() || VD->getLocEnd().isMacroID())
+    return false;
+
+  QualType VariableTy = VD->getType().getCanonicalType();
+  SourceLocation Loc = S.getLocForEndOfToken(VD->getLocEnd());
+  std::string Init = S.getFixItZeroInitializerForType(VariableTy, Loc);
+  if (!Init.empty()) {
+    Sequence.AddZeroInitializationStep(Entity.getType());
+    Sequence.SetZeroInitializationFixit(Init, Loc);
+    return true;
+  }
+  return false;
+}
+
 static void MaybeProduceObjCObject(Sema &S,
                                    InitializationSequence &Sequence,
                                    const InitializedEntity &Entity) {
@@ -3339,7 +3517,8 @@
   if (Kind.getKind() == InitializationKind::IK_Default &&
       Entity.getType().isConstQualified() &&
       !cast<CXXConstructorDecl>(Best->Function)->isUserProvided()) {
-    Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
+    if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity))
+      Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
     return;
   }
 
@@ -3415,6 +3594,11 @@
     Sequence.SetFailed(InitializationSequence::FK_ReferenceBindingToInitList);
     return;
   }
+  // Can't reference initialize a compound literal.
+  if (Entity.getKind() == InitializedEntity::EK_CompoundLiteralInit) {
+    Sequence.SetFailed(InitializationSequence::FK_ReferenceBindingToInitList);
+    return;
+  }
 
   QualType DestType = Entity.getType();
   QualType cv1T1 = DestType->getAs<ReferenceType>()->getPointeeType();
@@ -4231,7 +4415,8 @@
   //   a const-qualified type T, T shall be a class type with a user-provided
   //   default constructor.
   if (DestType.isConstQualified() && S.getLangOpts().CPlusPlus) {
-    Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
+    if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity))
+      Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst);
     return;
   }
 
@@ -5739,6 +5924,115 @@
                                         QualType EntityType,
                                         const Expr *PostInit);
 
+/// Provide warnings when std::move is used on construction.
+static void CheckMoveOnConstruction(Sema &S, const Expr *InitExpr,
+                                    bool IsReturnStmt) {
+  if (!InitExpr)
+    return;
+
+  QualType DestType = InitExpr->getType();
+  if (!DestType->isRecordType())
+    return;
+
+  unsigned DiagID = 0;
+  if (IsReturnStmt) {
+    const CXXConstructExpr *CCE =
+        dyn_cast<CXXConstructExpr>(InitExpr->IgnoreParens());
+    if (!CCE || CCE->getNumArgs() != 1)
+      return;
+
+    if (!CCE->getConstructor()->isCopyOrMoveConstructor())
+      return;
+
+    InitExpr = CCE->getArg(0)->IgnoreImpCasts();
+
+    // Remove implicit temporary and constructor nodes.
+    if (const MaterializeTemporaryExpr *MTE =
+            dyn_cast<MaterializeTemporaryExpr>(InitExpr)) {
+      InitExpr = MTE->GetTemporaryExpr()->IgnoreImpCasts();
+      while (const CXXConstructExpr *CCE =
+                 dyn_cast<CXXConstructExpr>(InitExpr)) {
+        if (isa<CXXTemporaryObjectExpr>(CCE))
+          return;
+        if (CCE->getNumArgs() == 0)
+          return;
+        if (CCE->getNumArgs() > 1 && !isa<CXXDefaultArgExpr>(CCE->getArg(1)))
+          return;
+        InitExpr = CCE->getArg(0);
+      }
+      InitExpr = InitExpr->IgnoreImpCasts();
+      DiagID = diag::warn_redundant_move_on_return;
+    }
+  }
+
+  // Find the std::move call and get the argument.
+  const CallExpr *CE = dyn_cast<CallExpr>(InitExpr->IgnoreParens());
+  if (!CE || CE->getNumArgs() != 1)
+    return;
+
+  const FunctionDecl *MoveFunction = CE->getDirectCallee();
+  if (!MoveFunction || !MoveFunction->isInStdNamespace() ||
+      !MoveFunction->getIdentifier() ||
+      !MoveFunction->getIdentifier()->isStr("move"))
+    return;
+
+  const Expr *Arg = CE->getArg(0)->IgnoreImplicit();
+
+  if (IsReturnStmt) {
+    const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Arg->IgnoreParenImpCasts());
+    if (!DRE || DRE->refersToEnclosingVariableOrCapture())
+      return;
+
+    const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl());
+    if (!VD || !VD->hasLocalStorage())
+      return;
+
+    if (!VD->getType()->isRecordType())
+      return;
+
+    if (DiagID == 0) {
+      DiagID = S.Context.hasSameUnqualifiedType(DestType, VD->getType())
+                   ? diag::warn_pessimizing_move_on_return
+                   : diag::warn_redundant_move_on_return;
+    }
+  } else {
+    DiagID = diag::warn_pessimizing_move_on_initialization;
+    const Expr *ArgStripped = Arg->IgnoreImplicit()->IgnoreParens();
+    if (!ArgStripped->isRValue() || !ArgStripped->getType()->isRecordType())
+      return;
+  }
+
+  S.Diag(CE->getLocStart(), DiagID);
+
+  // Get all the locations for a fix-it.  Don't emit the fix-it if any location
+  // is within a macro.
+  SourceLocation CallBegin = CE->getCallee()->getLocStart();
+  if (CallBegin.isMacroID())
+    return;
+  SourceLocation RParen = CE->getRParenLoc();
+  if (RParen.isMacroID())
+    return;
+  SourceLocation LParen;
+  SourceLocation ArgLoc = Arg->getLocStart();
+
+  // Special testing for the argument location.  Since the fix-it needs the
+  // location right before the argument, the argument location can be in a
+  // macro only if it is at the beginning of the macro.
+  while (ArgLoc.isMacroID() &&
+         S.getSourceManager().isAtStartOfImmediateMacroExpansion(ArgLoc)) {
+    ArgLoc = S.getSourceManager().getImmediateExpansionRange(ArgLoc).first;
+  }
+
+  if (LParen.isMacroID())
+    return;
+
+  LParen = ArgLoc.getLocWithOffset(-1);
+
+  S.Diag(CE->getLocStart(), diag::note_remove_move)
+      << FixItHint::CreateRemoval(SourceRange(CallBegin, LParen))
+      << FixItHint::CreateRemoval(SourceRange(RParen, RParen));
+}
+
 ExprResult
 InitializationSequence::Perform(Sema &S,
                                 const InitializedEntity &Entity,
@@ -5749,6 +6043,21 @@
     Diagnose(S, Entity, Kind, Args);
     return ExprError();
   }
+  if (!ZeroInitializationFixit.empty()) {
+    unsigned DiagID = diag::err_default_init_const;
+    if (Decl *D = Entity.getDecl())
+      if (S.getLangOpts().MSVCCompat && D->hasAttr<SelectAnyAttr>())
+        DiagID = diag::ext_default_init_const;
+
+    // The initialization would have succeeded with this fixit. Since the fixit
+    // is on the error, we need to build a valid AST in this case, so this isn't
+    // handled in the Failed() branch above.
+    QualType DestType = Entity.getType();
+    S.Diag(Kind.getLocation(), DiagID)
+        << DestType << (bool)DestType->getAs<RecordType>()
+        << FixItHint::CreateInsertion(ZeroInitializationFixitLoc,
+                                      ZeroInitializationFixit);
+  }
 
   if (getKind() == DependentSequence) {
     // If the declaration is a non-dependent, incomplete array type
@@ -6453,6 +6762,12 @@
                                   cast<FieldDecl>(Entity.getDecl()),
                                   CurInit.get());
 
+  // Check for std::move on construction.
+  if (const Expr *E = CurInit.get()) {
+    CheckMoveOnConstruction(S, E,
+                            Entity.getKind() == InitializedEntity::EK_Result);
+  }
+
   return CurInit;
 }
 
@@ -6549,26 +6864,6 @@
          "Inconsistent init list check result.");
 }
 
-/// Prints a fixit for adding a null initializer for |Entity|. Call this only
-/// right after emitting a diagnostic.
-static void maybeEmitZeroInitializationFixit(Sema &S,
-                                             InitializationSequence &Sequence,
-                                             const InitializedEntity &Entity) {
-  if (Entity.getKind() != InitializedEntity::EK_Variable)
-    return;
-
-  VarDecl *VD = cast<VarDecl>(Entity.getDecl());
-  if (VD->getInit() || VD->getLocEnd().isMacroID())
-    return;
-
-  QualType VariableTy = VD->getType().getCanonicalType();
-  SourceLocation Loc = S.getLocForEndOfToken(VD->getLocEnd());
-  std::string Init = S.getFixItZeroInitializerForType(VariableTy, Loc);
-
-  S.Diag(Loc, diag::note_add_initializer)
-      << VD << FixItHint::CreateInsertion(Loc, Init);
-}
-
 bool InitializationSequence::Diagnose(Sema &S,
                                       const InitializedEntity &Entity,
                                       const InitializationKind &Kind,
@@ -6716,12 +7011,19 @@
       << Args[0]->getSourceRange();
     break;
 
-  case FK_ReferenceInitDropsQualifiers:
+  case FK_ReferenceInitDropsQualifiers: {
+    QualType SourceType = Args[0]->getType();
+    QualType NonRefType = DestType.getNonReferenceType();
+    Qualifiers DroppedQualifiers =
+        SourceType.getQualifiers() - NonRefType.getQualifiers();
+
     S.Diag(Kind.getLocation(), diag::err_reference_bind_drops_quals)
-      << DestType.getNonReferenceType()
-      << Args[0]->getType()
+      << SourceType
+      << NonRefType
+      << DroppedQualifiers.getCVRQualifiers()
       << Args[0]->getSourceRange();
     break;
+  }
 
   case FK_ReferenceInitFailed:
     S.Diag(Kind.getLocation(), diag::err_reference_bind_failed)
@@ -6900,7 +7202,6 @@
     } else {
       S.Diag(Kind.getLocation(), diag::err_default_init_const)
           << DestType << (bool)DestType->getAs<RecordType>();
-      maybeEmitZeroInitializationFixit(S, *this, Entity);
     }
     break;
 
diff --git a/lib/Sema/SemaLambda.cpp b/lib/Sema/SemaLambda.cpp
index 147dd7e..8220641 100644
--- a/lib/Sema/SemaLambda.cpp
+++ b/lib/Sema/SemaLambda.cpp
@@ -818,7 +818,6 @@
   NewVD->markUsed(Context);
   NewVD->setInit(Init);
   return NewVD;
-
 }
 
 FieldDecl *Sema::buildInitCaptureField(LambdaScopeInfo *LSI, VarDecl *Var) {
@@ -837,7 +836,8 @@
 }
 
 void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
-                  Declarator &ParamInfo, Scope *CurScope) {
+                                        Declarator &ParamInfo,
+                                        Scope *CurScope) {
   // Determine if we're within a context where we know that the lambda will
   // be dependent, because there are template parameters in scope.
   bool KnownDependent = false;
@@ -930,12 +930,8 @@
   PushDeclContext(CurScope, Method);
     
   // Build the lambda scope.
-  buildLambdaScope(LSI, Method,
-                       Intro.Range,
-                       Intro.Default, Intro.DefaultLoc,
-                       ExplicitParams,
-                       ExplicitResultType,
-                       !Method->isConst());
+  buildLambdaScope(LSI, Method, Intro.Range, Intro.Default, Intro.DefaultLoc,
+                   ExplicitParams, ExplicitResultType, !Method->isConst());
 
   // C++11 [expr.prim.lambda]p9:
   //   A lambda-expression whose smallest enclosing scope is a block scope is a
@@ -1137,7 +1133,7 @@
 
 void Sema::ActOnLambdaError(SourceLocation StartLoc, Scope *CurScope,
                             bool IsInstantiation) {
-  LambdaScopeInfo *LSI = getCurLambda();
+  LambdaScopeInfo *LSI = cast<LambdaScopeInfo>(FunctionScopes.back());
 
   // Leave the expression-evaluation context.
   DiscardCleanupsInEvaluationContext();
@@ -1379,15 +1375,131 @@
   Conversion->setImplicit(true);
   Class->addDecl(Conversion);
 }
+
+static ExprResult performLambdaVarCaptureInitialization(
+    Sema &S, LambdaScopeInfo::Capture &Capture,
+    FieldDecl *Field,
+    SmallVectorImpl<VarDecl *> &ArrayIndexVars,
+    SmallVectorImpl<unsigned> &ArrayIndexStarts) {
+  assert(Capture.isVariableCapture() && "not a variable capture");
+
+  auto *Var = Capture.getVariable();
+  SourceLocation Loc = Capture.getLocation();
+
+  // C++11 [expr.prim.lambda]p21:
+  //   When the lambda-expression is evaluated, the entities that
+  //   are captured by copy are used to direct-initialize each
+  //   corresponding non-static data member of the resulting closure
+  //   object. (For array members, the array elements are
+  //   direct-initialized in increasing subscript order.) These
+  //   initializations are performed in the (unspecified) order in
+  //   which the non-static data members are declared.
+      
+  // C++ [expr.prim.lambda]p12:
+  //   An entity captured by a lambda-expression is odr-used (3.2) in
+  //   the scope containing the lambda-expression.
+  ExprResult RefResult = S.BuildDeclarationNameExpr(
+      CXXScopeSpec(), DeclarationNameInfo(Var->getDeclName(), Loc), Var);
+  if (RefResult.isInvalid())
+    return ExprError();
+  Expr *Ref = RefResult.get();
+
+  QualType FieldType = Field->getType();
+
+  // When the variable has array type, create index variables for each
+  // dimension of the array. We use these index variables to subscript
+  // the source array, and other clients (e.g., CodeGen) will perform
+  // the necessary iteration with these index variables.
+  //
+  // FIXME: This is dumb. Add a proper AST representation for array
+  // copy-construction and use it here.
+  SmallVector<VarDecl *, 4> IndexVariables;
+  QualType BaseType = FieldType;
+  QualType SizeType = S.Context.getSizeType();
+  ArrayIndexStarts.push_back(ArrayIndexVars.size());
+  while (const ConstantArrayType *Array
+                        = S.Context.getAsConstantArrayType(BaseType)) {
+    // Create the iteration variable for this array index.
+    IdentifierInfo *IterationVarName = nullptr;
+    {
+      SmallString<8> Str;
+      llvm::raw_svector_ostream OS(Str);
+      OS << "__i" << IndexVariables.size();
+      IterationVarName = &S.Context.Idents.get(OS.str());
+    }
+    VarDecl *IterationVar = VarDecl::Create(
+        S.Context, S.CurContext, Loc, Loc, IterationVarName, SizeType,
+        S.Context.getTrivialTypeSourceInfo(SizeType, Loc), SC_None);
+    IterationVar->setImplicit();
+    IndexVariables.push_back(IterationVar);
+    ArrayIndexVars.push_back(IterationVar);
+    
+    // Create a reference to the iteration variable.
+    ExprResult IterationVarRef =
+        S.BuildDeclRefExpr(IterationVar, SizeType, VK_LValue, Loc);
+    assert(!IterationVarRef.isInvalid() &&
+           "Reference to invented variable cannot fail!");
+    IterationVarRef = S.DefaultLvalueConversion(IterationVarRef.get());
+    assert(!IterationVarRef.isInvalid() &&
+           "Conversion of invented variable cannot fail!");
+    
+    // Subscript the array with this iteration variable.
+    ExprResult Subscript =
+        S.CreateBuiltinArraySubscriptExpr(Ref, Loc, IterationVarRef.get(), Loc);
+    if (Subscript.isInvalid())
+      return ExprError();
+
+    Ref = Subscript.get();
+    BaseType = Array->getElementType();
+  }
+
+  // Construct the entity that we will be initializing. For an array, this
+  // will be first element in the array, which may require several levels
+  // of array-subscript entities. 
+  SmallVector<InitializedEntity, 4> Entities;
+  Entities.reserve(1 + IndexVariables.size());
+  Entities.push_back(InitializedEntity::InitializeLambdaCapture(
+      Var->getIdentifier(), FieldType, Loc));
+  for (unsigned I = 0, N = IndexVariables.size(); I != N; ++I)
+    Entities.push_back(
+        InitializedEntity::InitializeElement(S.Context, 0, Entities.back()));
+
+  InitializationKind InitKind = InitializationKind::CreateDirect(Loc, Loc, Loc);
+  InitializationSequence Init(S, Entities.back(), InitKind, Ref);
+  return Init.Perform(S, Entities.back(), InitKind, Ref);
+}
          
 ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body, 
-                                 Scope *CurScope, 
-                                 bool IsInstantiation) {
+                                 Scope *CurScope) {
+  LambdaScopeInfo LSI = *cast<LambdaScopeInfo>(FunctionScopes.back());
+  ActOnFinishFunctionBody(LSI.CallOperator, Body);
+  return BuildLambdaExpr(StartLoc, Body->getLocEnd(), &LSI);
+}
+
+static LambdaCaptureDefault
+mapImplicitCaptureStyle(CapturingScopeInfo::ImplicitCaptureStyle ICS) {
+  switch (ICS) {
+  case CapturingScopeInfo::ImpCap_None:
+    return LCD_None;
+  case CapturingScopeInfo::ImpCap_LambdaByval:
+    return LCD_ByCopy;
+  case CapturingScopeInfo::ImpCap_CapturedRegion:
+  case CapturingScopeInfo::ImpCap_LambdaByref:
+    return LCD_ByRef;
+  case CapturingScopeInfo::ImpCap_Block:
+    llvm_unreachable("block capture in lambda");
+  }
+  llvm_unreachable("Unknown implicit capture style");
+}
+
+ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc,
+                                 LambdaScopeInfo *LSI) {
   // Collect information from the lambda scope.
   SmallVector<LambdaCapture, 4> Captures;
   SmallVector<Expr *, 4> CaptureInits;
-  LambdaCaptureDefault CaptureDefault;
-  SourceLocation CaptureDefaultLoc;
+  SourceLocation CaptureDefaultLoc = LSI->CaptureDefaultLoc;
+  LambdaCaptureDefault CaptureDefault =
+      mapImplicitCaptureStyle(LSI->ImpCaptureStyle);
   CXXRecordDecl *Class;
   CXXMethodDecl *CallOperator;
   SourceRange IntroducerRange;
@@ -1398,7 +1510,6 @@
   SmallVector<VarDecl *, 4> ArrayIndexVars;
   SmallVector<unsigned, 4> ArrayIndexStarts;
   {
-    LambdaScopeInfo *LSI = getCurLambda();
     CallOperator = LSI->CallOperator;
     Class = LSI->Lambda;
     IntroducerRange = LSI->IntroducerRange;
@@ -1406,11 +1517,21 @@
     ExplicitResultType = !LSI->HasImplicitReturnType;
     LambdaExprNeedsCleanups = LSI->ExprNeedsCleanups;
     ContainsUnexpandedParameterPack = LSI->ContainsUnexpandedParameterPack;
-    ArrayIndexVars.swap(LSI->ArrayIndexVars);
-    ArrayIndexStarts.swap(LSI->ArrayIndexStarts);
     
+    CallOperator->setLexicalDeclContext(Class);
+    Decl *TemplateOrNonTemplateCallOperatorDecl = 
+        CallOperator->getDescribedFunctionTemplate()  
+        ? CallOperator->getDescribedFunctionTemplate() 
+        : cast<Decl>(CallOperator);
+
+    TemplateOrNonTemplateCallOperatorDecl->setLexicalDeclContext(Class);
+    Class->addDecl(TemplateOrNonTemplateCallOperatorDecl);
+
+    PopExpressionEvaluationContext();
+
     // Translate captures.
-    for (unsigned I = 0, N = LSI->Captures.size(); I != N; ++I) {
+    auto CurField = Class->field_begin();
+    for (unsigned I = 0, N = LSI->Captures.size(); I != N; ++I, ++CurField) {
       LambdaScopeInfo::Capture From = LSI->Captures[I];
       assert(!From.isBlockCapture() && "Cannot capture __block variables");
       bool IsImplicit = I >= LSI->NumExplicitCaptures;
@@ -1422,83 +1543,33 @@
         CaptureInits.push_back(new (Context) CXXThisExpr(From.getLocation(),
                                                          getCurrentThisType(),
                                                          /*isImplicit=*/true));
+        ArrayIndexStarts.push_back(ArrayIndexVars.size());
         continue;
       }
       if (From.isVLATypeCapture()) {
         Captures.push_back(
             LambdaCapture(From.getLocation(), IsImplicit, LCK_VLAType));
         CaptureInits.push_back(nullptr);
+        ArrayIndexStarts.push_back(ArrayIndexVars.size());
         continue;
       }
 
       VarDecl *Var = From.getVariable();
-      LambdaCaptureKind Kind = From.isCopyCapture()? LCK_ByCopy : LCK_ByRef;
+      LambdaCaptureKind Kind = From.isCopyCapture() ? LCK_ByCopy : LCK_ByRef;
       Captures.push_back(LambdaCapture(From.getLocation(), IsImplicit, Kind,
                                        Var, From.getEllipsisLoc()));
-      CaptureInits.push_back(From.getInitExpr());
-    }
-
-    switch (LSI->ImpCaptureStyle) {
-    case CapturingScopeInfo::ImpCap_None:
-      CaptureDefault = LCD_None;
-      break;
-
-    case CapturingScopeInfo::ImpCap_LambdaByval:
-      CaptureDefault = LCD_ByCopy;
-      break;
-
-    case CapturingScopeInfo::ImpCap_CapturedRegion:
-    case CapturingScopeInfo::ImpCap_LambdaByref:
-      CaptureDefault = LCD_ByRef;
-      break;
-
-    case CapturingScopeInfo::ImpCap_Block:
-      llvm_unreachable("block capture in lambda");
-      break;
-    }
-    CaptureDefaultLoc = LSI->CaptureDefaultLoc;
-
-    // C++11 [expr.prim.lambda]p4:
-    //   If a lambda-expression does not include a
-    //   trailing-return-type, it is as if the trailing-return-type
-    //   denotes the following type:
-    //
-    // Skip for C++1y return type deduction semantics which uses
-    // different machinery.
-    // FIXME: Refactor and Merge the return type deduction machinery.
-    // FIXME: Assumes current resolution to core issue 975.
-    if (LSI->HasImplicitReturnType && !getLangOpts().CPlusPlus14) {
-      deduceClosureReturnType(*LSI);
-
-      //   - if there are no return statements in the
-      //     compound-statement, or all return statements return
-      //     either an expression of type void or no expression or
-      //     braced-init-list, the type void;
-      if (LSI->ReturnType.isNull()) {
-        LSI->ReturnType = Context.VoidTy;
+      Expr *Init = From.getInitExpr();
+      if (!Init) {
+        auto InitResult = performLambdaVarCaptureInitialization(
+            *this, From, *CurField, ArrayIndexVars, ArrayIndexStarts);
+        if (InitResult.isInvalid())
+          return ExprError();
+        Init = InitResult.get();
+      } else {
+        ArrayIndexStarts.push_back(ArrayIndexVars.size());
       }
-
-      // Create a function type with the inferred return type.
-      const FunctionProtoType *Proto
-        = CallOperator->getType()->getAs<FunctionProtoType>();
-      QualType FunctionTy = Context.getFunctionType(
-          LSI->ReturnType, Proto->getParamTypes(), Proto->getExtProtoInfo());
-      CallOperator->setType(FunctionTy);
+      CaptureInits.push_back(Init);
     }
-    // C++ [expr.prim.lambda]p7:
-    //   The lambda-expression's compound-statement yields the
-    //   function-body (8.4) of the function call operator [...].
-    ActOnFinishFunctionBody(CallOperator, Body, IsInstantiation);
-    CallOperator->setLexicalDeclContext(Class);
-    Decl *TemplateOrNonTemplateCallOperatorDecl = 
-        CallOperator->getDescribedFunctionTemplate()  
-        ? CallOperator->getDescribedFunctionTemplate() 
-        : cast<Decl>(CallOperator);
-
-    TemplateOrNonTemplateCallOperatorDecl->setLexicalDeclContext(Class);
-    Class->addDecl(TemplateOrNonTemplateCallOperatorDecl);
-
-    PopExpressionEvaluationContext();
 
     // C++11 [expr.prim.lambda]p6:
     //   The closure type for a lambda-expression with no lambda-capture
@@ -1534,7 +1605,7 @@
                                           Captures, 
                                           ExplicitParams, ExplicitResultType,
                                           CaptureInits, ArrayIndexVars, 
-                                          ArrayIndexStarts, Body->getLocEnd(),
+                                          ArrayIndexStarts, EndLoc,
                                           ContainsUnexpandedParameterPack);
 
   if (!CurContext->isDependentContext()) {
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index 09424a4..b5ef3a4 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 #include "clang/Sema/Lookup.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/CXXInheritance.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
@@ -23,7 +24,9 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Overload.h"
@@ -1169,8 +1172,73 @@
   return MSInfo->isExplicitSpecialization() ? D : MSInfo->getInstantiatedFrom();
 }
 
+Module *Sema::getOwningModule(Decl *Entity) {
+  // If it's imported, grab its owning module.
+  Module *M = Entity->getImportedOwningModule();
+  if (M || !isa<NamedDecl>(Entity) || !cast<NamedDecl>(Entity)->isHidden())
+    return M;
+  assert(!Entity->isFromASTFile() &&
+         "hidden entity from AST file has no owning module");
+
+  if (!getLangOpts().ModulesLocalVisibility) {
+    // If we're not tracking visibility locally, the only way a declaration
+    // can be hidden and local is if it's hidden because it's parent is (for
+    // instance, maybe this is a lazily-declared special member of an imported
+    // class).
+    auto *Parent = cast<NamedDecl>(Entity->getDeclContext());
+    assert(Parent->isHidden() && "unexpectedly hidden decl");
+    return getOwningModule(Parent);
+  }
+
+  // It's local and hidden; grab or compute its owning module.
+  M = Entity->getLocalOwningModule();
+  if (M)
+    return M;
+
+  if (auto *Containing =
+          PP.getModuleContainingLocation(Entity->getLocation())) {
+    M = Containing;
+  } else if (Entity->isInvalidDecl() || Entity->getLocation().isInvalid()) {
+    // Don't bother tracking visibility for invalid declarations with broken
+    // locations.
+    cast<NamedDecl>(Entity)->setHidden(false);
+  } else {
+    // We need to assign a module to an entity that exists outside of any
+    // module, so that we can hide it from modules that we textually enter.
+    // Invent a fake module for all such entities.
+    if (!CachedFakeTopLevelModule) {
+      CachedFakeTopLevelModule =
+          PP.getHeaderSearchInfo().getModuleMap().findOrCreateModule(
+              "<top-level>", nullptr, false, false).first;
+
+      auto &SrcMgr = PP.getSourceManager();
+      SourceLocation StartLoc =
+          SrcMgr.getLocForStartOfFile(SrcMgr.getMainFileID());
+      auto &TopLevel =
+          VisibleModulesStack.empty() ? VisibleModules : VisibleModulesStack[0];
+      TopLevel.setVisible(CachedFakeTopLevelModule, StartLoc);
+    }
+
+    M = CachedFakeTopLevelModule;
+  }
+
+  if (M)
+    Entity->setLocalOwningModule(M);
+  return M;
+}
+
+void Sema::makeMergedDefinitionVisible(NamedDecl *ND, SourceLocation Loc) {
+  // FIXME: If ND is a template declaration, make the template parameters
+  // visible too. They're not (necessarily) within its DeclContext.
+  if (auto *M = PP.getModuleContainingLocation(Loc))
+    Context.mergeDefinitionIntoModule(ND, M);
+  else
+    // We're not building a module; just make the definition visible.
+    ND->setHidden(false);
+}
+
 /// \brief Find the module in which the given declaration was defined.
-static Module *getDefiningModule(Decl *Entity) {
+static Module *getDefiningModule(Sema &S, Decl *Entity) {
   if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Entity)) {
     // If this function was instantiated from a template, the defining module is
     // the module containing the pattern.
@@ -1192,15 +1260,16 @@
   // from a template.
   DeclContext *Context = Entity->getDeclContext();
   if (Context->isFileContext())
-    return Entity->getOwningModule();
-  return getDefiningModule(cast<Decl>(Context));
+    return S.getOwningModule(Entity);
+  return getDefiningModule(S, cast<Decl>(Context));
 }
 
 llvm::DenseSet<Module*> &Sema::getLookupModules() {
   unsigned N = ActiveTemplateInstantiations.size();
   for (unsigned I = ActiveTemplateInstantiationLookupModules.size();
        I != N; ++I) {
-    Module *M = getDefiningModule(ActiveTemplateInstantiations[I].Entity);
+    Module *M =
+        getDefiningModule(*this, ActiveTemplateInstantiations[I].Entity);
     if (M && !LookupModulesCache.insert(M).second)
       M = nullptr;
     ActiveTemplateInstantiationLookupModules.push_back(M);
@@ -1208,6 +1277,37 @@
   return LookupModulesCache;
 }
 
+bool Sema::hasVisibleMergedDefinition(NamedDecl *Def) {
+  for (Module *Merged : Context.getModulesWithMergedDefinition(Def))
+    if (isModuleVisible(Merged))
+      return true;
+  return false;
+}
+
+template<typename ParmDecl>
+static bool hasVisibleDefaultArgument(Sema &S, const ParmDecl *D) {
+  if (!D->hasDefaultArgument())
+    return false;
+
+  while (D) {
+    auto &DefaultArg = D->getDefaultArgStorage();
+    if (!DefaultArg.isInherited() && S.isVisible(D))
+      return true;
+
+    // If there was a previous default argument, maybe its parameter is visible.
+    D = DefaultArg.getInheritedFrom();
+  }
+  return false;
+}
+
+bool Sema::hasVisibleDefaultArgument(const NamedDecl *D) {
+  if (auto *P = dyn_cast<TemplateTypeParmDecl>(D))
+    return ::hasVisibleDefaultArgument(*this, P);
+  if (auto *P = dyn_cast<NonTypeTemplateParmDecl>(D))
+    return ::hasVisibleDefaultArgument(*this, P);
+  return ::hasVisibleDefaultArgument(*this, cast<TemplateTemplateParmDecl>(D));
+}
+
 /// \brief Determine whether a declaration is visible to name lookup.
 ///
 /// This routine determines whether the declaration D is visible in the current
@@ -1218,16 +1318,39 @@
 /// your module can see, including those later on in your module).
 bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
   assert(D->isHidden() && "should not call this: not in slow case");
-  Module *DeclModule = D->getOwningModule();
-  assert(DeclModule && "hidden decl not from a module");
+  Module *DeclModule = SemaRef.getOwningModule(D);
+  if (!DeclModule) {
+    // getOwningModule() may have decided the declaration should not be hidden.
+    assert(!D->isHidden() && "hidden decl not from a module");
+    return true;
+  }
+
+  // If the owning module is visible, and the decl is not module private,
+  // then the decl is visible too. (Module private is ignored within the same
+  // top-level module.)
+  if (!D->isFromASTFile() || !D->isModulePrivate()) {
+    if (SemaRef.isModuleVisible(DeclModule))
+      return true;
+    // Also check merged definitions.
+    if (SemaRef.getLangOpts().ModulesLocalVisibility &&
+        SemaRef.hasVisibleMergedDefinition(D))
+      return true;
+  }
 
   // If this declaration is not at namespace scope nor module-private,
   // then it is visible if its lexical parent has a visible definition.
   DeclContext *DC = D->getLexicalDeclContext();
   if (!D->isModulePrivate() &&
       DC && !DC->isFileContext() && !isa<LinkageSpecDecl>(DC)) {
-    if (SemaRef.hasVisibleDefinition(cast<NamedDecl>(DC))) {
-      if (SemaRef.ActiveTemplateInstantiations.empty()) {
+    // For a parameter, check whether our current template declaration's
+    // lexical context is visible, not whether there's some other visible
+    // definition of it, because parameters aren't "within" the definition.
+    if ((D->isTemplateParameter() || isa<ParmVarDecl>(D))
+            ? isVisible(SemaRef, cast<NamedDecl>(DC))
+            : SemaRef.hasVisibleDefinition(cast<NamedDecl>(DC))) {
+      if (SemaRef.ActiveTemplateInstantiations.empty() &&
+          // FIXME: Do something better in this case.
+          !SemaRef.getLangOpts().ModulesLocalVisibility) {
         // Cache the fact that this declaration is implicitly visible because
         // its parent has a visible definition.
         D->setHidden(false);
@@ -1260,6 +1383,10 @@
   return false;
 }
 
+bool Sema::isVisibleSlow(const NamedDecl *D) {
+  return LookupResult::isVisible(*this, const_cast<NamedDecl*>(D));
+}
+
 /// \brief Retrieve the visible declaration corresponding to D, if any.
 ///
 /// This routine determines whether the declaration D is visible in the current
@@ -2905,6 +3032,9 @@
       if (!isa<FunctionDecl>(D) && !isa<FunctionTemplateDecl>(D))
         continue;
 
+      if (!isVisible(D) && !(D = findAcceptableDecl(*this, D)))
+        continue;
+
       Result.insert(D);
     }
   }
@@ -2973,7 +3103,7 @@
 
 public:
   ShadowContextRAII(VisibleDeclsRecord &Visible) : Visible(Visible) {
-    Visible.ShadowMaps.push_back(ShadowMap());
+    Visible.ShadowMaps.emplace_back();
   }
 
   ~ShadowContextRAII() {
@@ -4515,22 +4645,66 @@
 
 /// Find which declaration we should import to provide the definition of
 /// the given declaration.
-static const NamedDecl *getDefinitionToImport(const NamedDecl *D) {
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D))
+static NamedDecl *getDefinitionToImport(NamedDecl *D) {
+  if (VarDecl *VD = dyn_cast<VarDecl>(D))
     return VD->getDefinition();
   if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
-    return FD->isDefined(FD) ? FD : nullptr;
-  if (const TagDecl *TD = dyn_cast<TagDecl>(D))
+    return FD->isDefined(FD) ? const_cast<FunctionDecl*>(FD) : nullptr;
+  if (TagDecl *TD = dyn_cast<TagDecl>(D))
     return TD->getDefinition();
-  if (const ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
+  if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
     return ID->getDefinition();
-  if (const ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D))
+  if (ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D))
     return PD->getDefinition();
-  if (const TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
+  if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
     return getDefinitionToImport(TD->getTemplatedDecl());
   return nullptr;
 }
 
+void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
+                                 bool NeedDefinition, bool Recover) {
+  assert(!isVisible(Decl) && "missing import for non-hidden decl?");
+
+  // Suggest importing a module providing the definition of this entity, if
+  // possible.
+  NamedDecl *Def = getDefinitionToImport(Decl);
+  if (!Def)
+    Def = Decl;
+
+  // FIXME: Add a Fix-It that imports the corresponding module or includes
+  // the header.
+  Module *Owner = getOwningModule(Decl);
+  assert(Owner && "definition of hidden declaration is not in a module");
+
+  auto Merged = Context.getModulesWithMergedDefinition(Decl);
+  if (!Merged.empty()) {
+    std::string ModuleList;
+    ModuleList += "\n        ";
+    ModuleList += Owner->getFullModuleName();
+    unsigned N = 0;
+    for (Module *M : Merged) {
+      ModuleList += "\n        ";
+      if (++N == 5 && Merged.size() != N) {
+        ModuleList += "[...]";
+        break;
+      }
+      ModuleList += M->getFullModuleName();
+    }
+
+    Diag(Loc, diag::err_module_private_declaration_multiple)
+      << NeedDefinition << Decl << ModuleList;
+  } else {
+    Diag(Loc, diag::err_module_private_declaration)
+      << NeedDefinition << Decl << Owner->getFullModuleName();
+  }
+  Diag(Decl->getLocation(), NeedDefinition ? diag::note_previous_definition
+                                           : diag::note_previous_declaration);
+
+  // Try to recover by implicitly importing this module.
+  if (Recover)
+    createImplicitModuleImportForErrorRecovery(Loc, Owner);
+}
+
 /// \brief Diagnose a successfully-corrected typo. Separated from the correction
 /// itself to allow external validation of the result, etc.
 ///
@@ -4557,23 +4731,8 @@
     NamedDecl *Decl = Correction.getCorrectionDecl();
     assert(Decl && "import required but no declaration to import");
 
-    // Suggest importing a module providing the definition of this entity, if
-    // possible.
-    const NamedDecl *Def = getDefinitionToImport(Decl);
-    if (!Def)
-      Def = Decl;
-    Module *Owner = Def->getOwningModule();
-    assert(Owner && "definition of hidden declaration is not in a module");
-
-    Diag(Correction.getCorrectionRange().getBegin(),
-         diag::err_module_private_declaration)
-      << Def << Owner->getFullModuleName();
-    Diag(Def->getLocation(), diag::note_previous_declaration);
-
-    // Recover by implicitly importing this module.
-    if (ErrorRecovery)
-      createImplicitModuleImportForErrorRecovery(
-          Correction.getCorrectionRange().getBegin(), Owner);
+    diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
+                          /*NeedDefinition*/ false, ErrorRecovery);
     return;
   }
 
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp
index fed0ac7..cfe8db3 100644
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -82,10 +82,12 @@
   };
   typedef llvm::SmallDenseMap<VarDecl *, DSAInfo, 64> DeclSAMapTy;
   typedef llvm::SmallDenseMap<VarDecl *, DeclRefExpr *, 64> AlignedMapTy;
+  typedef llvm::DenseSet<VarDecl *> LoopControlVariablesSetTy;
 
   struct SharingMapTy {
     DeclSAMapTy SharingMap;
     AlignedMapTy AlignedMap;
+    LoopControlVariablesSetTy LCVSet;
     DefaultDataSharingAttributes DefaultAttr;
     SourceLocation DefaultAttrLoc;
     OpenMPDirectiveKind Directive;
@@ -93,22 +95,28 @@
     Scope *CurScope;
     SourceLocation ConstructLoc;
     bool OrderedRegion;
+    unsigned CollapseNumber;
     SourceLocation InnerTeamsRegionLoc;
     SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name,
                  Scope *CurScope, SourceLocation Loc)
-        : SharingMap(), AlignedMap(), DefaultAttr(DSA_unspecified),
+        : SharingMap(), AlignedMap(), LCVSet(), DefaultAttr(DSA_unspecified),
           Directive(DKind), DirectiveName(std::move(Name)), CurScope(CurScope),
-          ConstructLoc(Loc), OrderedRegion(false), InnerTeamsRegionLoc() {}
+          ConstructLoc(Loc), OrderedRegion(false), CollapseNumber(1),
+          InnerTeamsRegionLoc() {}
     SharingMapTy()
-        : SharingMap(), AlignedMap(), DefaultAttr(DSA_unspecified),
+        : SharingMap(), AlignedMap(), LCVSet(), DefaultAttr(DSA_unspecified),
           Directive(OMPD_unknown), DirectiveName(), CurScope(nullptr),
-          ConstructLoc(), OrderedRegion(false), InnerTeamsRegionLoc() {}
+          ConstructLoc(), OrderedRegion(false), CollapseNumber(1),
+          InnerTeamsRegionLoc() {}
   };
 
   typedef SmallVector<SharingMapTy, 64> StackTy;
 
   /// \brief Stack of used declaration and their data-sharing attributes.
   StackTy Stack;
+  /// \brief true, if check for DSA must be from parent directive, false, if
+  /// from current directive.
+  bool FromParent;
   Sema &SemaRef;
 
   typedef SmallVector<SharingMapTy, 8>::reverse_iterator reverse_iterator;
@@ -119,7 +127,10 @@
   bool isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter);
 
 public:
-  explicit DSAStackTy(Sema &S) : Stack(1), SemaRef(S) {}
+  explicit DSAStackTy(Sema &S) : Stack(1), FromParent(false), SemaRef(S) {}
+
+  bool isFromParent() const { return FromParent; }
+  void setFromParent(bool Flag) { FromParent = Flag; }
 
   void push(OpenMPDirectiveKind DKind, const DeclarationNameInfo &DirName,
             Scope *CurScope, SourceLocation Loc) {
@@ -137,6 +148,12 @@
   /// for diagnostics.
   DeclRefExpr *addUniqueAligned(VarDecl *D, DeclRefExpr *NewDE);
 
+  /// \brief Register specified variable as loop control variable.
+  void addLoopControlVariable(VarDecl *D);
+  /// \brief Check if the specified variable is a loop control variable for
+  /// current region.
+  bool isLoopControlVariable(VarDecl *D);
+
   /// \brief Adds explicit data sharing attribute to the specified declaration.
   void addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A);
 
@@ -209,6 +226,13 @@
     return false;
   }
 
+  /// \brief Set collapse value for the region.
+  void setCollapseNumber(unsigned Val) { Stack.back().CollapseNumber = Val; }
+  /// \brief Return collapse value for region.
+  unsigned getCollapseNumber() const {
+    return Stack.back().CollapseNumber;
+  }
+
   /// \brief Marks current target region as one with closely nested teams
   /// region.
   void setParentTeamsRegionLoc(SourceLocation TeamsRegionLoc) {
@@ -356,6 +380,18 @@
   return nullptr;
 }
 
+void DSAStackTy::addLoopControlVariable(VarDecl *D) {
+  assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
+  D = D->getCanonicalDecl();
+  Stack.back().LCVSet.insert(D);
+}
+
+bool DSAStackTy::isLoopControlVariable(VarDecl *D) {
+  assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
+  D = D->getCanonicalDecl();
+  return Stack.back().LCVSet.count(D) > 0;
+}
+
 void DSAStackTy::addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A) {
   D = D->getCanonicalDecl();
   if (A == OMPC_threadprivate) {
@@ -388,6 +424,28 @@
   return false;
 }
 
+/// \brief Build a variable declaration for OpenMP loop iteration variable.
+static VarDecl *buildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type,
+                             StringRef Name) {
+  DeclContext *DC = SemaRef.CurContext;
+  IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name);
+  TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc);
+  VarDecl *Decl =
+      VarDecl::Create(SemaRef.Context, DC, Loc, Loc, II, Type, TInfo, SC_None);
+  Decl->setImplicit();
+  return Decl;
+}
+
+static DeclRefExpr *buildDeclRefExpr(Sema &S, VarDecl *D, QualType Ty,
+                                     SourceLocation Loc,
+                                     bool RefersToCapture = false) {
+  D->setReferenced();
+  D->markUsed(S.Context);
+  return DeclRefExpr::Create(S.getASTContext(), NestedNameSpecifierLoc(),
+                             SourceLocation(), D, RefersToCapture, Loc, Ty,
+                             VK_LValue);
+}
+
 DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
   D = D->getCanonicalDecl();
   DSAVarData DVar;
@@ -396,9 +454,11 @@
   // in a Construct, C/C++, predetermined, p.1]
   //  Variables appearing in threadprivate directives are threadprivate.
   if (D->getTLSKind() != VarDecl::TLS_None ||
-      D->getStorageClass() == SC_Register) {
-    DVar.CKind = OMPC_threadprivate;
-    return DVar;
+      (D->getStorageClass() == SC_Register && D->hasAttr<AsmLabelAttr>() &&
+       !D->isLocalVarDecl())) {
+    addDSA(D, buildDeclRefExpr(SemaRef, D, D->getType().getNonReferenceType(),
+                               D->getLocation()),
+           OMPC_threadprivate);
   }
   if (Stack[0].SharingMap.count(D)) {
     DVar.RefExpr = Stack[0].SharingMap[D].RefExpr;
@@ -446,10 +506,7 @@
 
   QualType Type = D->getType().getNonReferenceType().getCanonicalType();
   bool IsConstant = Type.isConstant(SemaRef.getASTContext());
-  while (Type->isArrayType()) {
-    QualType ElemType = cast<ArrayType>(Type.getTypePtr())->getElementType();
-    Type = ElemType.getNonReferenceType().getCanonicalType();
-  }
+  Type = SemaRef.getASTContext().getBaseElementType(Type);
   // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
   // in a Construct, C/C++, predetermined, p.6]
   //  Variables with const qualified type having no mutable member are
@@ -556,11 +613,15 @@
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   VD = VD->getCanonicalDecl();
   if (DSAStack->getCurrentDirective() != OMPD_unknown) {
-    auto DVarPrivate = DSAStack->getTopDSA(VD, /*FromParent=*/false);
+    if (DSAStack->isLoopControlVariable(VD) ||
+        (VD->hasLocalStorage() &&
+         isParallelOrTaskRegion(DSAStack->getCurrentDirective())))
+      return true;
+    auto DVarPrivate = DSAStack->getTopDSA(VD, DSAStack->isFromParent());
     if (DVarPrivate.CKind != OMPC_unknown && isOpenMPPrivate(DVarPrivate.CKind))
       return true;
     DVarPrivate = DSAStack->hasDSA(VD, isOpenMPPrivate, MatchesAlways(),
-                                   /*FromParent=*/false);
+                                   DSAStack->isFromParent());
     return DVarPrivate.CKind != OMPC_unknown;
   }
   return false;
@@ -575,6 +636,14 @@
   PushExpressionEvaluationContext(PotentiallyEvaluated);
 }
 
+void Sema::StartOpenMPClauses() {
+  DSAStack->setFromParent(/*Flag=*/true);
+}
+
+void Sema::EndOpenMPClauses() {
+  DSAStack->setFromParent(/*Flag=*/false);
+}
+
 void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
   // OpenMP [2.14.3.5, Restrictions, C/C++, p.1]
   //  A variable of class type (or array thereof) that appears in a lastprivate
@@ -591,6 +660,7 @@
             continue;
           }
           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(DE)->getDecl());
+          QualType Type = VD->getType();
           auto DVar = DSAStack->getTopDSA(VD, false);
           if (DVar.CKind == OMPC_lastprivate) {
             // Generate helper private variable and initialize it with the
@@ -598,18 +668,14 @@
             // by the address of the new private variable in CodeGen. This new
             // variable is not added to IdResolver, so the code in the OpenMP
             // region uses original variable for proper diagnostics.
-            auto *VDPrivate = VarDecl::Create(
-                Context, CurContext, DE->getLocStart(), DE->getExprLoc(),
-                VD->getIdentifier(), VD->getType(), VD->getTypeSourceInfo(),
-                SC_Auto);
+            auto *VDPrivate =
+                buildVarDecl(*this, DE->getExprLoc(), Type.getUnqualifiedType(),
+                             VD->getName());
             ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false);
             if (VDPrivate->isInvalidDecl())
               continue;
-            CurContext->addDecl(VDPrivate);
-            PrivateCopies.push_back(DeclRefExpr::Create(
-                Context, NestedNameSpecifierLoc(), SourceLocation(), VDPrivate,
-                /*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(),
-                DE->getType(), VK_LValue));
+            PrivateCopies.push_back(buildDeclRefExpr(
+                *this, VDPrivate, DE->getType(), DE->getExprLoc()));
           } else {
             // The variable is also a firstprivate, so initialization sequence
             // for private copy is generated already.
@@ -773,7 +839,7 @@
   }
 
   QualType ExprType = VD->getType().getNonReferenceType();
-  ExprResult DE = BuildDeclRefExpr(VD, ExprType, VK_LValue, Id.getLoc());
+  ExprResult DE = buildDeclRefExpr(*this, VD, ExprType, Id.getLoc());
   return DE;
 }
 
@@ -853,7 +919,8 @@
 
     // Check if this is a TLS variable.
     if (VD->getTLSKind() != VarDecl::TLS_None ||
-        VD->getStorageClass() == SC_Register) {
+        (VD->getStorageClass() == SC_Register && VD->hasAttr<AsmLabelAttr>() &&
+         !VD->isLocalVarDecl())) {
       Diag(ILoc, diag::err_omp_var_thread_local)
           << VD << ((VD->getTLSKind() != VarDecl::TLS_None) ? 0 : 1);
       bool IsDecl =
@@ -1144,9 +1211,18 @@
   }
   case OMPD_task: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
+    QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()};
+    FunctionProtoType::ExtProtoInfo EPI;
+    EPI.Variadic = true;
+    QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32Ty),
         std::make_pair(".part_id.", KmpInt32Ty),
+        std::make_pair(".privates.",
+                       Context.VoidPtrTy.withConst().withRestrict()),
+        std::make_pair(
+            ".copy_fn.",
+            Context.getPointerType(CopyFnType).withConst().withRestrict()),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
@@ -1211,15 +1287,25 @@
     ActOnCapturedRegionError();
     return StmtError();
   }
-  // Mark all variables in private list clauses as used in inner region. This is
-  // required for proper codegen.
+  // This is required for proper codegen.
   for (auto *Clause : Clauses) {
-    if (isOpenMPPrivate(Clause->getClauseKind())) {
+    if (isOpenMPPrivate(Clause->getClauseKind()) ||
+        Clause->getClauseKind() == OMPC_copyprivate) {
+      // Mark all variables in private list clauses as used in inner region.
       for (auto *VarRef : Clause->children()) {
         if (auto *E = cast_or_null<Expr>(VarRef)) {
           MarkDeclarationsReferencedInExpr(E);
         }
       }
+    } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective()) &&
+               Clause->getClauseKind() == OMPC_schedule) {
+      // Mark all variables in private list clauses as used in inner region.
+      // Required for proper codegen of combined directives.
+      // TODO: add processing for other clauses.
+      if (auto *E = cast_or_null<Expr>(
+              cast<OMPScheduleClause>(Clause)->getHelperChunkSize())) {
+          MarkDeclarationsReferencedInExpr(E);
+        }
     }
   }
   return ActOnCapturedRegionEnd(S.get());
@@ -1956,7 +2042,7 @@
         TestIsStrictOp(false), SubtractStep(false) {}
   /// \brief Check init-expr for canonical loop form and save loop counter
   /// variable - #Var and its initialization value - #LB.
-  bool CheckInit(Stmt *S);
+  bool CheckInit(Stmt *S, bool EmitDiags = true);
   /// \brief Check test-expr for canonical form, save upper-bound (#UB), flags
   /// for less/greater and for strict/non-strict comparison.
   bool CheckCond(Expr *S);
@@ -1977,6 +2063,8 @@
   bool ShouldSubtractStep() const { return SubtractStep; }
   /// \brief Build the expression to calculate the number of iterations.
   Expr *BuildNumIterations(Scope *S, const bool LimitedType) const;
+  /// \brief Build the precondition expression for the loops.
+  Expr *BuildPreCond(Scope *S, Expr *Cond) const;
   /// \brief Build reference expression to the counter be used for codegen.
   Expr *BuildCounterVar() const;
   /// \brief Build initization of the counter be used for codegen.
@@ -2094,7 +2182,7 @@
   return false;
 }
 
-bool OpenMPIterationSpaceChecker::CheckInit(Stmt *S) {
+bool OpenMPIterationSpaceChecker::CheckInit(Stmt *S, bool EmitDiags) {
   // Check init-expr for canonical loop form and save loop counter
   // variable - #Var and its initialization value - #LB.
   // OpenMP [2.6] Canonical loop form. init-expr may be one of the following:
@@ -2104,7 +2192,9 @@
   //   pointer-type var = lb
   //
   if (!S) {
-    SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_init);
+    if (EmitDiags) {
+      SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_init);
+    }
     return true;
   }
   InitSrcRange = S->getSourceRange();
@@ -2120,7 +2210,7 @@
       if (auto Var = dyn_cast_or_null<VarDecl>(DS->getSingleDecl())) {
         if (Var->hasInit()) {
           // Accept non-canonical init form here but emit ext. warning.
-          if (Var->getInitStyle() != VarDecl::CInit)
+          if (Var->getInitStyle() != VarDecl::CInit && EmitDiags)
             SemaRef.Diag(S->getLocStart(),
                          diag::ext_omp_loop_not_canonical_init)
                 << S->getSourceRange();
@@ -2134,8 +2224,10 @@
         return SetVarAndLB(dyn_cast<VarDecl>(DRE->getDecl()), DRE,
                            CE->getArg(1));
 
-  SemaRef.Diag(S->getLocStart(), diag::err_omp_loop_not_canonical_init)
-      << S->getSourceRange();
+  if (EmitDiags) {
+    SemaRef.Diag(S->getLocStart(), diag::err_omp_loop_not_canonical_init)
+        << S->getSourceRange();
+  }
   return true;
 }
 
@@ -2380,11 +2472,22 @@
   return Diff.get();
 }
 
+Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const {
+  // Try to build LB <op> UB, where <op> is <, >, <=, or >=.
+  bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics();
+  SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true);
+  auto CondExpr = SemaRef.BuildBinOp(
+      S, DefaultLoc, TestIsLessOp ? (TestIsStrictOp ? BO_LT : BO_LE)
+                                  : (TestIsStrictOp ? BO_GT : BO_GE),
+      LB, UB);
+  SemaRef.getDiagnostics().setSuppressAllDiagnostics(Suppress);
+  // Otherwise use original loop conditon and evaluate it in runtime.
+  return CondExpr.isUsable() ? CondExpr.get() : Cond;
+}
+
 /// \brief Build reference expression to the counter be used for codegen.
 Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const {
-  return DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
-                             GetIncrementSrcRange().getBegin(), Var, false,
-                             DefaultLoc, Var->getType(), VK_LValue);
+  return buildDeclRefExpr(SemaRef, Var, Var->getType(), DefaultLoc);
 }
 
 /// \brief Build initization of the counter be used for codegen.
@@ -2395,6 +2498,8 @@
 
 /// \brief Iteration space of a single for loop.
 struct LoopIterationSpace {
+  /// \brief Condition of the loop.
+  Expr *PreCond;
   /// \brief This expression calculates the number of iterations in the loop.
   /// It is always possible to calculate it before starting the loop.
   Expr *NumIterations;
@@ -2417,6 +2522,20 @@
 
 } // namespace
 
+void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
+  assert(getLangOpts().OpenMP && "OpenMP is not active.");
+  assert(Init && "Expected loop in canonical form.");
+  unsigned CollapseIteration = DSAStack->getCollapseNumber();
+  if (CollapseIteration > 0 &&
+      isOpenMPLoopDirective(DSAStack->getCurrentDirective())) {
+    OpenMPIterationSpaceChecker ISC(*this, ForLoc);
+    if (!ISC.CheckInit(Init, /*EmitDiags=*/false)) {
+      DSAStack->addLoopControlVariable(ISC.GetLoopVar());
+    }
+    DSAStack->setCollapseNumber(CollapseIteration - 1);
+  }
+}
+
 /// \brief Called on a for stmt to check and extract its iteration space
 /// for further processing (such as collapsing).
 static bool CheckOpenMPIterationSpace(
@@ -2495,32 +2614,27 @@
           ? ((NestedLoopCount == 1) ? OMPC_linear : OMPC_lastprivate)
           : OMPC_private;
   if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
-        DVar.CKind != PredeterminedCKind) ||
+        DVar.CKind != OMPC_threadprivate && DVar.CKind != PredeterminedCKind) ||
        (isOpenMPWorksharingDirective(DKind) && !isOpenMPSimdDirective(DKind) &&
         DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private &&
-        DVar.CKind != OMPC_lastprivate)) &&
-      (DVar.CKind != OMPC_private || DVar.RefExpr != nullptr)) {
+        DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_threadprivate)) &&
+      ((DVar.CKind != OMPC_private && DVar.CKind != OMPC_threadprivate) ||
+       DVar.RefExpr != nullptr)) {
     SemaRef.Diag(Init->getLocStart(), diag::err_omp_loop_var_dsa)
         << getOpenMPClauseName(DVar.CKind) << getOpenMPDirectiveName(DKind)
         << getOpenMPClauseName(PredeterminedCKind);
-    ReportOriginalDSA(SemaRef, &DSA, Var, DVar, true);
+    if (DVar.RefExpr == nullptr)
+      DVar.CKind = PredeterminedCKind;
+    ReportOriginalDSA(SemaRef, &DSA, Var, DVar, /*IsLoopIterVar=*/true);
     HasErrors = true;
   } else if (LoopVarRefExpr != nullptr) {
     // Make the loop iteration variable private (for worksharing constructs),
     // linear (for simd directives with the only one associated loop) or
     // lastprivate (for simd directives with several collapsed loops).
-    // FIXME: the next check and error message must be removed once the
-    // capturing of global variables in loops is fixed.
     if (DVar.CKind == OMPC_unknown)
       DVar = DSA.hasDSA(Var, isOpenMPPrivate, MatchesAlways(),
                         /*FromParent=*/false);
-    if (!Var->hasLocalStorage() && DVar.CKind == OMPC_unknown) {
-      SemaRef.Diag(Init->getLocStart(), diag::err_omp_global_loop_var_dsa)
-          << getOpenMPClauseName(PredeterminedCKind)
-          << getOpenMPDirectiveName(DKind);
-      HasErrors = true;
-    } else
-      DSA.addDSA(Var, LoopVarRefExpr, PredeterminedCKind);
+    DSA.addDSA(Var, LoopVarRefExpr, PredeterminedCKind);
   }
 
   assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars");
@@ -2535,6 +2649,7 @@
     return HasErrors;
 
   // Build the loop's iteration space representation.
+  ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond());
   ResultIterSpace.NumIterations = ISC.BuildNumIterations(
       DSA.getCurScope(), /* LimitedType */ isOpenMPWorksharingDirective(DKind));
   ResultIterSpace.CounterVar = ISC.BuildCounterVar();
@@ -2545,7 +2660,8 @@
   ResultIterSpace.IncSrcRange = ISC.GetIncrementSrcRange();
   ResultIterSpace.Subtract = ISC.ShouldSubtractStep();
 
-  HasErrors |= (ResultIterSpace.NumIterations == nullptr ||
+  HasErrors |= (ResultIterSpace.PreCond == nullptr ||
+                ResultIterSpace.NumIterations == nullptr ||
                 ResultIterSpace.CounterVar == nullptr ||
                 ResultIterSpace.CounterInit == nullptr ||
                 ResultIterSpace.CounterStep == nullptr);
@@ -2553,18 +2669,6 @@
   return HasErrors;
 }
 
-/// \brief Build a variable declaration for OpenMP loop iteration variable.
-static VarDecl *BuildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type,
-                             StringRef Name) {
-  DeclContext *DC = SemaRef.CurContext;
-  IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name);
-  TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc);
-  VarDecl *Decl =
-      VarDecl::Create(SemaRef.Context, DC, Loc, Loc, II, Type, TInfo, SC_None);
-  Decl->setImplicit();
-  return Decl;
-}
-
 /// \brief Build 'VarRef = Start + Iter * Step'.
 static ExprResult BuildCounterUpdate(Sema &SemaRef, Scope *S,
                                      SourceLocation Loc, ExprResult VarRef,
@@ -2690,6 +2794,9 @@
 
   // Last iteration number is (I1 * I2 * ... In) - 1, where I1, I2 ... In are
   // the iteration counts of the collapsed for loops.
+  // Precondition tests if there is at least one iteration (all conditions are
+  // true).
+  auto PreCond = ExprResult(IterSpaces[0].PreCond);
   auto N0 = IterSpaces[0].NumIterations;
   ExprResult LastIteration32 = WidenIterationCount(32 /* Bits */, N0, SemaRef);
   ExprResult LastIteration64 = WidenIterationCount(64 /* Bits */, N0, SemaRef);
@@ -2702,6 +2809,10 @@
 
   Scope *CurScope = DSA.getCurScope();
   for (unsigned Cnt = 1; Cnt < NestedLoopCount; ++Cnt) {
+    if (PreCond.isUsable()) {
+      PreCond = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LAnd,
+                                   PreCond.get(), IterSpaces[Cnt].PreCond);
+    }
     auto N = IterSpaces[Cnt].NumIterations;
     AllCountsNeedLessThan32Bits &= C.getTypeSize(N->getType()) < 32;
     if (LastIteration32.isUsable())
@@ -2745,10 +2856,10 @@
   if (!IsConstant) {
     SourceLocation SaveLoc;
     VarDecl *SaveVar =
-        BuildVarDecl(SemaRef, SaveLoc, LastIteration.get()->getType(),
+        buildVarDecl(SemaRef, SaveLoc, LastIteration.get()->getType(),
                      ".omp.last.iteration");
-    ExprResult SaveRef = SemaRef.BuildDeclRefExpr(
-        SaveVar, LastIteration.get()->getType(), VK_LValue, SaveLoc);
+    ExprResult SaveRef = buildDeclRefExpr(
+        SemaRef, SaveVar, LastIteration.get()->getType(), SaveLoc);
     CalcLastIteration = SemaRef.BuildBinOp(CurScope, SaveLoc, BO_Assign,
                                            SaveRef.get(), LastIteration.get());
     LastIteration = SaveRef;
@@ -2763,25 +2874,20 @@
 
   SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
 
-  // Precondition tests if there is at least one iteration (LastIteration > 0).
-  ExprResult PreCond = SemaRef.BuildBinOp(
-      CurScope, InitLoc, BO_GT, LastIteration.get(),
-      SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
-
   QualType VType = LastIteration.get()->getType();
   // Build variables passed into runtime, nesessary for worksharing directives.
   ExprResult LB, UB, IL, ST, EUB;
   if (isOpenMPWorksharingDirective(DKind)) {
     // Lower bound variable, initialized with zero.
-    VarDecl *LBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.lb");
-    LB = SemaRef.BuildDeclRefExpr(LBDecl, VType, VK_LValue, InitLoc);
+    VarDecl *LBDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.lb");
+    LB = buildDeclRefExpr(SemaRef, LBDecl, VType, InitLoc);
     SemaRef.AddInitializerToDecl(
         LBDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(),
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
 
     // Upper bound variable, initialized with last iteration number.
-    VarDecl *UBDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.ub");
-    UB = SemaRef.BuildDeclRefExpr(UBDecl, VType, VK_LValue, InitLoc);
+    VarDecl *UBDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.ub");
+    UB = buildDeclRefExpr(SemaRef, UBDecl, VType, InitLoc);
     SemaRef.AddInitializerToDecl(UBDecl, LastIteration.get(),
                                  /*DirectInit*/ false,
                                  /*TypeMayContainAuto*/ false);
@@ -2789,15 +2895,15 @@
     // A 32-bit variable-flag where runtime returns 1 for the last iteration.
     // This will be used to implement clause 'lastprivate'.
     QualType Int32Ty = SemaRef.Context.getIntTypeForBitwidth(32, true);
-    VarDecl *ILDecl = BuildVarDecl(SemaRef, InitLoc, Int32Ty, ".omp.is_last");
-    IL = SemaRef.BuildDeclRefExpr(ILDecl, Int32Ty, VK_LValue, InitLoc);
+    VarDecl *ILDecl = buildVarDecl(SemaRef, InitLoc, Int32Ty, ".omp.is_last");
+    IL = buildDeclRefExpr(SemaRef, ILDecl, Int32Ty, InitLoc);
     SemaRef.AddInitializerToDecl(
         ILDecl, SemaRef.ActOnIntegerConstant(InitLoc, 0).get(),
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
 
     // Stride variable returned by runtime (we initialize it to 1 by default).
-    VarDecl *STDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
-    ST = SemaRef.BuildDeclRefExpr(STDecl, VType, VK_LValue, InitLoc);
+    VarDecl *STDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.stride");
+    ST = buildDeclRefExpr(SemaRef, STDecl, VType, InitLoc);
     SemaRef.AddInitializerToDecl(
         STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(),
         /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
@@ -2817,8 +2923,8 @@
   ExprResult IV;
   ExprResult Init;
   {
-    VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
-    IV = SemaRef.BuildDeclRefExpr(IVDecl, VType, VK_LValue, InitLoc);
+    VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
+    IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc);
     Expr *RHS = isOpenMPWorksharingDirective(DKind)
                     ? LB.get()
                     : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get();
@@ -2906,9 +3012,13 @@
         break;
       }
 
-      // Build update: IS.CounterVar = IS.Start + Iter * IS.Step
+      // Build update: IS.CounterVar(Private) = IS.Start + Iter * IS.Step
+      auto *CounterVar = buildDeclRefExpr(
+          SemaRef, cast<VarDecl>(cast<DeclRefExpr>(IS.CounterVar)->getDecl()),
+          IS.CounterVar->getType(), IS.CounterVar->getExprLoc(),
+          /*RefersToCapture=*/true);
       ExprResult Update =
-          BuildCounterUpdate(SemaRef, CurScope, UpdLoc, IS.CounterVar,
+          BuildCounterUpdate(SemaRef, CurScope, UpdLoc, CounterVar,
                              IS.CounterInit, Iter, IS.CounterStep, IS.Subtract);
       if (!Update.isUsable()) {
         HasErrors = true;
@@ -2917,7 +3027,7 @@
 
       // Build final: IS.CounterVar = IS.Start + IS.NumIters * IS.Step
       ExprResult Final = BuildCounterUpdate(
-          SemaRef, CurScope, UpdLoc, IS.CounterVar, IS.CounterInit,
+          SemaRef, CurScope, UpdLoc, CounterVar, IS.CounterInit,
           IS.NumIterations, IS.CounterStep, IS.Subtract);
       if (!Final.isUsable()) {
         HasErrors = true;
@@ -2976,11 +3086,11 @@
 }
 
 static Expr *GetCollapseNumberExpr(ArrayRef<OMPClause *> Clauses) {
-  auto CollapseFilter = [](const OMPClause *C) -> bool {
+  auto &&CollapseFilter = [](const OMPClause *C) -> bool {
     return C->getClauseKind() == OMPC_collapse;
   };
   OMPExecutableDirective::filtered_clause_iterator<decltype(CollapseFilter)> I(
-      Clauses, CollapseFilter);
+      Clauses, std::move(CollapseFilter));
   if (I)
     return cast<OMPCollapseClause>(*I)->getNumForLoops();
   return nullptr;
@@ -3435,7 +3545,7 @@
     return true;
   } else if (SemaRef.CurContext->isDependentContext())
     E = X = UpdateExpr = nullptr;
-  return false;
+  return ErrorFound != NoError;
 }
 
 bool OpenMPAtomicUpdateChecker::checkStatement(Stmt *S, unsigned DiagId,
@@ -3507,7 +3617,7 @@
     return true;
   } else if (SemaRef.CurContext->isDependentContext())
     E = X = UpdateExpr = nullptr;
-  if (E && X) {
+  if (ErrorFound == NoError && E && X) {
     // Build an update expression of form 'OpaqueValueExpr(x) binop
     // OpaqueValueExpr(expr)' or 'OpaqueValueExpr(expr) binop
     // OpaqueValueExpr(x)' and then cast it to the type of the 'x' expression.
@@ -3526,7 +3636,7 @@
       return true;
     UpdateExpr = Update.get();
   }
-  return false;
+  return ErrorFound != NoError;
 }
 
 StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses,
@@ -3840,7 +3950,7 @@
               E = Checker.getExpr();
               UE = Checker.getUpdateExpr();
               IsXLHSInRHSPart = Checker.isXLHSInRHSPart();
-              IsPostfixUpdate = Checker.isPostfixUpdate();
+              IsPostfixUpdate = true;
             }
           }
           if (!IsUpdateExprFound) {
@@ -3870,7 +3980,7 @@
                 E = Checker.getExpr();
                 UE = Checker.getUpdateExpr();
                 IsXLHSInRHSPart = Checker.isXLHSInRHSPart();
-                IsPostfixUpdate = Checker.isPostfixUpdate();
+                IsPostfixUpdate = false;
               }
             }
           }
@@ -4189,6 +4299,9 @@
         << E->getSourceRange();
     return ExprError();
   }
+  if (CKind == OMPC_collapse) {
+    DSAStack->setCollapseNumber(Result.getExtValue());
+  }
   return ICE;
 }
 
@@ -4416,6 +4529,7 @@
     return nullptr;
   }
   Expr *ValExpr = ChunkSize;
+  Expr *HelperValExpr = nullptr;
   if (ChunkSize) {
     if (!ChunkSize->isValueDependent() && !ChunkSize->isTypeDependent() &&
         !ChunkSize->isInstantiationDependent() &&
@@ -4432,17 +4546,25 @@
       //  chunk_size must be a loop invariant integer expression with a positive
       //  value.
       llvm::APSInt Result;
-      if (ValExpr->isIntegerConstantExpr(Result, Context) &&
-          Result.isSigned() && !Result.isStrictlyPositive()) {
-        Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
-            << "schedule" << ChunkSize->getSourceRange();
-        return nullptr;
+      if (ValExpr->isIntegerConstantExpr(Result, Context)) {
+        if (Result.isSigned() && !Result.isStrictlyPositive()) {
+          Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
+              << "schedule" << ChunkSize->getSourceRange();
+          return nullptr;
+        }
+      } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) {
+        auto *ImpVar = buildVarDecl(*this, ChunkSize->getExprLoc(),
+                                    ChunkSize->getType(), ".chunk.");
+        auto *ImpVarRef = buildDeclRefExpr(*this, ImpVar, ChunkSize->getType(),
+                                           ChunkSize->getExprLoc(),
+                                           /*RefersToCapture=*/true);
+        HelperValExpr = ImpVarRef;
       }
     }
   }
 
   return new (Context) OMPScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc,
-                                         EndLoc, Kind, ValExpr);
+                                         EndLoc, Kind, ValExpr, HelperValExpr);
 }
 
 OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
@@ -4667,14 +4789,6 @@
       continue;
     }
 
-    // OpenMP [2.9.3.3, Restrictions, C/C++, p.1]
-    //  A variable of class type (or array thereof) that appears in a private
-    //  clause requires an accessible, unambiguous default constructor for the
-    //  class type.
-    while (Type->isArrayType()) {
-      Type = cast<ArrayType>(Type.getTypePtr())->getElementType();
-    }
-
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct]
     //  Variables with the predetermined data-sharing attributes may not be
@@ -4690,25 +4804,36 @@
       continue;
     }
 
+    // Variably modified types are not supported for tasks.
+    if (!Type->isAnyPointerType() && Type->isVariablyModifiedType() &&
+        DSAStack->getCurrentDirective() == OMPD_task) {
+      Diag(ELoc, diag::err_omp_variably_modified_type_not_supported)
+          << getOpenMPClauseName(OMPC_private) << Type
+          << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+      bool IsDecl =
+          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
+      Diag(VD->getLocation(),
+           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          << VD;
+      continue;
+    }
+
+    // OpenMP [2.9.3.3, Restrictions, C/C++, p.1]
+    //  A variable of class type (or array thereof) that appears in a private
+    //  clause requires an accessible, unambiguous default constructor for the
+    //  class type.
     // Generate helper private variable and initialize it with the default
     // value. The address of the original variable is replaced by the address of
     // the new private variable in CodeGen. This new variable is not added to
     // IdResolver, so the code in the OpenMP region uses original variable for
     // proper diagnostics.
-    auto VDPrivate =
-        VarDecl::Create(Context, CurContext, DE->getLocStart(),
-                        DE->getExprLoc(), VD->getIdentifier(), VD->getType(),
-                        VD->getTypeSourceInfo(), /*S*/ SC_Auto);
-    ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto*/ false);
+    Type = Type.getUnqualifiedType();
+    auto VDPrivate = buildVarDecl(*this, DE->getExprLoc(), Type, VD->getName());
+    ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false);
     if (VDPrivate->isInvalidDecl())
       continue;
-    CurContext->addDecl(VDPrivate);
-    auto VDPrivateRefExpr =
-        DeclRefExpr::Create(Context, /*QualifierLoc*/ NestedNameSpecifierLoc(),
-                            /*TemplateKWLoc*/ SourceLocation(), VDPrivate,
-                            /*RefersToEnclosingVariableOrCapture*/ false,
-                            /*NameLoc*/ SourceLocation(), DE->getType(),
-                            /*VK*/ VK_LValue);
+    auto VDPrivateRefExpr = buildDeclRefExpr(
+        *this, VDPrivate, DE->getType().getUnqualifiedType(), DE->getExprLoc());
 
     DSAStack->addDSA(VD, DE, OMPC_private);
     Vars.push_back(DE);
@@ -4819,14 +4944,12 @@
     //  A variable of class type (or array thereof) that appears in a private
     //  clause requires an accessible, unambiguous copy constructor for the
     //  class type.
-    Type = Context.getBaseElementType(Type).getNonReferenceType();
+    auto ElemType = Context.getBaseElementType(Type).getNonReferenceType();
 
     // If an implicit firstprivate variable found it was checked already.
     if (!IsImplicitClause) {
       DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
-      Type = Type.getNonReferenceType().getCanonicalType();
-      bool IsConstant = Type.isConstant(Context);
-      Type = Context.getBaseElementType(Type);
+      bool IsConstant = ElemType.isConstant(Context);
       // OpenMP [2.4.13, Data-sharing Attribute Clauses]
       //  A list item that specifies a given variable may not appear in more
       // than one clause on the same directive, except that a variable may be
@@ -4909,10 +5032,22 @@
       }
     }
 
-    auto VDPrivate =
-        VarDecl::Create(Context, CurContext, DE->getLocStart(), ELoc,
-                        VD->getIdentifier(), VD->getType().getUnqualifiedType(),
-                        VD->getTypeSourceInfo(), /*S*/ SC_Auto);
+    // Variably modified types are not supported for tasks.
+    if (!Type->isAnyPointerType() && Type->isVariablyModifiedType() &&
+        DSAStack->getCurrentDirective() == OMPD_task) {
+      Diag(ELoc, diag::err_omp_variably_modified_type_not_supported)
+          << getOpenMPClauseName(OMPC_firstprivate) << Type
+          << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+      bool IsDecl =
+          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
+      Diag(VD->getLocation(),
+           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          << VD;
+      continue;
+    }
+
+    Type = Type.getUnqualifiedType();
+    auto VDPrivate = buildVarDecl(*this, ELoc, Type, VD->getName());
     // Generate helper private variable and initialize it with the value of the
     // original variable. The address of the original variable is replaced by
     // the address of the new private variable in the CodeGen. This new variable
@@ -4921,20 +5056,14 @@
     Expr *VDInitRefExpr = nullptr;
     // For arrays generate initializer for single element and replace it by the
     // original array element in CodeGen.
-    if (DE->getType()->isArrayType()) {
-      auto VDInit = VarDecl::Create(Context, CurContext, DE->getLocStart(),
-                                    ELoc, VD->getIdentifier(), Type,
-                                    VD->getTypeSourceInfo(), /*S*/ SC_Auto);
-      CurContext->addHiddenDecl(VDInit);
-      VDInitRefExpr = DeclRefExpr::Create(
-          Context, /*QualifierLoc*/ NestedNameSpecifierLoc(),
-          /*TemplateKWLoc*/ SourceLocation(), VDInit,
-          /*RefersToEnclosingVariableOrCapture*/ true, ELoc, Type,
-          /*VK*/ VK_LValue);
+    if (Type->isArrayType()) {
+      auto VDInit =
+          buildVarDecl(*this, DE->getExprLoc(), ElemType, VD->getName());
+      VDInitRefExpr = buildDeclRefExpr(*this, VDInit, ElemType, ELoc);
       auto Init = DefaultLvalueConversion(VDInitRefExpr).get();
-      auto *VDInitTemp =
-          BuildVarDecl(*this, DE->getLocStart(), Type.getUnqualifiedType(),
-                       ".firstprivate.temp");
+      ElemType = ElemType.getUnqualifiedType();
+      auto *VDInitTemp = buildVarDecl(*this, DE->getLocStart(), ElemType,
+                                      ".firstprivate.temp");
       InitializedEntity Entity =
           InitializedEntity::InitializeVariable(VDInitTemp);
       InitializationKind Kind = InitializationKind::CreateCopy(ELoc, ELoc);
@@ -4947,9 +5076,9 @@
         VDPrivate->setInit(Result.getAs<Expr>());
     } else {
       auto *VDInit =
-          BuildVarDecl(*this, DE->getLocStart(), Type, ".firstprivate.temp");
+          buildVarDecl(*this, DE->getLocStart(), Type, ".firstprivate.temp");
       VDInitRefExpr =
-          BuildDeclRefExpr(VDInit, Type, VK_LValue, DE->getExprLoc()).get();
+          buildDeclRefExpr(*this, VDInit, DE->getType(), DE->getExprLoc());
       AddInitializerToDecl(VDPrivate,
                            DefaultLvalueConversion(VDInitRefExpr).get(),
                            /*DirectInit=*/false, /*TypeMayContainAuto=*/false);
@@ -4962,11 +5091,8 @@
       continue;
     }
     CurContext->addDecl(VDPrivate);
-    auto VDPrivateRefExpr = DeclRefExpr::Create(
-        Context, /*QualifierLoc*/ NestedNameSpecifierLoc(),
-        /*TemplateKWLoc*/ SourceLocation(), VDPrivate,
-        /*RefersToEnclosingVariableOrCapture*/ false, DE->getLocStart(),
-        DE->getType().getUnqualifiedType(), /*VK*/ VK_LValue);
+    auto VDPrivateRefExpr = buildDeclRefExpr(
+        *this, VDPrivate, DE->getType().getUnqualifiedType(), DE->getExprLoc());
     DSAStack->addDSA(VD, DE, OMPC_firstprivate);
     Vars.push_back(DE);
     PrivateCopies.push_back(VDPrivateRefExpr);
@@ -5064,6 +5190,7 @@
     // lastprivate clause on a worksharing construct if any of the corresponding
     // worksharing regions ever binds to any of the corresponding parallel
     // regions.
+    DSAStackTy::DSAVarData TopDVar = DVar;
     if (isOpenMPWorksharingDirective(CurrDir) &&
         !isOpenMPParallelDirective(CurrDir)) {
       DVar = DSAStack->getImplicitDSA(VD, true);
@@ -5084,14 +5211,14 @@
     //  lastprivate clause requires an accessible, unambiguous copy assignment
     //  operator for the class type.
     Type = Context.getBaseElementType(Type).getNonReferenceType();
-    auto *SrcVD = BuildVarDecl(*this, DE->getLocStart(),
+    auto *SrcVD = buildVarDecl(*this, DE->getLocStart(),
                                Type.getUnqualifiedType(), ".lastprivate.src");
-    auto *PseudoSrcExpr = BuildDeclRefExpr(SrcVD, Type.getUnqualifiedType(),
-                                           VK_LValue, DE->getExprLoc()).get();
+    auto *PseudoSrcExpr = buildDeclRefExpr(
+        *this, SrcVD, Type.getUnqualifiedType(), DE->getExprLoc());
     auto *DstVD =
-        BuildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst");
+        buildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst");
     auto *PseudoDstExpr =
-        BuildDeclRefExpr(DstVD, Type, VK_LValue, DE->getExprLoc()).get();
+        buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc());
     // For arrays generate assignment operation for single element and replace
     // it by the original array element in CodeGen.
     auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign,
@@ -5103,7 +5230,7 @@
     if (AssignmentOp.isInvalid())
       continue;
 
-    if (DVar.CKind != OMPC_firstprivate)
+    if (TopDVar.CKind != OMPC_firstprivate)
       DSAStack->addDSA(VD, DE, OMPC_lastprivate);
     Vars.push_back(DE);
     SrcExprs.push_back(PseudoSrcExpr);
@@ -5471,8 +5598,8 @@
       }
     }
     Type = Type.getNonLValueExprType(Context).getUnqualifiedType();
-    auto *LHSVD = BuildVarDecl(*this, ELoc, Type, ".reduction.lhs");
-    auto *RHSVD = BuildVarDecl(*this, ELoc, Type, VD->getName());
+    auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs");
+    auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName());
     // Add initializer for private variable.
     Expr *Init = nullptr;
     switch (BOK) {
@@ -5594,13 +5721,13 @@
           << VD;
       continue;
     }
-    auto *LHSDRE = BuildDeclRefExpr(LHSVD, Type, VK_LValue, ELoc).get();
-    auto *RHSDRE = BuildDeclRefExpr(RHSVD, Type, VK_LValue, ELoc).get();
+    auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc);
+    auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc);
     ExprResult ReductionOp =
         BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(), BOK,
                    LHSDRE, RHSDRE);
     if (ReductionOp.isUsable()) {
-      if (BOK != BO_LOr && BOK != BO_LAnd) {
+      if (BOK != BO_LT && BOK != BO_GT) {
         ReductionOp =
             BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
                        BO_Assign, LHSDRE, ReductionOp.get());
@@ -5736,16 +5863,11 @@
     }
 
     // Build var to save initial value.
-    VarDecl *Init = BuildVarDecl(*this, ELoc, DE->getType(), ".linear.start");
+    VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start");
     AddInitializerToDecl(Init, DefaultLvalueConversion(DE).get(),
                          /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
-    CurContext->addDecl(Init);
-    Init->setIsUsed();
-    auto InitRef = DeclRefExpr::Create(
-        Context, /*QualifierLoc*/ NestedNameSpecifierLoc(),
-        /*TemplateKWLoc*/ SourceLocation(), Init,
-        /*isEnclosingLocal*/ false, DE->getLocStart(), DE->getType(),
-        /*VK*/ VK_LValue);
+    auto InitRef = buildDeclRefExpr(
+        *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc());
     DSAStack->addDSA(VD, DE, OMPC_linear);
     Vars.push_back(DE);
     Inits.push_back(InitRef);
@@ -5767,11 +5889,9 @@
 
     // Build var to save the step value.
     VarDecl *SaveVar =
-        BuildVarDecl(*this, StepLoc, StepExpr->getType(), ".linear.step");
-    CurContext->addDecl(SaveVar);
-    SaveVar->setIsUsed();
+        buildVarDecl(*this, StepLoc, StepExpr->getType(), ".linear.step");
     ExprResult SaveRef =
-        BuildDeclRefExpr(SaveVar, StepExpr->getType(), VK_LValue, StepLoc);
+        buildDeclRefExpr(*this, SaveVar, StepExpr->getType(), StepLoc);
     ExprResult CalcStep =
         BuildBinOp(CurScope, StepLoc, BO_Assign, SaveRef.get(), StepExpr);
 
@@ -5814,11 +5934,10 @@
 
     // Build privatized reference to the current linear var.
     auto DE = cast<DeclRefExpr>(RefExpr);
-    auto PrivateRef = DeclRefExpr::Create(
-        SemaRef.Context, /*QualifierLoc*/ DE->getQualifierLoc(),
-        /*TemplateKWLoc*/ SourceLocation(), DE->getDecl(),
-        /* RefersToEnclosingVariableOrCapture */ true, DE->getLocStart(),
-        DE->getType(), /*VK*/ VK_LValue);
+    auto PrivateRef =
+        buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
+                         DE->getType().getUnqualifiedType(), DE->getExprLoc(),
+                         /*RefersToCapture=*/true);
 
     // Build update: Var = InitExpr + IV * Step
     ExprResult Update =
@@ -5828,8 +5947,8 @@
 
     // Build final: Var = InitExpr + NumIterations * Step
     ExprResult Final =
-        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), RefExpr, InitExpr,
-                           NumIterations, Step, /* Subtract */ false);
+        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), PrivateRef,
+                           InitExpr, NumIterations, Step, /* Subtract */ false);
     Final = SemaRef.ActOnFinishFullExpr(Final.get());
     if (!Update.isUsable() || !Final.isUsable()) {
       Updates.push_back(nullptr);
@@ -5873,10 +5992,8 @@
     // OpenMP  [2.8.1, simd construct, Restrictions]
     // The type of list items appearing in the aligned clause must be
     // array, pointer, reference to array, or reference to pointer.
-    QualType QType = DE->getType()
-                         .getNonReferenceType()
-                         .getUnqualifiedType()
-                         .getCanonicalType();
+    QualType QType = VD->getType();
+    QType = QType.getNonReferenceType().getUnqualifiedType().getCanonicalType();
     const Type *Ty = QType.getTypePtrOrNull();
     if (!Ty || (!Ty->isDependentType() && !Ty->isArrayType() &&
                 !Ty->isPointerType())) {
@@ -5977,15 +6094,15 @@
     //  A variable of class type (or array thereof) that appears in a
     //  copyin clause requires an accessible, unambiguous copy assignment
     //  operator for the class type.
-    Type = Context.getBaseElementType(Type).getNonReferenceType();
-    auto *SrcVD = BuildVarDecl(*this, DE->getLocStart(),
-                               Type.getUnqualifiedType(), ".copyin.src");
-    auto *PseudoSrcExpr = BuildDeclRefExpr(SrcVD, Type.getUnqualifiedType(),
-                                           VK_LValue, DE->getExprLoc())
-                              .get();
-    auto *DstVD = BuildVarDecl(*this, DE->getLocStart(), Type, ".copyin.dst");
+    auto ElemType = Context.getBaseElementType(Type).getNonReferenceType();
+    auto *SrcVD = buildVarDecl(*this, DE->getLocStart(),
+                               ElemType.getUnqualifiedType(), ".copyin.src");
+    auto *PseudoSrcExpr = buildDeclRefExpr(
+        *this, SrcVD, ElemType.getUnqualifiedType(), DE->getExprLoc());
+    auto *DstVD =
+        buildVarDecl(*this, DE->getLocStart(), ElemType, ".copyin.dst");
     auto *PseudoDstExpr =
-        BuildDeclRefExpr(DstVD, Type, VK_LValue, DE->getExprLoc()).get();
+        buildDeclRefExpr(*this, DstVD, ElemType, DE->getExprLoc());
     // For arrays generate assignment operation for single element and replace
     // it by the original array element in CodeGen.
     auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign,
@@ -6083,19 +6200,32 @@
       }
     }
 
+    // Variably modified types are not supported.
+    if (!Type->isAnyPointerType() && Type->isVariablyModifiedType()) {
+      Diag(ELoc, diag::err_omp_variably_modified_type_not_supported)
+          << getOpenMPClauseName(OMPC_copyprivate) << Type
+          << getOpenMPDirectiveName(DSAStack->getCurrentDirective());
+      bool IsDecl =
+          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
+      Diag(VD->getLocation(),
+           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+          << VD;
+      continue;
+    }
+
     // OpenMP [2.14.4.1, Restrictions, C/C++, p.2]
     //  A variable of class type (or array thereof) that appears in a
     //  copyin clause requires an accessible, unambiguous copy assignment
     //  operator for the class type.
     Type = Context.getBaseElementType(Type).getUnqualifiedType();
     auto *SrcVD =
-        BuildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src");
+        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src");
     auto *PseudoSrcExpr =
-        BuildDeclRefExpr(SrcVD, Type, VK_LValue, DE->getExprLoc()).get();
+        buildDeclRefExpr(*this, SrcVD, Type, DE->getExprLoc());
     auto *DstVD =
-        BuildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst");
+        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst");
     auto *PseudoDstExpr =
-        BuildDeclRefExpr(DstVD, Type, VK_LValue, DE->getExprLoc()).get();
+        buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc());
     auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign,
                                    PseudoDstExpr, PseudoSrcExpr);
     if (AssignmentOp.isInvalid())
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index 8f9401b..f8610e0 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -10507,7 +10507,8 @@
                        const CXXScopeSpec &SS, LookupResult &R,
                        OverloadCandidateSet::CandidateSetKind CSK,
                        TemplateArgumentListInfo *ExplicitTemplateArgs,
-                       ArrayRef<Expr *> Args) {
+                       ArrayRef<Expr *> Args,
+                       bool *DoDiagnoseEmptyLookup = nullptr) {
   if (SemaRef.ActiveTemplateInstantiations.empty() || !SS.isEmpty())
     return false;
 
@@ -10524,6 +10525,8 @@
         // Don't diagnose names we find in classes; we get much better
         // diagnostics for these from DiagnoseEmptyLookup.
         R.clear();
+        if (DoDiagnoseEmptyLookup)
+          *DoDiagnoseEmptyLookup = true;
         return false;
       }
 
@@ -10673,15 +10676,16 @@
 
   LookupResult R(SemaRef, ULE->getName(), ULE->getNameLoc(),
                  Sema::LookupOrdinaryName);
+  bool DoDiagnoseEmptyLookup = EmptyLookup;
   if (!DiagnoseTwoPhaseLookup(SemaRef, Fn->getExprLoc(), SS, R,
                               OverloadCandidateSet::CSK_Normal,
-                              ExplicitTemplateArgs, Args) &&
-      (!EmptyLookup ||
-       SemaRef.DiagnoseEmptyLookup(
-           S, SS, R,
-           MakeValidator(SemaRef, dyn_cast<MemberExpr>(Fn), Args.size(),
-                         ExplicitTemplateArgs != nullptr, AllowTypoCorrection),
-           ExplicitTemplateArgs, Args)))
+                              ExplicitTemplateArgs, Args,
+                              &DoDiagnoseEmptyLookup) &&
+    (!DoDiagnoseEmptyLookup || SemaRef.DiagnoseEmptyLookup(
+        S, SS, R,
+        MakeValidator(SemaRef, dyn_cast<MemberExpr>(Fn), Args.size(),
+                      ExplicitTemplateArgs != nullptr, AllowTypoCorrection),
+        ExplicitTemplateArgs, Args)))
     return ExprError();
 
   assert(!R.empty() && "lookup results empty despite recovery");
@@ -10746,26 +10750,29 @@
   // functions, including those from argument-dependent lookup.
   AddOverloadedCallCandidates(ULE, Args, *CandidateSet);
 
-  // If we found nothing, try to recover.
-  // BuildRecoveryCallExpr diagnoses the error itself, so we just bail
-  // out if it fails.
-  if (CandidateSet->empty()) {
-    // In Microsoft mode, if we are inside a template class member function then
-    // create a type dependent CallExpr. The goal is to postpone name lookup
-    // to instantiation time to be able to search into type dependent base
-    // classes.
-    if (getLangOpts().MSVCCompat && CurContext->isDependentContext() &&
-        (isa<FunctionDecl>(CurContext) || isa<CXXRecordDecl>(CurContext))) {
-      CallExpr *CE = new (Context) CallExpr(Context, Fn, Args,
-                                            Context.DependentTy, VK_RValue,
-                                            RParenLoc);
+  if (getLangOpts().MSVCCompat &&
+      CurContext->isDependentContext() && !isSFINAEContext() &&
+      (isa<FunctionDecl>(CurContext) || isa<CXXRecordDecl>(CurContext))) {
+
+    OverloadCandidateSet::iterator Best;
+    if (CandidateSet->empty() ||
+        CandidateSet->BestViableFunction(*this, Fn->getLocStart(), Best) ==
+            OR_No_Viable_Function) {
+      // In Microsoft mode, if we are inside a template class member function then
+      // create a type dependent CallExpr. The goal is to postpone name lookup
+      // to instantiation time to be able to search into type dependent base
+      // classes.
+      CallExpr *CE = new (Context) CallExpr(
+          Context, Fn, Args, Context.DependentTy, VK_RValue, RParenLoc);
       CE->setTypeDependent(true);
       *Result = CE;
       return true;
     }
-    return false;
   }
 
+  if (CandidateSet->empty())
+    return false;
+
   UnbridgedCasts.restore();
   return false;
 }
@@ -12488,17 +12495,17 @@
       type = Fn->getType();
     } else {
       valueKind = VK_RValue;
-      type = Context.BoundMemberTy;

-    }

-

-    MemberExpr *ME = MemberExpr::Create(

-        Context, Base, MemExpr->isArrow(), MemExpr->getOperatorLoc(),

-        MemExpr->getQualifierLoc(), MemExpr->getTemplateKeywordLoc(), Fn, Found,

-        MemExpr->getMemberNameInfo(), TemplateArgs, type, valueKind,

-        OK_Ordinary);

-    ME->setHadMultipleCandidates(true);

-    MarkMemberReferenced(ME);

-    return ME;

+      type = Context.BoundMemberTy;
+    }
+
+    MemberExpr *ME = MemberExpr::Create(
+        Context, Base, MemExpr->isArrow(), MemExpr->getOperatorLoc(),
+        MemExpr->getQualifierLoc(), MemExpr->getTemplateKeywordLoc(), Fn, Found,
+        MemExpr->getMemberNameInfo(), TemplateArgs, type, valueKind,
+        OK_Ordinary);
+    ME->setHadMultipleCandidates(true);
+    MarkMemberReferenced(ME);
+    return ME;
   }
 
   llvm_unreachable("Invalid reference to overloaded function");
diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp
index ed5da43..5c72529 100644
--- a/lib/Sema/SemaStmt.cpp
+++ b/lib/Sema/SemaStmt.cpp
@@ -1828,6 +1828,15 @@
 /// \return true if an error occurs.
 static bool FinishForRangeVarDecl(Sema &SemaRef, VarDecl *Decl, Expr *Init,
                                   SourceLocation Loc, int DiagID) {
+  if (Decl->getType()->isUndeducedType()) {
+    ExprResult Res = SemaRef.CorrectDelayedTyposInExpr(Init);
+    if (!Res.isUsable()) {
+      Decl->setInvalidDecl();
+      return true;
+    }
+    Init = Res.get();
+  }
+
   // Deduce the type for the iterator variable now rather than leaving it to
   // AddInitializerToDecl, so we can produce a more suitable diagnostic.
   QualType InitType;
@@ -3410,6 +3419,7 @@
   return new (Context) ObjCAutoreleasePoolStmt(AtLoc, Body);
 }
 
+namespace {
 class CatchHandlerType {
   QualType QT;
   unsigned IsPointer : 1;
@@ -3451,6 +3461,7 @@
     return LHS.QT == RHS.QT;
   }
 };
+} // namespace
 
 namespace llvm {
 template <> struct DenseMapInfo<CatchHandlerType> {
diff --git a/lib/Sema/SemaStmtAsm.cpp b/lib/Sema/SemaStmtAsm.cpp
index 179e207..d19d881 100644
--- a/lib/Sema/SemaStmtAsm.cpp
+++ b/lib/Sema/SemaStmtAsm.cpp
@@ -124,16 +124,8 @@
   // The parser verifies that there is a string literal here.
   assert(AsmString->isAscii());
 
-  bool ValidateConstraints = true;
-  if (getLangOpts().CUDA) {
-    // In CUDA mode don't verify asm constraints in device functions during host
-    // compilation and vice versa.
-    bool InDeviceMode = getLangOpts().CUDAIsDevice;
-    FunctionDecl *FD = getCurFunctionDecl();
-    bool IsDeviceFunction =
-        FD && (FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAGlobalAttr>());
-    ValidateConstraints = IsDeviceFunction == InDeviceMode;
-  }
+  bool ValidateConstraints =
+      DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl());
 
   for (unsigned i = 0; i != NumOutputs; i++) {
     StringLiteral *Literal = Constraints[i];
@@ -162,6 +154,14 @@
     if (CheckNakedParmReference(OutputExpr, *this))
       return StmtError();
 
+    // Bitfield can't be referenced with a pointer.
+    if (Info.allowsMemory() && OutputExpr->refersToBitField())
+      return StmtError(Diag(OutputExpr->getLocStart(),
+                            diag::err_asm_bitfield_in_memory_constraint)
+                       << 1
+                       << Info.getConstraintStr()
+                       << OutputExpr->getSourceRange());
+
     OutputConstraintInfos.push_back(Info);
 
     // If this is dependent, just continue.
@@ -238,6 +238,14 @@
     if (CheckNakedParmReference(InputExpr, *this))
       return StmtError();
 
+    // Bitfield can't be referenced with a pointer.
+    if (Info.allowsMemory() && InputExpr->refersToBitField())
+      return StmtError(Diag(InputExpr->getLocStart(),
+                            diag::err_asm_bitfield_in_memory_constraint)
+                       << 0
+                       << Info.getConstraintStr()
+                       << InputExpr->getSourceRange());
+
     // Only allow void types for memory constraints.
     if (Info.allowsMemory() && !Info.allowsRegister()) {
       if (CheckAsmLValue(InputExpr, *this))
diff --git a/lib/Sema/SemaStmtAttr.cpp b/lib/Sema/SemaStmtAttr.cpp
index 19e2c8e..5b71c11 100644
--- a/lib/Sema/SemaStmtAttr.cpp
+++ b/lib/Sema/SemaStmtAttr.cpp
@@ -105,6 +105,8 @@
     if (StateLoc && StateLoc->Ident) {
       if (StateLoc->Ident->isStr("disable"))
         State = LoopHintAttr::Disable;
+      else if (StateLoc->Ident->isStr("assume_safety"))
+        State = LoopHintAttr::AssumeSafety;
       else
         State = LoopHintAttr::Enable;
     }
@@ -159,7 +161,7 @@
     const LoopHintAttr *PrevAttr;
     if (Option == LoopHintAttr::Vectorize ||
         Option == LoopHintAttr::Interleave || Option == LoopHintAttr::Unroll) {
-      // Enable|disable hint.  For example, vectorize(enable).
+      // Enable|Disable|AssumeSafety hint.  For example, vectorize(enable).
       PrevAttr = CategoryState.StateAttr;
       CategoryState.StateAttr = LH;
     } else {
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp
index c642c05..9a15411 100644
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -12,7 +12,6 @@
 #include "TreeTransform.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
@@ -603,7 +602,7 @@
       return Param;
     }
 
-    Param->setDefaultArgument(DefaultTInfo, false);
+    Param->setDefaultArgument(DefaultTInfo);
   }
 
   return Param;
@@ -724,7 +723,7 @@
     }
     Default = DefaultRes.get();
 
-    Param->setDefaultArgument(Default, false);
+    Param->setDefaultArgument(Default);
   }
 
   return Param;
@@ -800,7 +799,7 @@
                                         UPPC_DefaultArgument))
       return Param;
 
-    Param->setDefaultArgument(DefaultArg, false);
+    Param->setDefaultArgument(Context, DefaultArg);
   }
 
   return Param;
@@ -838,7 +837,7 @@
                          SourceLocation FriendLoc,
                          unsigned NumOuterTemplateParamLists,
                          TemplateParameterList** OuterTemplateParamLists,
-                         bool *SkipBody) {
+                         SkipBodyInfo *SkipBody) {
   assert(TemplateParams && TemplateParams->size() > 0 &&
          "No template parameters");
   assert(TUK != TUK_Reference && "Can only declare or define class templates");
@@ -999,16 +998,12 @@
         // simply making that previous definition visible.
         NamedDecl *Hidden = nullptr;
         if (SkipBody && !hasVisibleDefinition(Def, &Hidden)) {
-          *SkipBody = true;
+          SkipBody->ShouldSkip = true;
           auto *Tmpl = cast<CXXRecordDecl>(Hidden)->getDescribedClassTemplate();
           assert(Tmpl && "original definition of a class template is not a "
                          "class template?");
-          if (auto *Listener = getASTMutationListener()) {
-            Listener->RedefinedHiddenDefinition(Hidden, KWLoc);
-            Listener->RedefinedHiddenDefinition(Tmpl, KWLoc);
-          }
-          Hidden->setHidden(false);
-          Tmpl->setHidden(false);
+          makeMergedDefinitionVisible(Hidden, KWLoc);
+          makeMergedDefinitionVisible(Tmpl, KWLoc);
           return Def;
         }
 
@@ -1315,12 +1310,11 @@
       // Merge default arguments for template type parameters.
       TemplateTypeParmDecl *OldTypeParm
           = OldParams? cast<TemplateTypeParmDecl>(*OldParam) : nullptr;
-
       if (NewTypeParm->isParameterPack()) {
         assert(!NewTypeParm->hasDefaultArgument() &&
                "Parameter packs can't have a default argument!");
         SawParameterPack = true;
-      } else if (OldTypeParm && OldTypeParm->hasDefaultArgument() &&
+      } else if (OldTypeParm && hasVisibleDefaultArgument(OldTypeParm) &&
                  NewTypeParm->hasDefaultArgument()) {
         OldDefaultLoc = OldTypeParm->getDefaultArgumentLoc();
         NewDefaultLoc = NewTypeParm->getDefaultArgumentLoc();
@@ -1330,8 +1324,7 @@
       } else if (OldTypeParm && OldTypeParm->hasDefaultArgument()) {
         // Merge the default argument from the old declaration to the
         // new declaration.
-        NewTypeParm->setDefaultArgument(OldTypeParm->getDefaultArgumentInfo(),
-                                        true);
+        NewTypeParm->setInheritedDefaultArgument(Context, OldTypeParm);
         PreviousDefaultArgLoc = OldTypeParm->getDefaultArgumentLoc();
       } else if (NewTypeParm->hasDefaultArgument()) {
         SawDefaultArgument = true;
@@ -1365,7 +1358,7 @@
                "Parameter packs can't have a default argument!");
         if (!NewNonTypeParm->isPackExpansion())
           SawParameterPack = true;
-      } else if (OldNonTypeParm && OldNonTypeParm->hasDefaultArgument() &&
+      } else if (OldNonTypeParm && hasVisibleDefaultArgument(OldNonTypeParm) &&
                  NewNonTypeParm->hasDefaultArgument()) {
         OldDefaultLoc = OldNonTypeParm->getDefaultArgumentLoc();
         NewDefaultLoc = NewNonTypeParm->getDefaultArgumentLoc();
@@ -1375,12 +1368,7 @@
       } else if (OldNonTypeParm && OldNonTypeParm->hasDefaultArgument()) {
         // Merge the default argument from the old declaration to the
         // new declaration.
-        // FIXME: We need to create a new kind of "default argument"
-        // expression that points to a previous non-type template
-        // parameter.
-        NewNonTypeParm->setDefaultArgument(
-                                         OldNonTypeParm->getDefaultArgument(),
-                                         /*Inherited=*/ true);
+        NewNonTypeParm->setInheritedDefaultArgument(Context, OldNonTypeParm);
         PreviousDefaultArgLoc = OldNonTypeParm->getDefaultArgumentLoc();
       } else if (NewNonTypeParm->hasDefaultArgument()) {
         SawDefaultArgument = true;
@@ -1412,8 +1400,9 @@
                "Parameter packs can't have a default argument!");
         if (!NewTemplateParm->isPackExpansion())
           SawParameterPack = true;
-      } else if (OldTemplateParm && OldTemplateParm->hasDefaultArgument() &&
-          NewTemplateParm->hasDefaultArgument()) {
+      } else if (OldTemplateParm &&
+                 hasVisibleDefaultArgument(OldTemplateParm) &&
+                 NewTemplateParm->hasDefaultArgument()) {
         OldDefaultLoc = OldTemplateParm->getDefaultArgument().getLocation();
         NewDefaultLoc = NewTemplateParm->getDefaultArgument().getLocation();
         SawDefaultArgument = true;
@@ -1422,11 +1411,7 @@
       } else if (OldTemplateParm && OldTemplateParm->hasDefaultArgument()) {
         // Merge the default argument from the old declaration to the
         // new declaration.
-        // FIXME: We need to create a new kind of "default argument" expression
-        // that points to a previous template template parameter.
-        NewTemplateParm->setDefaultArgument(
-                                          OldTemplateParm->getDefaultArgument(),
-                                          /*Inherited=*/ true);
+        NewTemplateParm->setInheritedDefaultArgument(Context, OldTemplateParm);
         PreviousDefaultArgLoc
           = OldTemplateParm->getDefaultArgument().getLocation();
       } else if (NewTemplateParm->hasDefaultArgument()) {
@@ -3314,7 +3299,7 @@
   HasDefaultArg = false;
 
   if (TemplateTypeParmDecl *TypeParm = dyn_cast<TemplateTypeParmDecl>(Param)) {
-    if (!TypeParm->hasDefaultArgument())
+    if (!hasVisibleDefaultArgument(TypeParm))
       return TemplateArgumentLoc();
 
     HasDefaultArg = true;
@@ -3331,7 +3316,7 @@
 
   if (NonTypeTemplateParmDecl *NonTypeParm
         = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
-    if (!NonTypeParm->hasDefaultArgument())
+    if (!hasVisibleDefaultArgument(NonTypeParm))
       return TemplateArgumentLoc();
 
     HasDefaultArg = true;
@@ -3349,7 +3334,7 @@
 
   TemplateTemplateParmDecl *TempTempParm
     = cast<TemplateTemplateParmDecl>(Param);
-  if (!TempTempParm->hasDefaultArgument())
+  if (!hasVisibleDefaultArgument(TempTempParm))
     return TemplateArgumentLoc();
 
   HasDefaultArg = true;
@@ -3814,7 +3799,7 @@
     // (when the template parameter was part of a nested template) into
     // the default argument.
     if (TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(*Param)) {
-      if (!TTP->hasDefaultArgument())
+      if (!hasVisibleDefaultArgument(TTP))
         return diagnoseArityMismatch(*this, Template, TemplateLoc, NewArgs);
 
       TypeSourceInfo *ArgType = SubstDefaultTemplateArgument(*this,
@@ -3830,7 +3815,7 @@
                                 ArgType);
     } else if (NonTypeTemplateParmDecl *NTTP
                  = dyn_cast<NonTypeTemplateParmDecl>(*Param)) {
-      if (!NTTP->hasDefaultArgument())
+      if (!hasVisibleDefaultArgument(NTTP))
         return diagnoseArityMismatch(*this, Template, TemplateLoc, NewArgs);
 
       ExprResult E = SubstDefaultTemplateArgument(*this, Template,
@@ -3847,7 +3832,7 @@
       TemplateTemplateParmDecl *TempParm
         = cast<TemplateTemplateParmDecl>(*Param);
 
-      if (!TempParm->hasDefaultArgument())
+      if (!hasVisibleDefaultArgument(TempParm))
         return diagnoseArityMismatch(*this, Template, TemplateLoc, NewArgs);
 
       NestedNameSpecifierLoc QualifierLoc;
@@ -6057,7 +6042,9 @@
                                        SourceLocation ModulePrivateLoc,
                                        TemplateIdAnnotation &TemplateId,
                                        AttributeList *Attr,
-                               MultiTemplateParamsArg TemplateParameterLists) {
+                                       MultiTemplateParamsArg
+                                           TemplateParameterLists,
+                                       SkipBodyInfo *SkipBody) {
   assert(TUK != TUK_Reference && "References are not specializations");
 
   CXXScopeSpec &SS = TemplateId.SS;
@@ -6368,7 +6355,14 @@
 
   // Check that this isn't a redefinition of this specialization.
   if (TUK == TUK_Definition) {
-    if (RecordDecl *Def = Specialization->getDefinition()) {
+    RecordDecl *Def = Specialization->getDefinition();
+    NamedDecl *Hidden = nullptr;
+    if (Def && SkipBody && !hasVisibleDefinition(Def, &Hidden)) {
+      SkipBody->ShouldSkip = true;
+      makeMergedDefinitionVisible(Hidden, KWLoc);
+      // From here on out, treat this as just a redeclaration.
+      TUK = TUK_Declaration;
+    } else if (Def) {
       SourceRange Range(TemplateNameLoc, RAngleLoc);
       Diag(TemplateNameLoc, diag::err_redefinition)
         << Context.getTypeDeclType(Specialization) << Range;
@@ -7354,10 +7348,30 @@
     // Fix a TSK_ExplicitInstantiationDeclaration followed by a
     // TSK_ExplicitInstantiationDefinition
     if (Old_TSK == TSK_ExplicitInstantiationDeclaration &&
-        TSK == TSK_ExplicitInstantiationDefinition)
+        TSK == TSK_ExplicitInstantiationDefinition) {
       // FIXME: Need to notify the ASTMutationListener that we did this.
       Def->setTemplateSpecializationKind(TSK);
 
+      if (!getDLLAttr(Def) && getDLLAttr(Specialization) &&
+          Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+        // In the MS ABI, an explicit instantiation definition can add a dll
+        // attribute to a template with a previous instantiation declaration.
+        // MinGW doesn't allow this.
+        auto *A = cast<InheritableAttr>(
+            getDLLAttr(Specialization)->clone(getASTContext()));
+        A->setInherited(true);
+        Def->addAttr(A);
+        checkClassLevelDLLAttribute(Def);
+
+        // Propagate attribute to base class templates.
+        for (auto &B : Def->bases()) {
+          if (auto *BT = dyn_cast_or_null<ClassTemplateSpecializationDecl>(
+                  B.getType()->getAsCXXRecordDecl()))
+            propagateDLLAttrToBaseClassTemplate(Def, A, BT, B.getLocStart());
+        }
+      }
+    }
+
     InstantiateClassTemplateSpecializationMembers(TemplateNameLoc, Def, TSK);
   }
 
diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp
index af8d309..6f676ad 100644
--- a/lib/Sema/SemaTemplateDeduction.cpp
+++ b/lib/Sema/SemaTemplateDeduction.cpp
@@ -3656,8 +3656,10 @@
   FunctionTemplateDecl *InvokerTemplate = LambdaClass->
                   getLambdaStaticInvoker()->getDescribedFunctionTemplate();
 
-  Sema::TemplateDeductionResult LLVM_ATTRIBUTE_UNUSED Result
-    = S.FinishTemplateArgumentDeduction(InvokerTemplate, DeducedArguments, 0, 
+#ifndef NDEBUG
+  Sema::TemplateDeductionResult LLVM_ATTRIBUTE_UNUSED Result =
+#endif
+    S.FinishTemplateArgumentDeduction(InvokerTemplate, DeducedArguments, 0, 
           InvokerSpecialized, TDInfo);
   assert(Result == Sema::TDK_Success && 
     "If the call operator succeeded so should the invoker!");
diff --git a/lib/Sema/SemaTemplateInstantiate.cpp b/lib/Sema/SemaTemplateInstantiate.cpp
index f93a848..82ff7c0 100644
--- a/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/lib/Sema/SemaTemplateInstantiate.cpp
@@ -714,6 +714,20 @@
     }
 
     void transformedLocalDecl(Decl *Old, Decl *New) {
+      // If we've instantiated the call operator of a lambda or the call
+      // operator template of a generic lambda, update the "instantiation of"
+      // information.
+      auto *NewMD = dyn_cast<CXXMethodDecl>(New);
+      if (NewMD && isLambdaCallOperator(NewMD)) {
+        auto *OldMD = dyn_cast<CXXMethodDecl>(Old);
+        if (auto *NewTD = NewMD->getDescribedFunctionTemplate())
+          NewTD->setInstantiatedFromMemberTemplate(
+              OldMD->getDescribedFunctionTemplate());
+        else
+          NewMD->setInstantiationOfMemberFunction(OldMD,
+                                                  TSK_ImplicitInstantiation);
+      }
+      
       SemaRef.CurrentInstantiationScope->InstantiatedLocal(Old, New);
     }
     
@@ -816,28 +830,6 @@
       return TreeTransform<TemplateInstantiator>::TransformLambdaExpr(E);
     }
 
-    ExprResult TransformLambdaScope(LambdaExpr *E,
-        CXXMethodDecl *NewCallOperator, 
-        ArrayRef<InitCaptureInfoTy> InitCaptureExprsAndTypes) {
-      CXXMethodDecl *const OldCallOperator = E->getCallOperator();   
-      // In the generic lambda case, we set the NewTemplate to be considered
-      // an "instantiation" of the OldTemplate.
-      if (FunctionTemplateDecl *const NewCallOperatorTemplate = 
-            NewCallOperator->getDescribedFunctionTemplate()) {
-        
-        FunctionTemplateDecl *const OldCallOperatorTemplate = 
-                              OldCallOperator->getDescribedFunctionTemplate();
-        NewCallOperatorTemplate->setInstantiatedFromMemberTemplate(
-                                                     OldCallOperatorTemplate);
-      } else 
-        // For a non-generic lambda we set the NewCallOperator to 
-        // be an instantiation of the OldCallOperator.
-        NewCallOperator->setInstantiationOfMemberFunction(OldCallOperator,
-                                                    TSK_ImplicitInstantiation);
-      
-      return inherited::TransformLambdaScope(E, NewCallOperator, 
-          InitCaptureExprsAndTypes);
-    }
     TemplateParameterList *TransformTemplateParameterList(
                               TemplateParameterList *OrigTPL)  {
       if (!OrigTPL || !OrigTPL->size()) return OrigTPL;
@@ -2241,7 +2233,7 @@
   EnterExpressionEvaluationContext EvalContext(*this,
                                                Sema::PotentiallyEvaluated);
 
-  LocalInstantiationScope Scope(*this);
+  LocalInstantiationScope Scope(*this, true);
 
   // Instantiate the initializer.
   ActOnStartCXXInClassMemberInitializer();
@@ -2796,6 +2788,16 @@
       isa<TemplateTemplateParmDecl>(D))
     return nullptr;
 
+  // Local types referenced prior to definition may require instantiation.
+  if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D))
+    if (RD->isLocalClass())
+      return nullptr;
+
+  // Enumeration types referenced prior to definition may appear as a result of
+  // error recovery.
+  if (isa<EnumDecl>(D))
+    return nullptr;
+
   // If we didn't find the decl, then we either have a sema bug, or we have a
   // forward reference to a label declaration.  Return null to indicate that
   // we have an uninstantiated label.
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 6936539..d0a5739 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -202,6 +202,31 @@
   New->addAttr(EIA);
 }
 
+// Constructs and adds to New a new instance of CUDALaunchBoundsAttr using
+// template A as the base and arguments from TemplateArgs.
+static void instantiateDependentCUDALaunchBoundsAttr(
+    Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
+    const CUDALaunchBoundsAttr &Attr, Decl *New) {
+  // The alignment expression is a constant expression.
+  EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated);
+
+  ExprResult Result = S.SubstExpr(Attr.getMaxThreads(), TemplateArgs);
+  if (Result.isInvalid())
+    return;
+  Expr *MaxThreads = Result.getAs<Expr>();
+
+  Expr *MinBlocks = nullptr;
+  if (Attr.getMinBlocks()) {
+    Result = S.SubstExpr(Attr.getMinBlocks(), TemplateArgs);
+    if (Result.isInvalid())
+      return;
+    MinBlocks = Result.getAs<Expr>();
+  }
+
+  S.AddLaunchBoundsAttr(Attr.getLocation(), New, MaxThreads, MinBlocks,
+                        Attr.getSpellingListIndex());
+}
+
 void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
                             const Decl *Tmpl, Decl *New,
                             LateInstantiatedAttrVec *LateAttrs,
@@ -233,6 +258,13 @@
       continue;
     }
 
+    if (const CUDALaunchBoundsAttr *CUDALaunchBounds =
+            dyn_cast<CUDALaunchBoundsAttr>(TmplAttr)) {
+      instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
+                                               *CUDALaunchBounds, New);
+      continue;
+    }
+
     // Existing DLL attribute on the instantiation takes precedence.
     if (TmplAttr->getKind() == attr::DLLExport ||
         TmplAttr->getKind() == attr::DLLImport) {
@@ -1270,11 +1302,19 @@
   // DR1484 clarifies that the members of a local class are instantiated as part
   // of the instantiation of their enclosing entity.
   if (D->isCompleteDefinition() && D->isLocalClass()) {
+    Sema::SavePendingLocalImplicitInstantiationsRAII
+        SavedPendingLocalImplicitInstantiations(SemaRef);
+
     SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs,
                              TSK_ImplicitInstantiation,
                              /*Complain=*/true);
+
     SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs,
                                     TSK_ImplicitInstantiation);
+
+    // This class may have local implicit instantiations that need to be
+    // performed within this scope.
+    SemaRef.PerformPendingInstantiations(/*LocalOnly=*/true);
   }
 
   SemaRef.DiagnoseUnusedNestedTypedefs(Record);
@@ -1887,7 +1927,7 @@
         SemaRef.SubstType(D->getDefaultArgumentInfo(), TemplateArgs,
                           D->getDefaultArgumentLoc(), D->getDeclName());
     if (InstantiatedDefaultArg)
-      Inst->setDefaultArgument(InstantiatedDefaultArg, false);
+      Inst->setDefaultArgument(InstantiatedDefaultArg);
   }
 
   // Introduce this template parameter's instantiation into the instantiation
@@ -2041,7 +2081,7 @@
   if (D->hasDefaultArgument()) {
     ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs);
     if (!Value.isInvalid())
-      Param->setDefaultArgument(Value.get(), false);
+      Param->setDefaultArgument(Value.get());
   }
 
   // Introduce this template parameter's instantiation into the instantiation
@@ -2175,10 +2215,10 @@
         D->getDefaultArgument().getTemplateNameLoc(), TemplateArgs);
     if (!TName.isNull())
       Param->setDefaultArgument(
+          SemaRef.Context,
           TemplateArgumentLoc(TemplateArgument(TName),
                               D->getDefaultArgument().getTemplateQualifierLoc(),
-                              D->getDefaultArgument().getTemplateNameLoc()),
-          false);
+                              D->getDefaultArgument().getTemplateNameLoc()));
   }
   Param->setAccess(AS_public);
 
@@ -4402,6 +4442,30 @@
     if (D->isInvalidDecl())
       return nullptr;
 
+    // Normally this function only searches for already instantiated declaration
+    // however we have to make an exclusion for local types used before
+    // definition as in the code:
+    //
+    //   template<typename T> void f1() {
+    //     void g1(struct x1);
+    //     struct x1 {};
+    //   }
+    //
+    // In this case instantiation of the type of 'g1' requires definition of
+    // 'x1', which is defined later. Error recovery may produce an enum used
+    // before definition. In these cases we need to instantiate relevant
+    // declarations here.
+    bool NeedInstantiate = false;
+    if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D))
+      NeedInstantiate = RD->isLocalClass();
+    else
+      NeedInstantiate = isa<EnumDecl>(D);
+    if (NeedInstantiate) {
+      Decl *Inst = SubstDecl(D, CurContext, TemplateArgs);
+      CurrentInstantiationScope->InstantiatedLocal(D, Inst);
+      return cast<TypeDecl>(Inst);
+    }
+
     // If we didn't find the decl, then we must have a label decl that hasn't
     // been found yet.  Lazily instantiate it and return it now.
     assert(isa<LabelDecl>(D));
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 5a6cc2e..d3787ec 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -688,6 +688,31 @@
   state.setCurrentChunkIndex(declarator.getNumTypeObjects());
 }
 
+static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS,
+                                            unsigned &TypeQuals,
+                                            QualType TypeSoFar,
+                                            unsigned RemoveTQs,
+                                            unsigned DiagID) {
+  // If this occurs outside a template instantiation, warn the user about
+  // it; they probably didn't mean to specify a redundant qualifier.
+  typedef std::pair<DeclSpec::TQ, SourceLocation> QualLoc;
+  for (QualLoc Qual : {QualLoc(DeclSpec::TQ_const, DS.getConstSpecLoc()),
+                       QualLoc(DeclSpec::TQ_volatile, DS.getVolatileSpecLoc()),
+                       QualLoc(DeclSpec::TQ_atomic, DS.getAtomicSpecLoc())}) {
+    if (!(RemoveTQs & Qual.first))
+      continue;
+
+    if (S.ActiveTemplateInstantiations.empty()) {
+      if (TypeQuals & Qual.first)
+        S.Diag(Qual.second, DiagID)
+          << DeclSpec::getSpecifierName(Qual.first) << TypeSoFar
+          << FixItHint::CreateRemoval(Qual.second);
+    }
+
+    TypeQuals &= ~Qual.first;
+  }
+}
+
 /// \brief Convert the specified declspec to the appropriate type
 /// object.
 /// \param state Specifies the declarator containing the declaration specifier
@@ -1117,24 +1142,22 @@
 
   // Apply const/volatile/restrict qualifiers to T.
   if (unsigned TypeQuals = DS.getTypeQualifiers()) {
-
-    // Warn about CV qualifiers on functions: C99 6.7.3p8: "If the specification
-    // of a function type includes any type qualifiers, the behavior is
-    // undefined."
-    if (Result->isFunctionType() && TypeQuals) {
-      if (TypeQuals & DeclSpec::TQ_const)
-        S.Diag(DS.getConstSpecLoc(), diag::warn_typecheck_function_qualifiers)
-          << Result << DS.getSourceRange();
-      else if (TypeQuals & DeclSpec::TQ_volatile)
-        S.Diag(DS.getVolatileSpecLoc(),
-               diag::warn_typecheck_function_qualifiers)
-            << Result << DS.getSourceRange();
-      else {
-        assert((TypeQuals & (DeclSpec::TQ_restrict | DeclSpec::TQ_atomic)) &&
-               "Has CVRA quals but not C, V, R, or A?");
-        // No diagnostic; we'll diagnose 'restrict' or '_Atomic' applied to a
-        // function type later, in BuildQualifiedType.
-      }
+    // Warn about CV qualifiers on function types.
+    // C99 6.7.3p8:
+    //   If the specification of a function type includes any type qualifiers,
+    //   the behavior is undefined.
+    // C++11 [dcl.fct]p7:
+    //   The effect of a cv-qualifier-seq in a function declarator is not the
+    //   same as adding cv-qualification on top of the function type. In the
+    //   latter case, the cv-qualifiers are ignored.
+    if (TypeQuals && Result->isFunctionType()) {
+      diagnoseAndRemoveTypeQualifiers(
+          S, DS, TypeQuals, Result, DeclSpec::TQ_const | DeclSpec::TQ_volatile,
+          S.getLangOpts().CPlusPlus
+              ? diag::warn_typecheck_function_qualifiers_ignored
+              : diag::warn_typecheck_function_qualifiers_unspecified);
+      // No diagnostic for 'restrict' or '_Atomic' applied to a
+      // function type; we'll diagnose those later, in BuildQualifiedType.
     }
 
     // C++11 [dcl.ref]p1:
@@ -1145,25 +1168,11 @@
     // There don't appear to be any other contexts in which a cv-qualified
     // reference type could be formed, so the 'ill-formed' clause here appears
     // to never happen.
-    if (DS.getTypeSpecType() == DeclSpec::TST_typename &&
-        TypeQuals && Result->isReferenceType()) {
-      // If this occurs outside a template instantiation, warn the user about
-      // it; they probably didn't mean to specify a redundant qualifier.
-      typedef std::pair<DeclSpec::TQ, SourceLocation> QualLoc;
-      QualLoc Quals[] = {
-        QualLoc(DeclSpec::TQ_const, DS.getConstSpecLoc()),
-        QualLoc(DeclSpec::TQ_volatile, DS.getVolatileSpecLoc()),
-        QualLoc(DeclSpec::TQ_atomic, DS.getAtomicSpecLoc())
-      };
-      for (unsigned I = 0, N = llvm::array_lengthof(Quals); I != N; ++I) {
-        if (S.ActiveTemplateInstantiations.empty()) {
-          if (TypeQuals & Quals[I].first)
-            S.Diag(Quals[I].second, diag::warn_typecheck_reference_qualifiers)
-              << DeclSpec::getSpecifierName(Quals[I].first) << Result
-              << FixItHint::CreateRemoval(Quals[I].second);
-        }
-        TypeQuals &= ~Quals[I].first;
-      }
+    if (TypeQuals && Result->isReferenceType()) {
+      diagnoseAndRemoveTypeQualifiers(
+          S, DS, TypeQuals, Result,
+          DeclSpec::TQ_const | DeclSpec::TQ_volatile | DeclSpec::TQ_atomic,
+          diag::warn_typecheck_reference_qualifiers);
     }
 
     // C90 6.5.3 constraints: "The same type qualifier shall not appear more
@@ -2833,14 +2842,14 @@
       if ((T.getCVRQualifiers() || T->isAtomicType()) &&
           !(S.getLangOpts().CPlusPlus &&
             (T->isDependentType() || T->isRecordType()))) {
-	if (T->isVoidType() && !S.getLangOpts().CPlusPlus &&
-	    D.getFunctionDefinitionKind() == FDK_Definition) {
-	  // [6.9.1/3] qualified void return is invalid on a C
-	  // function definition.  Apparently ok on declarations and
-	  // in C++ though (!)
-	  S.Diag(DeclType.Loc, diag::err_func_returning_qualified_void) << T;
-	} else
-	  diagnoseRedundantReturnTypeQualifiers(S, T, D, chunkIndex);
+        if (T->isVoidType() && !S.getLangOpts().CPlusPlus &&
+            D.getFunctionDefinitionKind() == FDK_Definition) {
+          // [6.9.1/3] qualified void return is invalid on a C
+          // function definition.  Apparently ok on declarations and
+          // in C++ though (!)
+          S.Diag(DeclType.Loc, diag::err_func_returning_qualified_void) << T;
+        } else
+          diagnoseRedundantReturnTypeQualifiers(S, T, D, chunkIndex);
       }
 
       // Objective-C ARC ownership qualifiers are ignored on the function
@@ -3491,16 +3500,27 @@
 }
 
 static void fillAttributedTypeLoc(AttributedTypeLoc TL,
-                                  const AttributeList *attrs) {
-  AttributedType::Kind kind = TL.getAttrKind();
+                                  const AttributeList *attrs,
+                                  const AttributeList *DeclAttrs = nullptr) {
+  // DeclAttrs and attrs cannot be both empty.
+  assert((attrs || DeclAttrs) &&
+         "no type attributes in the expected location!");
 
-  assert(attrs && "no type attributes in the expected location!");
-  AttributeList::Kind parsedKind = getAttrListKind(kind);
-  while (attrs->getKind() != parsedKind) {
+  AttributeList::Kind parsedKind = getAttrListKind(TL.getAttrKind());
+  // Try to search for an attribute of matching kind in attrs list.
+  while (attrs && attrs->getKind() != parsedKind)
     attrs = attrs->getNext();
-    assert(attrs && "no matching attribute in expected location!");
+  if (!attrs) {
+    // No matching type attribute in attrs list found.
+    // Try searching through C++11 attributes in the declarator attribute list.
+    while (DeclAttrs && (!DeclAttrs->isCXX11Attribute() ||
+                         DeclAttrs->getKind() != parsedKind))
+      DeclAttrs = DeclAttrs->getNext();
+    attrs = DeclAttrs;
   }
 
+  assert(attrs && "no matching type attribute in expected location!");
+
   TL.setAttrNameLoc(attrs->getLoc());
   if (TL.hasAttrExprOperand()) {
     assert(attrs->isArgExpr(0) && "mismatched attribute operand kind");
@@ -3854,6 +3874,7 @@
                                      TypeSourceInfo *ReturnTypeInfo) {
   TypeSourceInfo *TInfo = Context.CreateTypeSourceInfo(T);
   UnqualTypeLoc CurrTL = TInfo->getTypeLoc().getUnqualifiedLoc();
+  const AttributeList *DeclAttrs = D.getAttributes();
 
   // Handle parameter packs whose type is a pack expansion.
   if (isa<PackExpansionType>(T)) {
@@ -3870,7 +3891,7 @@
     }
 
     while (AttributedTypeLoc TL = CurrTL.getAs<AttributedTypeLoc>()) {
-      fillAttributedTypeLoc(TL, D.getTypeObject(i).getAttrs());
+      fillAttributedTypeLoc(TL, D.getTypeObject(i).getAttrs(), DeclAttrs);
       CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
     }
 
@@ -5132,12 +5153,16 @@
 ///        in order to provide a definition of this entity.
 bool Sema::hasVisibleDefinition(NamedDecl *D, NamedDecl **Suggested) {
   // Easy case: if we don't have modules, all declarations are visible.
-  if (!getLangOpts().Modules)
+  if (!getLangOpts().Modules && !getLangOpts().ModulesLocalVisibility)
     return true;
 
   // If this definition was instantiated from a template, map back to the
   // pattern from which it was instantiated.
-  if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
+  if (isa<TagDecl>(D) && cast<TagDecl>(D)->isBeingDefined()) {
+    // We're in the middle of defining it; this definition should be treated
+    // as visible.
+    return true;
+  } else if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
     if (auto *Pattern = RD->getTemplateInstantiationPattern())
       RD = Pattern;
     D = RD->getDefinition();
@@ -5214,20 +5239,8 @@
     // If we know about the definition but it is not visible, complain.
     NamedDecl *SuggestedDef = nullptr;
     if (!Diagnoser.Suppressed && Def &&
-        !hasVisibleDefinition(Def, &SuggestedDef)) {
-      // Suppress this error outside of a SFINAE context if we've already
-      // emitted the error once for this type. There's no usefulness in
-      // repeating the diagnostic.
-      // FIXME: Add a Fix-It that imports the corresponding module or includes
-      // the header.
-      Module *Owner = SuggestedDef->getOwningModule();
-      Diag(Loc, diag::err_module_private_definition)
-        << T << Owner->getFullModuleName();
-      Diag(SuggestedDef->getLocation(), diag::note_previous_definition);
-
-      // Try to recover by implicitly importing this module.
-      createImplicitModuleImportForErrorRecovery(Loc, Owner);
-    }
+        !hasVisibleDefinition(Def, &SuggestedDef))
+      diagnoseMissingImport(Loc, SuggestedDef, /*NeedDefinition*/true);
 
     // We lock in the inheritance model once somebody has asked us to ensure
     // that a pointer-to-member type is complete.
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index df0e4b3..fde8946 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -619,11 +619,6 @@
 
   StmtResult TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr);
   ExprResult TransformCXXNamedCastExpr(CXXNamedCastExpr *E);
-  
-  typedef std::pair<ExprResult, QualType> InitCaptureInfoTy;
-  /// \brief Transform the captures and body of a lambda expression.
-  ExprResult TransformLambdaScope(LambdaExpr *E, CXXMethodDecl *CallOperator, 
-       ArrayRef<InitCaptureInfoTy> InitCaptureExprsAndTypes);
 
   TemplateParameterList *TransformTemplateParameterList(
         TemplateParameterList *TPL) {
@@ -7955,6 +7950,25 @@
                                                 E->usesGNUSyntax(), Init.get());
 }
 
+// Seems that if TransformInitListExpr() only works on the syntactic form of an
+// InitListExpr, then a DesignatedInitUpdateExpr is not encountered.
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformDesignatedInitUpdateExpr(
+    DesignatedInitUpdateExpr *E) {
+  llvm_unreachable("Unexpected DesignatedInitUpdateExpr in syntactic form of "
+                   "initializer");
+  return ExprError();
+}
+
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformNoInitExpr(
+    NoInitExpr *E) {
+  llvm_unreachable("Unexpected NoInitExpr in syntactic form of initializer");
+  return ExprError();
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformImplicitValueInitExpr(
@@ -9131,13 +9145,14 @@
 TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
   // Transform any init-capture expressions before entering the scope of the
   // lambda body, because they are not semantically within that scope.
+  typedef std::pair<ExprResult, QualType> InitCaptureInfoTy;
   SmallVector<InitCaptureInfoTy, 8> InitCaptureExprsAndTypes;
   InitCaptureExprsAndTypes.resize(E->explicit_capture_end() -
-      E->explicit_capture_begin());
+                                  E->explicit_capture_begin());
   for (LambdaExpr::capture_iterator C = E->capture_begin(),
                                     CEnd = E->capture_end();
        C != CEnd; ++C) {
-    if (!C->isInitCapture())
+    if (!E->isInitCapture(C))
       continue;
     EnterExpressionEvaluationContext EEEC(getSema(),
                                           Sema::PotentiallyEvaluated);
@@ -9159,12 +9174,9 @@
         std::make_pair(NewExprInitResult, NewInitCaptureType);
   }
 
-  LambdaScopeInfo *LSI = getSema().PushLambdaScope();
-  Sema::FunctionScopeRAII FuncScopeCleanup(getSema());
-
   // Transform the template parameters, and add them to the current
   // instantiation scope. The null case is handled correctly.
-  LSI->GLTemplateParameterList = getDerived().TransformTemplateParameterList(
+  auto TPL = getDerived().TransformTemplateParameterList(
       E->getTemplateParameterList());
 
   // Transform the type of the original lambda's call operator.
@@ -9192,6 +9204,10 @@
                                                         NewCallOpType);
   }
 
+  LambdaScopeInfo *LSI = getSema().PushLambdaScope();
+  Sema::FunctionScopeRAII FuncScopeCleanup(getSema());
+  LSI->GLTemplateParameterList = TPL;
+
   // Create the local class that will describe the lambda.
   CXXRecordDecl *Class
     = getSema().createLambdaClosureType(E->getIntroducerRange(),
@@ -9208,34 +9224,22 @@
   LSI->CallOperator = NewCallOperator;
 
   getDerived().transformAttrs(E->getCallOperator(), NewCallOperator);
-
-  // TransformLambdaScope will manage the function scope, so we can disable the
-  // cleanup.
-  FuncScopeCleanup.disable();
-
-  return getDerived().TransformLambdaScope(E, NewCallOperator, 
-      InitCaptureExprsAndTypes);
-}
-
-template<typename Derived>
-ExprResult
-TreeTransform<Derived>::TransformLambdaScope(LambdaExpr *E,
-    CXXMethodDecl *CallOperator, 
-    ArrayRef<InitCaptureInfoTy> InitCaptureExprsAndTypes) {
-  bool Invalid = false;
+  getDerived().transformedLocalDecl(E->getCallOperator(), NewCallOperator);
 
   // Introduce the context of the call operator.
-  Sema::ContextRAII SavedContext(getSema(), CallOperator,
+  Sema::ContextRAII SavedContext(getSema(), NewCallOperator,
                                  /*NewThisContext*/false);
 
-  LambdaScopeInfo *const LSI = getSema().getCurLambda();
   // Enter the scope of the lambda.
-  getSema().buildLambdaScope(LSI, CallOperator, E->getIntroducerRange(),
-                                 E->getCaptureDefault(),
-                                 E->getCaptureDefaultLoc(),
-                                 E->hasExplicitParameters(),
-                                 E->hasExplicitResultType(),
-                                 E->isMutable());
+  getSema().buildLambdaScope(LSI, NewCallOperator,
+                             E->getIntroducerRange(),
+                             E->getCaptureDefault(),
+                             E->getCaptureDefaultLoc(),
+                             E->hasExplicitParameters(),
+                             E->hasExplicitResultType(),
+                             E->isMutable());
+
+  bool Invalid = false;
 
   // Transform captures.
   bool FinishedExplicitCaptures = false;
@@ -9260,8 +9264,7 @@
       continue;
 
     // Rebuild init-captures, including the implied field declaration.
-    if (C->isInitCapture()) {
-      
+    if (E->isInitCapture(C)) {
       InitCaptureInfoTy InitExprTypePair = 
           InitCaptureExprsAndTypes[C - E->capture_begin()];
       ExprResult Init = InitExprTypePair.first;
@@ -9348,28 +9351,34 @@
   if (!FinishedExplicitCaptures)
     getSema().finishLambdaExplicitCaptures(LSI);
 
-
   // Enter a new evaluation context to insulate the lambda from any
   // cleanups from the enclosing full-expression.
   getSema().PushExpressionEvaluationContext(Sema::PotentiallyEvaluated);
 
-  if (Invalid) {
-    getSema().ActOnLambdaError(E->getLocStart(), /*CurScope=*/nullptr,
-                               /*IsInstantiation=*/true);
-    return ExprError();
-  }
-
   // Instantiate the body of the lambda expression.
-  StmtResult Body = getDerived().TransformStmt(E->getBody());
+  StmtResult Body =
+      Invalid ? StmtError() : getDerived().TransformStmt(E->getBody());
+
+  // ActOnLambda* will pop the function scope for us.
+  FuncScopeCleanup.disable();
+
   if (Body.isInvalid()) {
+    SavedContext.pop();
     getSema().ActOnLambdaError(E->getLocStart(), /*CurScope=*/nullptr,
                                /*IsInstantiation=*/true);
     return ExprError();
   }
 
-  return getSema().ActOnLambdaExpr(E->getLocStart(), Body.get(),
-                                   /*CurScope=*/nullptr,
-                                   /*IsInstantiation=*/true);
+  // Copy the LSI before ActOnFinishFunctionBody removes it.
+  // FIXME: This is dumb. Store the lambda information somewhere that outlives
+  // the call operator.
+  auto LSICopy = *LSI;
+  getSema().ActOnFinishFunctionBody(NewCallOperator, Body.get(),
+                                    /*IsInstantiation*/ true);
+  SavedContext.pop();
+
+  return getSema().BuildLambdaExpr(E->getLocStart(), Body.get()->getLocEnd(),
+                                   &LSICopy);
 }
 
 template<typename Derived>
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index d26ed222..609c25d 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -777,8 +777,6 @@
   Bits >>= 1;
   bool ExtensionToken = Bits & 0x01;
   Bits >>= 1;
-  bool hasSubmoduleMacros = Bits & 0x01;
-  Bits >>= 1;
   bool hadMacroDefinition = Bits & 0x01;
   Bits >>= 1;
 
@@ -820,49 +818,8 @@
     uint32_t MacroDirectivesOffset =
         endian::readNext<uint32_t, little, unaligned>(d);
     DataLen -= 4;
-    SmallVector<uint32_t, 8> LocalMacroIDs;
-    if (hasSubmoduleMacros) {
-      while (true) {
-        uint32_t LocalMacroID =
-            endian::readNext<uint32_t, little, unaligned>(d);
-        DataLen -= 4;
-        if (LocalMacroID == (uint32_t)-1) break;
-        LocalMacroIDs.push_back(LocalMacroID);
-      }
-    }
 
-    if (F.Kind == MK_ImplicitModule || F.Kind == MK_ExplicitModule) {
-      // Macro definitions are stored from newest to oldest, so reverse them
-      // before registering them.
-      llvm::SmallVector<unsigned, 8> MacroSizes;
-      for (SmallVectorImpl<uint32_t>::iterator
-             I = LocalMacroIDs.begin(), E = LocalMacroIDs.end(); I != E; /**/) {
-        unsigned Size = 1;
-
-        static const uint32_t HasOverridesFlag = 0x80000000U;
-        if (I + 1 != E && (I[1] & HasOverridesFlag))
-          Size += 1 + (I[1] & ~HasOverridesFlag);
-
-        MacroSizes.push_back(Size);
-        I += Size;
-      }
-
-      SmallVectorImpl<uint32_t>::iterator I = LocalMacroIDs.end();
-      for (SmallVectorImpl<unsigned>::reverse_iterator SI = MacroSizes.rbegin(),
-                                                       SE = MacroSizes.rend();
-           SI != SE; ++SI) {
-        I -= *SI;
-
-        uint32_t LocalMacroID = *I;
-        ArrayRef<uint32_t> Overrides;
-        if (*SI != 1)
-          Overrides = llvm::makeArrayRef(&I[2], *SI - 2);
-        Reader.addPendingMacroFromModule(II, &F, LocalMacroID, Overrides);
-      }
-      assert(I == LocalMacroIDs.begin());
-    } else {
-      Reader.addPendingMacroFromPCH(II, &F, MacroDirectivesOffset);
-    }
+    Reader.addPendingMacro(II, &F, MacroDirectivesOffset);
   }
 
   Reader.SetIdentifierInfo(ID, II);
@@ -1426,6 +1383,7 @@
     PreprocessorRecordTypes RecType =
       (PreprocessorRecordTypes)Stream.readRecord(Entry.ID, Record);
     switch (RecType) {
+    case PP_MODULE_MACRO:
     case PP_MACRO_DIRECTIVE_HISTORY:
       return Macro;
 
@@ -1474,10 +1432,10 @@
         PreprocessedEntityID
             GlobalID = getGlobalPreprocessedEntityID(F, Record[NextIndex]);
         PreprocessingRecord &PPRec = *PP.getPreprocessingRecord();
-        PreprocessingRecord::PPEntityID
-          PPID = PPRec.getPPEntityID(GlobalID-1, /*isLoaded=*/true);
-        MacroDefinition *PPDef =
-          cast_or_null<MacroDefinition>(PPRec.getPreprocessedEntity(PPID));
+        PreprocessingRecord::PPEntityID PPID =
+            PPRec.getPPEntityID(GlobalID - 1, /*isLoaded=*/true);
+        MacroDefinitionRecord *PPDef = cast_or_null<MacroDefinitionRecord>(
+            PPRec.getPreprocessedEntity(PPID));
         if (PPDef)
           PPRec.RegisterMacroDefinition(Macro, PPDef);
       }
@@ -1619,24 +1577,9 @@
   return HFI;
 }
 
-void
-ASTReader::addPendingMacroFromModule(IdentifierInfo *II, ModuleFile *M,
-                                     GlobalMacroID GMacID,
-                                     ArrayRef<SubmoduleID> Overrides) {
-  assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
-  SubmoduleID *OverrideData = nullptr;
-  if (!Overrides.empty()) {
-    OverrideData = new (Context) SubmoduleID[Overrides.size() + 1];
-    OverrideData[0] = Overrides.size();
-    for (unsigned I = 0; I != Overrides.size(); ++I)
-      OverrideData[I + 1] = getGlobalSubmoduleID(*M, Overrides[I]);
-  }
-  PendingMacroIDs[II].push_back(PendingMacroInfo(M, GMacID, OverrideData));
-}
-
-void ASTReader::addPendingMacroFromPCH(IdentifierInfo *II,
-                                       ModuleFile *M,
-                                       uint64_t MacroDirectivesOffset) {
+void ASTReader::addPendingMacro(IdentifierInfo *II,
+                                ModuleFile *M,
+                                uint64_t MacroDirectivesOffset) {
   assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
   PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
 }
@@ -1780,116 +1723,82 @@
     IdentifierGeneration[II] = getGeneration();
 }
 
-struct ASTReader::ModuleMacroInfo {
-  SubmoduleID SubModID;
-  MacroInfo *MI;
-  SubmoduleID *Overrides;
-  // FIXME: Remove this.
-  ModuleFile *F;
-
-  bool isDefine() const { return MI; }
-
-  SubmoduleID getSubmoduleID() const { return SubModID; }
-
-  ArrayRef<SubmoduleID> getOverriddenSubmodules() const {
-    if (!Overrides)
-      return None;
-    return llvm::makeArrayRef(Overrides + 1, *Overrides);
-  }
-
-  MacroDirective *import(Preprocessor &PP, SourceLocation ImportLoc) const {
-    if (!MI)
-      return PP.AllocateUndefMacroDirective(ImportLoc, SubModID,
-                                            getOverriddenSubmodules());
-    return PP.AllocateDefMacroDirective(MI, ImportLoc, SubModID,
-                                        getOverriddenSubmodules());
-  }
-};
-
-ASTReader::ModuleMacroInfo *
-ASTReader::getModuleMacro(IdentifierInfo *II, const PendingMacroInfo &PMInfo) {
-  ModuleMacroInfo Info;
-
-  uint32_t ID = PMInfo.ModuleMacroData.MacID;
-  if (ID & 1) {
-    // Macro undefinition.
-    Info.SubModID = getGlobalSubmoduleID(*PMInfo.M, ID >> 1);
-    Info.MI = nullptr;
-
-    // If we've already loaded the #undef of this macro from this module,
-    // don't do so again.
-    if (!LoadedUndefs.insert(std::make_pair(II, Info.SubModID)).second)
-      return nullptr;
-  } else {
-    // Macro definition.
-    GlobalMacroID GMacID = getGlobalMacroID(*PMInfo.M, ID >> 1);
-    assert(GMacID);
-
-    // If this macro has already been loaded, don't do so again.
-    // FIXME: This is highly dubious. Multiple macro definitions can have the
-    // same MacroInfo (and hence the same GMacID) due to #pragma push_macro etc.
-    if (MacrosLoaded[GMacID - NUM_PREDEF_MACRO_IDS])
-      return nullptr;
-
-    Info.MI = getMacro(GMacID);
-    Info.SubModID = Info.MI->getOwningModuleID();
-  }
-  Info.Overrides = PMInfo.ModuleMacroData.Overrides;
-  Info.F = PMInfo.M;
-
-  return new (Context) ModuleMacroInfo(Info);
-}
-
 void ASTReader::resolvePendingMacro(IdentifierInfo *II,
                                     const PendingMacroInfo &PMInfo) {
-  assert(II);
-
-  if (PMInfo.M->Kind != MK_ImplicitModule &&
-      PMInfo.M->Kind != MK_ExplicitModule) {
-    installPCHMacroDirectives(II, *PMInfo.M,
-                              PMInfo.PCHMacroData.MacroDirectivesOffset);
-    return;
-  }
-
-  // Module Macro.
-
-  ModuleMacroInfo *MMI = getModuleMacro(II, PMInfo);
-  if (!MMI)
-    return;
-
-  Module *Owner = getSubmodule(MMI->getSubmoduleID());
-  if (Owner && Owner->NameVisibility == Module::Hidden) {
-    // Macros in the owning module are hidden. Just remember this macro to
-    // install if we make this module visible.
-    HiddenNamesMap[Owner].HiddenMacros.insert(std::make_pair(II, MMI));
-  } else {
-    installImportedMacro(II, MMI, Owner);
-  }
-}
-
-void ASTReader::installPCHMacroDirectives(IdentifierInfo *II,
-                                          ModuleFile &M, uint64_t Offset) {
-  assert(M.Kind != MK_ImplicitModule && M.Kind != MK_ExplicitModule);
+  ModuleFile &M = *PMInfo.M;
 
   BitstreamCursor &Cursor = M.MacroCursor;
   SavedStreamPosition SavedPosition(Cursor);
-  Cursor.JumpToBit(Offset);
+  Cursor.JumpToBit(PMInfo.MacroDirectivesOffset);
 
-  llvm::BitstreamEntry Entry =
-      Cursor.advance(BitstreamCursor::AF_DontPopBlockAtEnd);
-  if (Entry.Kind != llvm::BitstreamEntry::Record) {
-    Error("malformed block record in AST file");
-    return;
-  }
+  struct ModuleMacroRecord {
+    SubmoduleID SubModID;
+    MacroInfo *MI;
+    SmallVector<SubmoduleID, 8> Overrides;
+  };
+  llvm::SmallVector<ModuleMacroRecord, 8> ModuleMacros;
 
+  // We expect to see a sequence of PP_MODULE_MACRO records listing exported
+  // macros, followed by a PP_MACRO_DIRECTIVE_HISTORY record with the complete
+  // macro histroy.
   RecordData Record;
-  PreprocessorRecordTypes RecType =
-    (PreprocessorRecordTypes)Cursor.readRecord(Entry.ID, Record);
-  if (RecType != PP_MACRO_DIRECTIVE_HISTORY) {
-    Error("malformed block record in AST file");
-    return;
+  while (true) {
+    llvm::BitstreamEntry Entry =
+        Cursor.advance(BitstreamCursor::AF_DontPopBlockAtEnd);
+    if (Entry.Kind != llvm::BitstreamEntry::Record) {
+      Error("malformed block record in AST file");
+      return;
+    }
+
+    Record.clear();
+    switch ((PreprocessorRecordTypes)Cursor.readRecord(Entry.ID, Record)) {
+    case PP_MACRO_DIRECTIVE_HISTORY:
+      break;
+
+    case PP_MODULE_MACRO: {
+      ModuleMacros.push_back(ModuleMacroRecord());
+      auto &Info = ModuleMacros.back();
+      Info.SubModID = getGlobalSubmoduleID(M, Record[0]);
+      Info.MI = getMacro(getGlobalMacroID(M, Record[1]));
+      for (int I = 2, N = Record.size(); I != N; ++I)
+        Info.Overrides.push_back(getGlobalSubmoduleID(M, Record[I]));
+      continue;
+    }
+
+    default:
+      Error("malformed block record in AST file");
+      return;
+    }
+
+    // We found the macro directive history; that's the last record
+    // for this macro.
+    break;
   }
 
+  // Module macros are listed in reverse dependency order.
+  {
+    std::reverse(ModuleMacros.begin(), ModuleMacros.end());
+    llvm::SmallVector<ModuleMacro*, 8> Overrides;
+    for (auto &MMR : ModuleMacros) {
+      Overrides.clear();
+      for (unsigned ModID : MMR.Overrides) {
+        Module *Mod = getSubmodule(ModID);
+        auto *Macro = PP.getModuleMacro(Mod, II);
+        assert(Macro && "missing definition for overridden macro");
+        Overrides.push_back(Macro);
+      }
+
+      bool Inserted = false;
+      Module *Owner = getSubmodule(MMR.SubModID);
+      PP.addModuleMacro(Owner, II, MMR.MI, Overrides, Inserted);
+    }
+  }
+
+  // Don't read the directive history for a module; we don't have anywhere
+  // to put it.
+  if (M.Kind == MK_ImplicitModule || M.Kind == MK_ExplicitModule)
+    return;
+
   // Deserialize the macro directives history in reverse source-order.
   MacroDirective *Latest = nullptr, *Earliest = nullptr;
   unsigned Idx = 0, N = Record.size();
@@ -1899,31 +1808,12 @@
     MacroDirective::Kind K = (MacroDirective::Kind)Record[Idx++];
     switch (K) {
     case MacroDirective::MD_Define: {
-      GlobalMacroID GMacID = getGlobalMacroID(M, Record[Idx++]);
-      MacroInfo *MI = getMacro(GMacID);
-      SubmoduleID ImportedFrom = Record[Idx++];
-      bool IsAmbiguous = Record[Idx++];
-      llvm::SmallVector<unsigned, 4> Overrides;
-      if (ImportedFrom) {
-        Overrides.insert(Overrides.end(),
-                         &Record[Idx] + 1, &Record[Idx] + 1 + Record[Idx]);
-        Idx += Overrides.size() + 1;
-      }
-      DefMacroDirective *DefMD =
-          PP.AllocateDefMacroDirective(MI, Loc, ImportedFrom, Overrides);
-      DefMD->setAmbiguous(IsAmbiguous);
-      MD = DefMD;
+      MacroInfo *MI = getMacro(getGlobalMacroID(M, Record[Idx++]));
+      MD = PP.AllocateDefMacroDirective(MI, Loc);
       break;
     }
     case MacroDirective::MD_Undefine: {
-      SubmoduleID ImportedFrom = Record[Idx++];
-      llvm::SmallVector<unsigned, 4> Overrides;
-      if (ImportedFrom) {
-        Overrides.insert(Overrides.end(),
-                         &Record[Idx] + 1, &Record[Idx] + 1 + Record[Idx]);
-        Idx += Overrides.size() + 1;
-      }
-      MD = PP.AllocateUndefMacroDirective(Loc, ImportedFrom, Overrides);
+      MD = PP.AllocateUndefMacroDirective(Loc);
       break;
     }
     case MacroDirective::MD_Visibility:
@@ -1939,175 +1829,8 @@
     Earliest = MD;
   }
 
-  PP.setLoadedMacroDirective(II, Latest);
-}
-
-/// \brief For the given macro definitions, check if they are both in system
-/// modules.
-static bool areDefinedInSystemModules(MacroInfo *PrevMI, MacroInfo *NewMI,
-                                      Module *NewOwner, ASTReader &Reader) {
-  assert(PrevMI && NewMI);
-  Module *PrevOwner = nullptr;
-  if (SubmoduleID PrevModID = PrevMI->getOwningModuleID())
-    PrevOwner = Reader.getSubmodule(PrevModID);
-  if (PrevOwner && PrevOwner == NewOwner)
-    return false;
-  SourceManager &SrcMgr = Reader.getSourceManager();
-  bool PrevInSystem = (PrevOwner && PrevOwner->IsSystem) ||
-                      SrcMgr.isInSystemHeader(PrevMI->getDefinitionLoc());
-  bool NewInSystem = (NewOwner && NewOwner->IsSystem) ||
-                     SrcMgr.isInSystemHeader(NewMI->getDefinitionLoc());
-  return PrevInSystem && NewInSystem;
-}
-
-void ASTReader::removeOverriddenMacros(IdentifierInfo *II,
-                                       SourceLocation ImportLoc,
-                                       AmbiguousMacros &Ambig,
-                                       ArrayRef<SubmoduleID> Overrides) {
-  for (unsigned OI = 0, ON = Overrides.size(); OI != ON; ++OI) {
-    SubmoduleID OwnerID = Overrides[OI];
-
-    // If this macro is not yet visible, remove it from the hidden names list.
-    // It won't be there if we're in the middle of making the owner visible.
-    Module *Owner = getSubmodule(OwnerID);
-    auto HiddenIt = HiddenNamesMap.find(Owner);
-    if (HiddenIt != HiddenNamesMap.end()) {
-      HiddenNames &Hidden = HiddenIt->second;
-      HiddenMacrosMap::iterator HI = Hidden.HiddenMacros.find(II);
-      if (HI != Hidden.HiddenMacros.end()) {
-        // Register the macro now so we don't lose it when we re-export.
-        PP.appendMacroDirective(II, HI->second->import(PP, ImportLoc));
-
-        auto SubOverrides = HI->second->getOverriddenSubmodules();
-        Hidden.HiddenMacros.erase(HI);
-        removeOverriddenMacros(II, ImportLoc, Ambig, SubOverrides);
-      }
-    }
-
-    // If this macro is already in our list of conflicts, remove it from there.
-    Ambig.erase(
-        std::remove_if(Ambig.begin(), Ambig.end(), [&](DefMacroDirective *MD) {
-          return MD->getInfo()->getOwningModuleID() == OwnerID;
-        }),
-        Ambig.end());
-  }
-}
-
-ASTReader::AmbiguousMacros *
-ASTReader::removeOverriddenMacros(IdentifierInfo *II,
-                                  SourceLocation ImportLoc,
-                                  ArrayRef<SubmoduleID> Overrides) {
-  MacroDirective *Prev = PP.getMacroDirective(II);
-  if (!Prev && Overrides.empty())
-    return nullptr;
-
-  DefMacroDirective *PrevDef = Prev ? Prev->getDefinition().getDirective()
-                                    : nullptr;
-  if (PrevDef && PrevDef->isAmbiguous()) {
-    // We had a prior ambiguity. Check whether we resolve it (or make it worse).
-    AmbiguousMacros &Ambig = AmbiguousMacroDefs[II];
-    Ambig.push_back(PrevDef);
-
-    removeOverriddenMacros(II, ImportLoc, Ambig, Overrides);
-
-    if (!Ambig.empty())
-      return &Ambig;
-
-    AmbiguousMacroDefs.erase(II);
-  } else {
-    // There's no ambiguity yet. Maybe we're introducing one.
-    AmbiguousMacros Ambig;
-    if (PrevDef)
-      Ambig.push_back(PrevDef);
-
-    removeOverriddenMacros(II, ImportLoc, Ambig, Overrides);
-
-    if (!Ambig.empty()) {
-      AmbiguousMacros &Result = AmbiguousMacroDefs[II];
-      std::swap(Result, Ambig);
-      return &Result;
-    }
-  }
-
-  // We ended up with no ambiguity.
-  return nullptr;
-}
-
-void ASTReader::installImportedMacro(IdentifierInfo *II, ModuleMacroInfo *MMI,
-                                     Module *Owner) {
-  assert(II && Owner);
-
-  SourceLocation ImportLoc = Owner->MacroVisibilityLoc;
-  if (ImportLoc.isInvalid()) {
-    // FIXME: If we made macros from this module visible but didn't provide a
-    // source location for the import, we don't have a location for the macro.
-    // Use the location at which the containing module file was first imported
-    // for now.
-    ImportLoc = MMI->F->DirectImportLoc;
-    assert(ImportLoc.isValid() && "no import location for a visible macro?");
-  }
-
-  AmbiguousMacros *Prev =
-      removeOverriddenMacros(II, ImportLoc, MMI->getOverriddenSubmodules());
-
-  // Create a synthetic macro definition corresponding to the import (or null
-  // if this was an undefinition of the macro).
-  MacroDirective *Imported = MMI->import(PP, ImportLoc);
-  DefMacroDirective *MD = dyn_cast<DefMacroDirective>(Imported);
-
-  // If there's no ambiguity, just install the macro.
-  if (!Prev) {
-    PP.appendMacroDirective(II, Imported);
-    return;
-  }
-  assert(!Prev->empty());
-
-  if (!MD) {
-    // We imported a #undef that didn't remove all prior definitions. The most
-    // recent prior definition remains, and we install it in the place of the
-    // imported directive, as if by a local #pragma pop_macro.
-    MacroInfo *NewMI = Prev->back()->getInfo();
-    Prev->pop_back();
-    MD = PP.AllocateDefMacroDirective(NewMI, ImportLoc);
-
-    // Install our #undef first so that we don't lose track of it. We'll replace
-    // this with whichever macro definition ends up winning.
-    PP.appendMacroDirective(II, Imported);
-  }
-
-  // We're introducing a macro definition that creates or adds to an ambiguity.
-  // We can resolve that ambiguity if this macro is token-for-token identical to
-  // all of the existing definitions.
-  MacroInfo *NewMI = MD->getInfo();
-  assert(NewMI && "macro definition with no MacroInfo?");
-  while (!Prev->empty()) {
-    MacroInfo *PrevMI = Prev->back()->getInfo();
-    assert(PrevMI && "macro definition with no MacroInfo?");
-
-    // Before marking the macros as ambiguous, check if this is a case where
-    // both macros are in system headers. If so, we trust that the system
-    // did not get it wrong. This also handles cases where Clang's own
-    // headers have a different spelling of certain system macros:
-    //   #define LONG_MAX __LONG_MAX__ (clang's limits.h)
-    //   #define LONG_MAX 0x7fffffffffffffffL (system's limits.h)
-    //
-    // FIXME: Remove the defined-in-system-headers check. clang's limits.h
-    // overrides the system limits.h's macros, so there's no conflict here.
-    if (NewMI != PrevMI &&
-        !PrevMI->isIdenticalTo(*NewMI, PP, /*Syntactically=*/true) &&
-        !areDefinedInSystemModules(PrevMI, NewMI, Owner, *this))
-      break;
-
-    // The previous definition is the same as this one (or both are defined in
-    // system modules so we can assume they're equivalent); we don't need to
-    // track it any more.
-    Prev->pop_back();
-  }
-
-  if (!Prev->empty())
-    MD->setAmbiguous(true);
-
-  PP.appendMacroDirective(II, MD);
+  if (Latest)
+    PP.setLoadedMacroDirective(II, Latest);
 }
 
 ASTReader::InputFileInfo
@@ -3298,6 +3021,18 @@
             ReadSourceLocation(F, Record, I).getRawEncoding());
       }
       break;
+    case DELETE_EXPRS_TO_ANALYZE:
+      for (unsigned I = 0, N = Record.size(); I != N;) {
+        DelayedDeleteExprs.push_back(getGlobalDeclID(F, Record[I++]));
+        const uint64_t Count = Record[I++];
+        DelayedDeleteExprs.push_back(Count);
+        for (uint64_t C = 0; C < Count; ++C) {
+          DelayedDeleteExprs.push_back(ReadSourceLocation(F, Record, I).getRawEncoding());
+          bool IsArrayForm = Record[I++] == 1;
+          DelayedDeleteExprs.push_back(IsArrayForm);
+        }
+      }
+      break;
 
     case IMPORTED_MODULES: {
       if (F.Kind != MK_ImplicitModule && F.Kind != MK_ExplicitModule) {
@@ -3499,10 +3234,9 @@
   }
 }
 
-void ASTReader::makeNamesVisible(const HiddenNames &Names, Module *Owner,
-                                 bool FromFinalization) {
-  // FIXME: Only do this if Owner->NameVisibility == AllVisible.
-  for (Decl *D : Names.HiddenDecls) {
+void ASTReader::makeNamesVisible(const HiddenNames &Names, Module *Owner) {
+  assert(Owner->NameVisibility != Module::Hidden && "nothing to make visible?");
+  for (Decl *D : Names) {
     bool wasHidden = D->Hidden;
     D->Hidden = false;
 
@@ -3512,22 +3246,11 @@
       }
     }
   }
-
-  assert((FromFinalization || Owner->NameVisibility >= Module::MacrosVisible) &&
-         "nothing to make visible?");
-  for (const auto &Macro : Names.HiddenMacros) {
-    if (FromFinalization)
-      PP.appendMacroDirective(Macro.first,
-                              Macro.second->import(PP, SourceLocation()));
-    else
-      installImportedMacro(Macro.first, Macro.second, Owner);
-  }
 }
 
 void ASTReader::makeModuleVisible(Module *Mod,
                                   Module::NameVisibilityKind NameVisibility,
-                                  SourceLocation ImportLoc,
-                                  bool Complain) {
+                                  SourceLocation ImportLoc) {
   llvm::SmallPtrSet<Module *, 4> Visited;
   SmallVector<Module *, 4> Stack;
   Stack.push_back(Mod);
@@ -3546,9 +3269,6 @@
     }
 
     // Update the module's name visibility.
-    if (NameVisibility >= Module::MacrosVisible &&
-        Mod->NameVisibility < Module::MacrosVisible)
-      Mod->MacroVisibilityLoc = ImportLoc;
     Mod->NameVisibility = NameVisibility;
 
     // If we've already deserialized any names from this module,
@@ -3557,8 +3277,7 @@
     if (Hidden != HiddenNamesMap.end()) {
       auto HiddenNames = std::move(*Hidden);
       HiddenNamesMap.erase(Hidden);
-      makeNamesVisible(HiddenNames.second, HiddenNames.first,
-                       /*FromFinalization*/false);
+      makeNamesVisible(HiddenNames.second, HiddenNames.first);
       assert(HiddenNamesMap.find(Mod) == HiddenNamesMap.end() &&
              "making names visible added hidden names");
     }
@@ -3572,20 +3291,6 @@
       if (Visited.insert(Exported).second)
         Stack.push_back(Exported);
     }
-
-    // Detect any conflicts.
-    if (Complain) {
-      assert(ImportLoc.isValid() && "Missing import location");
-      for (unsigned I = 0, N = Mod->Conflicts.size(); I != N; ++I) {
-        if (Mod->Conflicts[I].Other->NameVisibility >= NameVisibility) {
-          Diag(ImportLoc, diag::warn_module_conflict)
-            << Mod->getFullModuleName()
-            << Mod->Conflicts[I].Other->getFullModuleName()
-            << Mod->Conflicts[I].Message;
-          // FIXME: Need note where the other module was imported.
-        }
-      }
-    }
   }
 }
 
@@ -3744,7 +3449,7 @@
 
     case UnresolvedModuleRef::Import:
       if (ResolvedMod)
-        Unresolved.Mod->Imports.push_back(ResolvedMod);
+        Unresolved.Mod->Imports.insert(ResolvedMod);
       continue;
 
     case UnresolvedModuleRef::Export:
@@ -3956,7 +3661,7 @@
   return Success;
 }
 
-void ASTReader::InitializeContext() {  
+void ASTReader::InitializeContext() {
   // If there's a listener, notify them that we "read" the translation unit.
   if (DeserializationListener)
     DeserializationListener->DeclRead(PREDEF_DECL_TRANSLATION_UNIT_ID, 
@@ -4079,24 +3784,19 @@
   }
 
   // Re-export any modules that were imported by a non-module AST file.
-  // FIXME: This does not make macro-only imports visible again. It also doesn't
-  // make #includes mapped to module imports visible.
+  // FIXME: This does not make macro-only imports visible again.
   for (auto &Import : ImportedModules) {
-    if (Module *Imported = getSubmodule(Import.ID))
+    if (Module *Imported = getSubmodule(Import.ID)) {
       makeModuleVisible(Imported, Module::AllVisible,
-                        /*ImportLoc=*/Import.ImportLoc,
-                        /*Complain=*/false);
+                        /*ImportLoc=*/Import.ImportLoc);
+      PP.makeModuleVisible(Imported, Import.ImportLoc);
+    }
   }
   ImportedModules.clear();
 }
 
 void ASTReader::finalizeForWriting() {
-  while (!HiddenNamesMap.empty()) {
-    auto HiddenNames = std::move(*HiddenNamesMap.begin());
-    HiddenNamesMap.erase(HiddenNamesMap.begin());
-    makeNamesVisible(HiddenNames.second, HiddenNames.first,
-                     /*FromFinalization*/true);
-  }
+  // Nothing to do for now.
 }
 
 /// \brief Given a cursor at the start of an AST file, scan ahead and drop the
@@ -4582,10 +4282,12 @@
     }
         
     case SUBMODULE_UMBRELLA_HEADER: {
-      if (const FileEntry *Umbrella = PP.getFileManager().getFile(Blob)) {
+      std::string Filename = Blob;
+      ResolveImportedPath(F, Filename);
+      if (auto *Umbrella = PP.getFileManager().getFile(Filename)) {
         if (!CurrentModule->getUmbrellaHeader())
-          ModMap.setUmbrellaHeader(CurrentModule, Umbrella);
-        else if (CurrentModule->getUmbrellaHeader() != Umbrella) {
+          ModMap.setUmbrellaHeader(CurrentModule, Umbrella, Blob);
+        else if (CurrentModule->getUmbrellaHeader().Entry != Umbrella) {
           // This can be a spurious difference caused by changing the VFS to
           // point to a different copy of the file, and it is too late to
           // to rebuild safely.
@@ -4618,11 +4320,12 @@
     }
 
     case SUBMODULE_UMBRELLA_DIR: {
-      if (const DirectoryEntry *Umbrella
-                                  = PP.getFileManager().getDirectory(Blob)) {
+      std::string Dirname = Blob;
+      ResolveImportedPath(F, Dirname);
+      if (auto *Umbrella = PP.getFileManager().getDirectory(Dirname)) {
         if (!CurrentModule->getUmbrellaDir())
-          ModMap.setUmbrellaDir(CurrentModule, Umbrella);
-        else if (CurrentModule->getUmbrellaDir() != Umbrella) {
+          ModMap.setUmbrellaDir(CurrentModule, Umbrella, Blob);
+        else if (CurrentModule->getUmbrellaDir().Entry != Umbrella) {
           if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
             Error("mismatched umbrella directories in submodule");
           return OutOfDate;
@@ -4811,16 +4514,15 @@
       = static_cast<frontend::IncludeDirGroup>(Record[Idx++]);
     bool IsFramework = Record[Idx++];
     bool IgnoreSysRoot = Record[Idx++];
-    HSOpts.UserEntries.push_back(
-      HeaderSearchOptions::Entry(Path, Group, IsFramework, IgnoreSysRoot));
+    HSOpts.UserEntries.emplace_back(std::move(Path), Group, IsFramework,
+                                    IgnoreSysRoot);
   }
 
   // System header prefixes.
   for (unsigned N = Record[Idx++]; N; --N) {
     std::string Prefix = ReadString(Record, Idx);
     bool IsSystemHeader = Record[Idx++];
-    HSOpts.SystemHeaderPrefixes.push_back(
-      HeaderSearchOptions::SystemHeaderPrefix(Prefix, IsSystemHeader));
+    HSOpts.SystemHeaderPrefixes.emplace_back(std::move(Prefix), IsSystemHeader);
   }
 
   HSOpts.ResourceDir = ReadString(Record, Idx);
@@ -4934,13 +4636,14 @@
   case PPD_MACRO_EXPANSION: {
     bool isBuiltin = Record[0];
     IdentifierInfo *Name = nullptr;
-    MacroDefinition *Def = nullptr;
+    MacroDefinitionRecord *Def = nullptr;
     if (isBuiltin)
       Name = getLocalIdentifier(M, Record[1]);
     else {
-      PreprocessedEntityID
-          GlobalID = getGlobalPreprocessedEntityID(M, Record[1]);
-      Def =cast<MacroDefinition>(PPRec.getLoadedPreprocessedEntity(GlobalID-1));
+      PreprocessedEntityID GlobalID =
+          getGlobalPreprocessedEntityID(M, Record[1]);
+      Def = cast<MacroDefinitionRecord>(
+          PPRec.getLoadedPreprocessedEntity(GlobalID - 1));
     }
 
     MacroExpansion *ME;
@@ -4956,8 +4659,7 @@
     // Decode the identifier info and then check again; if the macro is
     // still defined and associated with the identifier,
     IdentifierInfo *II = getLocalIdentifier(M, Record[0]);
-    MacroDefinition *MD
-      = new (PPRec) MacroDefinition(II, Range);
+    MacroDefinitionRecord *MD = new (PPRec) MacroDefinitionRecord(II, Range);
 
     if (DeserializationListener)
       DeserializationListener->MacroDefinitionRead(PPID, MD);
@@ -6454,10 +6156,7 @@
         PredefsVisited[I] = false;
     }
 
-    static bool visit(ModuleFile &M, bool Preorder, void *UserData) {
-      if (Preorder)
-        return false;
-
+    static bool visitPostorder(ModuleFile &M, void *UserData) {
       FindExternalLexicalDeclsVisitor *This
         = static_cast<FindExternalLexicalDeclsVisitor *>(UserData);
 
@@ -6499,7 +6198,8 @@
   // There might be lexical decls in multiple modules, for the TU at
   // least. Walk all of the modules in the order they were loaded.
   FindExternalLexicalDeclsVisitor Visitor(*this, DC, isKindWeWant, Decls);
-  ModuleMgr.visitDepthFirst(&FindExternalLexicalDeclsVisitor::visit, &Visitor);
+  ModuleMgr.visitDepthFirst(
+      nullptr, &FindExternalLexicalDeclsVisitor::visitPostorder, &Visitor);
   ++NumLexicalDeclContextsRead;
   return ELR_Success;
 }
@@ -7325,6 +7025,21 @@
   }
 }
 
+void ASTReader::ReadMismatchingDeleteExpressions(llvm::MapVector<
+    FieldDecl *, llvm::SmallVector<std::pair<SourceLocation, bool>, 4>> &
+                                                     Exprs) {
+  for (unsigned Idx = 0, N = DelayedDeleteExprs.size(); Idx != N;) {
+    FieldDecl *FD = cast<FieldDecl>(GetDecl(DelayedDeleteExprs[Idx++]));
+    uint64_t Count = DelayedDeleteExprs[Idx++];
+    for (uint64_t C = 0; C < Count; ++C) {
+      SourceLocation DeleteLoc =
+          SourceLocation::getFromRawEncoding(DelayedDeleteExprs[Idx++]);
+      const bool IsArrayForm = DelayedDeleteExprs[Idx++];
+      Exprs[FD].push_back(std::make_pair(DeleteLoc, IsArrayForm));
+    }
+  }
+}
+
 void ASTReader::ReadTentativeDefinitions(
                   SmallVectorImpl<VarDecl *> &TentativeDefs) {
   for (unsigned I = 0, N = TentativeDefinitions.size(); I != N; ++I) {
@@ -8309,7 +8024,7 @@
 
 std::string ASTReader::getOwningModuleNameForDiagnostic(const Decl *D) {
   // If we know the owning module, use it.
-  if (Module *M = D->getOwningModule())
+  if (Module *M = D->getImportedOwningModule())
     return M->getFullModuleName();
 
   // Otherwise, use the name of the top-level module the decl is within.
@@ -8480,6 +8195,11 @@
       MD->setLazyBody(PB->second);
   }
   PendingBodies.clear();
+
+  // Do some cleanup.
+  for (auto *ND : PendingMergedDefinitionsToDeduplicate)
+    getContext().deduplicateMergedDefinitonsFor(ND);
+  PendingMergedDefinitionsToDeduplicate.clear();
 }
 
 void ASTReader::diagnoseOdrViolations() {
diff --git a/lib/Serialization/ASTReaderDecl.cpp b/lib/Serialization/ASTReaderDecl.cpp
index 5e911b4..9cb145e 100644
--- a/lib/Serialization/ASTReaderDecl.cpp
+++ b/lib/Serialization/ASTReaderDecl.cpp
@@ -458,24 +458,28 @@
   D->FromASTFile = true;
   D->setModulePrivate(Record[Idx++]);
   D->Hidden = D->isModulePrivate();
-  
+
   // Determine whether this declaration is part of a (sub)module. If so, it
   // may not yet be visible.
   if (unsigned SubmoduleID = readSubmoduleID(Record, Idx)) {
     // Store the owning submodule ID in the declaration.
     D->setOwningModuleID(SubmoduleID);
-    
-    // Module-private declarations are never visible, so there is no work to do.
-    if (!D->isModulePrivate()) {
-      if (Module *Owner = Reader.getSubmodule(SubmoduleID)) {
-        if (Owner->NameVisibility != Module::AllVisible) {
-          // The owning module is not visible. Mark this declaration as hidden.
-          D->Hidden = true;
-          
-          // Note that this declaration was hidden because its owning module is 
-          // not yet visible.
-          Reader.HiddenNamesMap[Owner].HiddenDecls.push_back(D);
-        }
+
+    if (D->Hidden) {
+      // Module-private declarations are never visible, so there is no work to do.
+    } else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
+      // If local visibility is being tracked, this declaration will become
+      // hidden and visible as the owning module does. Inform Sema that this
+      // declaration might not be visible.
+      D->Hidden = true;
+    } else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) {
+      if (Owner->NameVisibility != Module::AllVisible) {
+        // The owning module is not visible. Mark this declaration as hidden.
+        D->Hidden = true;
+        
+        // Note that this declaration was hidden because its owning module is 
+        // not yet visible.
+        Reader.HiddenNamesMap[Owner].push_back(D);
       }
     }
   }
@@ -1059,13 +1063,15 @@
   VD->VarDeclBits.SClass = (StorageClass)Record[Idx++];
   VD->VarDeclBits.TSCSpec = Record[Idx++];
   VD->VarDeclBits.InitStyle = Record[Idx++];
-  VD->VarDeclBits.ExceptionVar = Record[Idx++];
-  VD->VarDeclBits.NRVOVariable = Record[Idx++];
-  VD->VarDeclBits.CXXForRangeDecl = Record[Idx++];
-  VD->VarDeclBits.ARCPseudoStrong = Record[Idx++];
-  VD->VarDeclBits.IsConstexpr = Record[Idx++];
-  VD->VarDeclBits.IsInitCapture = Record[Idx++];
-  VD->VarDeclBits.PreviousDeclInSameBlockScope = Record[Idx++];
+  if (!isa<ParmVarDecl>(VD)) {
+    VD->NonParmVarDeclBits.ExceptionVar = Record[Idx++];
+    VD->NonParmVarDeclBits.NRVOVariable = Record[Idx++];
+    VD->NonParmVarDeclBits.CXXForRangeDecl = Record[Idx++];
+    VD->NonParmVarDeclBits.ARCPseudoStrong = Record[Idx++];
+    VD->NonParmVarDeclBits.IsConstexpr = Record[Idx++];
+    VD->NonParmVarDeclBits.IsInitCapture = Record[Idx++];
+    VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record[Idx++];
+  }
   Linkage VarLinkage = Linkage(Record[Idx++]);
   VD->setCachedLinkage(VarLinkage);
 
@@ -1399,11 +1405,16 @@
       // If MergeDD is visible or becomes visible, make the definition visible.
       if (!MergeDD.Definition->isHidden())
         DD.Definition->Hidden = false;
-      else {
+      else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
+        Reader.getContext().mergeDefinitionIntoModule(
+            DD.Definition, MergeDD.Definition->getImportedOwningModule(),
+            /*NotifyListeners*/ false);
+        Reader.PendingMergedDefinitionsToDeduplicate.insert(DD.Definition);
+      } else {
         auto SubmoduleID = MergeDD.Definition->getOwningModuleID();
         assert(SubmoduleID && "hidden definition in no module");
-        Reader.HiddenNamesMap[Reader.getSubmodule(SubmoduleID)]
-              .HiddenDecls.push_back(DD.Definition);
+        Reader.HiddenNamesMap[Reader.getSubmodule(SubmoduleID)].push_back(
+            DD.Definition);
       }
     }
   }
@@ -2022,9 +2033,8 @@
 
   D->setDeclaredWithTypename(Record[Idx++]);
 
-  bool Inherited = Record[Idx++];
-  TypeSourceInfo *DefArg = GetTypeSourceInfo(Record, Idx);
-  D->setDefaultArgument(DefArg, Inherited);
+  if (Record[Idx++])
+    D->setDefaultArgument(GetTypeSourceInfo(Record, Idx));
 }
 
 void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
@@ -2041,11 +2051,8 @@
   } else {
     // Rest of NonTypeTemplateParmDecl.
     D->ParameterPack = Record[Idx++];
-    if (Record[Idx++]) {
-      Expr *DefArg = Reader.ReadExpr(F);
-      bool Inherited = Record[Idx++];
-      D->setDefaultArgument(DefArg, Inherited);
-   }
+    if (Record[Idx++])
+      D->setDefaultArgument(Reader.ReadExpr(F));
   }
 }
 
@@ -2061,10 +2068,10 @@
       Data[I] = Reader.ReadTemplateParameterList(F, Record, Idx);
   } else {
     // Rest of TemplateTemplateParmDecl.
-    TemplateArgumentLoc Arg = Reader.ReadTemplateArgumentLoc(F, Record, Idx);
-    bool IsInherited = Record[Idx++];
-    D->setDefaultArgument(Arg, IsInherited);
     D->ParameterPack = Record[Idx++];
+    if (Record[Idx++])
+      D->setDefaultArgument(Reader.getContext(),
+                            Reader.ReadTemplateArgumentLoc(F, Record, Idx));
   }
 }
 
@@ -2893,6 +2900,43 @@
   llvm_unreachable("attachPreviousDecl on non-redeclarable declaration");
 }
 
+/// Inherit the default template argument from \p From to \p To. Returns
+/// \c false if there is no default template for \p From.
+template <typename ParmDecl>
+static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From,
+                                           Decl *ToD) {
+  auto *To = cast<ParmDecl>(ToD);
+  if (!From->hasDefaultArgument())
+    return false;
+  To->setInheritedDefaultArgument(Context, From);
+  return true;
+}
+
+static void inheritDefaultTemplateArguments(ASTContext &Context,
+                                            TemplateDecl *From,
+                                            TemplateDecl *To) {
+  auto *FromTP = From->getTemplateParameters();
+  auto *ToTP = To->getTemplateParameters();
+  assert(FromTP->size() == ToTP->size() && "merged mismatched templates?");
+
+  for (unsigned I = 0, N = FromTP->size(); I != N; ++I) {
+    NamedDecl *FromParam = FromTP->getParam(N - I - 1);
+    NamedDecl *ToParam = ToTP->getParam(N - I - 1);
+
+    if (auto *FTTP = dyn_cast<TemplateTypeParmDecl>(FromParam)) {
+      if (inheritDefaultTemplateArgument(Context, FTTP, ToParam))
+        break;
+    } else if (auto *FNTTP = dyn_cast<NonTypeTemplateParmDecl>(FromParam)) {
+      if (inheritDefaultTemplateArgument(Context, FNTTP, ToParam))
+        break;
+    } else {
+      if (inheritDefaultTemplateArgument(
+              Context, cast<TemplateTemplateParmDecl>(FromParam), ToParam))
+        break;
+    }
+  }
+}
+
 void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
                                        Decl *Previous, Decl *Canon) {
   assert(D && Previous);
@@ -2919,6 +2963,12 @@
   // be too.
   if (Previous->Used)
     D->Used = true;
+
+  // If the declaration declares a template, it may inherit default arguments
+  // from the previous declaration.
+  if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
+    inheritDefaultTemplateArguments(Reader.getContext(),
+                                    cast<TemplateDecl>(Previous), TD);
 }
 
 template<typename DeclT>
@@ -3300,11 +3350,13 @@
       addToChain(Reader.GetDecl(CanonID));
     }
 
-    static bool visit(ModuleFile &M, bool Preorder, void *UserData) {
-      if (Preorder)
-        return false;
+    static ModuleManager::DFSPreorderControl
+    visitPreorder(ModuleFile &M, void *UserData) {
+      return static_cast<RedeclChainVisitor *>(UserData)->visitPreorder(M);
+    }
 
-      return static_cast<RedeclChainVisitor *>(UserData)->visit(M);
+    static bool visitPostorder(ModuleFile &M, void *UserData) {
+      return static_cast<RedeclChainVisitor *>(UserData)->visitPostorder(M);
     }
 
     void addToChain(Decl *D) {
@@ -3357,8 +3409,36 @@
       for (unsigned I = 0; I != N; ++I)
         addToChain(Reader.GetLocalDecl(M, M.RedeclarationChains[Offset++]));
     }
-    
-    bool visit(ModuleFile &M) {
+
+    bool needsToVisitImports(ModuleFile &M, GlobalDeclID GlobalID) {
+      DeclID ID = Reader.mapGlobalIDToModuleFileGlobalID(M, GlobalID);
+      if (!ID)
+        return false;
+
+      const LocalRedeclarationsInfo Compare = {ID, 0};
+      const LocalRedeclarationsInfo *Result = std::lower_bound(
+          M.RedeclarationsMap,
+          M.RedeclarationsMap + M.LocalNumRedeclarationsInMap, Compare);
+      if (Result == M.RedeclarationsMap + M.LocalNumRedeclarationsInMap ||
+          Result->FirstID != ID) {
+        return true;
+      }
+      unsigned Offset = Result->Offset;
+      unsigned N = M.RedeclarationChains[Offset];
+      // We don't need to visit a module or any of its imports if we've already
+      // deserialized the redecls from this module.
+      return N != 0;
+    }
+
+    ModuleManager::DFSPreorderControl visitPreorder(ModuleFile &M) {
+      for (unsigned I = 0, N = SearchDecls.size(); I != N; ++I) {
+        if (needsToVisitImports(M, SearchDecls[I]))
+          return ModuleManager::Continue;
+      }
+      return ModuleManager::SkipImports;
+    }
+
+    bool visitPostorder(ModuleFile &M) {
       // Visit each of the declarations.
       for (unsigned I = 0, N = SearchDecls.size(); I != N; ++I)
         searchForID(M, SearchDecls[I]);
@@ -3390,11 +3470,12 @@
 
   // Build up the list of redeclarations.
   RedeclChainVisitor Visitor(*this, SearchDecls, RedeclsDeserialized, CanonID);
-  ModuleMgr.visitDepthFirst(&RedeclChainVisitor::visit, &Visitor);
+  ModuleMgr.visitDepthFirst(&RedeclChainVisitor::visitPreorder,
+                            &RedeclChainVisitor::visitPostorder, &Visitor);
 
   // Retrieve the chains.
   ArrayRef<Decl *> Chain = Visitor.getChain();
-  if (Chain.empty())
+  if (Chain.empty() || (Chain.size() == 1 && Chain[0] == CanonDecl))
     return;
 
   // Hook up the chains.
@@ -3813,10 +3894,16 @@
     case UPD_DECL_EXPORTED:
       unsigned SubmoduleID = readSubmoduleID(Record, Idx);
       Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr;
-      if (Owner && Owner->NameVisibility != Module::AllVisible) {
+      if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
+        // FIXME: This doesn't send the right notifications if there are
+        // ASTMutationListeners other than an ASTWriter.
+        Reader.getContext().mergeDefinitionIntoModule(cast<NamedDecl>(D), Owner,
+                                                      /*NotifyListeners*/false);
+        Reader.PendingMergedDefinitionsToDeduplicate.insert(cast<NamedDecl>(D));
+      } else if (Owner && Owner->NameVisibility != Module::AllVisible) {
         // If Owner is made visible at some later point, make this declaration
         // visible too.
-        Reader.HiddenNamesMap[Owner].HiddenDecls.push_back(D);
+        Reader.HiddenNamesMap[Owner].push_back(D);
       } else {
         // The declaration is now visible.
         D->Hidden = false;
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index 57728c1..d84b5be 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -801,6 +801,16 @@
                     Designators.data(), Designators.size());
 }
 
+void ASTStmtReader::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+  VisitExpr(E);
+  E->setBase(Reader.ReadSubExpr());
+  E->setUpdater(Reader.ReadSubExpr());
+}
+
+void ASTStmtReader::VisitNoInitExpr(NoInitExpr *E) {
+  VisitExpr(E);
+}
+
 void ASTStmtReader::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
   VisitExpr(E);
 }
@@ -1826,6 +1836,7 @@
   C->setScheduleKind(
        static_cast<OpenMPScheduleClauseKind>(Record[Idx++]));
   C->setChunkSize(Reader->Reader.ReadSubExpr());
+  C->setHelperChunkSize(Reader->Reader.ReadSubExpr());
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setScheduleKindLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setCommaLoc(Reader->ReadSourceLocation(Record, Idx));
@@ -2488,18 +2499,18 @@
       ExprObjectKind OK = static_cast<ExprObjectKind>(Record[Idx++]);
       Expr *Base = ReadSubExpr();
       ValueDecl *MemberD = ReadDeclAs<ValueDecl>(F, Record, Idx);
-      SourceLocation MemberLoc = ReadSourceLocation(F, Record, Idx);

-      DeclarationNameInfo MemberNameInfo(MemberD->getDeclName(), MemberLoc);

-      bool IsArrow = Record[Idx++];

-      SourceLocation OperatorLoc = ReadSourceLocation(F, Record, Idx);

-

-      S = MemberExpr::Create(Context, Base, IsArrow, OperatorLoc, QualifierLoc,

-                             TemplateKWLoc, MemberD, FoundDecl, MemberNameInfo,

-                             HasTemplateKWAndArgsInfo ? &ArgInfo : nullptr, T,

-                             VK, OK);

-      ReadDeclarationNameLoc(F, cast<MemberExpr>(S)->MemberDNLoc,

-                             MemberD->getDeclName(), Record, Idx);

-      if (HadMultipleCandidates)

+      SourceLocation MemberLoc = ReadSourceLocation(F, Record, Idx);
+      DeclarationNameInfo MemberNameInfo(MemberD->getDeclName(), MemberLoc);
+      bool IsArrow = Record[Idx++];
+      SourceLocation OperatorLoc = ReadSourceLocation(F, Record, Idx);
+
+      S = MemberExpr::Create(Context, Base, IsArrow, OperatorLoc, QualifierLoc,
+                             TemplateKWLoc, MemberD, FoundDecl, MemberNameInfo,
+                             HasTemplateKWAndArgsInfo ? &ArgInfo : nullptr, T,
+                             VK, OK);
+      ReadDeclarationNameLoc(F, cast<MemberExpr>(S)->MemberDNLoc,
+                             MemberD->getDeclName(), Record, Idx);
+      if (HadMultipleCandidates)
         cast<MemberExpr>(S)->setHadMultipleCandidates(true);
       break;
     }
@@ -2548,10 +2559,18 @@
 
       break;
 
+    case EXPR_DESIGNATED_INIT_UPDATE:
+      S = new (Context) DesignatedInitUpdateExpr(Empty);
+      break;
+
     case EXPR_IMPLICIT_VALUE_INIT:
       S = new (Context) ImplicitValueInitExpr(Empty);
       break;
 
+    case EXPR_NO_INIT:
+      S = new (Context) NoInitExpr(Empty);
+      break;
+
     case EXPR_VA_ARG:
       S = new (Context) VAArgExpr(Empty);
       break;
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index df05f04..5bb0bec 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -60,14 +60,14 @@
 using namespace clang::serialization;
 
 template <typename T, typename Allocator>
-static StringRef data(const std::vector<T, Allocator> &v) {
+static StringRef bytes(const std::vector<T, Allocator> &v) {
   if (v.empty()) return StringRef();
   return StringRef(reinterpret_cast<const char*>(&v[0]),
                          sizeof(T) * v.size());
 }
 
 template <typename T>
-static StringRef data(const SmallVectorImpl<T> &v) {
+static StringRef bytes(const SmallVectorImpl<T> &v) {
   return StringRef(reinterpret_cast<const char*>(v.data()),
                          sizeof(T) * v.size());
 }
@@ -774,7 +774,9 @@
   RECORD(EXPR_EXT_VECTOR_ELEMENT);
   RECORD(EXPR_INIT_LIST);
   RECORD(EXPR_DESIGNATED_INIT);
+  RECORD(EXPR_DESIGNATED_INIT_UPDATE);
   RECORD(EXPR_IMPLICIT_VALUE_INIT);
+  RECORD(EXPR_NO_INIT);
   RECORD(EXPR_VA_ARG);
   RECORD(EXPR_ADDR_LABEL);
   RECORD(EXPR_STMT);
@@ -940,8 +942,9 @@
   // Preprocessor Block.
   BLOCK(PREPROCESSOR_BLOCK);
   RECORD(PP_MACRO_DIRECTIVE_HISTORY);
-  RECORD(PP_MACRO_OBJECT_LIKE);
   RECORD(PP_MACRO_FUNCTION_LIKE);
+  RECORD(PP_MACRO_OBJECT_LIKE);
+  RECORD(PP_MODULE_MACRO);
   RECORD(PP_TOKEN);
 
   // Decls and Types block.
@@ -1529,7 +1532,7 @@
   Record.push_back(INPUT_FILE_OFFSETS);
   Record.push_back(InputFileOffsets.size());
   Record.push_back(UserFilesNum);
-  Stream.EmitRecordWithBlob(OffsetsAbbrevCode, Record, data(InputFileOffsets));
+  Stream.EmitRecordWithBlob(OffsetsAbbrevCode, Record, bytes(InputFileOffsets));
 }
 
 //===----------------------------------------------------------------------===//
@@ -1924,7 +1927,7 @@
   Record.push_back(SOURCE_LOCATION_OFFSETS);
   Record.push_back(SLocEntryOffsets.size());
   Record.push_back(SourceMgr.getNextLocalOffset() - 1); // skip dummy
-  Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, data(SLocEntryOffsets));
+  Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets));
 
   // Write the source location entry preloads array, telling the AST
   // reader which source locations entries it should load eagerly.
@@ -1971,52 +1974,6 @@
 // Preprocessor Serialization
 //===----------------------------------------------------------------------===//
 
-namespace {
-class ASTMacroTableTrait {
-public:
-  typedef IdentID key_type;
-  typedef key_type key_type_ref;
-
-  struct Data {
-    uint32_t MacroDirectivesOffset;
-  };
-
-  typedef Data data_type;
-  typedef const data_type &data_type_ref;
-  typedef unsigned hash_value_type;
-  typedef unsigned offset_type;
-
-  static hash_value_type ComputeHash(IdentID IdID) {
-    return llvm::hash_value(IdID);
-  }
-
-  std::pair<unsigned,unsigned>
-  static EmitKeyDataLength(raw_ostream& Out,
-                           key_type_ref Key, data_type_ref Data) {
-    unsigned KeyLen = 4; // IdentID.
-    unsigned DataLen = 4; // MacroDirectivesOffset.
-    return std::make_pair(KeyLen, DataLen);
-  }
-
-  static void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
-    using namespace llvm::support;
-    endian::Writer<little>(Out).write<uint32_t>(Key);
-  }
-
-  static void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
-                       unsigned) {
-    using namespace llvm::support;
-    endian::Writer<little>(Out).write<uint32_t>(Data.MacroDirectivesOffset);
-  }
-};
-} // end anonymous namespace
-
-static int compareMacroDirectives(
-    const std::pair<const IdentifierInfo *, MacroDirective *> *X,
-    const std::pair<const IdentifierInfo *, MacroDirective *> *Y) {
-  return X->first->getName().compare(Y->first->getName());
-}
-
 static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
                               const Preprocessor &PP) {
   if (MacroInfo *MI = MD->getMacroInfo())
@@ -2024,10 +1981,6 @@
       return true;
 
   if (IsModule) {
-    // Re-export any imported directives.
-    if (MD->isImported())
-      return false;
-
     SourceLocation Loc = MD->getLocation();
     if (Loc.isInvalid())
       return true;
@@ -2047,6 +2000,7 @@
     WritePreprocessorDetail(*PPRec);
 
   RecordData Record;
+  RecordData ModuleMacroRecord;
 
   // If the preprocessor __COUNTER__ value has been bumped, remember it.
   if (PP.getCounterValue() != 0) {
@@ -2067,63 +2021,73 @@
   // Loop over all the macro directives that are live at the end of the file,
   // emitting each to the PP section.
 
-  // Construct the list of macro directives that need to be serialized.
-  SmallVector<std::pair<const IdentifierInfo *, MacroDirective *>, 2>
-    MacroDirectives;
-  for (Preprocessor::macro_iterator
-         I = PP.macro_begin(/*IncludeExternalMacros=*/false),
-         E = PP.macro_end(/*IncludeExternalMacros=*/false);
-       I != E; ++I) {
-    MacroDirectives.push_back(std::make_pair(I->first, I->second));
-  }
-
+  // Construct the list of identifiers with macro directives that need to be
+  // serialized.
+  SmallVector<const IdentifierInfo *, 128> MacroIdentifiers;
+  for (auto &Id : PP.getIdentifierTable())
+    if (Id.second->hadMacroDefinition() &&
+        (!Id.second->isFromAST() ||
+         Id.second->hasChangedSinceDeserialization()))
+      MacroIdentifiers.push_back(Id.second);
   // Sort the set of macro definitions that need to be serialized by the
   // name of the macro, to provide a stable ordering.
-  llvm::array_pod_sort(MacroDirectives.begin(), MacroDirectives.end(),
-                       &compareMacroDirectives);
+  std::sort(MacroIdentifiers.begin(), MacroIdentifiers.end(),
+            llvm::less_ptr<IdentifierInfo>());
 
   // Emit the macro directives as a list and associate the offset with the
   // identifier they belong to.
-  for (unsigned I = 0, N = MacroDirectives.size(); I != N; ++I) {
-    const IdentifierInfo *Name = MacroDirectives[I].first;
-    MacroDirective *MD = MacroDirectives[I].second;
-
-    // If the macro or identifier need no updates, don't write the macro history
-    // for this one.
-    // FIXME: Chain the macro history instead of re-writing it.
-    if (MD->isFromPCH() &&
-        Name->isFromAST() && !Name->hasChangedSinceDeserialization())
-      continue;
+  for (const IdentifierInfo *Name : MacroIdentifiers) {
+    MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name);
+    auto StartOffset = Stream.GetCurrentBitNo();
 
     // Emit the macro directives in reverse source order.
     for (; MD; MD = MD->getPrevious()) {
+      // Once we hit an ignored macro, we're done: the rest of the chain
+      // will all be ignored macros.
       if (shouldIgnoreMacro(MD, IsModule, PP))
-        continue;
+        break;
 
       AddSourceLocation(MD->getLocation(), Record);
       Record.push_back(MD->getKind());
       if (auto *DefMD = dyn_cast<DefMacroDirective>(MD)) {
-        MacroID InfoID = getMacroRef(DefMD->getInfo(), Name);
-        Record.push_back(InfoID);
-        Record.push_back(DefMD->getOwningModuleID());
-        Record.push_back(DefMD->isAmbiguous());
-      } else if (auto *UndefMD = dyn_cast<UndefMacroDirective>(MD)) {
-        Record.push_back(UndefMD->getOwningModuleID());
-      } else {
-        auto *VisMD = cast<VisibilityMacroDirective>(MD);
+        Record.push_back(getMacroRef(DefMD->getInfo(), Name));
+      } else if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
         Record.push_back(VisMD->isPublic());
       }
+    }
 
-      if (MD->isImported()) {
-        auto Overrides = MD->getOverriddenModules();
-        Record.push_back(Overrides.size());
-        Record.append(Overrides.begin(), Overrides.end());
+    // Write out any exported module macros.
+    bool EmittedModuleMacros = false;
+    if (IsModule) {
+      auto Leafs = PP.getLeafModuleMacros(Name);
+      SmallVector<ModuleMacro*, 8> Worklist(Leafs.begin(), Leafs.end());
+      llvm::DenseMap<ModuleMacro*, unsigned> Visits;
+      while (!Worklist.empty()) {
+        auto *Macro = Worklist.pop_back_val();
+
+        // Emit a record indicating this submodule exports this macro.
+        ModuleMacroRecord.push_back(
+            getSubmoduleID(Macro->getOwningModule()));
+        ModuleMacroRecord.push_back(getMacroRef(Macro->getMacroInfo(), Name));
+        for (auto *M : Macro->overrides())
+          ModuleMacroRecord.push_back(getSubmoduleID(M->getOwningModule()));
+
+        Stream.EmitRecord(PP_MODULE_MACRO, ModuleMacroRecord);
+        ModuleMacroRecord.clear();
+
+        // Enqueue overridden macros once we've visited all their ancestors.
+        for (auto *M : Macro->overrides())
+          if (++Visits[M] == M->getNumOverridingMacros())
+            Worklist.push_back(M);
+
+        EmittedModuleMacros = true;
       }
     }
-    if (Record.empty())
+
+    if (Record.empty() && !EmittedModuleMacros)
       continue;
 
-    IdentMacroDirectivesOffsetMap[Name] = Stream.GetCurrentBitNo();
+    IdentMacroDirectivesOffsetMap[Name] = StartOffset;
     Stream.EmitRecord(PP_MACRO_DIRECTIVE_HISTORY, Record);
     Record.clear();
   }
@@ -2173,9 +2137,8 @@
       Record.push_back(MI->isGNUVarargs());
       Record.push_back(MI->hasCommaPasting());
       Record.push_back(MI->getNumArgs());
-      for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
-           I != E; ++I)
-        AddIdentifierRef(*I, Record);
+      for (const IdentifierInfo *Arg : MI->args())
+        AddIdentifierRef(Arg, Record);
     }
 
     // If we have a detailed preprocessing record, record the macro definition
@@ -2215,7 +2178,7 @@
   Record.push_back(MacroOffsets.size());
   Record.push_back(FirstMacroID - NUM_PREDEF_MACRO_IDS);
   Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record,
-                            data(MacroOffsets));
+                            bytes(MacroOffsets));
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
@@ -2255,13 +2218,13 @@
        (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
     Record.clear();
 
-    PreprocessedEntityOffsets.push_back(PPEntityOffset((*E)->getSourceRange(),
-                                                     Stream.GetCurrentBitNo()));
+    PreprocessedEntityOffsets.push_back(
+        PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));
 
-    if (MacroDefinition *MD = dyn_cast<MacroDefinition>(*E)) {
+    if (MacroDefinitionRecord *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
       MacroDefinitions[MD] = NextPreprocessorEntityID;
-      
+
       AddIdentifierRef(MD->getName(), Record);
       Stream.EmitRecord(PPD_MACRO_DEFINITION, Record);
       continue;
@@ -2313,7 +2276,7 @@
     Record.push_back(PPD_ENTITIES_OFFSETS);
     Record.push_back(FirstPreprocessorEntityID - NUM_PREDEF_PP_ENTITY_IDS);
     Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record,
-                              data(PreprocessedEntityOffsets));
+                              bytes(PreprocessedEntityOffsets));
   }
 }
 
@@ -2350,19 +2313,6 @@
 }
 
 void ASTWriter::WriteSubmodules(Module *WritingModule) {
-  // Determine the dependencies of our module and each of it's submodules.
-  // FIXME: This feels like it belongs somewhere else, but there are no
-  // other consumers of this information.
-  SourceManager &SrcMgr = PP->getSourceManager();
-  ModuleMap &ModMap = PP->getHeaderSearchInfo().getModuleMap();
-  for (const auto *I : Context->local_imports()) {
-    if (Module *ImportedFrom
-          = ModMap.inferModuleFromLocation(FullSourceLoc(I->getLocation(), 
-                                                         SrcMgr))) {
-      ImportedFrom->Imports.push_back(I->getImportedModule());
-    }
-  }
-  
   // Enter the submodule description block.
   Stream.EnterSubblock(SUBMODULE_BLOCK_ID, /*bits for abbreviations*/5);
   
@@ -2490,16 +2440,16 @@
     }
 
     // Emit the umbrella header, if there is one.
-    if (const FileEntry *UmbrellaHeader = Mod->getUmbrellaHeader()) {
+    if (auto UmbrellaHeader = Mod->getUmbrellaHeader()) {
       Record.clear();
       Record.push_back(SUBMODULE_UMBRELLA_HEADER);
-      Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record, 
-                                UmbrellaHeader->getName());
-    } else if (const DirectoryEntry *UmbrellaDir = Mod->getUmbrellaDir()) {
+      Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record,
+                                UmbrellaHeader.NameAsWritten);
+    } else if (auto UmbrellaDir = Mod->getUmbrellaDir()) {
       Record.clear();
       Record.push_back(SUBMODULE_UMBRELLA_DIR);
       Stream.EmitRecordWithBlob(UmbrellaDirAbbrev, Record, 
-                                UmbrellaDir->getName());      
+                                UmbrellaDir.NameAsWritten);
     }
 
     // Emit the headers.
@@ -2547,8 +2497,7 @@
       Record.clear();
       for (unsigned I = 0, N = Mod->Exports.size(); I != N; ++I) {
         if (Module *Exported = Mod->Exports[I].getPointer()) {
-          unsigned ExportedID = SubmoduleIDs[Exported];
-          assert(ExportedID > 0 && "Unknown submodule ID?");
+          unsigned ExportedID = getSubmoduleID(Exported);
           Record.push_back(ExportedID);
         } else {
           Record.push_back(0);
@@ -2599,9 +2548,14 @@
   }
   
   Stream.ExitBlock();
-  
-  assert((NextSubmoduleID - FirstSubmoduleID
-            == getNumberOfModules(WritingModule)) && "Wrong # of submodules");
+
+  // FIXME: This can easily happen, if we have a reference to a submodule that
+  // did not result in us loading a module file for that submodule. For
+  // instance, a cross-top-level-module 'conflict' declaration will hit this.
+  assert((NextSubmoduleID - FirstSubmoduleID ==
+          getNumberOfModules(WritingModule)) &&
+         "Wrong # of submodules; found a reference to a non-local, "
+         "non-imported submodule?");
 }
 
 serialization::SubmoduleID 
@@ -2685,7 +2639,7 @@
   Record.push_back(CXX_CTOR_INITIALIZERS_OFFSETS);
   Record.push_back(CXXCtorInitializersOffsets.size());
   Stream.EmitRecordWithBlob(CtorInitializersOffsetAbbrev, Record,
-                            data(CXXCtorInitializersOffsets));
+                            bytes(CXXCtorInitializersOffsets));
 }
 
 void ASTWriter::WriteCXXBaseSpecifiersOffsets() {
@@ -2708,7 +2662,7 @@
   Record.push_back(CXX_BASE_SPECIFIER_OFFSETS);
   Record.push_back(CXXBaseSpecifiersOffsets.size());
   Stream.EmitRecordWithBlob(BaseSpecifierOffsetAbbrev, Record,
-                            data(CXXBaseSpecifiersOffsets));
+                            bytes(CXXBaseSpecifiersOffsets));
 }
 
 //===----------------------------------------------------------------------===//
@@ -2784,7 +2738,7 @@
     Decls.push_back(std::make_pair(D->getKind(), GetDeclRef(D)));
 
   ++NumLexicalDeclContexts;
-  Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record, data(Decls));
+  Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record, bytes(Decls));
   return Offset;
 }
 
@@ -2803,7 +2757,7 @@
   Record.push_back(TYPE_OFFSET);
   Record.push_back(TypeOffsets.size());
   Record.push_back(FirstTypeID - NUM_PREDEF_TYPE_IDS);
-  Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, data(TypeOffsets));
+  Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets));
 
   // Write the declaration offsets array
   Abbrev = new BitCodeAbbrev();
@@ -2816,7 +2770,7 @@
   Record.push_back(DECL_OFFSET);
   Record.push_back(DeclOffsets.size());
   Record.push_back(FirstDeclID - NUM_PREDEF_DECL_IDS);
-  Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, data(DeclOffsets));
+  Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, bytes(DeclOffsets));
 }
 
 void ASTWriter::WriteFileDeclIDsMap() {
@@ -2844,7 +2798,7 @@
   unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev);
   Record.push_back(FILE_SORTED_DECLS);
   Record.push_back(FileGroupedDeclIDs.size());
-  Stream.EmitRecordWithBlob(AbbrevCode, Record, data(FileGroupedDeclIDs));
+  Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs));
 }
 
 void ASTWriter::WriteComments() {
@@ -3073,7 +3027,7 @@
     Record.push_back(SelectorOffsets.size());
     Record.push_back(FirstSelectorID - NUM_PREDEF_SELECTOR_IDS);
     Stream.EmitRecordWithBlob(SelectorOffsetAbbrev, Record,
-                              data(SelectorOffsets));
+                              bytes(SelectorOffsets));
   }
 }
 
@@ -3137,169 +3091,23 @@
   ASTWriter &Writer;
   Preprocessor &PP;
   IdentifierResolver &IdResolver;
-  bool IsModule;
   
-  /// \brief Determines whether this is an "interesting" identifier
-  /// that needs a full IdentifierInfo structure written into the hash
-  /// table.
-  bool isInterestingIdentifier(IdentifierInfo *II, MacroDirective *&Macro) {
-    if (II->isPoisoned() ||
+  /// \brief Determines whether this is an "interesting" identifier that needs a
+  /// full IdentifierInfo structure written into the hash table. Notably, this
+  /// doesn't check whether the name has macros defined; use PublicMacroIterator
+  /// to check that.
+  bool isInterestingIdentifier(IdentifierInfo *II, uint64_t MacroOffset) {
+    if (MacroOffset ||
+        II->isPoisoned() ||
         II->isExtensionToken() ||
         II->getObjCOrBuiltinID() ||
         II->hasRevertedTokenIDToIdentifier() ||
         II->getFETokenInfo<void>())
       return true;
 
-    return hadMacroDefinition(II, Macro);
-  }
-
-  bool hadMacroDefinition(IdentifierInfo *II, MacroDirective *&Macro) {
-    if (!II->hadMacroDefinition())
-      return false;
-
-    if (Macro || (Macro = PP.getMacroDirectiveHistory(II))) {
-      if (!IsModule)
-        return !shouldIgnoreMacro(Macro, IsModule, PP);
-
-      MacroState State;
-      if (getFirstPublicSubmoduleMacro(Macro, State))
-        return true;
-    }
-
     return false;
   }
 
-  enum class SubmoduleMacroState {
-    /// We've seen nothing about this macro.
-    None,
-    /// We've seen a public visibility directive.
-    Public,
-    /// We've either exported a macro for this module or found that the
-    /// module's definition of this macro is private.
-    Done
-  };
-  typedef llvm::DenseMap<SubmoduleID, SubmoduleMacroState> MacroState;
-
-  MacroDirective *
-  getFirstPublicSubmoduleMacro(MacroDirective *MD, MacroState &State) {
-    if (MacroDirective *NextMD = getPublicSubmoduleMacro(MD, State))
-      return NextMD;
-    return nullptr;
-  }
-
-  MacroDirective *
-  getNextPublicSubmoduleMacro(MacroDirective *MD, MacroState &State) {
-    if (MacroDirective *NextMD =
-            getPublicSubmoduleMacro(MD->getPrevious(), State))
-      return NextMD;
-    return nullptr;
-  }
-
-  /// \brief Traverses the macro directives history and returns the next
-  /// public macro definition or undefinition that has not been found so far.
-  ///
-  /// A macro that is defined in submodule A and undefined in submodule B
-  /// will still be considered as defined/exported from submodule A.
-  MacroDirective *getPublicSubmoduleMacro(MacroDirective *MD,
-                                          MacroState &State) {
-    if (!MD)
-      return nullptr;
-
-    Optional<bool> IsPublic;
-    for (; MD; MD = MD->getPrevious()) {
-      // Once we hit an ignored macro, we're done: the rest of the chain
-      // will all be ignored macros.
-      if (shouldIgnoreMacro(MD, IsModule, PP))
-        break;
-
-      // If this macro was imported, re-export it.
-      if (MD->isImported())
-        return MD;
-
-      SubmoduleID ModID = getSubmoduleID(MD);
-      auto &S = State[ModID];
-      assert(ModID && "found macro in no submodule");
-
-      if (S == SubmoduleMacroState::Done)
-        continue;
-
-      if (auto *VisMD = dyn_cast<VisibilityMacroDirective>(MD)) {
-        // The latest visibility directive for a name in a submodule affects all
-        // the directives that come before it.
-        if (S == SubmoduleMacroState::None)
-          S = VisMD->isPublic() ? SubmoduleMacroState::Public
-                                : SubmoduleMacroState::Done;
-      } else {
-        S = SubmoduleMacroState::Done;
-        return MD;
-      }
-    }
-
-    return nullptr;
-  }
-
-  ArrayRef<SubmoduleID>
-  getOverriddenSubmodules(MacroDirective *MD,
-                          SmallVectorImpl<SubmoduleID> &ScratchSpace) {
-    assert(!isa<VisibilityMacroDirective>(MD) &&
-           "only #define and #undef can override");
-    if (MD->isImported())
-      return MD->getOverriddenModules();
-
-    ScratchSpace.clear();
-    SubmoduleID ModID = getSubmoduleID(MD);
-    for (MD = MD->getPrevious(); MD; MD = MD->getPrevious()) {
-      if (shouldIgnoreMacro(MD, IsModule, PP))
-        break;
-
-      // If this is a definition from a submodule import, that submodule's
-      // definition is overridden by the definition or undefinition that we
-      // started with.
-      if (MD->isImported()) {
-        if (auto *DefMD = dyn_cast<DefMacroDirective>(MD)) {
-          SubmoduleID DefModuleID = DefMD->getInfo()->getOwningModuleID();
-          assert(DefModuleID && "imported macro has no owning module");
-          ScratchSpace.push_back(DefModuleID);
-        } else if (auto *UndefMD = dyn_cast<UndefMacroDirective>(MD)) {
-          // If we override a #undef, we override anything that #undef overrides.
-          // We don't need to override it, since an active #undef doesn't affect
-          // the meaning of a macro.
-          auto Overrides = UndefMD->getOverriddenModules();
-          ScratchSpace.insert(ScratchSpace.end(),
-                              Overrides.begin(), Overrides.end());
-        }
-      }
-
-      // Stop once we leave the original macro's submodule.
-      //
-      // Either this submodule #included another submodule of the same
-      // module or it just happened to be built after the other module.
-      // In the former case, we override the submodule's macro.
-      //
-      // FIXME: In the latter case, we shouldn't do so, but we can't tell
-      // these cases apart.
-      //
-      // FIXME: We can leave this submodule and re-enter it if it #includes a
-      // header within a different submodule of the same module. In such cases
-      // the overrides list will be incomplete.
-      SubmoduleID DirectiveModuleID = getSubmoduleID(MD);
-      if (DirectiveModuleID != ModID) {
-        if (DirectiveModuleID && !MD->isImported())
-          ScratchSpace.push_back(DirectiveModuleID);
-        break;
-      }
-    }
-
-    std::sort(ScratchSpace.begin(), ScratchSpace.end());
-    ScratchSpace.erase(std::unique(ScratchSpace.begin(), ScratchSpace.end()),
-                       ScratchSpace.end());
-    return ScratchSpace;
-  }
-
-  SubmoduleID getSubmoduleID(MacroDirective *MD) {
-    return Writer.inferSubmoduleIDFromLocation(MD->getLocation());
-  }
-
 public:
   typedef IdentifierInfo* key_type;
   typedef key_type  key_type_ref;
@@ -3310,9 +3118,9 @@
   typedef unsigned hash_value_type;
   typedef unsigned offset_type;
 
-  ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP, 
-                          IdentifierResolver &IdResolver, bool IsModule)
-    : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule) { }
+  ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
+                          IdentifierResolver &IdResolver)
+      : Writer(Writer), PP(PP), IdResolver(IdResolver) {}
 
   static hash_value_type ComputeHash(const IdentifierInfo* II) {
     return llvm::HashString(II->getName());
@@ -3322,25 +3130,12 @@
   EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
     unsigned KeyLen = II->getLength() + 1;
     unsigned DataLen = 4; // 4 bytes for the persistent ID << 1
-    MacroDirective *Macro = nullptr;
-    if (isInterestingIdentifier(II, Macro)) {
+    auto MacroOffset = Writer.getMacroDirectivesOffset(II);
+    if (isInterestingIdentifier(II, MacroOffset)) {
       DataLen += 2; // 2 bytes for builtin ID
       DataLen += 2; // 2 bytes for flags
-      if (hadMacroDefinition(II, Macro)) {
+      if (MacroOffset)
         DataLen += 4; // MacroDirectives offset.
-        if (IsModule) {
-          MacroState State;
-          SmallVector<SubmoduleID, 16> Scratch;
-          for (MacroDirective *MD = getFirstPublicSubmoduleMacro(Macro, State);
-               MD; MD = getNextPublicSubmoduleMacro(MD, State)) {
-            DataLen += 4; // MacroInfo ID or ModuleID.
-            if (unsigned NumOverrides =
-                    getOverriddenSubmodules(MD, Scratch).size())
-              DataLen += 4 * (1 + NumOverrides);
-          }
-          DataLen += 4; // 0 terminator.
-        }
-      }
 
       for (IdentifierResolver::iterator D = IdResolver.begin(II),
                                      DEnd = IdResolver.end();
@@ -3367,25 +3162,13 @@
     Out.write(II->getNameStart(), KeyLen);
   }
 
-  static void emitMacroOverrides(raw_ostream &Out,
-                                 ArrayRef<SubmoduleID> Overridden) {
-    if (!Overridden.empty()) {
-      using namespace llvm::support;
-      endian::Writer<little> LE(Out);
-      LE.write<uint32_t>(Overridden.size() | 0x80000000U);
-      for (unsigned I = 0, N = Overridden.size(); I != N; ++I) {
-        assert(Overridden[I] && "zero module ID for override");
-        LE.write<uint32_t>(Overridden[I]);
-      }
-    }
-  }
-
   void EmitData(raw_ostream& Out, IdentifierInfo* II,
                 IdentID ID, unsigned) {
     using namespace llvm::support;
     endian::Writer<little> LE(Out);
-    MacroDirective *Macro = nullptr;
-    if (!isInterestingIdentifier(II, Macro)) {
+
+    auto MacroOffset = Writer.getMacroDirectivesOffset(II);
+    if (!isInterestingIdentifier(II, MacroOffset)) {
       LE.write<uint32_t>(ID << 1);
       return;
     }
@@ -3395,43 +3178,16 @@
     assert((Bits & 0xffff) == Bits && "ObjCOrBuiltinID too big for ASTReader.");
     LE.write<uint16_t>(Bits);
     Bits = 0;
-    bool HadMacroDefinition = hadMacroDefinition(II, Macro);
+    bool HadMacroDefinition = MacroOffset != 0;
     Bits = (Bits << 1) | unsigned(HadMacroDefinition);
-    Bits = (Bits << 1) | unsigned(IsModule);
     Bits = (Bits << 1) | unsigned(II->isExtensionToken());
     Bits = (Bits << 1) | unsigned(II->isPoisoned());
     Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
     Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
     LE.write<uint16_t>(Bits);
 
-    if (HadMacroDefinition) {
-      LE.write<uint32_t>(Writer.getMacroDirectivesOffset(II));
-      if (IsModule) {
-        // Write the IDs of macros coming from different submodules.
-        MacroState State;
-        SmallVector<SubmoduleID, 16> Scratch;
-        for (MacroDirective *MD = getFirstPublicSubmoduleMacro(Macro, State);
-             MD; MD = getNextPublicSubmoduleMacro(MD, State)) {
-          if (DefMacroDirective *DefMD = dyn_cast<DefMacroDirective>(MD)) {
-            // FIXME: If this macro directive was created by #pragma pop_macros,
-            // or if it was created implicitly by resolving conflicting macros,
-            // it may be for a different submodule from the one in the MacroInfo
-            // object. If so, we should write out its owning ModuleID.
-            MacroID InfoID = Writer.getMacroID(DefMD->getInfo());
-            assert(InfoID);
-            LE.write<uint32_t>(InfoID << 1);
-          } else {
-            auto *UndefMD = cast<UndefMacroDirective>(MD);
-            SubmoduleID Mod = UndefMD->isImported()
-                                  ? UndefMD->getOwningModuleID()
-                                  : getSubmoduleID(UndefMD);
-            LE.write<uint32_t>((Mod << 1) | 1);
-          }
-          emitMacroOverrides(Out, getOverriddenSubmodules(MD, Scratch));
-        }
-        LE.write<uint32_t>((uint32_t)-1);
-      }
-    }
+    if (HadMacroDefinition)
+      LE.write<uint32_t>(MacroOffset);
 
     // Emit the declaration IDs in reverse order, because the
     // IdentifierResolver provides the declarations as they would be
@@ -3463,7 +3219,7 @@
   // strings.
   {
     llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
-    ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
+    ASTIdentifierTableTrait Trait(*this, PP, IdResolver);
 
     // Look for any identifiers that were named while processing the
     // headers, but are otherwise not needed. We add these to the hash
@@ -3497,7 +3253,6 @@
     uint32_t BucketOffset;
     {
       using namespace llvm::support;
-      ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule);
       llvm::raw_svector_ostream Out(IdentifierTable);
       // Make sure that no bucket is at offset 0
       endian::Writer<little>(Out).write<uint32_t>(0);
@@ -3536,7 +3291,7 @@
   Record.push_back(IdentifierOffsets.size());
   Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS);
   Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
-                            data(IdentifierOffsets));
+                            bytes(IdentifierOffsets));
 }
 
 //===----------------------------------------------------------------------===//
@@ -4401,6 +4156,20 @@
     AddSourceLocation(I->second, UndefinedButUsed);
   }
 
+  // Build a record containing all delete-expressions that we would like to
+  // analyze later in AST.
+  RecordData DeleteExprsToAnalyze;
+
+  for (const auto &DeleteExprsInfo :
+       SemaRef.getMismatchingDeleteExpressions()) {
+    AddDeclRef(DeleteExprsInfo.first, DeleteExprsToAnalyze);
+    DeleteExprsToAnalyze.push_back(DeleteExprsInfo.second.size());
+    for (const auto &DeleteLoc : DeleteExprsInfo.second) {
+      AddSourceLocation(DeleteLoc.first, DeleteExprsToAnalyze);
+      DeleteExprsToAnalyze.push_back(DeleteLoc.second);
+    }
+  }
+
   // Write the control block
   WriteControlBlock(PP, Context, isysroot, OutputFile);
 
@@ -4430,7 +4199,7 @@
   Record.clear();
   Record.push_back(TU_UPDATE_LEXICAL);
   Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record,
-                            data(NewGlobalDecls));
+                            bytes(NewGlobalDecls));
   
   // And a visible updates block for the translation unit.
   Abv = new llvm::BitCodeAbbrev();
@@ -4670,7 +4439,10 @@
   // Write the undefined internal functions and variables, and inline functions.
   if (!UndefinedButUsed.empty())
     Stream.EmitRecord(UNDEFINED_BUT_USED, UndefinedButUsed);
-  
+
+  if (!DeleteExprsToAnalyze.empty())
+    Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze);
+
   // Write the visible updates to DeclContexts.
   for (auto *DC : UpdatedDeclContexts)
     WriteDeclContextVisibleUpdate(DC);
@@ -4708,7 +4480,7 @@
         // FIXME: If the module has macros imported then later has declarations
         // imported, this location won't be the right one as a location for the
         // declaration imports.
-        AddSourceLocation(Import.M->MacroVisibilityLoc, ImportedModules);
+        AddSourceLocation(PP.getModuleImportLoc(Import.M), ImportedModules);
       }
 
       Stream.EmitRecord(IMPORTED_MODULES, ImportedModules);
@@ -4844,7 +4616,7 @@
         break;
 
       case UPD_DECL_EXPORTED:
-        Record.push_back(inferSubmoduleIDFromLocation(Update.getLoc()));
+        Record.push_back(getSubmoduleID(Update.getModule()));
         break;
       }
     }
@@ -4943,8 +4715,7 @@
 }
 
 uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
-  assert(IdentMacroDirectivesOffsetMap[Name] && "not set!");
-  return IdentMacroDirectivesOffsetMap[Name];
+  return IdentMacroDirectivesOffsetMap.lookup(Name);
 }
 
 void ASTWriter::AddSelectorRef(const Selector SelRef, RecordDataImpl &Record) {
@@ -5796,7 +5567,7 @@
 }
 
 void ASTWriter::MacroDefinitionRead(serialization::PreprocessedEntityID ID,
-                                    MacroDefinition *MD) {
+                                    MacroDefinitionRecord *MD) {
   assert(MacroDefinitions.find(MD) == MacroDefinitions.end());
   MacroDefinitions[MD] = ID;
 }
@@ -5990,10 +5761,8 @@
   DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_MARKED_OPENMP_THREADPRIVATE));
 }
 
-void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D,
-                                          SourceLocation Loc) {
+void ASTWriter::RedefinedHiddenDefinition(const NamedDecl *D, Module *M) {
   assert(!WritingAST && "Already writing the AST!");
   assert(D->isHidden() && "expected a hidden declaration");
-  assert(D->isFromASTFile() && "hidden decl not from AST file");
-  DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, Loc));
+  DeclUpdates[D].push_back(DeclUpdate(UPD_DECL_EXPORTED, M));
 }
diff --git a/lib/Serialization/ASTWriterDecl.cpp b/lib/Serialization/ASTWriterDecl.cpp
index 608aa59..f69367f 100644
--- a/lib/Serialization/ASTWriterDecl.cpp
+++ b/lib/Serialization/ASTWriterDecl.cpp
@@ -190,8 +190,7 @@
         assert(D->isCanonicalDecl() && "non-canonical decl in set");
         Writer.AddDeclRef(D, Record);
       }
-      for (DeclID ID : LazySpecializations)
-        Record.push_back(ID);
+      Record.append(LazySpecializations.begin(), LazySpecializations.end());
     }
   };
 }
@@ -790,13 +789,15 @@
   Record.push_back(D->getStorageClass());
   Record.push_back(D->getTSCSpec());
   Record.push_back(D->getInitStyle());
-  Record.push_back(D->isExceptionVariable());
-  Record.push_back(D->isNRVOVariable());
-  Record.push_back(D->isCXXForRangeDecl());
-  Record.push_back(D->isARCPseudoStrong());
-  Record.push_back(D->isConstexpr());
-  Record.push_back(D->isInitCapture());
-  Record.push_back(D->isPreviousDeclInSameBlockScope());
+  if (!isa<ParmVarDecl>(D)) {
+    Record.push_back(D->isExceptionVariable());
+    Record.push_back(D->isNRVOVariable());
+    Record.push_back(D->isCXXForRangeDecl());
+    Record.push_back(D->isARCPseudoStrong());
+    Record.push_back(D->isConstexpr());
+    Record.push_back(D->isInitCapture());
+    Record.push_back(D->isPreviousDeclInSameBlockScope());
+  }
   Record.push_back(D->getLinkageInternal());
 
   if (D->getInit()) {
@@ -1378,8 +1379,12 @@
   VisitTypeDecl(D);
 
   Record.push_back(D->wasDeclaredWithTypename());
-  Record.push_back(D->defaultArgumentWasInherited());
-  Writer.AddTypeSourceInfo(D->getDefaultArgumentInfo(), Record);
+
+  bool OwnsDefaultArg = D->hasDefaultArgument() &&
+                        !D->defaultArgumentWasInherited();
+  Record.push_back(OwnsDefaultArg);
+  if (OwnsDefaultArg)
+    Writer.AddTypeSourceInfo(D->getDefaultArgumentInfo(), Record);
 
   Code = serialization::DECL_TEMPLATE_TYPE_PARM;
 }
@@ -1406,11 +1411,11 @@
   } else {
     // Rest of NonTypeTemplateParmDecl.
     Record.push_back(D->isParameterPack());
-    Record.push_back(D->getDefaultArgument() != nullptr);
-    if (D->getDefaultArgument()) {
+    bool OwnsDefaultArg = D->hasDefaultArgument() &&
+                          !D->defaultArgumentWasInherited();
+    Record.push_back(OwnsDefaultArg);
+    if (OwnsDefaultArg)
       Writer.AddStmt(D->getDefaultArgument());
-      Record.push_back(D->defaultArgumentWasInherited());
-    }
     Code = serialization::DECL_NON_TYPE_TEMPLATE_PARM;
   }
 }
@@ -1435,9 +1440,12 @@
     Code = serialization::DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK;
   } else {
     // Rest of TemplateTemplateParmDecl.
-    Writer.AddTemplateArgumentLoc(D->getDefaultArgument(), Record);
-    Record.push_back(D->defaultArgumentWasInherited());
     Record.push_back(D->isParameterPack());
+    bool OwnsDefaultArg = D->hasDefaultArgument() &&
+                          !D->defaultArgumentWasInherited();
+    Record.push_back(OwnsDefaultArg);
+    if (OwnsDefaultArg)
+      Writer.AddTemplateArgumentLoc(D->getDefaultArgument(), Record);
     Code = serialization::DECL_TEMPLATE_TEMPLATE_PARM;
   }
 }
@@ -1738,13 +1746,6 @@
   Abv->Add(BitCodeAbbrevOp(0));                       // StorageClass
   Abv->Add(BitCodeAbbrevOp(0));                       // getTSCSpec
   Abv->Add(BitCodeAbbrevOp(0));                       // hasCXXDirectInitializer
-  Abv->Add(BitCodeAbbrevOp(0));                       // isExceptionVariable
-  Abv->Add(BitCodeAbbrevOp(0));                       // isNRVOVariable
-  Abv->Add(BitCodeAbbrevOp(0));                       // isCXXForRangeDecl
-  Abv->Add(BitCodeAbbrevOp(0));                       // isARCPseudoStrong
-  Abv->Add(BitCodeAbbrevOp(0));                       // isConstexpr
-  Abv->Add(BitCodeAbbrevOp(0));                       // isInitCapture
-  Abv->Add(BitCodeAbbrevOp(0));                       // isPrevDeclInSameScope
   Abv->Add(BitCodeAbbrevOp(0));                       // Linkage
   Abv->Add(BitCodeAbbrevOp(0));                       // HasInit
   Abv->Add(BitCodeAbbrevOp(0));                   // HasMemberSpecializationInfo
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index f15f76c..00356f8 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -550,13 +550,13 @@
   Record.push_back(E->getValueKind());
   Record.push_back(E->getObjectKind());
   Writer.AddStmt(E->getBase());
-  Writer.AddDeclRef(E->getMemberDecl(), Record);

-  Writer.AddSourceLocation(E->getMemberLoc(), Record);

-  Record.push_back(E->isArrow());

-  Writer.AddSourceLocation(E->getOperatorLoc(), Record);

-  Writer.AddDeclarationNameLoc(E->MemberDNLoc,

-                               E->getMemberDecl()->getDeclName(), Record);

-  Code = serialization::EXPR_MEMBER;

+  Writer.AddDeclRef(E->getMemberDecl(), Record);
+  Writer.AddSourceLocation(E->getMemberLoc(), Record);
+  Record.push_back(E->isArrow());
+  Writer.AddSourceLocation(E->getOperatorLoc(), Record);
+  Writer.AddDeclarationNameLoc(E->MemberDNLoc,
+                               E->getMemberDecl()->getDeclName(), Record);
+  Code = serialization::EXPR_MEMBER;
 }
 
 void ASTStmtWriter::VisitObjCIsaExpr(ObjCIsaExpr *E) {
@@ -738,6 +738,18 @@
   Code = serialization::EXPR_DESIGNATED_INIT;
 }
 
+void ASTStmtWriter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
+  VisitExpr(E);
+  Writer.AddStmt(E->getBase());
+  Writer.AddStmt(E->getUpdater());
+  Code = serialization::EXPR_DESIGNATED_INIT_UPDATE;
+}
+
+void ASTStmtWriter::VisitNoInitExpr(NoInitExpr *E) {
+  VisitExpr(E);
+  Code = serialization::EXPR_NO_INIT;
+}
+
 void ASTStmtWriter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
   VisitExpr(E);
   Code = serialization::EXPR_IMPLICIT_VALUE_INIT;
@@ -1745,6 +1757,7 @@
 void OMPClauseWriter::VisitOMPScheduleClause(OMPScheduleClause *C) {
   Record.push_back(C->getScheduleKind());
   Writer->Writer.AddStmt(C->getChunkSize());
+  Writer->Writer.AddStmt(C->getHelperChunkSize());
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
   Writer->Writer.AddSourceLocation(C->getScheduleKindLoc(), Record);
   Writer->Writer.AddSourceLocation(C->getCommaLoc(), Record);
diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp
index a50c2b1..30d9c89 100644
--- a/lib/Serialization/ModuleManager.cpp
+++ b/lib/Serialization/ModuleManager.cpp
@@ -94,6 +94,8 @@
     New->File = Entry;
     New->ImportLoc = ImportLoc;
     Chain.push_back(New);
+    if (!ImportedBy)
+      Roots.push_back(New);
     NewModule = true;
     ModuleEntry = New;
 
@@ -155,7 +157,12 @@
         // invalidate the file cache for Entry, and that is not safe if this
         // module is *itself* up to date, but has an out-of-date importer.
         Modules.erase(Entry);
+        assert(Chain.back() == ModuleEntry);
         Chain.pop_back();
+        if (Roots.back() == ModuleEntry)
+          Roots.pop_back();
+        else
+          assert(ImportedBy);
         delete ModuleEntry;
       }
       return OutOfDate;
@@ -186,12 +193,15 @@
   // Collect the set of module file pointers that we'll be removing.
   llvm::SmallPtrSet<ModuleFile *, 4> victimSet(first, last);
 
+  auto IsVictim = [&](ModuleFile *MF) {
+    return victimSet.count(MF);
+  };
   // Remove any references to the now-destroyed modules.
   for (unsigned i = 0, n = Chain.size(); i != n; ++i) {
-    Chain[i]->ImportedBy.remove_if([&](ModuleFile *MF) {
-      return victimSet.count(MF);
-    });
+    Chain[i]->ImportedBy.remove_if(IsVictim);
   }
+  Roots.erase(std::remove_if(Roots.begin(), Roots.end(), IsVictim),
+              Roots.end());
 
   // Delete the modules and erase them from the various structures.
   for (ModuleIterator victim = first; victim != last; ++victim) {
@@ -398,16 +408,38 @@
   returnVisitState(State);
 }
 
+static void markVisitedDepthFirst(ModuleFile &M,
+                                  SmallVectorImpl<bool> &Visited) {
+  for (llvm::SetVector<ModuleFile *>::iterator IM = M.Imports.begin(),
+                                               IMEnd = M.Imports.end();
+       IM != IMEnd; ++IM) {
+    if (Visited[(*IM)->Index])
+      continue;
+    Visited[(*IM)->Index] = true;
+    if (!M.DirectlyImported)
+      markVisitedDepthFirst(**IM, Visited);
+  }
+}
+
 /// \brief Perform a depth-first visit of the current module.
-static bool visitDepthFirst(ModuleFile &M, 
-                            bool (*Visitor)(ModuleFile &M, bool Preorder, 
-                                            void *UserData), 
-                            void *UserData,
-                            SmallVectorImpl<bool> &Visited) {
-  // Preorder visitation
-  if (Visitor(M, /*Preorder=*/true, UserData))
-    return true;
-  
+static bool visitDepthFirst(
+    ModuleFile &M,
+    ModuleManager::DFSPreorderControl (*PreorderVisitor)(ModuleFile &M,
+                                                         void *UserData),
+    bool (*PostorderVisitor)(ModuleFile &M, void *UserData), void *UserData,
+    SmallVectorImpl<bool> &Visited) {
+  if (PreorderVisitor) {
+    switch (PreorderVisitor(M, UserData)) {
+    case ModuleManager::Abort:
+      return true;
+    case ModuleManager::SkipImports:
+      markVisitedDepthFirst(M, Visited);
+      return false;
+    case ModuleManager::Continue:
+      break;
+    }
+  }
+
   // Visit children
   for (llvm::SetVector<ModuleFile *>::iterator IM = M.Imports.begin(),
                                             IMEnd = M.Imports.end();
@@ -416,24 +448,27 @@
       continue;
     Visited[(*IM)->Index] = true;
 
-    if (visitDepthFirst(**IM, Visitor, UserData, Visited))
+    if (visitDepthFirst(**IM, PreorderVisitor, PostorderVisitor, UserData, Visited))
       return true;
   }  
   
-  // Postorder visitation
-  return Visitor(M, /*Preorder=*/false, UserData);
+  if (PostorderVisitor)
+    return PostorderVisitor(M, UserData);
+
+  return false;
 }
 
-void ModuleManager::visitDepthFirst(bool (*Visitor)(ModuleFile &M, bool Preorder, 
-                                                    void *UserData), 
-                                    void *UserData) {
+void ModuleManager::visitDepthFirst(
+    ModuleManager::DFSPreorderControl (*PreorderVisitor)(ModuleFile &M,
+                                                         void *UserData),
+    bool (*PostorderVisitor)(ModuleFile &M, void *UserData), void *UserData) {
   SmallVector<bool, 16> Visited(size(), false);
-  for (unsigned I = 0, N = Chain.size(); I != N; ++I) {
-    if (Visited[Chain[I]->Index])
+  for (unsigned I = 0, N = Roots.size(); I != N; ++I) {
+    if (Visited[Roots[I]->Index])
       continue;
-    Visited[Chain[I]->Index] = true;
+    Visited[Roots[I]->Index] = true;
 
-    if (::visitDepthFirst(*Chain[I], Visitor, UserData, Visited))
+    if (::visitDepthFirst(*Roots[I], PreorderVisitor, PostorderVisitor, UserData, Visited))
       return;
   }
 }
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index e91a7e1..0f5741b 100644
--- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -1922,10 +1922,6 @@
   if (!evalFunction)
     return false;
 
-  // Make sure each function sets its own description.
-  // (But don't bother in a release build.)
-  assert(!(CurrentFunctionDescription = nullptr));
-
   // Check and evaluate the call.
   (this->*evalFunction)(C, CE);
 
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 08ba26a..275481f 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -199,7 +199,7 @@
                                                      const FunctionDecl *FDecl,
                                                      StringRef Name,
                                                      CheckerContext &C) {
-  // TODO: Currently, we might loose precision here: we always mark a return
+  // TODO: Currently, we might lose precision here: we always mark a return
   // value as tainted even if it's just a pointer, pointing to tainted data.
 
   // Check for exact name match for functions without builtin substitutes.
diff --git a/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h b/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h
index b7549fd..d38d63c 100644
--- a/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h
+++ b/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h
@@ -13,6 +13,8 @@
 #ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_INTERCHECKERAPI_H
 #define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_INTERCHECKERAPI_H
 namespace clang {
+class CheckerManager;
+
 namespace ento {
 
 /// Register the checker which evaluates CString API calls.
diff --git a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index 8e51154..4f0b7e5 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -29,7 +29,8 @@
 
 namespace {
 class ObjCContainersChecker : public Checker< check::PreStmt<CallExpr>,
-                                             check::PostStmt<CallExpr> > {
+                                             check::PostStmt<CallExpr>,
+                                             check::PointerEscape> {
   mutable std::unique_ptr<BugType> BT;
   inline void initBugType() const {
     if (!BT)
@@ -52,6 +53,10 @@
 
   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
+  ProgramStateRef checkPointerEscape(ProgramStateRef State,
+                                     const InvalidatedSymbols &Escaped,
+                                     const CallEvent *Call,
+                                     PointerEscapeKind Kind) const;
 };
 } // end anonymous namespace
 
@@ -110,7 +115,8 @@
   if (Name.equals("CFArrayGetValueAtIndex")) {
     ProgramStateRef State = C.getState();
     // Retrieve the size.
-    // Find out if we saw this array symbol before and have information about it.
+    // Find out if we saw this array symbol before and have information about
+    // it.
     const Expr *ArrayExpr = CE->getArg(0);
     SymbolRef ArraySym = getArraySym(ArrayExpr, C);
     if (!ArraySym)
@@ -145,6 +151,24 @@
   }
 }
 
+ProgramStateRef
+ObjCContainersChecker::checkPointerEscape(ProgramStateRef State,
+                                          const InvalidatedSymbols &Escaped,
+                                          const CallEvent *Call,
+                                          PointerEscapeKind Kind) const {
+  for (InvalidatedSymbols::const_iterator I = Escaped.begin(),
+                                          E = Escaped.end();
+                                          I != E; ++I) {
+    SymbolRef Sym = *I;
+    // When a symbol for a mutable array escapes, we can't reason precisely
+    // about its size any more -- so remove it from the map.
+    // Note that we aren't notified here when a CFMutableArrayRef escapes as a
+    // CFArrayRef. This is because CFArrayRef is typedef'd as a pointer to a
+    // const-qualified type.
+    State = State->remove<ArraySizeMap>(Sym);
+  }
+  return State;
+}
 /// Register checker.
 void ento::registerObjCContainersChecker(CheckerManager &mgr) {
   mgr.registerChecker<ObjCContainersChecker>();
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 8b7f18f..c5f34da 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -859,6 +859,7 @@
 
     // Cases not handled yet; but will handle some day.
     case Stmt::DesignatedInitExprClass:
+    case Stmt::DesignatedInitUpdateExprClass:
     case Stmt::ExtVectorElementExprClass:
     case Stmt::ImaginaryLiteralClass:
     case Stmt::ObjCAtCatchStmtClass:
@@ -891,6 +892,7 @@
     case Stmt::CXXBoolLiteralExprClass:
     case Stmt::ObjCBoolLiteralExprClass:
     case Stmt::FloatingLiteralClass:
+    case Stmt::NoInitExprClass:
     case Stmt::SizeOfPackExprClass:
     case Stmt::StringLiteralClass:
     case Stmt::ObjCStringLiteralClass:
diff --git a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index 3c1a3b4..cfcf7c6 100644
--- a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -22,6 +22,7 @@
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -306,7 +307,7 @@
                                                FD,
                                                llvm::sys::fs::F_RW |
                                                llvm::sys::fs::F_Excl);
-          if (EC && EC != std::errc::file_exists) {
+          if (EC && EC != llvm::errc::file_exists) {
               llvm::errs() << "warning: could not create file '" << Model
                            << "': " << EC.message() << '\n';
               return;
diff --git a/lib/StaticAnalyzer/Frontend/ModelInjector.cpp b/lib/StaticAnalyzer/Frontend/ModelInjector.cpp
index 63bb1e2..699549f 100644
--- a/lib/StaticAnalyzer/Frontend/ModelInjector.cpp
+++ b/lib/StaticAnalyzer/Frontend/ModelInjector.cpp
@@ -69,7 +69,7 @@
   FrontendOptions &FrontendOpts = Invocation->getFrontendOpts();
   InputKind IK = IK_CXX; // FIXME
   FrontendOpts.Inputs.clear();
-  FrontendOpts.Inputs.push_back(FrontendInputFile(fileName, IK));
+  FrontendOpts.Inputs.emplace_back(fileName, IK);
   FrontendOpts.DisableFree = true;
 
   Invocation->getDiagnosticOpts().VerifyDiagnostics = 0;
diff --git a/lib/Tooling/CompilationDatabase.cpp b/lib/Tooling/CompilationDatabase.cpp
index 2514f02..4483b18 100644
--- a/lib/Tooling/CompilationDatabase.cpp
+++ b/lib/Tooling/CompilationDatabase.cpp
@@ -302,8 +302,7 @@
   std::vector<std::string> ToolCommandLine(1, "clang-tool");
   ToolCommandLine.insert(ToolCommandLine.end(),
                          CommandLine.begin(), CommandLine.end());
-  CompileCommands.push_back(
-      CompileCommand(Directory, std::move(ToolCommandLine)));
+  CompileCommands.emplace_back(Directory, std::move(ToolCommandLine));
 }
 
 std::vector<CompileCommand>
diff --git a/lib/Tooling/Core/Replacement.cpp b/lib/Tooling/Core/Replacement.cpp
index 525f7df..32e8e5b 100644
--- a/lib/Tooling/Core/Replacement.cpp
+++ b/lib/Tooling/Core/Replacement.cpp
@@ -43,8 +43,9 @@
 
 Replacement::Replacement(const SourceManager &Sources,
                          const CharSourceRange &Range,
-                         StringRef ReplacementText) {
-  setFromSourceRange(Sources, Range, ReplacementText);
+                         StringRef ReplacementText,
+                         const LangOptions &LangOpts) {
+  setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
 }
 
 bool Replacement::isApplicable() const {
@@ -77,11 +78,11 @@
 }
 
 std::string Replacement::toString() const {
-  std::string result;
-  llvm::raw_string_ostream stream(result);
-  stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
+  std::string Result;
+  llvm::raw_string_ostream Stream(Result);
+  Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
          << ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
-  return result;
+  return Stream.str();
 }
 
 bool operator<(const Replacement &LHS, const Replacement &RHS) {
@@ -124,23 +125,25 @@
 // to handle ranges for refactoring in general first - there is no obvious
 // good way how to integrate this into the Lexer yet.
 static int getRangeSize(const SourceManager &Sources,
-                        const CharSourceRange &Range) {
+                        const CharSourceRange &Range,
+                        const LangOptions &LangOpts) {
   SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
   SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
   std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
   std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
   if (Start.first != End.first) return -1;
   if (Range.isTokenRange())
-    End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources,
-                                            LangOptions());
+    End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
   return End.second - Start.second;
 }
 
 void Replacement::setFromSourceRange(const SourceManager &Sources,
                                      const CharSourceRange &Range,
-                                     StringRef ReplacementText) {
+                                     StringRef ReplacementText,
+                                     const LangOptions &LangOpts) {
   setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
-                        getRangeSize(Sources, Range), ReplacementText);
+                        getRangeSize(Sources, Range, LangOpts),
+                        ReplacementText);
 }
 
 unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position) {
diff --git a/lib/Tooling/JSONCompilationDatabase.cpp b/lib/Tooling/JSONCompilationDatabase.cpp
index 7dc211e..454a2ff 100644
--- a/lib/Tooling/JSONCompilationDatabase.cpp
+++ b/lib/Tooling/JSONCompilationDatabase.cpp
@@ -220,10 +220,10 @@
   for (int I = 0, E = CommandsRef.size(); I != E; ++I) {
     SmallString<8> DirectoryStorage;
     SmallString<1024> CommandStorage;
-    Commands.push_back(CompileCommand(
-      // FIXME: Escape correctly:
-      CommandsRef[I].first->getValue(DirectoryStorage),
-      unescapeCommandLine(CommandsRef[I].second->getValue(CommandStorage))));
+    Commands.emplace_back(
+        // FIXME: Escape correctly:
+        CommandsRef[I].first->getValue(DirectoryStorage),
+        unescapeCommandLine(CommandsRef[I].second->getValue(CommandStorage)));
   }
 }