Update LLVM for 3.5 rebase (r209712).

Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 48d3fba..377fa15 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,7 +29,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "argpromotion"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -49,6 +48,8 @@
 #include <set>
 using namespace llvm;
 
+#define DEBUG_TYPE "argpromotion"
+
 STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
 STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
 STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
@@ -123,14 +124,14 @@
   Function *F = CGN->getFunction();
 
   // Make sure that it is local to this module.
-  if (!F || !F->hasLocalLinkage()) return 0;
+  if (!F || !F->hasLocalLinkage()) return nullptr;
 
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<Argument*, 16> PointerArgs;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
     if (I->getType()->isPointerTy())
       PointerArgs.push_back(I);
-  if (PointerArgs.empty()) return 0;
+  if (PointerArgs.empty()) return nullptr;
 
   // Second check: make sure that all callers are direct callers.  We can't
   // transform functions that have indirect callers.  Also see if the function
@@ -139,7 +140,7 @@
   for (Use &U : F->uses()) {
     CallSite CS(U.getUser());
     // Must be a direct call.
-    if (CS.getInstruction() == 0 || !CS.isCallee(&U)) return 0;
+    if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
     
     if (CS.getInstruction()->getParent()->getParent() == F)
       isSelfRecursive = true;
@@ -207,7 +208,7 @@
 
   // No promotable pointer arguments.
   if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
-    return 0;
+    return nullptr;
 
   return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
 }
@@ -660,7 +661,7 @@
         Type *AgTy = cast<PointerType>(I->getType())->getElementType();
         StructType *STy = cast<StructType>(AgTy);
         Value *Idxs[2] = {
-              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
           Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
           Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
@@ -788,10 +789,10 @@
 
       // Just add all the struct element types.
       Type *AgTy = cast<PointerType>(I->getType())->getElementType();
-      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+      Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
       StructType *STy = cast<StructType>(AgTy);
       Value *Idxs[2] = {
-            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
 
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
         Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 5c3acea..23be081 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -17,7 +17,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "constmerge"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
@@ -31,6 +30,8 @@
 #include "llvm/Pass.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "constmerge"
+
 STATISTIC(NumMerged, "Number of global constants merged");
 
 namespace {
@@ -66,7 +67,7 @@
 /// Find values that are marked as llvm.used.
 static void FindUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
-  if (LLVMUsed == 0) return;
+  if (!LLVMUsed) return;
   ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
 
   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
@@ -103,7 +104,7 @@
 
 bool ConstantMerge::runOnModule(Module &M) {
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   // Find all the globals that are marked "used".  These cannot be merged.
   SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -161,7 +162,7 @@
       // If this is the first constant we find or if the old one is local,
       // replace with the current one. If the current is externally visible
       // it cannot be replace, but can be the canonical constant we merge with.
-      if (Slot == 0 || IsBetterCanonical(*GV, *Slot))
+      if (!Slot || IsBetterCanonical(*GV, *Slot))
         Slot = GV;
     }
 
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1aba3df..284b896 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -17,7 +17,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "deadargelim"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
@@ -38,8 +37,11 @@
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <set>
+#include <tuple>
 using namespace llvm;
 
+#define DEBUG_TYPE "deadargelim"
+
 STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
 STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
 STATISTIC(NumArgumentsReplacedWithUndef, 
@@ -764,7 +766,7 @@
 
   // Find out the new return value.
   Type *RetTy = FTy->getReturnType();
-  Type *NRetTy = NULL;
+  Type *NRetTy = nullptr;
   unsigned RetCount = NumRetVals(F);
 
   // -1 means unused, other numbers are the new index
@@ -1050,7 +1052,7 @@
         Value *RetVal;
 
         if (NFTy->getReturnType()->isVoidTy()) {
-          RetVal = 0;
+          RetVal = nullptr;
         } else {
           assert (RetTy->isStructTy());
           // The original return value was a struct, insert
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 4211f12..40ec9fa 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -27,11 +27,10 @@
 /// the split module remain valid.
 static void makeVisible(GlobalValue &GV, bool Delete) {
   bool Local = GV.hasLocalLinkage();
-  if (Local)
-    GV.setVisibility(GlobalValue::HiddenVisibility);
-
   if (Local || Delete) {
     GV.setLinkage(GlobalValue::ExternalLinkage);
+    if (Local)
+      GV.setVisibility(GlobalValue::HiddenVisibility);
     return;
   }
 
@@ -95,7 +94,7 @@
 	makeVisible(*I, Delete);
 
         if (Delete)
-          I->setInitializer(0);
+          I->setInitializer(nullptr);
       }
 
       // Visit the Functions.
@@ -134,7 +133,7 @@
           } else {
             Declaration =
               new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
-                                 0, CurI->getName());
+                                 nullptr, CurI->getName());
 
           }
           CurI->replaceAllUsesWith(Declaration);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index b716718..fed8839 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -18,7 +18,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "functionattrs"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SetVector.h"
@@ -35,6 +34,8 @@
 #include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "functionattrs"
+
 STATISTIC(NumReadNone, "Number of functions marked readnone");
 STATISTIC(NumReadOnly, "Number of functions marked readonly");
 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
@@ -46,7 +47,7 @@
 namespace {
   struct FunctionAttrs : public CallGraphSCCPass {
     static char ID; // Pass identification, replacement for typeid
-    FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+    FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
       initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
     }
 
@@ -160,7 +161,7 @@
   for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     Function *F = (*I)->getFunction();
 
-    if (F == 0)
+    if (!F)
       // External node - may write memory.  Just give up.
       return false;
 
@@ -319,7 +320,7 @@
     ArgumentGraphNode SyntheticRoot;
 
   public:
-    ArgumentGraph() { SyntheticRoot.Definition = 0; }
+    ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
 
     typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
 
@@ -521,7 +522,7 @@
   for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     Function *F = (*I)->getFunction();
 
-    if (F == 0)
+    if (!F)
       // External node - only a problem for arguments that we pass to it.
       continue;
 
@@ -600,7 +601,7 @@
   // captures.
 
   for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
-    std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
+    const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
     if (ArgumentSCC.size() == 1) {
       if (!ArgumentSCC[0]->Definition) continue;  // synthetic root node
 
@@ -616,8 +617,8 @@
     }
 
     bool SCCCaptured = false;
-    for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
-           E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+    for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+         I != E && !SCCCaptured; ++I) {
       ArgumentGraphNode *Node = *I;
       if (Node->Uses.empty()) {
         if (!Node->Definition->hasNoCaptureAttr())
@@ -629,13 +630,12 @@
     SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
     // Fill ArgumentSCCNodes with the elements of the ArgumentSCC.  Used for
     // quickly looking up whether a given Argument is in this ArgumentSCC.
-    for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
-           E = ArgumentSCC.end(); I != E; ++I) {
+    for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
       ArgumentSCCNodes.insert((*I)->Definition);
     }
 
-    for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
-           E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+    for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+         I != E && !SCCCaptured; ++I) {
       ArgumentGraphNode *N = *I;
       for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
              UE = N->Uses.end(); UI != UE; ++UI) {
@@ -775,7 +775,7 @@
   for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     Function *F = (*I)->getFunction();
 
-    if (F == 0)
+    if (!F)
       // External node - skip it;
       return false;
 
@@ -1668,7 +1668,7 @@
   for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     Function *F = (*I)->getFunction();
 
-    if (F != 0 && F->isDeclaration())
+    if (F && F->isDeclaration())
       MadeChange |= inferPrototypeAttributes(*F);
   }
 
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 0c081f1..9decddc 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -15,15 +15,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "globaldce"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "globaldce"
+
 STATISTIC(NumAliases  , "Number of global aliases removed");
 STATISTIC(NumFunctions, "Number of functions removed");
 STATISTIC(NumVariables, "Number of global variables removed");
@@ -53,6 +56,15 @@
   };
 }
 
+/// Returns true if F contains only a single "ret" instruction.
+static bool isEmptyFunction(Function *F) {
+  BasicBlock &Entry = F->getEntryBlock();
+  if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
+    return false;
+  ReturnInst &RI = cast<ReturnInst>(Entry.front());
+  return RI.getReturnValue() == NULL;
+}
+
 char GlobalDCE::ID = 0;
 INITIALIZE_PASS(GlobalDCE, "globaldce",
                 "Dead Global Elimination", false, false)
@@ -61,7 +73,10 @@
 
 bool GlobalDCE::runOnModule(Module &M) {
   bool Changed = false;
-  
+
+  // Remove empty functions from the global ctors list.
+  Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+
   // Loop over the module, adding globals which are obviously necessary.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     Changed |= RemoveUnusedGlobalValue(*I);
@@ -99,7 +114,7 @@
        I != E; ++I)
     if (!AliveGlobals.count(I)) {
       DeadGlobalVars.push_back(I);         // Keep track of dead globals
-      I->setInitializer(0);
+      I->setInitializer(nullptr);
     }
 
   // The second pass drops the bodies of functions which are dead...
@@ -117,7 +132,7 @@
        ++I)
     if (!AliveGlobals.count(I)) {
       DeadAliases.push_back(I);
-      I->setAliasee(0);
+      I->setAliasee(nullptr);
     }
 
   if (!DeadFunctions.empty()) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1a510cf..ae80c43 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "globalopt"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -39,11 +38,15 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <algorithm>
+#include <deque>
 using namespace llvm;
 
+#define DEBUG_TYPE "globalopt"
+
 STATISTIC(NumMarked    , "Number of globals marked constant");
 STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
@@ -74,11 +77,9 @@
     bool runOnModule(Module &M) override;
 
   private:
-    GlobalVariable *FindGlobalCtors(Module &M);
     bool OptimizeFunctions(Module &M);
     bool OptimizeGlobalVars(Module &M);
     bool OptimizeGlobalAliases(Module &M);
-    bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
     bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
     bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
                                const GlobalStatus &GS);
@@ -294,7 +295,7 @@
       Changed = true;
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
       if (CE->getOpcode() == Instruction::GetElementPtr) {
-        Constant *SubInit = 0;
+        Constant *SubInit = nullptr;
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
         Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
@@ -302,7 +303,7 @@
                   CE->getType()->isPointerTy()) ||
                  CE->getOpcode() == Instruction::AddrSpaceCast) {
         // Pointer cast, delete any stores and memsets to the global.
-        Changed |= CleanupConstantGlobalUsers(CE, 0, DL, TLI);
+        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
       }
 
       if (CE->use_empty()) {
@@ -313,7 +314,7 @@
       // Do not transform "gepinst (gep constexpr (GV))" here, because forming
       // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
       // and will invalidate our notion of what Init is.
-      Constant *SubInit = 0;
+      Constant *SubInit = nullptr;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
         ConstantExpr *CE =
           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
@@ -370,7 +371,7 @@
 
   // Otherwise, it must be a GEP.
   GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
-  if (GEPI == 0) return false;
+  if (!GEPI) return false;
 
   if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
       !cast<Constant>(GEPI->getOperand(1))->isNullValue())
@@ -470,7 +471,7 @@
 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
   // Make sure this global only has simple uses that we can SRA.
   if (!GlobalUsersSafeToSRA(GV))
-    return 0;
+    return nullptr;
 
   assert(GV->hasLocalLinkage() && !GV->isConstant());
   Constant *Init = GV->getInitializer();
@@ -514,7 +515,7 @@
       NumElements = cast<VectorType>(STy)->getNumElements();
 
     if (NumElements > 16 && GV->hasNUsesOrMore(16))
-      return 0; // It's not worth it.
+      return nullptr; // It's not worth it.
     NewGlobals.reserve(NumElements);
 
     uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
@@ -541,7 +542,7 @@
   }
 
   if (NewGlobals.empty())
-    return 0;
+    return nullptr;
 
   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
@@ -603,7 +604,7 @@
       if (FirstGlobal == i) ++FirstGlobal;
     }
 
-  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
 }
 
 /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
@@ -785,7 +786,7 @@
       Changed |= CleanupPointerRootUsers(GV, TLI);
     } else {
       Changed = true;
-      CleanupConstantGlobalUsers(GV, 0, DL, TLI);
+      CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
     }
     if (GV->use_empty()) {
       DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
@@ -847,7 +848,7 @@
   // If there are bitcast users of the malloc (which is typical, usually we have
   // a malloc + bitcast) then replace them with uses of the new global.  Update
   // other users to use the global as well.
-  BitCastInst *TheBC = 0;
+  BitCastInst *TheBC = nullptr;
   while (!CI->use_empty()) {
     Instruction *User = cast<Instruction>(CI->user_back());
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
@@ -858,7 +859,7 @@
         BCI->setOperand(0, NewGV);
       }
     } else {
-      if (TheBC == 0)
+      if (!TheBC)
         TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
       User->replaceUsesOfWith(CI, TheBC);
     }
@@ -1169,10 +1170,13 @@
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     // PN's type is pointer to struct.  Make a new PHI of pointer to struct
     // field.
-    StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
 
+    PointerType *PTy = cast<PointerType>(PN->getType());
+    StructType *ST = cast<StructType>(PTy->getElementType());
+
+    unsigned AS = PTy->getAddressSpace();
     PHINode *NewPN =
-     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
                      PN->getNumIncomingValues(),
                      PN->getName()+".f"+Twine(FieldNo), PN);
     Result = NewPN;
@@ -1284,9 +1288,10 @@
   std::vector<Value*> FieldGlobals;
   std::vector<Value*> FieldMallocs;
 
+  unsigned AS = GV->getType()->getPointerAddressSpace();
   for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
     Type *FieldTy = STy->getElementType(FieldNo);
-    PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
 
     GlobalVariable *NGV =
       new GlobalVariable(*GV->getParent(),
@@ -1302,7 +1307,7 @@
     Type *IntPtrTy = DL->getIntPtrType(CI->getType());
     Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
                                         ConstantInt::get(IntPtrTy, TypeSize),
-                                        NElems, 0,
+                                        NElems, nullptr,
                                         CI->getName() + ".f" + Twine(FieldNo));
     FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, CI);
@@ -1535,7 +1540,7 @@
       Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
       Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
                                                    AllocSize, NumElements,
-                                                   0, CI->getName());
+                                                   nullptr, CI->getName());
       Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
       CI->replaceAllUsesWith(Cast);
       CI->eraseFromParent();
@@ -1750,7 +1755,8 @@
                                                    ->getEntryBlock().begin());
     Type *ElemTy = GV->getType()->getElementType();
     // FIXME: Pass Global's alignment when globals have alignment
-    AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+    AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+                                        GV->getName(), &FirstI);
     if (!isa<UndefValue>(GV->getInitializer()))
       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
 
@@ -1957,116 +1963,6 @@
   return Changed;
 }
 
-/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
-/// initializers have an init priority of 65535.
-GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
-  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
-  if (GV == 0) return 0;
-
-  // Verify that the initializer is simple enough for us to handle. We are
-  // only allowed to optimize the initializer if it is unique.
-  if (!GV->hasUniqueInitializer()) return 0;
-
-  if (isa<ConstantAggregateZero>(GV->getInitializer()))
-    return GV;
-  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
-
-  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
-    if (isa<ConstantAggregateZero>(*i))
-      continue;
-    ConstantStruct *CS = cast<ConstantStruct>(*i);
-    if (isa<ConstantPointerNull>(CS->getOperand(1)))
-      continue;
-
-    // Must have a function or null ptr.
-    if (!isa<Function>(CS->getOperand(1)))
-      return 0;
-
-    // Init priority must be standard.
-    ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
-    if (CI->getZExtValue() != 65535)
-      return 0;
-  }
-
-  return GV;
-}
-
-/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
-/// return a list of the functions and null terminator as a vector.
-static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
-  if (GV->getInitializer()->isNullValue())
-    return std::vector<Function*>();
-  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
-  std::vector<Function*> Result;
-  Result.reserve(CA->getNumOperands());
-  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
-    ConstantStruct *CS = cast<ConstantStruct>(*i);
-    Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
-  }
-  return Result;
-}
-
-/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
-/// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
-                                          const std::vector<Function*> &Ctors) {
-  // If we made a change, reassemble the initializer list.
-  Constant *CSVals[2];
-  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
-  CSVals[1] = 0;
-
-  StructType *StructTy =
-    cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
-
-  // Create the new init list.
-  std::vector<Constant*> CAList;
-  for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
-    if (Ctors[i]) {
-      CSVals[1] = Ctors[i];
-    } else {
-      Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
-                                          false);
-      PointerType *PFTy = PointerType::getUnqual(FTy);
-      CSVals[1] = Constant::getNullValue(PFTy);
-      CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
-                                   0x7fffffff);
-    }
-    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
-  }
-
-  // Create the array initializer.
-  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
-                                                   CAList.size()), CAList);
-
-  // If we didn't change the number of elements, don't create a new GV.
-  if (CA->getType() == GCL->getInitializer()->getType()) {
-    GCL->setInitializer(CA);
-    return GCL;
-  }
-
-  // Create the new global and insert it next to the existing list.
-  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
-                                           GCL->getLinkage(), CA, "",
-                                           GCL->getThreadLocalMode());
-  GCL->getParent()->getGlobalList().insert(GCL, NGV);
-  NGV->takeName(GCL);
-
-  // Nuke the old list, replacing any uses with the new one.
-  if (!GCL->use_empty()) {
-    Constant *V = NGV;
-    if (V->getType() != GCL->getType())
-      V = ConstantExpr::getBitCast(V, GCL->getType());
-    GCL->replaceAllUsesWith(V);
-  }
-  GCL->eraseFromParent();
-
-  if (Ctors.size())
-    return NGV;
-  else
-    return 0;
-}
-
-
 static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
                             SmallPtrSet<Constant*, 8> &SimpleConstants,
@@ -2271,22 +2167,16 @@
 public:
   Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
     : DL(DL), TLI(TLI) {
-    ValueStack.push_back(new DenseMap<Value*, Constant*>);
+    ValueStack.emplace_back();
   }
 
   ~Evaluator() {
-    DeleteContainerPointers(ValueStack);
-    while (!AllocaTmps.empty()) {
-      GlobalVariable *Tmp = AllocaTmps.back();
-      AllocaTmps.pop_back();
-
+    for (auto &Tmp : AllocaTmps)
       // If there are still users of the alloca, the program is doing something
       // silly, e.g. storing the address of the alloca somewhere and using it
       // later.  Since this is undefined, we'll just make it be null.
       if (!Tmp->use_empty())
         Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
-      delete Tmp;
-    }
   }
 
   /// EvaluateFunction - Evaluate a call to function F, returning true if
@@ -2302,13 +2192,13 @@
 
   Constant *getVal(Value *V) {
     if (Constant *CV = dyn_cast<Constant>(V)) return CV;
-    Constant *R = ValueStack.back()->lookup(V);
+    Constant *R = ValueStack.back().lookup(V);
     assert(R && "Reference to an uncomputed value!");
     return R;
   }
 
   void setVal(Value *V, Constant *C) {
-    ValueStack.back()->operator[](V) = C;
+    ValueStack.back()[V] = C;
   }
 
   const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
@@ -2323,9 +2213,9 @@
   Constant *ComputeLoadResult(Constant *P);
 
   /// ValueStack - As we compute SSA register values, we store their contents
-  /// here. The back of the vector contains the current function and the stack
+  /// here. The back of the deque contains the current function and the stack
   /// contains the values in the calling frames.
-  SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
+  std::deque<DenseMap<Value*, Constant*>> ValueStack;
 
   /// CallStack - This is used to detect recursion.  In pathological situations
   /// we could hit exponential behavior, but at least there is nothing
@@ -2340,7 +2230,7 @@
   /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
   /// to represent its body.  This vector is needed so we can delete the
   /// temporary globals when we are done.
-  SmallVector<GlobalVariable*, 32> AllocaTmps;
+  SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
 
   /// Invariants - These global variables have been marked invariant by the
   /// static constructor.
@@ -2369,7 +2259,7 @@
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
     if (GV->hasDefinitiveInitializer())
       return GV->getInitializer();
-    return 0;
+    return nullptr;
   }
 
   // Handle a constantexpr getelementptr.
@@ -2381,7 +2271,7 @@
         return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
     }
 
-  return 0;  // don't know how to evaluate.
+  return nullptr;  // don't know how to evaluate.
 }
 
 /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
@@ -2391,7 +2281,7 @@
                               BasicBlock *&NextBB) {
   // This is the main evaluation loop.
   while (1) {
-    Constant *InstResult = 0;
+    Constant *InstResult = nullptr;
 
     DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
 
@@ -2517,7 +2407,7 @@
               "folding: " << *Ptr << "\n");
       }
       InstResult = ComputeLoadResult(Ptr);
-      if (InstResult == 0) {
+      if (!InstResult) {
         DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
               "\n");
         return false; // Could not evaluate load.
@@ -2530,11 +2420,10 @@
         return false;  // Cannot handle array allocs.
       }
       Type *Ty = AI->getType()->getElementType();
-      AllocaTmps.push_back(new GlobalVariable(Ty, false,
-                                              GlobalValue::InternalLinkage,
-                                              UndefValue::get(Ty),
-                                              AI->getName()));
-      InstResult = AllocaTmps.back();
+      AllocaTmps.push_back(
+          make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+                                      UndefValue::get(Ty), AI->getName()));
+      InstResult = AllocaTmps.back().get();
       DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
       CallSite CS(CurInst);
@@ -2636,17 +2525,17 @@
           return false;
         }
 
-        Constant *RetVal = 0;
+        Constant *RetVal = nullptr;
         // Execute the call, if successful, use the return value.
-        ValueStack.push_back(new DenseMap<Value*, Constant*>);
+        ValueStack.emplace_back();
         if (!EvaluateFunction(Callee, RetVal, Formals)) {
           DEBUG(dbgs() << "Failed to evaluate function.\n");
           return false;
         }
-        delete ValueStack.pop_back_val();
+        ValueStack.pop_back();
         InstResult = RetVal;
 
-        if (InstResult != NULL) {
+        if (InstResult) {
           DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
                 InstResult << "\n\n");
         } else {
@@ -2678,7 +2567,7 @@
         else
           return false;  // Cannot determine.
       } else if (isa<ReturnInst>(CurInst)) {
-        NextBB = 0;
+        NextBB = nullptr;
       } else {
         // invoke, unwind, resume, unreachable.
         DEBUG(dbgs() << "Can not handle terminator.");
@@ -2743,13 +2632,13 @@
   BasicBlock::iterator CurInst = CurBB->begin();
 
   while (1) {
-    BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+    BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
     DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
 
     if (!EvaluateBlock(CurInst, NextBB))
       return false;
 
-    if (NextBB == 0) {
+    if (!NextBB) {
       // Successfully running until there's no next block means that we found
       // the return.  Fill it the return value and pop the call stack.
       ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
@@ -2768,7 +2657,7 @@
     // Okay, we have never been in this block before.  Check to see if there
     // are any PHI nodes.  If so, evaluate them with information about where
     // we came from.
-    PHINode *PN = 0;
+    PHINode *PN = nullptr;
     for (CurInst = NextBB->begin();
          (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
       setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
@@ -2789,6 +2678,8 @@
                                            SmallVector<Constant*, 0>());
 
   if (EvalSuccess) {
+    ++NumCtorsEvaluated;
+
     // We succeeded at evaluation: commit the result.
     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
           << F->getName() << "' to " << Eval.getMutatedMemory().size()
@@ -2806,46 +2697,6 @@
   return EvalSuccess;
 }
 
-/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
-/// Return true if anything changed.
-bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
-  std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
-  bool MadeChange = false;
-  if (Ctors.empty()) return false;
-
-  // Loop over global ctors, optimizing them when we can.
-  for (unsigned i = 0; i != Ctors.size(); ++i) {
-    Function *F = Ctors[i];
-    // Found a null terminator in the middle of the list, prune off the rest of
-    // the list.
-    if (F == 0) {
-      if (i != Ctors.size()-1) {
-        Ctors.resize(i+1);
-        MadeChange = true;
-      }
-      break;
-    }
-    DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
-
-    // We cannot simplify external ctor functions.
-    if (F->empty()) continue;
-
-    // If we can evaluate the ctor at compile time, do.
-    if (EvaluateStaticConstructor(F, DL, TLI)) {
-      Ctors.erase(Ctors.begin()+i);
-      MadeChange = true;
-      --i;
-      ++NumCtorsEvaluated;
-      continue;
-    }
-  }
-
-  if (!MadeChange) return false;
-
-  GCL = InstallGlobalCtors(GCL, Ctors);
-  return true;
-}
-
 static int compareNames(Constant *const *A, Constant *const *B) {
   return (*A)->getName().compare((*B)->getName());
 }
@@ -3010,7 +2861,7 @@
     if (!hasUsesToReplace(*J, Used, RenameTarget))
       continue;
 
-    J->replaceAllUsesWith(Aliasee);
+    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
     ++NumAliasesResolved;
     Changed = true;
 
@@ -3042,12 +2893,12 @@
 
 static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
   if (!TLI->has(LibFunc::cxa_atexit))
-    return 0;
+    return nullptr;
 
   Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
 
   if (!Fn)
-    return 0;
+    return nullptr;
 
   FunctionType *FTy = Fn->getFunctionType();
 
@@ -3058,7 +2909,7 @@
       !FTy->getParamType(0)->isPointerTy() ||
       !FTy->getParamType(1)->isPointerTy() ||
       !FTy->getParamType(2)->isPointerTy())
-    return 0;
+    return nullptr;
 
   return Fn;
 }
@@ -3160,12 +3011,9 @@
   bool Changed = false;
 
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
   TLI = &getAnalysis<TargetLibraryInfo>();
 
-  // Try to find the llvm.globalctors list.
-  GlobalVariable *GlobalCtors = FindGlobalCtors(M);
-
   bool LocalChange = true;
   while (LocalChange) {
     LocalChange = false;
@@ -3174,8 +3022,9 @@
     LocalChange |= OptimizeFunctions(M);
 
     // Optimize global_ctors list.
-    if (GlobalCtors)
-      LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
+      return EvaluateStaticConstructor(F, DL, TLI);
+    });
 
     // Optimize non-address-taken globals.
     LocalChange |= OptimizeGlobalVars(M);
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 8684796..af541d1 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "ipconstprop"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -27,6 +26,8 @@
 #include "llvm/Pass.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "ipconstprop"
+
 STATISTIC(NumArgumentsProped, "Number of args turned into constants");
 STATISTIC(NumReturnValProped, "Number of return values turned into constants");
 
@@ -112,7 +113,7 @@
         continue;
       
       Constant *C = dyn_cast<Constant>(*AI);
-      if (C && ArgumentConstants[i].first == 0) {
+      if (C && ArgumentConstants[i].first == nullptr) {
         ArgumentConstants[i].first = C;   // First constant seen.
       } else if (C && ArgumentConstants[i].first == C) {
         // Still the constant value we think it is.
@@ -139,7 +140,7 @@
       continue;
   
     Value *V = ArgumentConstants[i].first;
-    if (V == 0) V = UndefValue::get(AI->getType());
+    if (!V) V = UndefValue::get(AI->getType());
     AI->replaceAllUsesWith(V);
     ++NumArgumentsProped;
     MadeChange = true;
@@ -209,7 +210,7 @@
         }
         // Different or no known return value? Don't propagate this return
         // value.
-        RetVals[i] = 0;
+        RetVals[i] = nullptr;
         // All values non-constant? Stop looking.
         if (++NumNonConstant == RetVals.size())
           return false;
@@ -235,7 +236,7 @@
 
     MadeChange = true;
 
-    if (STy == 0) {
+    if (!STy) {
       Value* New = RetVals[0];
       if (Argument *A = dyn_cast<Argument>(New))
         // Was an argument returned? Then find the corresponding argument in
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 6cf3040..624cb90 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "inline"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/CallGraph.h"
@@ -28,6 +27,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "inline"
+
 namespace {
 
 /// \brief Inliner pass which only handles "always inline" functions.
@@ -36,12 +37,13 @@
 
 public:
   // Use extremely low threshold.
-  AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) {
+  AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
+                    ICA(nullptr) {
     initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
   }
 
   AlwaysInliner(bool InsertLifetime)
-      : Inliner(ID, -2000000000, InsertLifetime), ICA(0) {
+      : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
     initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
   }
 
@@ -93,8 +95,7 @@
   // that are viable for inlining. FIXME: We shouldn't even get here for
   // declarations.
   if (Callee && !Callee->isDeclaration() &&
-      Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                           Attribute::AlwaysInline) &&
+      CS.hasFnAttr(Attribute::AlwaysInline) &&
       ICA->isInlineViable(*Callee))
     return InlineCost::getAlways();
 
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 7141064..d189756 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "inline"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
@@ -26,6 +25,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "inline"
+
 namespace {
 
 /// \brief Actual inliner pass implementation.
@@ -37,12 +38,12 @@
   InlineCostAnalysis *ICA;
 
 public:
-  SimpleInliner() : Inliner(ID), ICA(0) {
+  SimpleInliner() : Inliner(ID), ICA(nullptr) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
   SimpleInliner(int Threshold)
-      : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) {
+      : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index e97fb83..9087ab2 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "inline"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -21,6 +20,7 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
@@ -32,6 +32,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "inline"
+
 STATISTIC(NumInlined, "Number of functions inlined");
 STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
@@ -183,7 +185,7 @@
     // canonicalized to be an allocation *of* an array), or allocations whose
     // type is not itself an array (because we're afraid of pessimizing SRoA).
     ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
-    if (ATy == 0 || AI->isArrayAllocation())
+    if (!ATy || AI->isArrayAllocation())
       continue;
     
     // Get the list of all available allocas for this array type.
@@ -239,7 +241,7 @@
       AI->eraseFromParent();
       MergedAwayAlloca = true;
       ++NumMergedAllocas;
-      IFI.StaticAllocas[AllocaNo] = 0;
+      IFI.StaticAllocas[AllocaNo] = nullptr;
       break;
     }
 
@@ -288,12 +290,24 @@
   bool ColdCallee = Callee && !Callee->isDeclaration() &&
     Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                          Attribute::Cold);
-  if (ColdCallee && ColdThreshold < thres)
+  // Command line argument for InlineLimit will override the default
+  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+  // do not use the default cold threshold even if it is smaller.
+  if ((InlineLimit.getNumOccurrences() == 0 ||
+       ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
+      ColdThreshold < thres)
     thres = ColdThreshold;
 
   return thres;
 }
 
+static void emitAnalysis(CallSite CS, const Twine &Msg) {
+  Function *Caller = CS.getCaller();
+  LLVMContext &Ctx = Caller->getContext();
+  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+}
+
 /// shouldInline - Return true if the inliner should attempt to inline
 /// at the given CallSite.
 bool Inliner::shouldInline(CallSite CS) {
@@ -302,12 +316,16 @@
   if (IC.isAlways()) {
     DEBUG(dbgs() << "    Inlining: cost=always"
           << ", Call: " << *CS.getInstruction() << "\n");
+    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
+                         " should always be inlined (cost=always)");
     return true;
   }
   
   if (IC.isNever()) {
     DEBUG(dbgs() << "    NOT Inlining: cost=never"
           << ", Call: " << *CS.getInstruction() << "\n");
+    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+                           " should never be inlined (cost=never)"));
     return false;
   }
   
@@ -316,6 +334,10 @@
     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
           << ", thres=" << (IC.getCostDelta() + IC.getCost())
           << ", Call: " << *CS.getInstruction() << "\n");
+    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+                           " too costly to inline (cost=") +
+                         Twine(IC.getCost()) + ", threshold=" +
+                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
     return false;
   }
   
@@ -383,6 +405,11 @@
       DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() <<
            " Cost = " << IC.getCost() <<
            ", outer Cost = " << TotalSecondaryCost << '\n');
+      emitAnalysis(
+          CS, Twine("Not inlining. Cost of inlining " +
+                    CS.getCalledFunction()->getName() +
+                    " increases the cost of inlining " +
+                    CS.getCaller()->getName() + " in other contexts"));
       return false;
     }
   }
@@ -390,6 +417,10 @@
   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
         << ", thres=" << (IC.getCostDelta() + IC.getCost())
         << ", Call: " << *CS.getInstruction() << '\n');
+  emitAnalysis(
+      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
+              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
+              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
   return true;
 }
 
@@ -410,7 +441,7 @@
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+  const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
   const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
@@ -499,7 +530,7 @@
         ++NumCallsDeleted;
       } else {
         // We can only inline direct calls to non-declarations.
-        if (Callee == 0 || Callee->isDeclaration()) continue;
+        if (!Callee || Callee->isDeclaration()) continue;
       
         // If this call site was obtained by inlining another function, verify
         // that the include path for the function did not include the callee
@@ -511,18 +542,37 @@
             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
           continue;
         
-        
+        LLVMContext &CallerCtx = Caller->getContext();
+
+        // Get DebugLoc to report. CS will be invalid after Inliner.
+        DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+
         // If the policy determines that we should inline this function,
         // try to do so.
-        if (!shouldInline(CS))
+        if (!shouldInline(CS)) {
+          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+                                       Twine(Callee->getName() +
+                                             " will not be inlined into " +
+                                             Caller->getName()));
           continue;
+        }
 
         // Attempt to inline the function.
         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
-                                  InlineHistoryID, InsertLifetime, DL))
+                                  InlineHistoryID, InsertLifetime, DL)) {
+          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+                                       Twine(Callee->getName() +
+                                             " will not be inlined into " +
+                                             Caller->getName()));
           continue;
+        }
         ++NumInlined;
-        
+
+        // Report the inline decision.
+        emitOptimizationRemark(
+            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+            Twine(Callee->getName() + " inlined into " + Caller->getName()));
+
         // If inlining this function gave us any new call sites, throw them
         // onto our worklist to process.  They are useful inline candidates.
         if (!InlineInfo.InlinedCalls.empty()) {
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index c1fe01c..c970a1a 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -19,7 +19,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "internalize"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
 #include <set>
 using namespace llvm;
 
+#define DEBUG_TYPE "internalize"
+
 STATISTIC(NumAliases  , "Number of aliases internalized");
 STATISTIC(NumFunctions, "Number of functions internalized");
 STATISTIC(NumGlobals  , "Number of global vars internalized");
@@ -131,8 +132,8 @@
 
 bool InternalizePass::runOnModule(Module &M) {
   CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
-  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0;
-  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
   bool Changed = false;
 
   SmallPtrSet<GlobalValue *, 8> Used;
@@ -158,6 +159,7 @@
     if (!shouldInternalize(*I, ExternalNames))
       continue;
 
+    I->setVisibility(GlobalValue::DefaultVisibility);
     I->setLinkage(GlobalValue::InternalLinkage);
 
     if (ExternalNode)
@@ -194,6 +196,7 @@
     if (!shouldInternalize(*I, ExternalNames))
       continue;
 
+    I->setVisibility(GlobalValue::DefaultVisibility);
     I->setLinkage(GlobalValue::InternalLinkage);
     Changed = true;
     ++NumGlobals;
@@ -206,6 +209,7 @@
     if (!shouldInternalize(*I, ExternalNames))
       continue;
 
+    I->setVisibility(GlobalValue::DefaultVisibility);
     I->setLinkage(GlobalValue::InternalLinkage);
     Changed = true;
     ++NumAliases;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 464aa99..20414aa 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "loop-extract"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -30,6 +29,8 @@
 #include <set>
 using namespace llvm;
 
+#define DEBUG_TYPE "loop-extract"
+
 STATISTIC(NumExtracted, "Number of loops extracted");
 
 namespace {
@@ -136,7 +137,7 @@
     if (NumLoops == 0) return Changed;
     --NumLoops;
     CodeExtractor Extractor(DT, *L);
-    if (Extractor.extractCodeRegion() != 0) {
+    if (Extractor.extractCodeRegion() != nullptr) {
       Changed = true;
       // After extraction, the loop is replaced by a function call, so
       // we shouldn't try to run any more loop passes on it.
@@ -241,7 +242,7 @@
     if (!Split) continue;
 
     SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs);
+    SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
   }
 }
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 8555d2c..c3a2b12 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -43,7 +43,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
@@ -67,6 +66,8 @@
 #include <vector>
 using namespace llvm;
 
+#define DEBUG_TYPE "mergefunc"
+
 STATISTIC(NumFunctionsMerged, "Number of functions merged");
 STATISTIC(NumThunksWritten, "Number of thunks generated");
 STATISTIC(NumAliasesWritten, "Number of aliases generated");
@@ -120,12 +121,12 @@
   void release() {
     assert(Func &&
            "Attempted to release function twice, or release empty/tombstone!");
-    Func = NULL;
+    Func = nullptr;
   }
 
 private:
   explicit ComparableFunction(unsigned Hash)
-    : Func(NULL), Hash(Hash), DL(NULL) {}
+    : Func(nullptr), Hash(Hash), DL(nullptr) {}
 
   AssertingVH<Function> Func;
   unsigned Hash;
@@ -175,19 +176,181 @@
   /// Test whether two basic blocks have equivalent behaviour.
   bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
 
+  /// Constants comparison.
+  /// Its analog to lexicographical comparison between hypothetical numbers
+  /// of next format:
+  /// <bitcastability-trait><raw-bit-contents>
+  ///
+  /// 1. Bitcastability.
+  /// Check whether L's type could be losslessly bitcasted to R's type.
+  /// On this stage method, in case when lossless bitcast is not possible
+  /// method returns -1 or 1, thus also defining which type is greater in
+  /// context of bitcastability.
+  /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
+  ///          to the contents comparison.
+  ///          If types differ, remember types comparison result and check
+  ///          whether we still can bitcast types.
+  /// Stage 1: Types that satisfies isFirstClassType conditions are always
+  ///          greater then others.
+  /// Stage 2: Vector is greater then non-vector.
+  ///          If both types are vectors, then vector with greater bitwidth is
+  ///          greater.
+  ///          If both types are vectors with the same bitwidth, then types
+  ///          are bitcastable, and we can skip other stages, and go to contents
+  ///          comparison.
+  /// Stage 3: Pointer types are greater than non-pointers. If both types are
+  ///          pointers of the same address space - go to contents comparison.
+  ///          Different address spaces: pointer with greater address space is
+  ///          greater.
+  /// Stage 4: Types are neither vectors, nor pointers. And they differ.
+  ///          We don't know how to bitcast them. So, we better don't do it,
+  ///          and return types comparison result (so it determines the
+  ///          relationship among constants we don't know how to bitcast).
+  ///
+  /// Just for clearance, let's see how the set of constants could look
+  /// on single dimension axis:
+  ///
+  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+  /// Where: NFCT - Not a FirstClassType
+  ///        FCT - FirstClassTyp:
+  ///
+  /// 2. Compare raw contents.
+  /// It ignores types on this stage and only compares bits from L and R.
+  /// Returns 0, if L and R has equivalent contents.
+  /// -1 or 1 if values are different.
+  /// Pretty trivial:
+  /// 2.1. If contents are numbers, compare numbers.
+  ///    Ints with greater bitwidth are greater. Ints with same bitwidths
+  ///    compared by their contents.
+  /// 2.2. "And so on". Just to avoid discrepancies with comments
+  /// perhaps it would be better to read the implementation itself.
+  /// 3. And again about overall picture. Let's look back at how the ordered set
+  /// of constants will look like:
+  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+  ///
+  /// Now look, what could be inside [FCT, "others"], for example:
+  /// [FCT, "others"] =
+  /// [
+  ///   [double 0.1], [double 1.23],
+  ///   [i32 1], [i32 2],
+  ///   { double 1.0 },       ; StructTyID, NumElements = 1
+  ///   { i32 1 },            ; StructTyID, NumElements = 1
+  ///   { double 1, i32 1 },  ; StructTyID, NumElements = 2
+  ///   { i32 1, double 1 }   ; StructTyID, NumElements = 2
+  /// ]
+  ///
+  /// Let's explain the order. Float numbers will be less than integers, just
+  /// because of cmpType terms: FloatTyID < IntegerTyID.
+  /// Floats (with same fltSemantics) are sorted according to their value.
+  /// Then you can see integers, and they are, like a floats,
+  /// could be easy sorted among each others.
+  /// The structures. Structures are grouped at the tail, again because of their
+  /// TypeID: StructTyID > IntegerTyID > FloatTyID.
+  /// Structures with greater number of elements are greater. Structures with
+  /// greater elements going first are greater.
+  /// The same logic with vectors, arrays and other possible complex types.
+  ///
+  /// Bitcastable constants.
+  /// Let's assume, that some constant, belongs to some group of
+  /// "so-called-equal" values with different types, and at the same time
+  /// belongs to another group of constants with equal types
+  /// and "really" equal values.
+  ///
+  /// Now, prove that this is impossible:
+  ///
+  /// If constant A with type TyA is bitcastable to B with type TyB, then:
+  /// 1. All constants with equal types to TyA, are bitcastable to B. Since
+  ///    those should be vectors (if TyA is vector), pointers
+  ///    (if TyA is pointer), or else (if TyA equal to TyB), those types should
+  ///    be equal to TyB.
+  /// 2. All constants with non-equal, but bitcastable types to TyA, are
+  ///    bitcastable to B.
+  ///    Once again, just because we allow it to vectors and pointers only.
+  ///    This statement could be expanded as below:
+  /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
+  ///      vector B, and thus bitcastable to B as well.
+  /// 2.2. All pointers of the same address space, no matter what they point to,
+  ///      bitcastable. So if C is pointer, it could be bitcasted to A and to B.
+  /// So any constant equal or bitcastable to A is equal or bitcastable to B.
+  /// QED.
+  ///
+  /// In another words, for pointers and vectors, we ignore top-level type and
+  /// look at their particular properties (bit-width for vectors, and
+  /// address space for pointers).
+  /// If these properties are equal - compare their contents.
+  int cmpConstants(const Constant *L, const Constant *R);
+
   /// Assign or look up previously assigned numbers for the two values, and
   /// return whether the numbers are equal. Numbers are assigned in the order
   /// visited.
-  bool enumerate(const Value *V1, const Value *V2);
+  /// Comparison order:
+  /// Stage 0: Value that is function itself is always greater then others.
+  ///          If left and right values are references to their functions, then
+  ///          they are equal.
+  /// Stage 1: Constants are greater than non-constants.
+  ///          If both left and right are constants, then the result of
+  ///          cmpConstants is used as cmpValues result.
+  /// Stage 2: InlineAsm instances are greater than others. If both left and
+  ///          right are InlineAsm instances, InlineAsm* pointers casted to
+  ///          integers and compared as numbers.
+  /// Stage 3: For all other cases we compare order we meet these values in
+  ///          their functions. If right value was met first during scanning,
+  ///          then left value is greater.
+  ///          In another words, we compare serial numbers, for more details
+  ///          see comments for sn_mapL and sn_mapR.
+  int cmpValues(const Value *L, const Value *R);
+
+  bool enumerate(const Value *V1, const Value *V2) {
+    return cmpValues(V1, V2) == 0;
+  }
 
   /// Compare two Instructions for equivalence, similar to
   /// Instruction::isSameOperationAs but with modifications to the type
   /// comparison.
+  /// Stages are listed in "most significant stage first" order:
+  /// On each stage below, we do comparison between some left and right
+  /// operation parts. If parts are non-equal, we assign parts comparison
+  /// result to the operation comparison result and exit from method.
+  /// Otherwise we proceed to the next stage.
+  /// Stages:
+  /// 1. Operations opcodes. Compared as numbers.
+  /// 2. Number of operands.
+  /// 3. Operation types. Compared with cmpType method.
+  /// 4. Compare operation subclass optional data as stream of bytes:
+  /// just convert it to integers and call cmpNumbers.
+  /// 5. Compare in operation operand types with cmpType in
+  /// most significant operand first order.
+  /// 6. Last stage. Check operations for some specific attributes.
+  /// For example, for Load it would be:
+  /// 6.1.Load: volatile (as boolean flag)
+  /// 6.2.Load: alignment (as integer numbers)
+  /// 6.3.Load: synch-scope (as integer numbers)
+  /// On this stage its better to see the code, since its not more than 10-15
+  /// strings for particular instruction, and could change sometimes.
+  int cmpOperation(const Instruction *L, const Instruction *R) const;
+
   bool isEquivalentOperation(const Instruction *I1,
-                             const Instruction *I2) const;
+                             const Instruction *I2) const {
+    return cmpOperation(I1, I2) == 0;
+  }
 
   /// Compare two GEPs for equivalent pointer arithmetic.
-  bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+  /// Parts to be compared for each comparison stage,
+  /// most significant stage first:
+  /// 1. Address space. As numbers.
+  /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
+  /// using GEPOperator::accumulateConstantOffset method).
+  /// 3. Pointer operand type (using cmpType method).
+  /// 4. Number of operands.
+  /// 5. Compare operands, using cmpValues method.
+  int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR);
+  int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+    return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+  }
+
+  bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) {
+    return cmpGEP(GEP1, GEP2) == 0;
+  }
   bool isEquivalentGEP(const GetElementPtrInst *GEP1,
                        const GetElementPtrInst *GEP2) {
     return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
@@ -241,13 +404,50 @@
 
   int cmpNumbers(uint64_t L, uint64_t R) const;
 
+  int cmpAPInt(const APInt &L, const APInt &R) const;
+  int cmpAPFloat(const APFloat &L, const APFloat &R) const;
+  int cmpStrings(StringRef L, StringRef R) const;
+  int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+
   // The two functions undergoing comparison.
   const Function *F1, *F2;
 
   const DataLayout *DL;
 
-  DenseMap<const Value *, const Value *> id_map;
-  DenseSet<const Value *> seen_values;
+  /// Assign serial numbers to values from left function, and values from
+  /// right function.
+  /// Explanation:
+  /// Being comparing functions we need to compare values we meet at left and
+  /// right sides.
+  /// Its easy to sort things out for external values. It just should be
+  /// the same value at left and right.
+  /// But for local values (those were introduced inside function body)
+  /// we have to ensure they were introduced at exactly the same place,
+  /// and plays the same role.
+  /// Let's assign serial number to each value when we meet it first time.
+  /// Values that were met at same place will be with same serial numbers.
+  /// In this case it would be good to explain few points about values assigned
+  /// to BBs and other ways of implementation (see below).
+  ///
+  /// 1. Safety of BB reordering.
+  /// It's safe to change the order of BasicBlocks in function.
+  /// Relationship with other functions and serial numbering will not be
+  /// changed in this case.
+  /// As follows from FunctionComparator::compare(), we do CFG walk: we start
+  /// from the entry, and then take each terminator. So it doesn't matter how in
+  /// fact BBs are ordered in function. And since cmpValues are called during
+  /// this walk, the numbering depends only on how BBs located inside the CFG.
+  /// So the answer is - yes. We will get the same numbering.
+  ///
+  /// 2. Impossibility to use dominance properties of values.
+  /// If we compare two instruction operands: first is usage of local
+  /// variable AL from function FL, and second is usage of local variable AR
+  /// from FR, we could compare their origins and check whether they are
+  /// defined at the same place.
+  /// But, we are still not able to compare operands of PHI nodes, since those
+  /// could be operands from further BBs we didn't scan yet.
+  /// So it's impossible to use dominance properties in general.
+  DenseMap<const Value*, int> sn_mapL, sn_mapR;
 };
 
 }
@@ -258,6 +458,206 @@
   return 0;
 }
 
+int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
+  if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+    return Res;
+  if (L.ugt(R)) return 1;
+  if (R.ugt(L)) return -1;
+  return 0;
+}
+
+int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const {
+  if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
+                           (uint64_t)&R.getSemantics()))
+    return Res;
+  return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+  // Prevent heavy comparison, compare sizes first.
+  if (int Res = cmpNumbers(L.size(), R.size()))
+    return Res;
+
+  // Compare strings lexicographically only when it is necessary: only when
+  // strings are equal in size.
+  return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeSet L,
+                                 const AttributeSet R) const {
+  if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+    return Res;
+
+  for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
+    AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+                           RE = R.end(i);
+    for (; LI != LE && RI != RE; ++LI, ++RI) {
+      Attribute LA = *LI;
+      Attribute RA = *RI;
+      if (LA < RA)
+        return -1;
+      if (RA < LA)
+        return 1;
+    }
+    if (LI != LE)
+      return 1;
+    if (RI != RE)
+      return -1;
+  }
+  return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
+
+  Type *TyL = L->getType();
+  Type *TyR = R->getType();
+
+  // Check whether types are bitcastable. This part is just re-factored
+  // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+  // we also pack into result which type is "less" for us.
+  int TypesRes = cmpType(TyL, TyR);
+  if (TypesRes != 0) {
+    // Types are different, but check whether we can bitcast them.
+    if (!TyL->isFirstClassType()) {
+      if (TyR->isFirstClassType())
+        return -1;
+      // Neither TyL nor TyR are values of first class type. Return the result
+      // of comparing the types
+      return TypesRes;
+    }
+    if (!TyR->isFirstClassType()) {
+      if (TyL->isFirstClassType())
+        return 1;
+      return TypesRes;
+    }
+
+    // Vector -> Vector conversions are always lossless if the two vector types
+    // have the same size, otherwise not.
+    unsigned TyLWidth = 0;
+    unsigned TyRWidth = 0;
+
+    if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+      TyLWidth = VecTyL->getBitWidth();
+    if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+      TyRWidth = VecTyR->getBitWidth();
+
+    if (TyLWidth != TyRWidth)
+      return cmpNumbers(TyLWidth, TyRWidth);
+
+    // Zero bit-width means neither TyL nor TyR are vectors.
+    if (!TyLWidth) {
+      PointerType *PTyL = dyn_cast<PointerType>(TyL);
+      PointerType *PTyR = dyn_cast<PointerType>(TyR);
+      if (PTyL && PTyR) {
+        unsigned AddrSpaceL = PTyL->getAddressSpace();
+        unsigned AddrSpaceR = PTyR->getAddressSpace();
+        if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+          return Res;
+      }
+      if (PTyL)
+        return 1;
+      if (PTyR)
+        return -1;
+
+      // TyL and TyR aren't vectors, nor pointers. We don't know how to
+      // bitcast them.
+      return TypesRes;
+    }
+  }
+
+  // OK, types are bitcastable, now check constant contents.
+
+  if (L->isNullValue() && R->isNullValue())
+    return TypesRes;
+  if (L->isNullValue() && !R->isNullValue())
+    return 1;
+  if (!L->isNullValue() && R->isNullValue())
+    return -1;
+
+  if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+    return Res;
+
+  switch (L->getValueID()) {
+  case Value::UndefValueVal: return TypesRes;
+  case Value::ConstantIntVal: {
+    const APInt &LInt = cast<ConstantInt>(L)->getValue();
+    const APInt &RInt = cast<ConstantInt>(R)->getValue();
+    return cmpAPInt(LInt, RInt);
+  }
+  case Value::ConstantFPVal: {
+    const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+    const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+    return cmpAPFloat(LAPF, RAPF);
+  }
+  case Value::ConstantArrayVal: {
+    const ConstantArray *LA = cast<ConstantArray>(L);
+    const ConstantArray *RA = cast<ConstantArray>(R);
+    uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+    uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (uint64_t i = 0; i < NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+                                 cast<Constant>(RA->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantStructVal: {
+    const ConstantStruct *LS = cast<ConstantStruct>(L);
+    const ConstantStruct *RS = cast<ConstantStruct>(R);
+    unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+    unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (unsigned i = 0; i != NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+                                 cast<Constant>(RS->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantVectorVal: {
+    const ConstantVector *LV = cast<ConstantVector>(L);
+    const ConstantVector *RV = cast<ConstantVector>(R);
+    unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+    unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (uint64_t i = 0; i < NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+                                 cast<Constant>(RV->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantExprVal: {
+    const ConstantExpr *LE = cast<ConstantExpr>(L);
+    const ConstantExpr *RE = cast<ConstantExpr>(R);
+    unsigned NumOperandsL = LE->getNumOperands();
+    unsigned NumOperandsR = RE->getNumOperands();
+    if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+      return Res;
+    for (unsigned i = 0; i < NumOperandsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+                                 cast<Constant>(RE->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::FunctionVal:
+  case Value::GlobalVariableVal:
+  case Value::GlobalAliasVal:
+  default: // Unknown constant, cast L and R pointers to numbers and compare.
+    return cmpNumbers((uint64_t)L, (uint64_t)R);
+  }
+}
+
 /// cmpType - compares two types,
 /// defines total ordering among the types set.
 /// See method declaration comments for more details.
@@ -350,143 +750,209 @@
 // Determine whether the two operations are the same except that pointer-to-A
 // and pointer-to-B are equivalent. This should be kept in sync with
 // Instruction::isSameOperationAs.
-bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
-                                               const Instruction *I2) const {
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperation(const Instruction *L,
+                                     const Instruction *R) const {
   // Differences from Instruction::isSameOperationAs:
   //  * replace type comparison with calls to isEquivalentType.
   //  * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
   //  * because of the above, we don't test for the tail bit on calls later on
-  if (I1->getOpcode() != I2->getOpcode() ||
-      I1->getNumOperands() != I2->getNumOperands() ||
-      !isEquivalentType(I1->getType(), I2->getType()) ||
-      !I1->hasSameSubclassOptionalData(I2))
-    return false;
+  if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+    return Res;
+
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+
+  if (int Res = cmpType(L->getType(), R->getType()))
+    return Res;
+
+  if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+                           R->getRawSubclassOptionalData()))
+    return Res;
 
   // We have two instructions of identical opcode and #operands.  Check to see
   // if all operands are the same type
-  for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
-    if (!isEquivalentType(I1->getOperand(i)->getType(),
-                          I2->getOperand(i)->getType()))
-      return false;
+  for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+    if (int Res =
+            cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+      return Res;
+  }
 
   // Check special state that is a part of some instructions.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
-    return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
-           LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() &&
-           LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
-           LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
-    return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
-           SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() &&
-           SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
-           SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
-  if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
-    return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
-  if (const CallInst *CI = dyn_cast<CallInst>(I1))
-    return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
-           CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
-  if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
-    return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
-           CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
-    return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
-    return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
-  if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
-    return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
-           FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
-  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
-    return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
-           CXI->getSuccessOrdering() ==
-               cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
-           CXI->getFailureOrdering() ==
-               cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
-           CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
-  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
-    return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
-           RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
-           RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
-           RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
+  if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+    if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope());
+  }
+  if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+    if (int Res =
+            cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+      return Res;
+    if (int Res =
+            cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+      return Res;
+    if (int Res =
+            cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+  }
+  if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+    return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+  if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+    if (int Res = cmpNumbers(CI->getCallingConv(),
+                             cast<CallInst>(R)->getCallingConv()))
+      return Res;
+    return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes());
+  }
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
+    if (int Res = cmpNumbers(CI->getCallingConv(),
+                             cast<InvokeInst>(R)->getCallingConv()))
+      return Res;
+    return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes());
+  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+    ArrayRef<unsigned> LIndices = IVI->getIndices();
+    ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+      return Res;
+    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+        return Res;
+    }
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+    ArrayRef<unsigned> LIndices = EVI->getIndices();
+    ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+      return Res;
+    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+        return Res;
+    }
+  }
+  if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+    if (int Res =
+            cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+  }
 
-  return true;
+  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+    if (int Res = cmpNumbers(CXI->isVolatile(),
+                             cast<AtomicCmpXchgInst>(R)->isVolatile()))
+      return Res;
+    if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
+                             cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+      return Res;
+    if (int Res = cmpNumbers(CXI->getFailureOrdering(),
+                             cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+      return Res;
+    return cmpNumbers(CXI->getSynchScope(),
+                      cast<AtomicCmpXchgInst>(R)->getSynchScope());
+  }
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+    if (int Res = cmpNumbers(RMWI->getOperation(),
+                             cast<AtomicRMWInst>(R)->getOperation()))
+      return Res;
+    if (int Res = cmpNumbers(RMWI->isVolatile(),
+                             cast<AtomicRMWInst>(R)->isVolatile()))
+      return Res;
+    if (int Res = cmpNumbers(RMWI->getOrdering(),
+                             cast<AtomicRMWInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(RMWI->getSynchScope(),
+                      cast<AtomicRMWInst>(R)->getSynchScope());
+  }
+  return 0;
 }
 
 // Determine whether two GEP operations perform the same underlying arithmetic.
-bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
-                                         const GEPOperator *GEP2) {
-  unsigned AS = GEP1->getPointerAddressSpace();
-  if (AS != GEP2->getPointerAddressSpace())
-    return false;
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
+                               const GEPOperator *GEPR) {
 
+  unsigned int ASL = GEPL->getPointerAddressSpace();
+  unsigned int ASR = GEPR->getPointerAddressSpace();
+
+  if (int Res = cmpNumbers(ASL, ASR))
+    return Res;
+
+  // When we have target data, we can reduce the GEP down to the value in bytes
+  // added to the address.
   if (DL) {
-    // When we have target data, we can reduce the GEP down to the value in bytes
-    // added to the address.
-    unsigned BitWidth = DL ? DL->getPointerSizeInBits(AS) : 1;
-    APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
-    if (GEP1->accumulateConstantOffset(*DL, Offset1) &&
-        GEP2->accumulateConstantOffset(*DL, Offset2)) {
-      return Offset1 == Offset2;
-    }
+    unsigned BitWidth = DL->getPointerSizeInBits(ASL);
+    APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+    if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
+        GEPR->accumulateConstantOffset(*DL, OffsetR))
+      return cmpAPInt(OffsetL, OffsetR);
   }
 
-  if (GEP1->getPointerOperand()->getType() !=
-      GEP2->getPointerOperand()->getType())
-    return false;
+  if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
+                           (uint64_t)GEPR->getPointerOperand()->getType()))
+    return Res;
 
-  if (GEP1->getNumOperands() != GEP2->getNumOperands())
-    return false;
+  if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+    return Res;
 
-  for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
-    if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
-      return false;
+  for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+    if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+      return Res;
   }
 
-  return true;
+  return 0;
 }
 
-// Compare two values used by the two functions under pair-wise comparison. If
-// this is the first time the values are seen, they're added to the mapping so
-// that we will detect mismatches on next use.
-bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
-  // Check for function @f1 referring to itself and function @f2 referring to
-  // itself, or referring to each other, or both referring to either of them.
-  // They're all equivalent if the two functions are otherwise equivalent.
-  if (V1 == F1 && V2 == F2)
-    return true;
-  if (V1 == F2 && V2 == F1)
-    return true;
-
-  if (const Constant *C1 = dyn_cast<Constant>(V1)) {
-    if (V1 == V2) return true;
-    const Constant *C2 = dyn_cast<Constant>(V2);
-    if (!C2) return false;
-    // TODO: constant expressions with GEP or references to F1 or F2.
-    if (C1->isNullValue() && C2->isNullValue() &&
-        isEquivalentType(C1->getType(), C2->getType()))
-      return true;
-    // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
-    // then they must have equal bit patterns.
-    return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
-      C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) {
+  // Catch self-reference case.
+  if (L == F1) {
+    if (R == F2)
+      return 0;
+    return -1;
+  }
+  if (R == F2) {
+    if (L == F1)
+      return 0;
+    return 1;
   }
 
-  if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
-    return V1 == V2;
+  const Constant *ConstL = dyn_cast<Constant>(L);
+  const Constant *ConstR = dyn_cast<Constant>(R);
+  if (ConstL && ConstR) {
+    if (L == R)
+      return 0;
+    return cmpConstants(ConstL, ConstR);
+  }
 
-  // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
-  // check whether it's equal to V2. When there is no mapping then we need to
-  // ensure that V2 isn't already equivalent to something else. For this
-  // purpose, we track the V2 values in a set.
+  if (ConstL)
+    return 1;
+  if (ConstR)
+    return -1;
 
-  const Value *&map_elem = id_map[V1];
-  if (map_elem)
-    return map_elem == V2;
-  if (!seen_values.insert(V2).second)
-    return false;
-  map_elem = V2;
-  return true;
+  const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+  const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+  if (InlineAsmL && InlineAsmR)
+    return cmpNumbers((uint64_t)L, (uint64_t)R);
+  if (InlineAsmL)
+    return 1;
+  if (InlineAsmR)
+    return -1;
+
+  auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+       RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+
+  return cmpNumbers(LeftSN.first->second, RightSN.first->second);
 }
-
 // Test whether two basic blocks have equivalent behaviour.
 bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
   BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
@@ -535,6 +1001,9 @@
   // We need to recheck everything, but check the things that weren't included
   // in the hash first.
 
+  sn_mapL.clear();
+  sn_mapR.clear();
+
   if (F1->getAttributes() != F2->getAttributes())
     return false;
 
@@ -683,7 +1152,7 @@
 bool MergeFunctions::runOnModule(Module &M) {
   bool Changed = false;
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -783,8 +1252,23 @@
 // Helper for writeThunk,
 // Selects proper bitcast operation,
 // but a bit simpler then CastInst::getCastOpcode.
-static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
   Type *SrcTy = V->getType();
+  if (SrcTy->isStructTy()) {
+    assert(DestTy->isStructTy());
+    assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+    Value *Result = UndefValue::get(DestTy);
+    for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+      Value *Element = createCast(
+          Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)),
+          DestTy->getStructElementType(I));
+
+      Result =
+          Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I));
+    }
+    return Result;
+  }
+  assert(!DestTy->isStructTy());
   if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
     return Builder.CreateIntToPtr(V, DestTy);
   else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
@@ -843,9 +1327,9 @@
 
 // Replace G with an alias to F and delete G.
 void MergeFunctions::writeAlias(Function *F, Function *G) {
-  Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
-  GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
-                                    BitcastF, G->getParent());
+  PointerType *PTy = G->getType();
+  auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+                                 G->getLinkage(), "", F);
   F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
   GA->takeName(G);
   GA->setVisibility(G->getVisibility());
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index ac88aee..76d6dfa 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "partialinlining"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/CFG.h"
@@ -24,6 +23,8 @@
 #include "llvm/Transforms/Utils/CodeExtractor.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "partialinlining"
+
 STATISTIC(NumPartialInlined, "Number of functions partially inlined");
 
 namespace {
@@ -52,10 +53,10 @@
   BasicBlock* entryBlock = F->begin();
   BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
   if (!BR || BR->isUnconditional())
-    return 0;
+    return nullptr;
   
-  BasicBlock* returnBlock = 0;
-  BasicBlock* nonReturnBlock = 0;
+  BasicBlock* returnBlock = nullptr;
+  BasicBlock* nonReturnBlock = nullptr;
   unsigned returnCount = 0;
   for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
        SI != SE; ++SI)
@@ -66,7 +67,7 @@
       nonReturnBlock = *SI;
   
   if (returnCount != 1)
-    return 0;
+    return nullptr;
   
   // Clone the function, so that we can hack away on it.
   ValueToValueMapTy VMap;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 4a28b34..38e1b8e 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -56,8 +56,9 @@
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
-    LibraryInfo = 0;
-    Inliner = 0;
+    LibraryInfo = nullptr;
+    Inliner = nullptr;
+    DisableTailCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
     BBVectorize = RunBBVectorization;
@@ -128,7 +129,7 @@
   if (OptLevel == 0) {
     if (Inliner) {
       MPM.add(Inliner);
-      Inliner = 0;
+      Inliner = nullptr;
     }
 
     // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
@@ -156,6 +157,7 @@
     MPM.add(createDeadArgEliminationPass());  // Dead argument elimination
 
     MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+    addExtensionsToPM(EP_Peephole, MPM);
     MPM.add(createCFGSimplificationPass());   // Clean up after IPCP & DAE
   }
 
@@ -164,7 +166,7 @@
     MPM.add(createPruneEHPass());             // Remove dead EH info
   if (Inliner) {
     MPM.add(Inliner);
-    Inliner = 0;
+    Inliner = nullptr;
   }
   if (!DisableUnitAtATime)
     MPM.add(createFunctionAttrsPass());       // Set readonly/readnone attrs
@@ -182,8 +184,10 @@
   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createInstructionCombiningPass());  // Combine silly seq's
+  addExtensionsToPM(EP_Peephole, MPM);
 
-  MPM.add(createTailCallEliminationPass());   // Eliminate tail calls
+  if (!DisableTailCalls)
+    MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createReassociatePass());           // Reassociate expressions
   MPM.add(createLoopRotatePass());            // Rotate Loop
@@ -206,6 +210,7 @@
   // Run instcombine after redundancy elimination to exploit opportunities
   // opened up by them.
   MPM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createJumpThreadingPass());         // Thread jumps
   MPM.add(createCorrelatedValuePropagationPass());
   MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
@@ -220,6 +225,7 @@
   if (BBVectorize) {
     MPM.add(createBBVectorizePass());
     MPM.add(createInstructionCombiningPass());
+    addExtensionsToPM(EP_Peephole, MPM);
     if (OptLevel > 1 && UseGVNAfterVectorization)
       MPM.add(createGVNPass());           // Remove redundancies
     else
@@ -233,6 +239,7 @@
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
   MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
   MPM.add(createInstructionCombiningPass());  // Clean up after everything.
+  addExtensionsToPM(EP_Peephole, MPM);
 
   // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
   // pass manager that we are specifically trying to avoid. To prevent this
@@ -245,6 +252,7 @@
   // as function calls, so that we can only pass them when the vectorizer
   // changed the code.
   MPM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createCFGSimplificationPass());
 
   if (!DisableUnrollLoops)
@@ -297,6 +305,7 @@
   // function pointers.  When this happens, we often have to resolve varargs
   // calls, etc, so let instcombine do this.
   PM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, PM);
 
   // Inline small functions
   if (RunInliner)
@@ -315,6 +324,7 @@
 
   // The IPO passes may leave cruft around.  Clean up after them.
   PM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, PM);
   PM.add(createJumpThreadingPass());
 
   // Break up allocas
@@ -334,11 +344,17 @@
   // Nuke dead stores.
   PM.add(createDeadStoreEliminationPass());
 
-  // More loops are countable try to vectorize them.
+  // More loops are countable; try to optimize them.
+  PM.add(createIndVarSimplifyPass());
+  PM.add(createLoopDeletionPass());
   PM.add(createLoopVectorizePass(true, true));
 
+  // More scalar chains could be vectorized due to more alias information
+  PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
   // Cleanup and simplify the code after the scalar optimizations.
   PM.add(createInstructionCombiningPass());
+  addExtensionsToPM(EP_Peephole, PM);
 
   PM.add(createJumpThreadingPass());
 
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index c61ec5e..b2c4a09 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "prune-eh"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -30,6 +29,8 @@
 #include <algorithm>
 using namespace llvm;
 
+#define DEBUG_TYPE "prune-eh"
+
 STATISTIC(NumRemoved, "Number of invokes removed");
 STATISTIC(NumUnreach, "Number of noreturn calls optimized");
 
@@ -85,7 +86,7 @@
   for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
        (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
     Function *F = (*I)->getFunction();
-    if (F == 0) {
+    if (!F) {
       SCCMightUnwind = true;
       SCCMightReturn = true;
     } else if (F->isDeclaration() || F->mayBeOverridden()) {
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 1c6532d..956991a 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -14,13 +14,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "strip-dead-prototypes"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "strip-dead-prototypes"
+
 STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
 
 namespace {
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 6d0be8f..1abbccc 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -192,7 +192,7 @@
 /// Find values that are marked as llvm.used.
 static void findUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
-  if (LLVMUsed == 0) return;
+  if (!LLVMUsed) return;
   UsedValues.insert(LLVMUsed);
 
   ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());