Update LLVM for 3.5 rebase (r209712).

Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 9583bbe..57237e5 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -473,7 +473,7 @@
 ///
 void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
   DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
   TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
   AA = &P->getAnalysis<AliasAnalysis>();
 }
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 2e3bc55..b860914 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -126,7 +126,7 @@
 AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
   AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
 
-  const char *AliasString = 0;
+  const char *AliasString = nullptr;
   switch (R) {
   case NoAlias:   No++;   AliasString = "No alias"; break;
   case MayAlias:  May++;  AliasString = "May alias"; break;
@@ -152,7 +152,7 @@
                                     const Location &Loc) {
   ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
 
-  const char *MRString = 0;
+  const char *MRString = nullptr;
   switch (R) {
   case NoModRef: NoMR++;     MRString = "NoModRef"; break;
   case Ref:      JustRef++;  MRString = "JustRef"; break;
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index ab1005e..a45fe23 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -72,16 +72,16 @@
     AS.PtrList->setPrevInList(PtrListEnd);
     PtrListEnd = AS.PtrListEnd;
 
-    AS.PtrList = 0;
+    AS.PtrList = nullptr;
     AS.PtrListEnd = &AS.PtrList;
-    assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+    assert(*AS.PtrListEnd == nullptr && "End of list is not null?");
   }
 }
 
 void AliasSetTracker::removeAliasSet(AliasSet *AS) {
   if (AliasSet *Fwd = AS->Forward) {
     Fwd->dropRef(*this);
-    AS->Forward = 0;
+    AS->Forward = nullptr;
   }
   AliasSets.erase(AS);
 }
@@ -115,10 +115,10 @@
   Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
 
   // Add it to the end of the list...
-  assert(*PtrListEnd == 0 && "End of list is not null?");
+  assert(*PtrListEnd == nullptr && "End of list is not null?");
   *PtrListEnd = &Entry;
   PtrListEnd = Entry.setPrevInList(PtrListEnd);
-  assert(*PtrListEnd == 0 && "End of list is not null?");
+  assert(*PtrListEnd == nullptr && "End of list is not null?");
   addRef();               // Entry points to alias set.
 }
 
@@ -217,11 +217,11 @@
 AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
                                                   uint64_t Size,
                                                   const MDNode *TBAAInfo) {
-  AliasSet *FoundSet = 0;
+  AliasSet *FoundSet = nullptr;
   for (iterator I = begin(), E = end(); I != E; ++I) {
     if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
     
-    if (FoundSet == 0) {  // If this is the first alias set ptr can go into.
+    if (!FoundSet) {      // If this is the first alias set ptr can go into.
       FoundSet = I;       // Remember it.
     } else {              // Otherwise, we must merge the sets.
       FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
@@ -245,12 +245,12 @@
 
 
 AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
-  AliasSet *FoundSet = 0;
+  AliasSet *FoundSet = nullptr;
   for (iterator I = begin(), E = end(); I != E; ++I) {
     if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
       continue;
     
-    if (FoundSet == 0)        // If this is the first alias set ptr can go into.
+    if (!FoundSet)            // If this is the first alias set ptr can go into.
       FoundSet = I;           // Remember it.
     else if (!I->Forward)     // Otherwise, we must merge the sets.
       FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index c960123..01c1c7e 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -73,7 +73,7 @@
 
 LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
                           char **OutMessages) {
-  raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : 0;
+  raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr;
   std::string Messages;
   raw_string_ostream MsgsOS(Messages);
 
@@ -94,7 +94,8 @@
 
 LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
   LLVMBool Result = verifyFunction(
-      *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs() : 0);
+      *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs()
+                                                              : nullptr);
 
   if (Action == LLVMAbortProcessAction && Result)
     report_fatal_error("Broken function found, compilation aborted!");
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 76eee74..a8fef77 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -9,6 +9,7 @@
   Analysis.cpp \
   BasicAliasAnalysis.cpp \
   BlockFrequencyInfo.cpp \
+  BlockFrequencyInfoImpl.cpp \
   BranchProbabilityInfo.cpp \
   CFG.cpp \
   CFGPrinter.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index e267374..fe90b84 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -298,7 +298,7 @@
   do {
     // See if this is a bitcast or GEP.
     const Operator *Op = dyn_cast<Operator>(V);
-    if (Op == 0) {
+    if (!Op) {
       // The only non-operator case we can handle are GlobalAliases.
       if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
         if (!GA->mayBeOverridden()) {
@@ -315,7 +315,7 @@
     }
 
     const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
-    if (GEPOp == 0) {
+    if (!GEPOp) {
       // If it's not a GEP, hand it off to SimplifyInstruction to see if it
       // can come up with something. This matches what GetUnderlyingObject does.
       if (const Instruction *I = dyn_cast<Instruction>(V))
@@ -336,7 +336,7 @@
     // If we are lacking DataLayout information, we can't compute the offets of
     // elements computed by GEPs.  However, we can handle bitcast equivalent
     // GEPs.
-    if (DL == 0) {
+    if (!DL) {
       if (!GEPOp->hasAllZeroIndices())
         return V;
       V = GEPOp->getOperand(0);
@@ -433,7 +433,7 @@
   if (const Argument *arg = dyn_cast<Argument>(V))
     return arg->getParent();
 
-  return NULL;
+  return nullptr;
 }
 
 static bool notDifferentParent(const Value *O1, const Value *O2) {
@@ -753,7 +753,7 @@
 
   // Finally, handle specific knowledge of intrinsics.
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
-  if (II != 0)
+  if (II != nullptr)
     switch (II->getIntrinsicID()) {
     default: break;
     case Intrinsic::memcpy:
@@ -868,21 +868,6 @@
   return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
 }
 
-static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1,
-                               SmallVectorImpl<VariableGEPIndex> &Indices2) {
-  unsigned Size1 = Indices1.size();
-  unsigned Size2 = Indices2.size();
-
-  if (Size1 != Size2)
-    return false;
-
-  for (unsigned I = 0; I != Size1; ++I)
-    if (Indices1[I] != Indices2[I])
-      return false;
-
-  return true;
-}
-
 /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
 /// against another pointer.  We know that V1 is a GEP, but we don't know
 /// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -904,8 +889,8 @@
   // derived pointer.
   if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
     // Do the base pointers alias?
-    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
-                                       UnderlyingV2, UnknownSize, 0);
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
+                                       UnderlyingV2, UnknownSize, nullptr);
 
     // Check for geps of non-aliasing underlying pointers where the offsets are
     // identical.
@@ -929,8 +914,8 @@
         // DecomposeGEPExpression and GetUnderlyingObject should return the
         // same result except when DecomposeGEPExpression has no DataLayout.
         if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-          assert(DL == 0 &&
-             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+          assert(!DL &&
+                 "DecomposeGEPExpression and GetUnderlyingObject disagree!");
           return MayAlias;
         }
         // If the max search depth is reached the result is undefined
@@ -939,7 +924,7 @@
 
         // Same offsets.
         if (GEP1BaseOffset == GEP2BaseOffset &&
-            areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices))
+            GEP1VariableIndices == GEP2VariableIndices)
           return NoAlias;
         GEP1VariableIndices.clear();
       }
@@ -966,7 +951,7 @@
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
     if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-      assert(DL == 0 &&
+      assert(!DL &&
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
@@ -988,7 +973,7 @@
     if (V1Size == UnknownSize && V2Size == UnknownSize)
       return MayAlias;
 
-    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
                                V2, V2Size, V2TBAAInfo);
     if (R != MustAlias)
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
@@ -1005,7 +990,7 @@
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
     if (GEP1BasePtr != UnderlyingV1) {
-      assert(DL == 0 &&
+      assert(!DL &&
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
@@ -1371,7 +1356,7 @@
   // Use dominance or loop info if available.
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
   LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
 
   // Make sure that the visited phis cannot reach the Value. This ensures that
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 63049a5..8ed8e3e 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===//
+//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
@@ -24,6 +24,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "block-freq"
+
 #ifndef NDEBUG
 enum GVDAGType {
   GVDT_None,
@@ -106,6 +108,7 @@
 INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
                       "Block Frequency Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
                     "Block Frequency Analysis", true, true)
 
@@ -120,14 +123,16 @@
 
 void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<BranchProbabilityInfo>();
+  AU.addRequired<LoopInfo>();
   AU.setPreservesAll();
 }
 
 bool BlockFrequencyInfo::runOnFunction(Function &F) {
   BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  LoopInfo &LI = getAnalysis<LoopInfo>();
   if (!BFI)
     BFI.reset(new ImplType);
-  BFI->doFunction(&F, &BPI);
+  BFI->doFunction(&F, &BPI, &LI);
 #ifndef NDEBUG
   if (ViewBlockFreqPropagationDAG != GVDT_None)
     view();
@@ -158,7 +163,7 @@
 }
 
 const Function *BlockFrequencyInfo::getFunction() const {
-  return BFI ? BFI->Fn : nullptr;
+  return BFI ? BFI->getFunction() : nullptr;
 }
 
 raw_ostream &BlockFrequencyInfo::
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
new file mode 100644
index 0000000..87d93a4
--- /dev/null
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -0,0 +1,995 @@
+//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <deque>
+
+using namespace llvm;
+using namespace llvm::bfi_detail;
+
+#define DEBUG_TYPE "block-freq"
+
+//===----------------------------------------------------------------------===//
+//
+// UnsignedFloat implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _MSC_VER
+const int32_t UnsignedFloatBase::MaxExponent;
+const int32_t UnsignedFloatBase::MinExponent;
+#endif
+
+static void appendDigit(std::string &Str, unsigned D) {
+  assert(D < 10);
+  Str += '0' + D % 10;
+}
+
+static void appendNumber(std::string &Str, uint64_t N) {
+  while (N) {
+    appendDigit(Str, N % 10);
+    N /= 10;
+  }
+}
+
+static bool doesRoundUp(char Digit) {
+  switch (Digit) {
+  case '5':
+  case '6':
+  case '7':
+  case '8':
+  case '9':
+    return true;
+  default:
+    return false;
+  }
+}
+
+static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
+  assert(E >= UnsignedFloatBase::MinExponent);
+  assert(E <= UnsignedFloatBase::MaxExponent);
+
+  // Find a new E, but don't let it increase past MaxExponent.
+  int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
+  int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
+  int Shift = 63 - (NewE - E);
+  assert(Shift <= LeadingZeros);
+  assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent);
+  D <<= Shift;
+  E = NewE;
+
+  // Check for a denormal.
+  unsigned AdjustedE = E + 16383;
+  if (!(D >> 63)) {
+    assert(E == UnsignedFloatBase::MaxExponent);
+    AdjustedE = 0;
+  }
+
+  // Build the float and print it.
+  uint64_t RawBits[2] = {D, AdjustedE};
+  APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
+  SmallVector<char, 24> Chars;
+  Float.toString(Chars, Precision, 0);
+  return std::string(Chars.begin(), Chars.end());
+}
+
+static std::string stripTrailingZeros(const std::string &Float) {
+  size_t NonZero = Float.find_last_not_of('0');
+  assert(NonZero != std::string::npos && "no . in floating point string");
+
+  if (Float[NonZero] == '.')
+    ++NonZero;
+
+  return Float.substr(0, NonZero + 1);
+}
+
+std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
+                                        unsigned Precision) {
+  if (!D)
+    return "0.0";
+
+  // Canonicalize exponent and digits.
+  uint64_t Above0 = 0;
+  uint64_t Below0 = 0;
+  uint64_t Extra = 0;
+  int ExtraShift = 0;
+  if (E == 0) {
+    Above0 = D;
+  } else if (E > 0) {
+    if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
+      D <<= Shift;
+      E -= Shift;
+
+      if (!E)
+        Above0 = D;
+    }
+  } else if (E > -64) {
+    Above0 = D >> -E;
+    Below0 = D << (64 + E);
+  } else if (E > -120) {
+    Below0 = D >> (-E - 64);
+    Extra = D << (128 + E);
+    ExtraShift = -64 - E;
+  }
+
+  // Fall back on APFloat for very small and very large numbers.
+  if (!Above0 && !Below0)
+    return toStringAPFloat(D, E, Precision);
+
+  // Append the digits before the decimal.
+  std::string Str;
+  size_t DigitsOut = 0;
+  if (Above0) {
+    appendNumber(Str, Above0);
+    DigitsOut = Str.size();
+  } else
+    appendDigit(Str, 0);
+  std::reverse(Str.begin(), Str.end());
+
+  // Return early if there's nothing after the decimal.
+  if (!Below0)
+    return Str + ".0";
+
+  // Append the decimal and beyond.
+  Str += '.';
+  uint64_t Error = UINT64_C(1) << (64 - Width);
+
+  // We need to shift Below0 to the right to make space for calculating
+  // digits.  Save the precision we're losing in Extra.
+  Extra = (Below0 & 0xf) << 56 | (Extra >> 8);
+  Below0 >>= 4;
+  size_t SinceDot = 0;
+  size_t AfterDot = Str.size();
+  do {
+    if (ExtraShift) {
+      --ExtraShift;
+      Error *= 5;
+    } else
+      Error *= 10;
+
+    Below0 *= 10;
+    Extra *= 10;
+    Below0 += (Extra >> 60);
+    Extra = Extra & (UINT64_MAX >> 4);
+    appendDigit(Str, Below0 >> 60);
+    Below0 = Below0 & (UINT64_MAX >> 4);
+    if (DigitsOut || Str.back() != '0')
+      ++DigitsOut;
+    ++SinceDot;
+  } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 &&
+           (!Precision || DigitsOut <= Precision || SinceDot < 2));
+
+  // Return early for maximum precision.
+  if (!Precision || DigitsOut <= Precision)
+    return stripTrailingZeros(Str);
+
+  // Find where to truncate.
+  size_t Truncate =
+      std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
+
+  // Check if there's anything to truncate.
+  if (Truncate >= Str.size())
+    return stripTrailingZeros(Str);
+
+  bool Carry = doesRoundUp(Str[Truncate]);
+  if (!Carry)
+    return stripTrailingZeros(Str.substr(0, Truncate));
+
+  // Round with the first truncated digit.
+  for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
+       I != E; ++I) {
+    if (*I == '.')
+      continue;
+    if (*I == '9') {
+      *I = '0';
+      continue;
+    }
+
+    ++*I;
+    Carry = false;
+    break;
+  }
+
+  // Add "1" in front if we still need to carry.
+  return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
+}
+
+raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
+                                      int Width, unsigned Precision) {
+  return OS << toString(D, E, Width, Precision);
+}
+
+void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
+  print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
+                                << "]";
+}
+
+static std::pair<uint64_t, int16_t>
+getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
+  if (ShouldRound)
+    if (!++N)
+      // Rounding caused an overflow.
+      return std::make_pair(UINT64_C(1), Shift + 64);
+  return std::make_pair(N, Shift);
+}
+
+std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
+                                                         uint64_t Divisor) {
+  // Input should be sanitized.
+  assert(Divisor);
+  assert(Dividend);
+
+  // Minimize size of divisor.
+  int16_t Shift = 0;
+  if (int Zeros = countTrailingZeros(Divisor)) {
+    Shift -= Zeros;
+    Divisor >>= Zeros;
+  }
+
+  // Check for powers of two.
+  if (Divisor == 1)
+    return std::make_pair(Dividend, Shift);
+
+  // Maximize size of dividend.
+  if (int Zeros = countLeadingZeros64(Dividend)) {
+    Shift -= Zeros;
+    Dividend <<= Zeros;
+  }
+
+  // Start with the result of a divide.
+  uint64_t Quotient = Dividend / Divisor;
+  Dividend %= Divisor;
+
+  // Continue building the quotient with long division.
+  //
+  // TODO: continue with largers digits.
+  while (!(Quotient >> 63) && Dividend) {
+    // Shift Dividend, and check for overflow.
+    bool IsOverflow = Dividend >> 63;
+    Dividend <<= 1;
+    --Shift;
+
+    // Divide.
+    bool DoesDivide = IsOverflow || Divisor <= Dividend;
+    Quotient = (Quotient << 1) | uint64_t(DoesDivide);
+    Dividend -= DoesDivide ? Divisor : 0;
+  }
+
+  // Round.
+  if (Dividend >= getHalf(Divisor))
+    if (!++Quotient)
+      // Rounding caused an overflow in Quotient.
+      return std::make_pair(UINT64_C(1), Shift + 64);
+
+  return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
+}
+
+std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
+                                                           uint64_t R) {
+  // Separate into two 32-bit digits (U.L).
+  uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
+
+  // Compute cross products.
+  uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
+
+  // Sum into two 64-bit digits.
+  uint64_t Upper = P1, Lower = P4;
+  auto addWithCarry = [&](uint64_t N) {
+    uint64_t NewLower = Lower + (N << 32);
+    Upper += (N >> 32) + (NewLower < Lower);
+    Lower = NewLower;
+  };
+  addWithCarry(P2);
+  addWithCarry(P3);
+
+  // Check whether the upper digit is empty.
+  if (!Upper)
+    return std::make_pair(Lower, 0);
+
+  // Shift as little as possible to maximize precision.
+  unsigned LeadingZeros = countLeadingZeros64(Upper);
+  int16_t Shift = 64 - LeadingZeros;
+  if (LeadingZeros)
+    Upper = Upper << LeadingZeros | Lower >> Shift;
+  bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
+  return getRoundedFloat(Upper, ShouldRound, Shift);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// BlockMass implementation.
+//
+//===----------------------------------------------------------------------===//
+UnsignedFloat<uint64_t> BlockMass::toFloat() const {
+  if (isFull())
+    return UnsignedFloat<uint64_t>(1, 0);
+  return UnsignedFloat<uint64_t>(getMass() + 1, -64);
+}
+
+void BlockMass::dump() const { print(dbgs()); }
+
+static char getHexDigit(int N) {
+  assert(N < 16);
+  if (N < 10)
+    return '0' + N;
+  return 'a' + N - 10;
+}
+raw_ostream &BlockMass::print(raw_ostream &OS) const {
+  for (int Digits = 0; Digits < 16; ++Digits)
+    OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
+  return OS;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// BlockFrequencyInfoImpl implementation.
+//
+//===----------------------------------------------------------------------===//
+namespace {
+
+typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+typedef BlockFrequencyInfoImplBase::Distribution Distribution;
+typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
+typedef BlockFrequencyInfoImplBase::Float Float;
+typedef BlockFrequencyInfoImplBase::LoopData LoopData;
+typedef BlockFrequencyInfoImplBase::Weight Weight;
+typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
+
+/// \brief Dithering mass distributer.
+///
+/// This class splits up a single mass into portions by weight, dithering to
+/// spread out error.  No mass is lost.  The dithering precision depends on the
+/// precision of the product of \a BlockMass and \a BranchProbability.
+///
+/// The distribution algorithm follows.
+///
+///  1. Initialize by saving the sum of the weights in \a RemWeight and the
+///     mass to distribute in \a RemMass.
+///
+///  2. For each portion:
+///
+///      1. Construct a branch probability, P, as the portion's weight divided
+///         by the current value of \a RemWeight.
+///      2. Calculate the portion's mass as \a RemMass times P.
+///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
+///         the current portion's weight and mass.
+struct DitheringDistributer {
+  uint32_t RemWeight;
+  BlockMass RemMass;
+
+  DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
+
+  BlockMass takeMass(uint32_t Weight);
+};
+}
+
+DitheringDistributer::DitheringDistributer(Distribution &Dist,
+                                           const BlockMass &Mass) {
+  Dist.normalize();
+  RemWeight = Dist.Total;
+  RemMass = Mass;
+}
+
+BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
+  assert(Weight && "invalid weight");
+  assert(Weight <= RemWeight);
+  BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
+
+  // Decrement totals (dither).
+  RemWeight -= Weight;
+  RemMass -= Mass;
+  return Mass;
+}
+
+void Distribution::add(const BlockNode &Node, uint64_t Amount,
+                       Weight::DistType Type) {
+  assert(Amount && "invalid weight of 0");
+  uint64_t NewTotal = Total + Amount;
+
+  // Check for overflow.  It should be impossible to overflow twice.
+  bool IsOverflow = NewTotal < Total;
+  assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
+  DidOverflow |= IsOverflow;
+
+  // Update the total.
+  Total = NewTotal;
+
+  // Save the weight.
+  Weight W;
+  W.TargetNode = Node;
+  W.Amount = Amount;
+  W.Type = Type;
+  Weights.push_back(W);
+}
+
+static void combineWeight(Weight &W, const Weight &OtherW) {
+  assert(OtherW.TargetNode.isValid());
+  if (!W.Amount) {
+    W = OtherW;
+    return;
+  }
+  assert(W.Type == OtherW.Type);
+  assert(W.TargetNode == OtherW.TargetNode);
+  assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
+  W.Amount += OtherW.Amount;
+}
+static void combineWeightsBySorting(WeightList &Weights) {
+  // Sort so edges to the same node are adjacent.
+  std::sort(Weights.begin(), Weights.end(),
+            [](const Weight &L,
+               const Weight &R) { return L.TargetNode < R.TargetNode; });
+
+  // Combine adjacent edges.
+  WeightList::iterator O = Weights.begin();
+  for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
+       ++O, (I = L)) {
+    *O = *I;
+
+    // Find the adjacent weights to the same node.
+    for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
+      combineWeight(*O, *L);
+  }
+
+  // Erase extra entries.
+  Weights.erase(O, Weights.end());
+  return;
+}
+static void combineWeightsByHashing(WeightList &Weights) {
+  // Collect weights into a DenseMap.
+  typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
+  HashTable Combined(NextPowerOf2(2 * Weights.size()));
+  for (const Weight &W : Weights)
+    combineWeight(Combined[W.TargetNode.Index], W);
+
+  // Check whether anything changed.
+  if (Weights.size() == Combined.size())
+    return;
+
+  // Fill in the new weights.
+  Weights.clear();
+  Weights.reserve(Combined.size());
+  for (const auto &I : Combined)
+    Weights.push_back(I.second);
+}
+static void combineWeights(WeightList &Weights) {
+  // Use a hash table for many successors to keep this linear.
+  if (Weights.size() > 128) {
+    combineWeightsByHashing(Weights);
+    return;
+  }
+
+  combineWeightsBySorting(Weights);
+}
+static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
+  assert(Shift >= 0);
+  assert(Shift < 64);
+  if (!Shift)
+    return N;
+  return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
+}
+void Distribution::normalize() {
+  // Early exit for termination nodes.
+  if (Weights.empty())
+    return;
+
+  // Only bother if there are multiple successors.
+  if (Weights.size() > 1)
+    combineWeights(Weights);
+
+  // Early exit when combined into a single successor.
+  if (Weights.size() == 1) {
+    Total = 1;
+    Weights.front().Amount = 1;
+    return;
+  }
+
+  // Determine how much to shift right so that the total fits into 32-bits.
+  //
+  // If we shift at all, shift by 1 extra.  Otherwise, the lower limit of 1
+  // for each weight can cause a 32-bit overflow.
+  int Shift = 0;
+  if (DidOverflow)
+    Shift = 33;
+  else if (Total > UINT32_MAX)
+    Shift = 33 - countLeadingZeros(Total);
+
+  // Early exit if nothing needs to be scaled.
+  if (!Shift)
+    return;
+
+  // Recompute the total through accumulation (rather than shifting it) so that
+  // it's accurate after shifting.
+  Total = 0;
+
+  // Sum the weights to each node and shift right if necessary.
+  for (Weight &W : Weights) {
+    // Scale down below UINT32_MAX.  Since Shift is larger than necessary, we
+    // can round here without concern about overflow.
+    assert(W.TargetNode.isValid());
+    W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
+    assert(W.Amount <= UINT32_MAX);
+
+    // Update the total.
+    Total += W.Amount;
+  }
+  assert(Total <= UINT32_MAX);
+}
+
+void BlockFrequencyInfoImplBase::clear() {
+  // Swap with a default-constructed std::vector, since std::vector<>::clear()
+  // does not actually clear heap storage.
+  std::vector<FrequencyData>().swap(Freqs);
+  std::vector<WorkingData>().swap(Working);
+  Loops.clear();
+}
+
+/// \brief Clear all memory not needed downstream.
+///
+/// Releases all memory not used downstream.  In particular, saves Freqs.
+static void cleanup(BlockFrequencyInfoImplBase &BFI) {
+  std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
+  BFI.clear();
+  BFI.Freqs = std::move(SavedFreqs);
+}
+
+bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
+                                           const LoopData *OuterLoop,
+                                           const BlockNode &Pred,
+                                           const BlockNode &Succ,
+                                           uint64_t Weight) {
+  if (!Weight)
+    Weight = 1;
+
+  auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
+    return OuterLoop && OuterLoop->isHeader(Node);
+  };
+
+  BlockNode Resolved = Working[Succ.Index].getResolvedNode();
+
+#ifndef NDEBUG
+  auto debugSuccessor = [&](const char *Type) {
+    dbgs() << "  =>"
+           << " [" << Type << "] weight = " << Weight;
+    if (!isLoopHeader(Resolved))
+      dbgs() << ", succ = " << getBlockName(Succ);
+    if (Resolved != Succ)
+      dbgs() << ", resolved = " << getBlockName(Resolved);
+    dbgs() << "\n";
+  };
+  (void)debugSuccessor;
+#endif
+
+  if (isLoopHeader(Resolved)) {
+    DEBUG(debugSuccessor("backedge"));
+    Dist.addBackedge(OuterLoop->getHeader(), Weight);
+    return true;
+  }
+
+  if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
+    DEBUG(debugSuccessor("  exit  "));
+    Dist.addExit(Resolved, Weight);
+    return true;
+  }
+
+  if (Resolved < Pred) {
+    if (!isLoopHeader(Pred)) {
+      // If OuterLoop is an irreducible loop, we can't actually handle this.
+      assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
+             "unhandled irreducible control flow");
+
+      // Irreducible backedge.  Abort.
+      DEBUG(debugSuccessor("abort!!!"));
+      return false;
+    }
+
+    // If "Pred" is a loop header, then this isn't really a backedge; rather,
+    // OuterLoop must be irreducible.  These false backedges can come only from
+    // secondary loop headers.
+    assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
+           "unhandled irreducible control flow");
+  }
+
+  DEBUG(debugSuccessor(" local  "));
+  Dist.addLocal(Resolved, Weight);
+  return true;
+}
+
+bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
+    const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
+  // Copy the exit map into Dist.
+  for (const auto &I : Loop.Exits)
+    if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
+                   I.second.getMass()))
+      // Irreducible backedge.
+      return false;
+
+  return true;
+}
+
+/// \brief Get the maximum allowed loop scale.
+///
+/// Gives the maximum number of estimated iterations allowed for a loop.  Very
+/// large numbers cause problems downstream (even within 64-bits).
+static Float getMaxLoopScale() { return Float(1, 12); }
+
+/// \brief Compute the loop scale for a loop.
+void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
+  // Compute loop scale.
+  DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
+
+  // LoopScale == 1 / ExitMass
+  // ExitMass == HeadMass - BackedgeMass
+  BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
+
+  // Block scale stores the inverse of the scale.
+  Loop.Scale = ExitMass.toFloat().inverse();
+
+  DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
+               << " - " << Loop.BackedgeMass << ")\n"
+               << " - scale = " << Loop.Scale << "\n");
+
+  if (Loop.Scale > getMaxLoopScale()) {
+    Loop.Scale = getMaxLoopScale();
+    DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
+  }
+}
+
+/// \brief Package up a loop.
+void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
+  DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
+
+  // Clear the subloop exits to prevent quadratic memory usage.
+  for (const BlockNode &M : Loop.Nodes) {
+    if (auto *Loop = Working[M.Index].getPackagedLoop())
+      Loop->Exits.clear();
+    DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
+  }
+  Loop.IsPackaged = true;
+}
+
+void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
+                                                LoopData *OuterLoop,
+                                                Distribution &Dist) {
+  BlockMass Mass = Working[Source.Index].getMass();
+  DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
+
+  // Distribute mass to successors as laid out in Dist.
+  DitheringDistributer D(Dist, Mass);
+
+#ifndef NDEBUG
+  auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
+                         const char *Desc) {
+    dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
+    if (Desc)
+      dbgs() << " [" << Desc << "]";
+    if (T.isValid())
+      dbgs() << " to " << getBlockName(T);
+    dbgs() << "\n";
+  };
+  (void)debugAssign;
+#endif
+
+  for (const Weight &W : Dist.Weights) {
+    // Check for a local edge (non-backedge and non-exit).
+    BlockMass Taken = D.takeMass(W.Amount);
+    if (W.Type == Weight::Local) {
+      Working[W.TargetNode.Index].getMass() += Taken;
+      DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
+      continue;
+    }
+
+    // Backedges and exits only make sense if we're processing a loop.
+    assert(OuterLoop && "backedge or exit outside of loop");
+
+    // Check for a backedge.
+    if (W.Type == Weight::Backedge) {
+      OuterLoop->BackedgeMass += Taken;
+      DEBUG(debugAssign(BlockNode(), Taken, "back"));
+      continue;
+    }
+
+    // This must be an exit.
+    assert(W.Type == Weight::Exit);
+    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+    DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
+  }
+}
+
+static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
+                                     const Float &Min, const Float &Max) {
+  // Scale the Factor to a size that creates integers.  Ideally, integers would
+  // be scaled so that Max == UINT64_MAX so that they can be best
+  // differentiated.  However, the register allocator currently deals poorly
+  // with large numbers.  Instead, push Min up a little from 1 to give some
+  // room to differentiate small, unequal numbers.
+  //
+  // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
+  Float ScalingFactor = Min.inverse();
+  if ((Max / Min).lg() < 60)
+    ScalingFactor <<= 3;
+
+  // Translate the floats to integers.
+  DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
+               << ", factor = " << ScalingFactor << "\n");
+  for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
+    Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
+    BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
+    DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
+                 << BFI.Freqs[Index].Floating << ", scaled = " << Scaled
+                 << ", int = " << BFI.Freqs[Index].Integer << "\n");
+  }
+}
+
+/// \brief Unwrap a loop package.
+///
+/// Visits all the members of a loop, adjusting their BlockData according to
+/// the loop's pseudo-node.
+static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
+  DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
+               << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+               << "\n");
+  Loop.Scale *= Loop.Mass.toFloat();
+  Loop.IsPackaged = false;
+  DEBUG(dbgs() << "  => combined-scale = " << Loop.Scale << "\n");
+
+  // Propagate the head scale through the loop.  Since members are visited in
+  // RPO, the head scale will be updated by the loop scale first, and then the
+  // final head scale will be used for updated the rest of the members.
+  for (const BlockNode &N : Loop.Nodes) {
+    const auto &Working = BFI.Working[N.Index];
+    Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
+                                    : BFI.Freqs[N.Index].Floating;
+    Float New = Loop.Scale * F;
+    DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
+                 << "\n");
+    F = New;
+  }
+}
+
+void BlockFrequencyInfoImplBase::unwrapLoops() {
+  // Set initial frequencies from loop-local masses.
+  for (size_t Index = 0; Index < Working.size(); ++Index)
+    Freqs[Index].Floating = Working[Index].Mass.toFloat();
+
+  for (LoopData &Loop : Loops)
+    unwrapLoop(*this, Loop);
+}
+
+void BlockFrequencyInfoImplBase::finalizeMetrics() {
+  // Unwrap loop packages in reverse post-order, tracking min and max
+  // frequencies.
+  auto Min = Float::getLargest();
+  auto Max = Float::getZero();
+  for (size_t Index = 0; Index < Working.size(); ++Index) {
+    // Update min/max scale.
+    Min = std::min(Min, Freqs[Index].Floating);
+    Max = std::max(Max, Freqs[Index].Floating);
+  }
+
+  // Convert to integers.
+  convertFloatingToInteger(*this, Min, Max);
+
+  // Clean up data structures.
+  cleanup(*this);
+
+  // Print out the final stats.
+  DEBUG(dump());
+}
+
+BlockFrequency
+BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
+  if (!Node.isValid())
+    return 0;
+  return Freqs[Node.Index].Integer;
+}
+Float
+BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
+  if (!Node.isValid())
+    return Float::getZero();
+  return Freqs[Node.Index].Floating;
+}
+
+std::string
+BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
+  return std::string();
+}
+std::string
+BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
+  return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
+}
+
+raw_ostream &
+BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
+                                           const BlockNode &Node) const {
+  return OS << getFloatingBlockFreq(Node);
+}
+
+raw_ostream &
+BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
+                                           const BlockFrequency &Freq) const {
+  Float Block(Freq.getFrequency(), 0);
+  Float Entry(getEntryFreq(), 0);
+
+  return OS << Block / Entry;
+}
+
+void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
+  Start = OuterLoop.getHeader();
+  Nodes.reserve(OuterLoop.Nodes.size());
+  for (auto N : OuterLoop.Nodes)
+    addNode(N);
+  indexNodes();
+}
+void IrreducibleGraph::addNodesInFunction() {
+  Start = 0;
+  for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
+    if (!BFI.Working[Index].isPackaged())
+      addNode(Index);
+  indexNodes();
+}
+void IrreducibleGraph::indexNodes() {
+  for (auto &I : Nodes)
+    Lookup[I.Node.Index] = &I;
+}
+void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
+                               const BFIBase::LoopData *OuterLoop) {
+  if (OuterLoop && OuterLoop->isHeader(Succ))
+    return;
+  auto L = Lookup.find(Succ.Index);
+  if (L == Lookup.end())
+    return;
+  IrrNode &SuccIrr = *L->second;
+  Irr.Edges.push_back(&SuccIrr);
+  SuccIrr.Edges.push_front(&Irr);
+  ++SuccIrr.NumIn;
+}
+
+namespace llvm {
+template <> struct GraphTraits<IrreducibleGraph> {
+  typedef bfi_detail::IrreducibleGraph GraphT;
+
+  typedef const GraphT::IrrNode NodeType;
+  typedef GraphT::IrrNode::iterator ChildIteratorType;
+
+  static const NodeType *getEntryNode(const GraphT &G) {
+    return G.StartIrr;
+  }
+  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
+};
+}
+
+/// \brief Find extra irreducible headers.
+///
+/// Find entry blocks and other blocks with backedges, which exist when \c G
+/// contains irreducible sub-SCCs.
+static void findIrreducibleHeaders(
+    const BlockFrequencyInfoImplBase &BFI,
+    const IrreducibleGraph &G,
+    const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
+    LoopData::NodeList &Headers, LoopData::NodeList &Others) {
+  // Map from nodes in the SCC to whether it's an entry block.
+  SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
+
+  // InSCC also acts the set of nodes in the graph.  Seed it.
+  for (const auto *I : SCC)
+    InSCC[I] = false;
+
+  for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
+    auto &Irr = *I->first;
+    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
+      if (InSCC.count(P))
+        continue;
+
+      // This is an entry block.
+      I->second = true;
+      Headers.push_back(Irr.Node);
+      DEBUG(dbgs() << "  => entry = " << BFI.getBlockName(Irr.Node) << "\n");
+      break;
+    }
+  }
+  assert(Headers.size() >= 2 && "Should be irreducible");
+  if (Headers.size() == InSCC.size()) {
+    // Every block is a header.
+    std::sort(Headers.begin(), Headers.end());
+    return;
+  }
+
+  // Look for extra headers from irreducible sub-SCCs.
+  for (const auto &I : InSCC) {
+    // Entry blocks are already headers.
+    if (I.second)
+      continue;
+
+    auto &Irr = *I.first;
+    for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
+      // Skip forward edges.
+      if (P->Node < Irr.Node)
+        continue;
+
+      // Skip predecessors from entry blocks.  These can have inverted
+      // ordering.
+      if (InSCC.lookup(P))
+        continue;
+
+      // Store the extra header.
+      Headers.push_back(Irr.Node);
+      DEBUG(dbgs() << "  => extra = " << BFI.getBlockName(Irr.Node) << "\n");
+      break;
+    }
+    if (Headers.back() == Irr.Node)
+      // Added this as a header.
+      continue;
+
+    // This is not a header.
+    Others.push_back(Irr.Node);
+    DEBUG(dbgs() << "  => other = " << BFI.getBlockName(Irr.Node) << "\n");
+  }
+  std::sort(Headers.begin(), Headers.end());
+  std::sort(Others.begin(), Others.end());
+}
+
+static void createIrreducibleLoop(
+    BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
+    LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
+    const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
+  // Translate the SCC into RPO.
+  DEBUG(dbgs() << " - found-scc\n");
+
+  LoopData::NodeList Headers;
+  LoopData::NodeList Others;
+  findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
+
+  auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
+                                Headers.end(), Others.begin(), Others.end());
+
+  // Update loop hierarchy.
+  for (const auto &N : Loop->Nodes)
+    if (BFI.Working[N.Index].isLoopHeader())
+      BFI.Working[N.Index].Loop->Parent = &*Loop;
+    else
+      BFI.Working[N.Index].Loop = &*Loop;
+}
+
+iterator_range<std::list<LoopData>::iterator>
+BlockFrequencyInfoImplBase::analyzeIrreducible(
+    const IrreducibleGraph &G, LoopData *OuterLoop,
+    std::list<LoopData>::iterator Insert) {
+  assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
+  auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
+
+  for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
+    if (I->size() < 2)
+      continue;
+
+    // Translate the SCC into RPO.
+    createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
+  }
+
+  if (OuterLoop)
+    return make_range(std::next(Prev), Insert);
+  return make_range(Loops.begin(), Insert);
+}
+
+void
+BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
+  OuterLoop.Exits.clear();
+  OuterLoop.BackedgeMass = BlockMass::getEmpty();
+  auto O = OuterLoop.Nodes.begin() + 1;
+  for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
+    if (!Working[I->Index].isPackaged())
+      *O++ = *I;
+  OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index b901c54..bbd8750 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "branch-prob"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -25,6 +24,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "branch-prob"
+
 INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
                       "Branch Probability Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -322,6 +323,9 @@
       InEdges.push_back(I.getSuccessorIndex());
   }
 
+  if (BackEdges.empty() && ExitingEdges.empty())
+    return false;
+
   if (uint32_t numBackEdges = BackEdges.size()) {
     uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
     if (backWeight < NORMAL_WEIGHT)
@@ -557,7 +561,7 @@
 BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
   uint32_t Sum = 0;
   uint32_t MaxWeight = 0;
-  BasicBlock *MaxSucc = 0;
+  BasicBlock *MaxSucc = nullptr;
 
   for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
     BasicBlock *Succ = *I;
@@ -577,7 +581,7 @@
   if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
     return MaxSucc;
 
-  return 0;
+  return nullptr;
 }
 
 /// Get the raw edge weight for the edge. If can't find it, return
@@ -594,11 +598,9 @@
   return DEFAULT_WEIGHT;
 }
 
-uint32_t
-BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const {
-  size_t index = std::distance(succ_begin(Src), Dst);
-  return getEdgeWeight(Src, index);
+uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
+                                              succ_const_iterator Dst) const {
+  return getEdgeWeight(Src, Dst.getSuccessorIndex());
 }
 
 /// Get the raw edge weight calculated for the block pair. This returns the sum
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 6963760..8ef5302 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -123,7 +123,7 @@
                              const BasicBlock *BB1, const BasicBlock *BB2) {
   const Loop *L1 = getOutermostLoop(LI, BB1);
   const Loop *L2 = getOutermostLoop(LI, BB2);
-  return L1 != NULL && L1 == L2;
+  return L1 != nullptr && L1 == L2;
 }
 
 static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
@@ -133,7 +133,7 @@
   // When the stop block is unreachable, it's dominated from everywhere,
   // regardless of whether there's a path between the two blocks.
   if (DT && !DT->isReachableFromEntry(StopBB))
-    DT = 0;
+    DT = nullptr;
 
   // Limit the number of blocks we visit. The goal is to avoid run-away compile
   // times on large CFGs without hampering sensible code. Arbitrarily chosen.
@@ -156,7 +156,7 @@
       return true;
     }
 
-    if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) {
+    if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) {
       // All blocks in a single loop are reachable from all other blocks. From
       // any of these blocks, we can skip directly to the exits of the loop,
       // ignoring any other blocks inside the loop body.
@@ -200,7 +200,7 @@
 
     // If the block is in a loop then we can reach any instruction in the block
     // from any other instruction in the block by going around a backedge.
-    if (LI && LI->getLoopFor(BB) != 0)
+    if (LI && LI->getLoopFor(BB) != nullptr)
       return true;
 
     // Linear scan, start at 'A', see whether we hit 'B' or the end first.
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 537d6d1..c2c19d6 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -19,6 +19,7 @@
 
 #include "llvm/Analysis/CFGPrinter.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/FileSystem.h"
 using namespace llvm;
 
 namespace {
@@ -33,7 +34,7 @@
       return false;
     }
 
-    void print(raw_ostream &OS, const Module* = 0) const override {}
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
@@ -56,7 +57,7 @@
       return false;
     }
 
-    void print(raw_ostream &OS, const Module* = 0) const override {}
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
@@ -90,7 +91,7 @@
       return false;
     }
 
-    void print(raw_ostream &OS, const Module* = 0) const override {}
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
@@ -123,7 +124,7 @@
       errs() << "\n";
       return false;
     }
-    void print(raw_ostream &OS, const Module* = 0) const override {}
+    void print(raw_ostream &OS, const Module* = nullptr) const override {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
@@ -147,8 +148,8 @@
 
 /// viewCFGOnly - This function is meant for use from the debugger.  It works
 /// just like viewCFG, but it does not include the contents of basic blocks
-/// into the nodes, just the label.  If you are only interested in the CFG t
-/// his can make the graph smaller.
+/// into the nodes, just the label.  If you are only interested in the CFG
+/// this can make the graph smaller.
 ///
 void Function::viewCFGOnly() const {
   ViewGraph(this, "cfg" + getName(), true);
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
new file mode 100644
index 0000000..5d1d8a9
--- /dev/null
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -0,0 +1,167 @@
+//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DebugPM("debug-cgscc-pass-manager", cl::Hidden,
+        cl::desc("Print CGSCC pass management debugging information"));
+
+PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C,
+                                        CGSCCAnalysisManager *AM) {
+  PreservedAnalyses PA = PreservedAnalyses::all();
+
+  if (DebugPM)
+    dbgs() << "Starting CGSCC pass manager run.\n";
+
+  for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+    if (DebugPM)
+      dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n";
+
+    PreservedAnalyses PassPA = Passes[Idx]->run(C, AM);
+    if (AM)
+      AM->invalidate(C, PassPA);
+    PA.intersect(std::move(PassPA));
+  }
+
+  if (DebugPM)
+    dbgs() << "Finished CGSCC pass manager run.\n";
+
+  return PA;
+}
+
+bool CGSCCAnalysisManager::empty() const {
+  assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() &&
+         "The storage and index of analysis results disagree on how many there "
+         "are!");
+  return CGSCCAnalysisResults.empty();
+}
+
+void CGSCCAnalysisManager::clear() {
+  CGSCCAnalysisResults.clear();
+  CGSCCAnalysisResultLists.clear();
+}
+
+CGSCCAnalysisManager::ResultConceptT &
+CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) {
+  CGSCCAnalysisResultMapT::iterator RI;
+  bool Inserted;
+  std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair(
+      std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator()));
+
+  // If we don't have a cached result for this function, look up the pass and
+  // run it to produce a result, which we then add to the cache.
+  if (Inserted) {
+    CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C];
+    ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this));
+    RI->second = std::prev(ResultList.end());
+  }
+
+  return *RI->second->second;
+}
+
+CGSCCAnalysisManager::ResultConceptT *
+CGSCCAnalysisManager::getCachedResultImpl(void *PassID,
+                                          LazyCallGraph::SCC *C) const {
+  CGSCCAnalysisResultMapT::const_iterator RI =
+      CGSCCAnalysisResults.find(std::make_pair(PassID, C));
+  return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second;
+}
+
+void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) {
+  CGSCCAnalysisResultMapT::iterator RI =
+      CGSCCAnalysisResults.find(std::make_pair(PassID, C));
+  if (RI == CGSCCAnalysisResults.end())
+    return;
+
+  CGSCCAnalysisResultLists[C].erase(RI->second);
+}
+
+void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C,
+                                          const PreservedAnalyses &PA) {
+  // Clear all the invalidated results associated specifically with this
+  // function.
+  SmallVector<void *, 8> InvalidatedPassIDs;
+  CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C];
+  for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(),
+                                          E = ResultsList.end();
+       I != E;)
+    if (I->second->invalidate(C, PA)) {
+      InvalidatedPassIDs.push_back(I->first);
+      I = ResultsList.erase(I);
+    } else {
+      ++I;
+    }
+  while (!InvalidatedPassIDs.empty())
+    CGSCCAnalysisResults.erase(
+        std::make_pair(InvalidatedPassIDs.pop_back_val(), C));
+  CGSCCAnalysisResultLists.erase(C);
+}
+
+char CGSCCAnalysisManagerModuleProxy::PassID;
+
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module *M) {
+  assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!");
+  return Result(*CGAM);
+}
+
+CGSCCAnalysisManagerModuleProxy::Result::~Result() {
+  // Clear out the analysis manager if we're being destroyed -- it means we
+  // didn't even see an invalidate call when we got invalidated.
+  CGAM->clear();
+}
+
+bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
+    Module *M, const PreservedAnalyses &PA) {
+  // If this proxy isn't marked as preserved, then we can't even invalidate
+  // individual CGSCC analyses, there may be an invalid set of SCC objects in
+  // the cache making it impossible to incrementally preserve them.
+  // Just clear the entire manager.
+  if (!PA.preserved(ID()))
+    CGAM->clear();
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
+char ModuleAnalysisManagerCGSCCProxy::PassID;
+
+char FunctionAnalysisManagerCGSCCProxy::PassID;
+
+FunctionAnalysisManagerCGSCCProxy::Result
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) {
+  assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!");
+  return Result(*FAM);
+}
+
+FunctionAnalysisManagerCGSCCProxy::Result::~Result() {
+  // Clear out the analysis manager if we're being destroyed -- it means we
+  // didn't even see an invalidate call when we got invalidated.
+  FAM->clear();
+}
+
+bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
+    LazyCallGraph::SCC *C, const PreservedAnalyses &PA) {
+  // If this proxy isn't marked as preserved, then we can't even invalidate
+  // individual function analyses, there may be an invalid set of Function
+  // objects in the cache making it impossible to incrementally preserve them.
+  // Just clear the entire manager.
+  if (!PA.preserved(ID()))
+    FAM->clear();
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
+char CGSCCAnalysisManagerFunctionProxy::PassID;
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index c6d4573..b546789 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -7,9 +7,11 @@
   Analysis.cpp
   BasicAliasAnalysis.cpp
   BlockFrequencyInfo.cpp
+  BlockFrequencyInfoImpl.cpp
   BranchProbabilityInfo.cpp
   CFG.cpp
   CFGPrinter.cpp
+  CGSCCPassManager.cpp
   CaptureTracking.cpp
   CostModel.cpp
   CodeMetrics.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 782acfa..0ac1cb5 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -56,7 +56,7 @@
   // Handle a vector->integer cast.
   if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
     VectorType *VTy = dyn_cast<VectorType>(C->getType());
-    if (VTy == 0)
+    if (!VTy)
       return ConstantExpr::getBitCast(C, DestTy);
 
     unsigned NumSrcElts = VTy->getNumElements();
@@ -73,7 +73,7 @@
     }
 
     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
-    if (CDV == 0)
+    if (!CDV)
       return ConstantExpr::getBitCast(C, DestTy);
 
     // Now that we know that the input value is a vector of integers, just shift
@@ -93,7 +93,7 @@
 
   // The code below only handles casts to vectors currently.
   VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
-  if (DestVTy == 0)
+  if (!DestVTy)
     return ConstantExpr::getBitCast(C, DestTy);
 
   // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
@@ -411,32 +411,32 @@
                                         TD.getTypeAllocSizeInBits(LoadTy),
                                         AS);
     } else
-      return 0;
+      return nullptr;
 
     C = FoldBitCast(C, MapTy, TD);
     if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
       return FoldBitCast(Res, LoadTy, TD);
-    return 0;
+    return nullptr;
   }
 
   unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
   if (BytesLoaded > 32 || BytesLoaded == 0)
-    return 0;
+    return nullptr;
 
   GlobalValue *GVal;
   APInt Offset;
   if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
-    return 0;
+    return nullptr;
 
   GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
       !GV->getInitializer()->getType()->isSized())
-    return 0;
+    return nullptr;
 
   // If we're loading off the beginning of the global, some bytes may be valid,
   // but we don't try to handle this.
   if (Offset.isNegative())
-    return 0;
+    return nullptr;
 
   // If we're not accessing anything in this constant, the result is undefined.
   if (Offset.getZExtValue() >=
@@ -446,7 +446,7 @@
   unsigned char RawBytes[32] = {0};
   if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
                           BytesLoaded, TD))
-    return 0;
+    return nullptr;
 
   APInt ResultVal = APInt(IntType->getBitWidth(), 0);
   if (TD.isLittleEndian()) {
@@ -466,6 +466,52 @@
   return ConstantInt::get(IntType->getContext(), ResultVal);
 }
 
+static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
+                                                const DataLayout *DL) {
+  if (!DL)
+    return nullptr;
+  auto *DestPtrTy = dyn_cast<PointerType>(CE->getType());
+  if (!DestPtrTy)
+    return nullptr;
+  Type *DestTy = DestPtrTy->getElementType();
+
+  Constant *C = ConstantFoldLoadFromConstPtr(CE->getOperand(0), DL);
+  if (!C)
+    return nullptr;
+
+  do {
+    Type *SrcTy = C->getType();
+
+    // If the type sizes are the same and a cast is legal, just directly
+    // cast the constant.
+    if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) {
+      Instruction::CastOps Cast = Instruction::BitCast;
+      // If we are going from a pointer to int or vice versa, we spell the cast
+      // differently.
+      if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+        Cast = Instruction::IntToPtr;
+      else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+        Cast = Instruction::PtrToInt;
+
+      if (CastInst::castIsValid(Cast, C, DestTy))
+        return ConstantExpr::getCast(Cast, C, DestTy);
+    }
+
+    // If this isn't an aggregate type, there is nothing we can do to drill down
+    // and find a bitcastable constant.
+    if (!SrcTy->isAggregateType())
+      return nullptr;
+
+    // We're simulating a load through a pointer that was bitcast to point to
+    // a different type, so we can try to walk down through the initial
+    // elements of an aggregate to see if some part of th e aggregate is
+    // castable to implement the "load" semantic model.
+    C = C->getAggregateElement(0u);
+  } while (C);
+
+  return nullptr;
+}
+
 /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
 /// produce if it is constant and determinable.  If this is not determinable,
 /// return null.
@@ -479,7 +525,7 @@
   // If the loaded value isn't a constant expr, we can't handle it.
   ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
   if (!CE)
-    return 0;
+    return nullptr;
 
   if (CE->getOpcode() == Instruction::GetElementPtr) {
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
@@ -491,6 +537,10 @@
     }
   }
 
+  if (CE->getOpcode() == Instruction::BitCast)
+    if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD))
+      return LoadedC;
+
   // Instead of loading constant c string, use corresponding integer value
   // directly if string length is small enough.
   StringRef Str;
@@ -542,16 +592,16 @@
   // Try hard to fold loads from bitcasted strange and non-type-safe things.
   if (TD)
     return FoldReinterpretLoadFromConstPtr(CE, *TD);
-  return 0;
+  return nullptr;
 }
 
 static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
-  if (LI->isVolatile()) return 0;
+  if (LI->isVolatile()) return nullptr;
 
   if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
     return ConstantFoldLoadFromConstPtr(C, TD);
 
-  return 0;
+  return nullptr;
 }
 
 /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
@@ -571,8 +621,8 @@
     unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
     APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
     APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
-    ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
-    ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL);
+    computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
+    computeKnownBits(Op1, KnownZero1, KnownOne1, DL);
     if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
       // All the bits of Op0 that the 'and' could be masking are already zero.
       return Op0;
@@ -608,7 +658,7 @@
       }
   }
 
-  return 0;
+  return nullptr;
 }
 
 /// CastGEPIndices - If array indices are not pointer-sized integers,
@@ -618,7 +668,7 @@
                                 Type *ResultTy, const DataLayout *TD,
                                 const TargetLibraryInfo *TLI) {
   if (!TD)
-    return 0;
+    return nullptr;
 
   Type *IntPtrTy = TD->getIntPtrType(ResultTy);
 
@@ -641,7 +691,7 @@
   }
 
   if (!Any)
-    return 0;
+    return nullptr;
 
   Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@@ -676,7 +726,7 @@
   Constant *Ptr = Ops[0];
   if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
       !Ptr->getType()->isPointerTy())
-    return 0;
+    return nullptr;
 
   Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
   Type *ResultElementTy = ResultTy->getPointerElementType();
@@ -690,7 +740,7 @@
       // "inttoptr (sub (ptrtoint Ptr), V)"
       if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
         ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
-        assert((CE == 0 || CE->getType() == IntPtrTy) &&
+        assert((!CE || CE->getType() == IntPtrTy) &&
                "CastGEPIndices didn't canonicalize index types!");
         if (CE && CE->getOpcode() == Instruction::Sub &&
             CE->getOperand(0)->isNullValue()) {
@@ -702,7 +752,7 @@
           return Res;
         }
       }
-      return 0;
+      return nullptr;
     }
 
   unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
@@ -765,7 +815,7 @@
 
         // Only handle pointers to sized types, not pointers to functions.
         if (!ATy->getElementType()->isSized())
-          return 0;
+          return nullptr;
       }
 
       // Determine which element of the array the offset points into.
@@ -810,7 +860,7 @@
   // type, then the offset is pointing into the middle of an indivisible
   // member, so we can't simplify it.
   if (Offset != 0)
-    return 0;
+    return nullptr;
 
   // Create a GEP.
   Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
@@ -841,7 +891,7 @@
                                         const TargetLibraryInfo *TLI) {
   // Handle PHI nodes quickly here...
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
-    Constant *CommonValue = 0;
+    Constant *CommonValue = nullptr;
 
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
       Value *Incoming = PN->getIncomingValue(i);
@@ -854,14 +904,14 @@
       // If the incoming value is not a constant, then give up.
       Constant *C = dyn_cast<Constant>(Incoming);
       if (!C)
-        return 0;
+        return nullptr;
       // Fold the PHI's operands.
       if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
         C = ConstantFoldConstantExpression(NewC, TD, TLI);
       // If the incoming value is a different constant to
       // the one we saw previously, then give up.
       if (CommonValue && C != CommonValue)
-        return 0;
+        return nullptr;
       CommonValue = C;
     }
 
@@ -876,7 +926,7 @@
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
     Constant *Op = dyn_cast<Constant>(*i);
     if (!Op)
-      return 0;  // All operands not constant!
+      return nullptr;  // All operands not constant!
 
     // Fold the Instruction's operands.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
@@ -966,14 +1016,14 @@
   }
 
   switch (Opcode) {
-  default: return 0;
+  default: return nullptr;
   case Instruction::ICmp:
   case Instruction::FCmp: llvm_unreachable("Invalid for compares");
   case Instruction::Call:
     if (Function *F = dyn_cast<Function>(Ops.back()))
       if (canConstantFoldCallTo(F))
         return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
-    return 0;
+    return nullptr;
   case Instruction::PtrToInt:
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
     // the width of a pointer, so it can't be done in ConstantExpr::getCast.
@@ -1142,14 +1192,14 @@
 Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
                                                        ConstantExpr *CE) {
   if (!CE->getOperand(1)->isNullValue())
-    return 0;  // Do not allow stepping over the value!
+    return nullptr;  // Do not allow stepping over the value!
 
   // Loop over all of the operands, tracking down which value we are
   // addressing.
   for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
     C = C->getAggregateElement(CE->getOperand(i));
-    if (C == 0)
-      return 0;
+    if (!C)
+      return nullptr;
   }
   return C;
 }
@@ -1164,8 +1214,8 @@
   // addressing.
   for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
     C = C->getAggregateElement(Indices[i]);
-    if (C == 0)
-      return 0;
+    if (!C)
+      return nullptr;
   }
   return C;
 }
@@ -1270,7 +1320,7 @@
   V = NativeFP(V);
   if (sys::llvm_fenv_testexcept()) {
     sys::llvm_fenv_clearexcept();
-    return 0;
+    return nullptr;
   }
 
   return GetConstantFoldFPValue(V, Ty);
@@ -1282,7 +1332,7 @@
   V = NativeFP(V, W);
   if (sys::llvm_fenv_testexcept()) {
     sys::llvm_fenv_clearexcept();
-    return 0;
+    return nullptr;
   }
 
   return GetConstantFoldFPValue(V, Ty);
@@ -1311,7 +1361,7 @@
                                                   /*isSigned=*/true, mode,
                                                   &isExact);
   if (status != APFloat::opOK && status != APFloat::opInexact)
-    return 0;
+    return nullptr;
   return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
 }
 
@@ -1345,7 +1395,7 @@
       }
 
       if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
-        return 0;
+        return nullptr;
 
       if (IntrinsicID == Intrinsic::round) {
         APFloat V = Op->getValueAPF();
@@ -1357,7 +1407,7 @@
       /// likely to be aborted with an exception anyway, and some host libms
       /// have known errors raising exceptions.
       if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
-        return 0;
+        return nullptr;
 
       /// Currently APFloat versions of these functions do not exist, so we use
       /// the host native double versions.  Float versions are not called
@@ -1396,7 +1446,7 @@
       }
 
       if (!TLI)
-        return 0;
+        return nullptr;
 
       switch (Name[0]) {
       case 'a':
@@ -1467,7 +1517,7 @@
       default:
         break;
       }
-      return 0;
+      return nullptr;
     }
 
     if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
@@ -1491,7 +1541,7 @@
         return ConstantFP::get(Ty->getContext(), Val);
       }
       default:
-        return 0;
+        return nullptr;
       }
     }
 
@@ -1523,21 +1573,21 @@
     if (isa<UndefValue>(Operands[0])) {
       if (IntrinsicID == Intrinsic::bswap)
         return Operands[0];
-      return 0;
+      return nullptr;
     }
 
-    return 0;
+    return nullptr;
   }
 
   if (Operands.size() == 2) {
     if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
       if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
-        return 0;
+        return nullptr;
       double Op1V = getValueAsDouble(Op1);
 
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
         if (Op2->getType() != Op1->getType())
-          return 0;
+          return nullptr;
 
         double Op2V = getValueAsDouble(Op2);
         if (IntrinsicID == Intrinsic::pow) {
@@ -1550,7 +1600,7 @@
           return ConstantFP::get(Ty->getContext(), V1);
         }
         if (!TLI)
-          return 0;
+          return nullptr;
         if (Name == "pow" && TLI->has(LibFunc::pow))
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
         if (Name == "fmod" && TLI->has(LibFunc::fmod))
@@ -1571,7 +1621,7 @@
                                  APFloat((double)std::pow((double)Op1V,
                                                    (int)Op2C->getZExtValue())));
       }
-      return 0;
+      return nullptr;
     }
 
     if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
@@ -1624,13 +1674,13 @@
         }
       }
 
-      return 0;
+      return nullptr;
     }
-    return 0;
+    return nullptr;
   }
 
   if (Operands.size() != 3)
-    return 0;
+    return nullptr;
 
   if (const ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
     if (const ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -1646,14 +1696,14 @@
           if (s != APFloat::opInvalidOp)
             return ConstantFP::get(Ty->getContext(), V);
 
-          return 0;
+          return nullptr;
         }
         }
       }
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
@@ -1690,7 +1740,7 @@
 llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
                        const TargetLibraryInfo *TLI) {
   if (!F->hasName())
-    return 0;
+    return nullptr;
   StringRef Name = F->getName();
 
   Type *Ty = F->getReturnType();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index b49211d..780b1aa 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -17,8 +17,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define CM_NAME "cost-model"
-#define DEBUG_TYPE CM_NAME
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -32,6 +30,9 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define CM_NAME "cost-model"
+#define DEBUG_TYPE CM_NAME
+
 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
                                      cl::Hidden,
                                      cl::desc("Recognize reduction patterns."));
@@ -41,7 +42,7 @@
 
   public:
     static char ID; // Class identification, replacement for typeinfo
-    CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) {
+    CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
       initializeCostModelAnalysisPass(
         *PassRegistry::getPassRegistry());
     }
@@ -101,24 +102,13 @@
   // Check for a splat of a constant or for a non uniform vector of constants.
   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
     OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
-    if (cast<Constant>(V)->getSplatValue() != NULL)
+    if (cast<Constant>(V)->getSplatValue() != nullptr)
       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
   }
 
   return OpInfo;
 }
 
-static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
-  if (M1.size() != M2.size())
-    return false;
-
-  for (unsigned i = 0, e = M1.size(); i != e; ++i)
-    if (M1[i] != M2[i])
-      return false;
-
-  return true;
-}
-
 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
                                      unsigned Level) {
   // We don't need a shuffle if we just want to have element 0 in position 0 of
@@ -136,7 +126,7 @@
     Mask[i] = val;
 
   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
-  if (!matchMask(Mask, ActualMask))
+  if (Mask != ActualMask)
     return false;
 
   return true;
@@ -150,7 +140,7 @@
   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
-  if (BinOp == 0)
+  if (BinOp == nullptr)
     return false;
 
   assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
@@ -171,9 +161,9 @@
     return false;
 
   // Shuffle inputs must match.
-  Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
-  Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
-  Value *NextLevelOp = 0;
+  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
+  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
+  Value *NextLevelOp = nullptr;
   if (NextLevelOpR && NextLevelOpL) {
     // If we have two shuffles their operands must match.
     if (NextLevelOpL != NextLevelOpR)
@@ -198,7 +188,7 @@
 
   // Check that the next levels binary operation exists and matches with the
   // current one.
-  BinaryOperator *NextLevelBinOp = 0;
+  BinaryOperator *NextLevelBinOp = nullptr;
   if (Level + 1 != NumLevels) {
     if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
       return false;
@@ -277,7 +267,7 @@
 
   Value *L = B->getOperand(0);
   Value *R = B->getOperand(1);
-  ShuffleVectorInst *S = 0;
+  ShuffleVectorInst *S = nullptr;
 
   if ((S = dyn_cast<ShuffleVectorInst>(L)))
     return std::make_pair(R, S);
@@ -337,7 +327,7 @@
     std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
 
     // Check the current reduction operation and the shuffle use the same value.
-    if (Shuffle == 0)
+    if (Shuffle == nullptr)
       return false;
     if (Shuffle->getOperand(0) != NextRdxOp)
       return false;
@@ -349,7 +339,7 @@
     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
 
     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
-    if (!matchMask(ShuffleMask, Mask))
+    if (ShuffleMask != Mask)
       return false;
 
     RdxOp = NextRdxOp;
@@ -478,7 +468,7 @@
 
     if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
       return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
-                                 0);
+                                 nullptr);
     return -1;
   }
   case Instruction::Call:
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index fd4a2f0..9334ceb 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -14,8 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DL_NAME "delinearize"
-#define DEBUG_TYPE DL_NAME
 #include "llvm/IR/Constants.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
@@ -34,6 +32,9 @@
 
 using namespace llvm;
 
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+
 namespace {
 
 class Delinearization : public FunctionPass {
@@ -51,7 +52,7 @@
   }
   bool runOnFunction(Function &F) override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
-  void print(raw_ostream &O, const Module *M = 0) const override;
+  void print(raw_ostream &O, const Module *M = nullptr) const override;
 };
 
 } // end anonymous namespace
@@ -76,7 +77,7 @@
     return Store->getPointerOperand();
   else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
     return Gep->getPointerOperand();
-  return NULL;
+  return nullptr;
 }
 
 void Delinearization::print(raw_ostream &O, const Module *) const {
@@ -92,25 +93,38 @@
     const BasicBlock *BB = Inst->getParent();
     // Delinearize the memory access as analyzed in all the surrounding loops.
     // Do not analyze memory accesses outside loops.
-    for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) {
+    for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
       const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+
+      const SCEVUnknown *BasePointer =
+          dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
+      // Do not delinearize if we cannot find the base pointer.
+      if (!BasePointer)
+        break;
+      AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
 
       // Do not try to delinearize memory accesses that are not AddRecs.
       if (!AR)
         break;
 
+
+      O << "\n";
+      O << "Inst:" << *Inst << "\n";
+      O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
       O << "AddRec: " << *AR << "\n";
 
       SmallVector<const SCEV *, 3> Subscripts, Sizes;
-      const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
-      int Size = Subscripts.size();
-      if (Res == AR || Size == 0) {
+      AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst));
+      if (Subscripts.size() == 0 || Sizes.size() == 0 ||
+          Subscripts.size() != Sizes.size()) {
         O << "failed to delinearize\n";
         continue;
       }
-      O << "Base offset: " << *Res << "\n";
+
+      O << "Base offset: " << *BasePointer << "\n";
       O << "ArrayDecl[UnknownSize]";
+      int Size = Subscripts.size();
       for (int i = 0; i < Size - 1; i++)
         O << "[" << *Sizes[i] << "]";
       O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index ff98611..d0784f1 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -51,8 +51,6 @@
 //                                                                            //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "da"
-
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -69,6 +67,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "da"
+
 //===----------------------------------------------------------------------===//
 // statistics
 
@@ -234,7 +234,7 @@
   Levels(CommonLevels),
   LoopIndependent(PossiblyLoopIndependent) {
   Consistent = true;
-  DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
+  DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
 }
 
 // The rest are simple getters that hide the implementation.
@@ -658,7 +658,7 @@
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->getPointerOperand();
   llvm_unreachable("Value is not load or store instruction");
-  return 0;
+  return nullptr;
 }
 
 
@@ -932,7 +932,7 @@
     const SCEV *UB = SE->getBackedgeTakenCount(L);
     return SE->getNoopOrZeroExtend(UB, T);
   }
-  return NULL;
+  return nullptr;
 }
 
 
@@ -943,7 +943,7 @@
                                                                   ) const {
   if (const SCEV *UB = collectUpperBound(L, T))
     return dyn_cast<SCEVConstant>(UB);
-  return NULL;
+  return nullptr;
 }
 
 
@@ -2194,7 +2194,7 @@
     if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
       return Constant;
   }
-  return NULL;
+  return nullptr;
 }
 
 
@@ -2646,8 +2646,8 @@
                                        CoefficientInfo *B,
                                        BoundInfo *Bound,
                                        unsigned K) const {
-  Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
-  Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
+  Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
   if (Bound[K].Iterations) {
     Bound[K].Lower[Dependence::DVEntry::ALL] =
       SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
@@ -2687,8 +2687,8 @@
                                       CoefficientInfo *B,
                                       BoundInfo *Bound,
                                       unsigned K) const {
-  Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
-  Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
+  Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
   if (Bound[K].Iterations) {
     const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
     const SCEV *NegativePart = getNegativePart(Delta);
@@ -2729,8 +2729,8 @@
                                       CoefficientInfo *B,
                                       BoundInfo *Bound,
                                       unsigned K) const {
-  Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
-  Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
+  Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
   if (Bound[K].Iterations) {
     const SCEV *Iter_1 =
       SE->getMinusSCEV(Bound[K].Iterations,
@@ -2776,8 +2776,8 @@
                                       CoefficientInfo *B,
                                       BoundInfo *Bound,
                                       unsigned K) const {
-  Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
-  Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
+  Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
+  Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
   if (Bound[K].Iterations) {
     const SCEV *Iter_1 =
       SE->getMinusSCEV(Bound[K].Iterations,
@@ -2829,7 +2829,7 @@
     CI[K].Coeff = Zero;
     CI[K].PosPart = Zero;
     CI[K].NegPart = Zero;
-    CI[K].Iterations = NULL;
+    CI[K].Iterations = nullptr;
   }
   while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
     const Loop *L = AddRec->getLoop();
@@ -2872,7 +2872,7 @@
     if (Bound[K].Lower[Bound[K].Direction])
       Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
     else
-      Sum = NULL;
+      Sum = nullptr;
   }
   return Sum;
 }
@@ -2888,7 +2888,7 @@
     if (Bound[K].Upper[Bound[K].Direction])
       Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
     else
-      Sum = NULL;
+      Sum = nullptr;
   }
   return Sum;
 }
@@ -3148,12 +3148,12 @@
   }
   else if (CurConstraint.isLine()) {
     Level.Scalar = false;
-    Level.Distance = NULL;
+    Level.Distance = nullptr;
     // direction should be accurate
   }
   else if (CurConstraint.isPoint()) {
     Level.Scalar = false;
-    Level.Distance = NULL;
+    Level.Distance = nullptr;
     unsigned NewDirection = Dependence::DVEntry::NONE;
     if (!isKnownPredicate(CmpInst::ICMP_NE,
                           CurConstraint.getY(),
@@ -3180,59 +3180,55 @@
 /// source and destination array references are recurrences on a nested loop,
 /// this function flattens the nested recurrences into separate recurrences
 /// for each loop level.
-bool
-DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
-                                   SmallVectorImpl<Subscript> &Pair) const {
+bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
+                                        const SCEV *DstSCEV,
+                                        SmallVectorImpl<Subscript> &Pair,
+                                        const SCEV *ElementSize) const {
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV));
+
+  if (!SrcBase || !DstBase || SrcBase != DstBase)
+    return false;
+
+  SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase);
+  DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase);
+
   const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
   const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
   if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
     return false;
 
-  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
-  const SCEV *RemainderS = SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
-  const SCEV *RemainderD = DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+  // First step: collect parametric terms in both array references.
+  SmallVector<const SCEV *, 4> Terms;
+  SrcAR->collectParametricTerms(*SE, Terms);
+  DstAR->collectParametricTerms(*SE, Terms);
+
+  // Second step: find subscript sizes.
+  SmallVector<const SCEV *, 4> Sizes;
+  SE->findArrayDimensions(Terms, Sizes, ElementSize);
+
+  // Third step: compute the access functions for each subscript.
+  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+  SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes);
+  DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes);
+
+  // Fail when there is only a subscript: that's a linearized access function.
+  if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
+      SrcSubscripts.size() != DstSubscripts.size())
+    return false;
 
   int size = SrcSubscripts.size();
-  // Fail when there is only a subscript: that's a linearized access function.
-  if (size < 2)
-    return false;
 
-  int dstSize = DstSubscripts.size();
-  // Fail when the number of subscripts in Src and Dst differ.
-  if (size != dstSize)
-    return false;
-
-  // Fail when the size of any of the subscripts in Src and Dst differs: the
-  // dependence analysis assumes that elements in the same array have same size.
-  // SCEV delinearization does not have a context based on which it would decide
-  // globally the size of subscripts that would best fit all the array accesses.
-  for (int i = 0; i < size; ++i)
-    if (SrcSizes[i] != DstSizes[i])
-      return false;
-
-  // When the difference in remainders is different than a constant it might be
-  // that the base address of the arrays is not the same.
-  const SCEV *DiffRemainders = SE->getMinusSCEV(RemainderS, RemainderD);
-  if (!isa<SCEVConstant>(DiffRemainders))
-    return false;
-
-  // Normalize the last dimension: integrate the size of the "scalar dimension"
-  // and the remainder of the delinearization.
-  DstSubscripts[size-1] = SE->getMulExpr(DstSubscripts[size-1],
-                                         DstSizes[size-1]);
-  SrcSubscripts[size-1] = SE->getMulExpr(SrcSubscripts[size-1],
-                                         SrcSizes[size-1]);
-  SrcSubscripts[size-1] = SE->getAddExpr(SrcSubscripts[size-1], RemainderS);
-  DstSubscripts[size-1] = SE->getAddExpr(DstSubscripts[size-1], RemainderD);
-
-#ifndef NDEBUG
-  DEBUG(errs() << "\nSrcSubscripts: ");
-  for (int i = 0; i < size; i++)
-    DEBUG(errs() << *SrcSubscripts[i]);
-  DEBUG(errs() << "\nDstSubscripts: ");
-  for (int i = 0; i < size; i++)
-    DEBUG(errs() << *DstSubscripts[i]);
-#endif
+  DEBUG({
+      dbgs() << "\nSrcSubscripts: ";
+    for (int i = 0; i < size; i++)
+      dbgs() << *SrcSubscripts[i];
+    dbgs() << "\nDstSubscripts: ";
+    for (int i = 0; i < size; i++)
+      dbgs() << *DstSubscripts[i];
+    });
 
   // The delinearization transforms a single-subscript MIV dependence test into
   // a multi-subscript SIV dependence test that is easier to compute. So we
@@ -3290,7 +3286,7 @@
   if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
       (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
     // if both instructions don't reference memory, there's no dependence
-    return NULL;
+    return nullptr;
 
   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
@@ -3310,7 +3306,7 @@
   case AliasAnalysis::NoAlias:
     // If the objects noalias, they are distinct, accesses are independent.
     DEBUG(dbgs() << "no alias\n");
-    return NULL;
+    return nullptr;
   case AliasAnalysis::MustAlias:
     break; // The underlying objects alias; test accesses for dependence.
   }
@@ -3363,7 +3359,7 @@
   }
 
   if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
-      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
     DEBUG(dbgs() << "    delinerized GEP\n");
     Pairs = Pair.size();
   }
@@ -3505,26 +3501,26 @@
     case Subscript::ZIV:
       DEBUG(dbgs() << ", ZIV\n");
       if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
-        return NULL;
+        return nullptr;
       break;
     case Subscript::SIV: {
       DEBUG(dbgs() << ", SIV\n");
       unsigned Level;
-      const SCEV *SplitIter = NULL;
+      const SCEV *SplitIter = nullptr;
       if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
                   Result, NewConstraint, SplitIter))
-        return NULL;
+        return nullptr;
       break;
     }
     case Subscript::RDIV:
       DEBUG(dbgs() << ", RDIV\n");
       if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
-        return NULL;
+        return nullptr;
       break;
     case Subscript::MIV:
       DEBUG(dbgs() << ", MIV\n");
       if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
-        return NULL;
+        return nullptr;
       break;
     default:
       llvm_unreachable("subscript has unexpected classification");
@@ -3558,16 +3554,16 @@
           DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
           // SJ is an SIV subscript that's part of the current coupled group
           unsigned Level;
-          const SCEV *SplitIter = NULL;
+          const SCEV *SplitIter = nullptr;
           DEBUG(dbgs() << "SIV\n");
           if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
                       Result, NewConstraint, SplitIter))
-            return NULL;
+            return nullptr;
           ConstrainedLevels.set(Level);
           if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
             if (Constraints[Level].isEmpty()) {
               ++DeltaIndependence;
-              return NULL;
+              return nullptr;
             }
             Changed = true;
           }
@@ -3593,7 +3589,7 @@
               case Subscript::ZIV:
                 DEBUG(dbgs() << "ZIV\n");
                 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
-                  return NULL;
+                  return nullptr;
                 Mivs.reset(SJ);
                 break;
               case Subscript::SIV:
@@ -3616,7 +3612,7 @@
         if (Pair[SJ].Classification == Subscript::RDIV) {
           DEBUG(dbgs() << "RDIV test\n");
           if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
-            return NULL;
+            return nullptr;
           // I don't yet understand how to propagate RDIV results
           Mivs.reset(SJ);
         }
@@ -3629,7 +3625,7 @@
         if (Pair[SJ].Classification == Subscript::MIV) {
           DEBUG(dbgs() << "MIV test\n");
           if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
-            return NULL;
+            return nullptr;
         }
         else
           llvm_unreachable("expected only MIV subscripts at this point");
@@ -3641,7 +3637,7 @@
            SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
         updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
         if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
-          return NULL;
+          return nullptr;
       }
     }
   }
@@ -3676,11 +3672,11 @@
       }
     }
     if (AllEqual)
-      return NULL;
+      return nullptr;
   }
 
   FullDependence *Final = new FullDependence(Result);
-  Result.DV = NULL;
+  Result.DV = nullptr;
   return Final;
 }
 
@@ -3787,7 +3783,7 @@
   }
 
   if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
-      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
     DEBUG(dbgs() << "    delinerized GEP\n");
     Pairs = Pair.size();
   }
@@ -3853,11 +3849,11 @@
     switch (Pair[SI].Classification) {
     case Subscript::SIV: {
       unsigned Level;
-      const SCEV *SplitIter = NULL;
+      const SCEV *SplitIter = nullptr;
       (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
                      Result, NewConstraint, SplitIter);
       if (Level == SplitLevel) {
-        assert(SplitIter != NULL);
+        assert(SplitIter != nullptr);
         return SplitIter;
       }
       break;
@@ -3892,7 +3888,7 @@
         for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
           // SJ is an SIV subscript that's part of the current coupled group
           unsigned Level;
-          const SCEV *SplitIter = NULL;
+          const SCEV *SplitIter = nullptr;
           (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
                          Result, NewConstraint, SplitIter);
           if (Level == SplitLevel && SplitIter)
@@ -3933,5 +3929,5 @@
     }
   }
   llvm_unreachable("somehow reached end of routine");
-  return NULL;
+  return nullptr;
 }
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index f0787f1..74594f8 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -40,12 +40,12 @@
 DominanceFrontier::calculate(const DominatorTree &DT,
                              const DomTreeNode *Node) {
   BasicBlock *BB = Node->getBlock();
-  DomSetType *Result = NULL;
+  DomSetType *Result = nullptr;
 
   std::vector<DFCalculateWorkObject> workList;
   SmallPtrSet<BasicBlock *, 32> visited;
 
-  workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+  workList.push_back(DFCalculateWorkObject(BB, nullptr, Node, nullptr));
   do {
     DFCalculateWorkObject *currentW = &workList.back();
     assert (currentW && "Missing work object.");
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index f43675b..caec253 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -21,14 +21,14 @@
 //
 
 CallGraph::CallGraph(Module &M)
-    : M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)),
-      CallsExternalNode(new CallGraphNode(0)) {
+    : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
+      CallsExternalNode(new CallGraphNode(nullptr)) {
   // Add every function to the call graph.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     addToCallGraph(I);
 
   // If we didn't find a main function, use the external call graph node
-  if (Root == 0)
+  if (!Root)
     Root = ExternalCallingNode;
 }
 
@@ -210,7 +210,7 @@
   for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
     assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
     CallRecord &CR = *I;
-    if (CR.second == Callee && CR.first == 0) {
+    if (CR.second == Callee && CR.first == nullptr) {
       Callee->DropRef();
       *I = CalledFunctions.back();
       CalledFunctions.pop_back();
@@ -267,7 +267,7 @@
 
 char CallGraphWrapperPass::ID = 0;
 
-void CallGraphWrapperPass::releaseMemory() { G.reset(0); }
+void CallGraphWrapperPass::releaseMemory() { G.reset(nullptr); }
 
 void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
   if (!G) {
@@ -280,7 +280,7 @@
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void CallGraphWrapperPass::dump() const { print(dbgs(), 0); }
+void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
 #endif
 
 // Enuse that users of CallGraph.h also link with this file
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index aafc085..bfab744 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "cgscc-passmgr"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -23,12 +22,15 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "cgscc-passmgr"
+
 static cl::opt<unsigned> 
 MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
 
@@ -112,7 +114,7 @@
   bool Changed = false;
   PMDataManager *PM = P->getAsPMDataManager();
 
-  if (PM == 0) {
+  if (!PM) {
     CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
     if (!CallGraphUpToDate) {
       DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
@@ -144,8 +146,11 @@
        I != E; ++I) {
     if (Function *F = (*I)->getFunction()) {
       dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
-      TimeRegion PassTimer(getPassTimer(FPP));
-      Changed |= FPP->runOnFunction(*F);
+      {
+        TimeRegion PassTimer(getPassTimer(FPP));
+        Changed |= FPP->runOnFunction(*F);
+      }
+      F->getContext().yield();
     }
   }
   
@@ -190,7 +195,7 @@
        SCCIdx != E; ++SCCIdx, ++FunctionNo) {
     CallGraphNode *CGN = *SCCIdx;
     Function *F = CGN->getFunction();
-    if (F == 0 || F->isDeclaration()) continue;
+    if (!F || F->isDeclaration()) continue;
     
     // Walk the function body looking for call sites.  Sync up the call sites in
     // CGN with those actually in the function.
@@ -203,7 +208,7 @@
     for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
       // If this call site is null, then the function pass deleted the call
       // entirely and the WeakVH nulled it out.  
-      if (I->first == 0 ||
+      if (!I->first ||
           // If we've already seen this call site, then the FunctionPass RAUW'd
           // one call with another, which resulted in two "uses" in the edge
           // list of the same call.
@@ -217,7 +222,7 @@
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
         // If this was an indirect call site, count it.
-        if (I->second->getFunction() == 0)
+        if (!I->second->getFunction())
           ++NumIndirectRemoved;
         else 
           ++NumDirectRemoved;
@@ -273,7 +278,7 @@
           // site could be turned direct), don't reject it in checking mode, and
           // don't tweak it to be more precise.
           if (CheckingMode && CS.getCalledFunction() &&
-              ExistingNode->getFunction() == 0)
+              ExistingNode->getFunction() == nullptr)
             continue;
           
           assert(!CheckingMode &&
@@ -286,7 +291,7 @@
             CalleeNode = CG.getOrInsertFunction(Callee);
             // Keep track of whether we turned an indirect call into a direct
             // one.
-            if (ExistingNode->getFunction() == 0) {
+            if (!ExistingNode->getFunction()) {
               DevirtualizedCall = true;
               DEBUG(dbgs() << "  CGSCCPASSMGR: Devirtualized call to '"
                            << Callee->getName() << "'\n");
@@ -434,8 +439,8 @@
   while (!CGI.isAtEnd()) {
     // Copy the current SCC and increment past it so that the pass can hack
     // on the SCC if it wants to without invalidating our iterator.
-    std::vector<CallGraphNode*> &NodeVec = *CGI;
-    CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+    const std::vector<CallGraphNode *> &NodeVec = *CGI;
+    CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
     ++CGI;
     
     // At the top level, we run all the passes in this pass manager on the
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f4097e4..607c068 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "globalsmodref-aa"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
 #include <set>
 using namespace llvm;
 
+#define DEBUG_TYPE "globalsmodref-aa"
+
 STATISTIC(NumNonAddrTakenGlobalVars,
           "Number of global vars without address taken");
 STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
@@ -177,14 +178,14 @@
         FunctionInfo.find(F);
       if (I != FunctionInfo.end())
         return &I->second;
-      return 0;
+      return nullptr;
     }
 
     void AnalyzeGlobals(Module &M);
     void AnalyzeCallGraph(CallGraph &CG, Module &M);
     bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
                               std::vector<Function*> &Writers,
-                              GlobalValue *OkayStoreDest = 0);
+                              GlobalValue *OkayStoreDest = nullptr);
     bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
   };
 }
@@ -358,7 +359,7 @@
   // We do a bottom-up SCC traversal of the call graph.  In other words, we
   // visit all callees before callers (leaf-first).
   for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
-    std::vector<CallGraphNode *> &SCC = *I;
+    const std::vector<CallGraphNode *> &SCC = *I;
     assert(!SCC.empty() && "SCC with no functions?");
 
     if (!SCC[0]->getFunction()) {
@@ -410,10 +411,8 @@
             FunctionEffect |= CalleeFR->FunctionEffect;
 
             // Incorporate callee's effects on globals into our info.
-            for (std::map<const GlobalValue*, unsigned>::iterator GI =
-                   CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
-                 GI != E; ++GI)
-              FR.GlobalInfo[GI->first] |= GI->second;
+            for (const auto &G : CalleeFR->GlobalInfo)
+              FR.GlobalInfo[G.first] |= G.second;
             FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
           } else {
             // Can't say anything about it.  However, if it is inside our SCC,
@@ -492,8 +491,8 @@
   if (GV1 || GV2) {
     // If the global's address is taken, pretend we don't know it's a pointer to
     // the global.
-    if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
-    if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+    if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr;
+    if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr;
 
     // If the two pointers are derived from two different non-addr-taken
     // globals, or if one is and the other isn't, we know these can't alias.
@@ -507,7 +506,7 @@
   // These pointers may be based on the memory owned by an indirect global.  If
   // so, we may be able to handle this.  First check to see if the base pointer
   // is a direct load from an indirect global.
-  GV1 = GV2 = 0;
+  GV1 = GV2 = nullptr;
   if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
       if (IndirectGlobals.count(GV))
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 8dafc1c..66f3f8e 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "inline-cost"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -34,6 +33,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "inline-cost"
+
 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
 
 namespace {
@@ -97,9 +98,6 @@
   void disableSROA(Value *V);
   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
                           int InstructionCost);
-  bool handleSROACandidate(bool IsSROAValid,
-                           DenseMap<Value *, int>::iterator CostIt,
-                           int InstructionCost);
   bool isGEPOffsetConstant(GetElementPtrInst &GEP);
   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
   bool simplifyCallSite(Function *F, CallSite CS);
@@ -225,21 +223,6 @@
   SROACostSavings += InstructionCost;
 }
 
-/// \brief Helper for the common pattern of handling a SROA candidate.
-/// Either accumulates the cost savings if the SROA remains valid, or disables
-/// SROA for the candidate.
-bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
-                                       DenseMap<Value *, int>::iterator CostIt,
-                                       int InstructionCost) {
-  if (IsSROAValid) {
-    accumulateSROACost(CostIt, InstructionCost);
-    return true;
-  }
-
-  disableSROA(CostIt);
-  return false;
-}
-
 /// \brief Check whether a GEP's indices are all constant.
 ///
 /// Respects any simplified values known during the analysis of this callsite.
@@ -287,8 +270,17 @@
 }
 
 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
-  // FIXME: Check whether inlining will turn a dynamic alloca into a static
+  // Check whether inlining will turn a dynamic alloca into a static
   // alloca, and handle that case.
+  if (I.isArrayAllocation()) {
+    if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
+      ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
+      assert(AllocSize && "Allocation size not a constant int?");
+      Type *Ty = I.getAllocatedType();
+      AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
+      return Base::visitAlloca(I);
+    }
+  }
 
   // Accumulate the allocated size.
   if (I.isStaticAlloca()) {
@@ -816,9 +808,29 @@
 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
   // We model unconditional switches as free, see the comments on handling
   // branches.
-  return isa<ConstantInt>(SI.getCondition()) ||
-         dyn_cast_or_null<ConstantInt>(
-             SimplifiedValues.lookup(SI.getCondition()));
+  if (isa<ConstantInt>(SI.getCondition()))
+    return true;
+  if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
+    if (isa<ConstantInt>(V))
+      return true;
+
+  // Otherwise, we need to accumulate a cost proportional to the number of
+  // distinct successor blocks. This fan-out in the CFG cannot be represented
+  // for free even if we can represent the core switch as a jumptable that
+  // takes a single instruction.
+  //
+  // NB: We convert large switches which are just used to initialize large phi
+  // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
+  // inlining those. It will prevent inlining in cases where the optimization
+  // does not (yet) fire.
+  SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
+  SuccessorBlocks.insert(SI.getDefaultDest());
+  for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
+    SuccessorBlocks.insert(I.getCaseSuccessor());
+  // Add cost corresponding to the number of distinct destinations. The first
+  // we model as free because of fallthrough.
+  Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+  return false;
 }
 
 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
@@ -934,7 +946,7 @@
 /// no constant offsets applied.
 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
   if (!DL || !V->getType()->isPointerTy())
-    return 0;
+    return nullptr;
 
   unsigned IntPtrWidth = DL->getPointerSizeInBits();
   APInt Offset = APInt::getNullValue(IntPtrWidth);
@@ -946,7 +958,7 @@
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
-        return 0;
+        return nullptr;
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
       V = cast<Operator>(V)->getOperand(0);
@@ -1247,7 +1259,7 @@
 
   // Calls to functions with always-inline attributes should be inlined
   // whenever possible.
-  if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
+  if (CS.hasFnAttr(Attribute::AlwaysInline)) {
     if (isInlineViable(*Callee))
       return llvm::InlineCost::getAlways();
     return llvm::InlineCost::getNever();
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 5317a47..c819bd3 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "iv-users"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -29,6 +28,8 @@
 #include <algorithm>
 using namespace llvm;
 
+#define DEBUG_TYPE "iv-users"
+
 char IVUsers::ID = 0;
 INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
@@ -84,7 +85,7 @@
 static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
                                  const LoopInfo *LI,
                                  SmallPtrSet<Loop*,16> &SimpleLoopNests) {
-  Loop *NearestLoop = 0;
+  Loop *NearestLoop = nullptr;
   for (DomTreeNode *Rung = DT->getNode(BB);
        Rung; Rung = Rung->getIDom()) {
     BasicBlock *DomBB = Rung->getBlock();
@@ -253,7 +254,7 @@
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = &getAnalysis<ScalarEvolution>();
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
 
   // Find all uses of induction variables in this loop, and categorize
   // them by stride.  Start by finding all of the PHI nodes in the header for
@@ -329,16 +330,16 @@
          I != E; ++I)
       if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
         return AR;
-    return 0;
+    return nullptr;
   }
 
-  return 0;
+  return nullptr;
 }
 
 const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
   if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
     return AR->getStepRecurrence(*SE);
-  return 0;
+  return nullptr;
 }
 
 void IVStrideUse::transformToPostInc(const Loop *L) {
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 3d05556..de2b9c0 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "instcount"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Function.h"
@@ -22,6 +21,8 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "instcount"
+
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
 STATISTIC(TotalBlocks, "Number of basic blocks");
 STATISTIC(TotalFuncs , "Number of non-external functions");
@@ -47,7 +48,7 @@
 
     void visitInstruction(Instruction &I) {
       errs() << "Instruction Count does not know about " << I;
-      llvm_unreachable(0);
+      llvm_unreachable(nullptr);
     }
   public:
     static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index d8d8a09..3684fda 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -17,7 +17,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "instsimplify"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+#define DEBUG_TYPE "instsimplify"
+
 enum { RecursionLimit = 3 };
 
 STATISTIC(NumExpand,  "Number of expansions");
@@ -131,7 +132,7 @@
   Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   // Check whether the expression has the form "(A op' B) op C".
   if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
@@ -179,7 +180,7 @@
         }
     }
 
-  return 0;
+  return nullptr;
 }
 
 /// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
@@ -192,14 +193,14 @@
   Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
 
   if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
       !Op1 || Op1->getOpcode() != OpcodeToExtract)
-    return 0;
+    return nullptr;
 
   // The expression has the form "(A op' B) op (C op' D)".
   Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
@@ -251,7 +252,7 @@
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 /// SimplifyAssociativeBinOp - Generic simplifications for associative binary
@@ -263,7 +264,7 @@
 
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
@@ -308,7 +309,7 @@
 
   // The remaining transforms require commutativity as well as associativity.
   if (!Instruction::isCommutative(Opcode))
-    return 0;
+    return nullptr;
 
   // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
   if (Op0 && Op0->getOpcode() == Opcode) {
@@ -348,7 +349,7 @@
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 /// ThreadBinOpOverSelect - In the case of a binary operation with a select
@@ -359,7 +360,7 @@
                                     const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   SelectInst *SI;
   if (isa<SelectInst>(LHS)) {
@@ -420,7 +421,7 @@
     }
   }
 
-  return 0;
+  return nullptr;
 }
 
 /// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
@@ -432,7 +433,7 @@
                                   unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   // Make sure the select is on the LHS.
   if (!isa<SelectInst>(LHS)) {
@@ -456,7 +457,7 @@
     // It didn't simplify.  However if "cmp TV, RHS" is equal to the select
     // condition then we can replace it with 'true'.  Otherwise give up.
     if (!isSameCompare(Cond, Pred, TV, RHS))
-      return 0;
+      return nullptr;
     TCmp = getTrue(Cond->getType());
   }
 
@@ -470,7 +471,7 @@
     // It didn't simplify.  However if "cmp FV, RHS" is equal to the select
     // condition then we can replace it with 'false'.  Otherwise give up.
     if (!isSameCompare(Cond, Pred, FV, RHS))
-      return 0;
+      return nullptr;
     FCmp = getFalse(Cond->getType());
   }
 
@@ -482,7 +483,7 @@
   // The remaining cases only make sense if the select condition has the same
   // type as the result of the comparison, so bail out if this is not so.
   if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
-    return 0;
+    return nullptr;
   // If the false value simplified to false, then the result of the compare
   // is equal to "Cond && TCmp".  This also catches the case when the false
   // value simplified to false and the true value to true, returning "Cond".
@@ -502,7 +503,7 @@
                         Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 /// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
@@ -513,24 +514,24 @@
                                  const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   PHINode *PI;
   if (isa<PHINode>(LHS)) {
     PI = cast<PHINode>(LHS);
     // Bail out if RHS and the phi may be mutually interdependent due to a loop.
     if (!ValueDominatesPHI(RHS, PI, Q.DT))
-      return 0;
+      return nullptr;
   } else {
     assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
     PI = cast<PHINode>(RHS);
     // Bail out if LHS and the phi may be mutually interdependent due to a loop.
     if (!ValueDominatesPHI(LHS, PI, Q.DT))
-      return 0;
+      return nullptr;
   }
 
   // Evaluate the BinOp on the incoming phi values.
-  Value *CommonValue = 0;
+  Value *CommonValue = nullptr;
   for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
     Value *Incoming = PI->getIncomingValue(i);
     // If the incoming value is the phi node itself, it can safely be skipped.
@@ -541,7 +542,7 @@
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
-      return 0;
+      return nullptr;
     CommonValue = V;
   }
 
@@ -556,7 +557,7 @@
                                const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
-    return 0;
+    return nullptr;
 
   // Make sure the phi is on the LHS.
   if (!isa<PHINode>(LHS)) {
@@ -568,10 +569,10 @@
 
   // Bail out if RHS and the phi may be mutually interdependent due to a loop.
   if (!ValueDominatesPHI(RHS, PI, Q.DT))
-    return 0;
+    return nullptr;
 
   // Evaluate the BinOp on the incoming phi values.
-  Value *CommonValue = 0;
+  Value *CommonValue = nullptr;
   for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
     Value *Incoming = PI->getIncomingValue(i);
     // If the incoming value is the phi node itself, it can safely be skipped.
@@ -580,7 +581,7 @@
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
-      return 0;
+      return nullptr;
     CommonValue = V;
   }
 
@@ -613,7 +614,7 @@
   // X + (Y - X) -> Y
   // (Y - X) + X -> Y
   // Eg: X + -X -> 0
-  Value *Y = 0;
+  Value *Y = nullptr;
   if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
       match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
     return Y;
@@ -647,7 +648,7 @@
   // "A+B" and "A+C" thus gains nothing, but costs compile time.  Similarly
   // for threading over phi nodes.
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -720,7 +721,7 @@
   // If LHS and RHS are not related via constant offsets to the same base
   // value, there is nothing we can do here.
   if (LHS != RHS)
-    return 0;
+    return nullptr;
 
   // Otherwise, the difference of LHS - RHS can be computed as:
   //    LHS - RHS
@@ -755,14 +756,14 @@
 
   // (X*2) - X -> X
   // (X<<1) - X -> X
-  Value *X = 0;
+  Value *X = nullptr;
   if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
       match(Op0, m_Shl(m_Specific(Op1), m_One())))
     return Op1;
 
   // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
   // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
-  Value *Y = 0, *Z = Op1;
+  Value *Y = nullptr, *Z = Op1;
   if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
     // See if "V === Y - Z" simplifies.
     if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
@@ -853,7 +854,7 @@
   // "A-B" and "A-C" thus gains nothing, but costs compile time.  Similarly
   // for threading over phi nodes.
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -890,7 +891,7 @@
   // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
   //   where nnan and ninf have to occur at least once somewhere in this
   //   expression
-  Value *SubOp = 0;
+  Value *SubOp = nullptr;
   if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
     SubOp = Op1;
   else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
@@ -902,7 +903,7 @@
       return Constant::getNullValue(Op0->getType());
   }
 
-  return 0;
+  return nullptr;
 }
 
 /// Given operands for an FSub, see if we can fold the result.  If not, this
@@ -939,7 +940,7 @@
   if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1)
     return Constant::getNullValue(Op0->getType());
 
-  return 0;
+  return nullptr;
 }
 
 /// Given the operands for an FMul, see if we can fold the result
@@ -966,7 +967,7 @@
  if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
    return Op1;
 
- return 0;
+ return nullptr;
 }
 
 /// SimplifyMulInst - Given operands for a Mul, see if we can
@@ -997,7 +998,7 @@
     return Op0;
 
   // (X / Y) * Y -> X if the division is exact.
-  Value *X = 0;
+  Value *X = nullptr;
   if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
       match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))   // Y * (X / Y)
     return X;
@@ -1031,7 +1032,7 @@
                                       MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
@@ -1098,7 +1099,7 @@
     return ConstantInt::get(Op0->getType(), 1);
 
   // (X * Y) / Y -> X if the multiplication does not overflow.
-  Value *X = 0, *Y = 0;
+  Value *X = nullptr, *Y = nullptr;
   if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
     if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
     OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
@@ -1129,7 +1130,7 @@
     if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 /// SimplifySDivInst - Given operands for an SDiv, see if we can
@@ -1139,7 +1140,7 @@
   if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
     return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1155,7 +1156,7 @@
   if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
     return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1174,7 +1175,7 @@
   if (match(Op1, m_Undef()))
     return Op1;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1234,7 +1235,7 @@
     if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 /// SimplifySRemInst - Given operands for an SRem, see if we can
@@ -1244,7 +1245,7 @@
   if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
     return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1260,7 +1261,7 @@
   if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
     return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1279,7 +1280,7 @@
   if (match(Op1, m_Undef()))
     return Op1;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1350,7 +1351,7 @@
     if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 /// SimplifyShlInst - Given operands for an Shl, see if we can
@@ -1368,7 +1369,7 @@
   Value *X;
   if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
     return X;
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -1399,7 +1400,7 @@
       cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
     return X;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
@@ -1435,7 +1436,7 @@
       cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
     return X;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
@@ -1483,7 +1484,7 @@
     return Constant::getNullValue(Op0->getType());
 
   // (A | ?) & A = A
-  Value *A = 0, *B = 0;
+  Value *A = nullptr, *B = nullptr;
   if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
       (A == Op1 || B == Op1))
     return Op1;
@@ -1536,7 +1537,7 @@
                                       MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1582,7 +1583,7 @@
     return Constant::getAllOnesValue(Op0->getType());
 
   // (A & ?) | A = A
-  Value *A = 0, *B = 0;
+  Value *A = nullptr, *B = nullptr;
   if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
       (A == Op1 || B == Op1))
     return Op1;
@@ -1630,7 +1631,7 @@
     if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1690,7 +1691,7 @@
   // "A^B" and "A^C" thus gains nothing, but costs compile time.  Similarly
   // for threading over phi nodes.
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1710,17 +1711,17 @@
                                          Value *LHS, Value *RHS) {
   SelectInst *SI = dyn_cast<SelectInst>(V);
   if (!SI)
-    return 0;
+    return nullptr;
   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
   if (!Cmp)
-    return 0;
+    return nullptr;
   Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
   if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
     return Cmp;
   if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
       LHS == CmpRHS && RHS == CmpLHS)
     return Cmp;
-  return 0;
+  return nullptr;
 }
 
 // A significant optimization not implemented here is assuming that alloca
@@ -1768,7 +1769,7 @@
   // We can only fold certain predicates on pointer comparisons.
   switch (Pred) {
   default:
-    return 0;
+    return nullptr;
 
     // Equality comaprisons are easy to fold.
   case CmpInst::ICMP_EQ:
@@ -1874,7 +1875,7 @@
   }
 
   // Otherwise, fail.
-  return 0;
+  return nullptr;
 }
 
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
@@ -2000,7 +2001,7 @@
 
     // Many binary operators with constant RHS have easy to compute constant
     // range.  Use them to check whether the comparison is a tautology.
-    uint32_t Width = CI->getBitWidth();
+    unsigned Width = CI->getBitWidth();
     APInt Lower = APInt(Width, 0);
     APInt Upper = APInt(Width, 0);
     ConstantInt *CI2;
@@ -2019,6 +2020,10 @@
       APInt NegOne = APInt::getAllOnesValue(Width);
       if (!CI2->isZero())
         Upper = NegOne.udiv(CI2->getValue()) + 1;
+    } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
+      // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
+      Upper = CI2->getValue().abs() + 1;
+      Lower = (-Upper) + 1;
     } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
       // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
       APInt IntMin = APInt::getSignedMinValue(Width);
@@ -2033,6 +2038,13 @@
       APInt NegOne = APInt::getAllOnesValue(Width);
       if (CI2->getValue().ult(Width))
         Upper = NegOne.lshr(CI2->getValue()) + 1;
+    } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) {
+      // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2].
+      unsigned ShiftAmount = Width - 1;
+      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
+        ShiftAmount = CI2->getValue().countTrailingZeros();
+      Lower = CI2->getValue().lshr(ShiftAmount);
+      Upper = CI2->getValue() + 1;
     } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
       // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
       APInt IntMin = APInt::getSignedMinValue(Width);
@@ -2041,6 +2053,19 @@
         Lower = IntMin.ashr(CI2->getValue());
         Upper = IntMax.ashr(CI2->getValue()) + 1;
       }
+    } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) {
+      unsigned ShiftAmount = Width - 1;
+      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
+        ShiftAmount = CI2->getValue().countTrailingZeros();
+      if (CI2->isNegative()) {
+        // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)]
+        Lower = CI2->getValue();
+        Upper = CI2->getValue().ashr(ShiftAmount) + 1;
+      } else {
+        // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2]
+        Lower = CI2->getValue().ashr(ShiftAmount);
+        Upper = CI2->getValue() + 1;
+      }
     } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
       // 'or x, CI2' produces [CI2, UINT_MAX].
       Lower = CI2->getValue();
@@ -2221,7 +2246,7 @@
   BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
   if (MaxRecurse && (LBO || RBO)) {
     // Analyze the case when either LHS or RHS is an add instruction.
-    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
     // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
     bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
     if (LBO && LBO->getOpcode() == Instruction::Add) {
@@ -2279,6 +2304,28 @@
     }
   }
 
+  // 0 - (zext X) pred C
+  if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      if (RHSC->getValue().isStrictlyPositive()) {
+        if (Pred == ICmpInst::ICMP_SLT)
+          return ConstantInt::getTrue(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_SGE)
+          return ConstantInt::getFalse(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(RHSC->getContext());
+      }
+      if (RHSC->getValue().isNonNegative()) {
+        if (Pred == ICmpInst::ICMP_SLE)
+          return ConstantInt::getTrue(RHSC->getContext());
+        if (Pred == ICmpInst::ICMP_SGT)
+          return ConstantInt::getFalse(RHSC->getContext());
+      }
+    }
+  }
+
   // icmp pred (urem X, Y), Y
   if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
     bool KnownNonNegative, KnownNegative;
@@ -2605,7 +2652,7 @@
     if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -2702,7 +2749,7 @@
     if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -2741,7 +2788,7 @@
   if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
     return TrueVal;
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -2786,7 +2833,7 @@
   // Check to see if this is constant foldable.
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     if (!isa<Constant>(Ops[i]))
-      return 0;
+      return nullptr;
 
   return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
 }
@@ -2823,7 +2870,7 @@
         return Agg;
     }
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
@@ -2839,7 +2886,7 @@
 static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   // If all of the PHI's incoming values are the same then replace the PHI node
   // with the common value.
-  Value *CommonValue = 0;
+  Value *CommonValue = nullptr;
   bool HasUndefInput = false;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     Value *Incoming = PN->getIncomingValue(i);
@@ -2851,7 +2898,7 @@
       continue;
     }
     if (CommonValue && Incoming != CommonValue)
-      return 0;  // Not the same, bail out.
+      return nullptr;  // Not the same, bail out.
     CommonValue = Incoming;
   }
 
@@ -2864,7 +2911,7 @@
   // instruction, we cannot return X as the result of the PHI node unless it
   // dominates the PHI block.
   if (HasUndefInput)
-    return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
+    return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
 
   return CommonValue;
 }
@@ -2873,7 +2920,7 @@
   if (Constant *C = dyn_cast<Constant>(Op))
     return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI);
 
-  return 0;
+  return nullptr;
 }
 
 Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
@@ -2945,7 +2992,7 @@
       if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
-    return 0;
+    return nullptr;
   }
 }
 
@@ -2992,7 +3039,7 @@
                                 const Query &Q, unsigned MaxRecurse) {
   // Perform idempotent optimizations
   if (!IsIdempotent(IID))
-    return 0;
+    return nullptr;
 
   // Unary Ops
   if (std::distance(ArgBegin, ArgEnd) == 1)
@@ -3000,7 +3047,7 @@
       if (II->getIntrinsicID() == IID)
         return II;
 
-  return 0;
+  return nullptr;
 }
 
 template <typename IterTy>
@@ -3017,7 +3064,7 @@
 
   Function *F = dyn_cast<Function>(V);
   if (!F)
-    return 0;
+    return nullptr;
 
   if (unsigned IID = F->getIntrinsicID())
     if (Value *Ret =
@@ -3025,14 +3072,14 @@
       return Ret;
 
   if (!canConstantFoldCallTo(F))
-    return 0;
+    return nullptr;
 
   SmallVector<Constant *, 4> ConstantArgs;
   ConstantArgs.reserve(ArgEnd - ArgBegin);
   for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
     Constant *C = dyn_cast<Constant>(*I);
     if (!C)
-      return 0;
+      return nullptr;
     ConstantArgs.push_back(C);
   }
 
@@ -3247,7 +3294,7 @@
                                           const DataLayout *DL,
                                           const TargetLibraryInfo *TLI,
                                           const DominatorTree *DT) {
-  return replaceAndRecursivelySimplifyImpl(I, 0, DL, TLI, DT);
+  return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT);
 }
 
 bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index 2e259b1..a0583e8 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -29,7 +29,7 @@
     delete Intervals[i];
   IntervalMap.clear();
   Intervals.clear();
-  RootInterval = 0;
+  RootInterval = nullptr;
 }
 
 void IntervalPartition::print(raw_ostream &O, const Module*) const {
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index ea213f2..e073616 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -8,19 +8,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+#define DEBUG_TYPE "lcg"
+
 static void findCallees(
     SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited,
     SmallVectorImpl<PointerUnion<Function *, LazyCallGraph::Node *>> &Callees,
-    SmallPtrSetImpl<Function *> &CalleeSet) {
+    DenseMap<Function *, size_t> &CalleeIndexMap) {
   while (!Worklist.empty()) {
     Constant *C = Worklist.pop_back_val();
 
@@ -35,8 +38,12 @@
       // alias. Then a test of the address of the weak function against the new
       // strong definition's address would be an effective way to determine the
       // safety of optimizing a direct call edge.
-      if (!F->isDeclaration() && CalleeSet.insert(F))
+      if (!F->isDeclaration() &&
+          CalleeIndexMap.insert(std::make_pair(F, Callees.size())).second) {
+        DEBUG(dbgs() << "    Added callable function: " << F->getName()
+                     << "\n");
         Callees.push_back(F);
+      }
       continue;
     }
 
@@ -46,7 +53,11 @@
   }
 }
 
-LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) {
+LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
+    : G(&G), F(F), DFSNumber(0), LowLink(0) {
+  DEBUG(dbgs() << "  Adding functions called by '" << F.getName()
+               << "' to the graph.\n");
+
   SmallVector<Constant *, 16> Worklist;
   SmallPtrSet<Constant *, 16> Visited;
   // Find all the potential callees in this function. First walk the
@@ -61,36 +72,41 @@
   // We've collected all the constant (and thus potentially function or
   // function containing) operands to all of the instructions in the function.
   // Process them (recursively) collecting every function found.
-  findCallees(Worklist, Visited, Callees, CalleeSet);
+  findCallees(Worklist, Visited, Callees, CalleeIndexMap);
 }
 
-LazyCallGraph::Node::Node(LazyCallGraph &G, const Node &OtherN)
-    : G(G), F(OtherN.F), CalleeSet(OtherN.CalleeSet) {
-  // Loop over the other node's callees, adding the Function*s to our list
-  // directly, and recursing to add the Node*s.
-  Callees.reserve(OtherN.Callees.size());
-  for (const auto &OtherCallee : OtherN.Callees)
-    if (Function *Callee = OtherCallee.dyn_cast<Function *>())
-      Callees.push_back(Callee);
-    else
-      Callees.push_back(G.copyInto(*OtherCallee.get<Node *>()));
+void LazyCallGraph::Node::insertEdgeInternal(Function &Callee) {
+  if (Node *N = G->lookup(Callee))
+    return insertEdgeInternal(*N);
+
+  CalleeIndexMap.insert(std::make_pair(&Callee, Callees.size()));
+  Callees.push_back(&Callee);
 }
 
-LazyCallGraph::Node::Node(LazyCallGraph &G, Node &&OtherN)
-    : G(G), F(OtherN.F), Callees(std::move(OtherN.Callees)),
-      CalleeSet(std::move(OtherN.CalleeSet)) {
-  // Loop over our Callees. They've been moved from another node, but we need
-  // to move the Node*s to live under our bump ptr allocator.
-  for (auto &Callee : Callees)
-    if (Node *ChildN = Callee.dyn_cast<Node *>())
-      Callee = G.moveInto(std::move(*ChildN));
+void LazyCallGraph::Node::insertEdgeInternal(Node &CalleeN) {
+  CalleeIndexMap.insert(std::make_pair(&CalleeN.getFunction(), Callees.size()));
+  Callees.push_back(&CalleeN);
 }
 
-LazyCallGraph::LazyCallGraph(Module &M) : M(M) {
+void LazyCallGraph::Node::removeEdgeInternal(Function &Callee) {
+  auto IndexMapI = CalleeIndexMap.find(&Callee);
+  assert(IndexMapI != CalleeIndexMap.end() &&
+         "Callee not in the callee set for this caller?");
+
+  Callees[IndexMapI->second] = nullptr;
+  CalleeIndexMap.erase(IndexMapI);
+}
+
+LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
+  DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
+               << "\n");
   for (Function &F : M)
     if (!F.isDeclaration() && !F.hasLocalLinkage())
-      if (EntryNodeSet.insert(&F))
+      if (EntryIndexMap.insert(std::make_pair(&F, EntryNodes.size())).second) {
+        DEBUG(dbgs() << "  Adding '" << F.getName()
+                     << "' to entry set of the graph.\n");
         EntryNodes.push_back(&F);
+      }
 
   // Now add entry nodes for functions reachable via initializers to globals.
   SmallVector<Constant *, 16> Worklist;
@@ -100,51 +116,568 @@
       if (Visited.insert(GV.getInitializer()))
         Worklist.push_back(GV.getInitializer());
 
-  findCallees(Worklist, Visited, EntryNodes, EntryNodeSet);
-}
+  DEBUG(dbgs() << "  Adding functions referenced by global initializers to the "
+                  "entry set.\n");
+  findCallees(Worklist, Visited, EntryNodes, EntryIndexMap);
 
-LazyCallGraph::LazyCallGraph(const LazyCallGraph &G)
-    : M(G.M), EntryNodeSet(G.EntryNodeSet) {
-  EntryNodes.reserve(G.EntryNodes.size());
-  for (const auto &EntryNode : G.EntryNodes)
-    if (Function *Callee = EntryNode.dyn_cast<Function *>())
-      EntryNodes.push_back(Callee);
+  for (auto &Entry : EntryNodes) {
+    assert(!Entry.isNull() &&
+           "We can't have removed edges before we finish the constructor!");
+    if (Function *F = Entry.dyn_cast<Function *>())
+      SCCEntryNodes.push_back(F);
     else
-      EntryNodes.push_back(copyInto(*EntryNode.get<Node *>()));
+      SCCEntryNodes.push_back(&Entry.get<Node *>()->getFunction());
+  }
 }
 
-// FIXME: This would be crazy simpler if BumpPtrAllocator were movable without
-// invalidating any of the allocated memory. We should make that be the case at
-// some point and delete this.
 LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
-    : M(G.M), EntryNodes(std::move(G.EntryNodes)),
-      EntryNodeSet(std::move(G.EntryNodeSet)) {
-  // Loop over our EntryNodes. They've been moved from another graph, so we
-  // need to move the Node*s to live under our bump ptr allocator. We can just
-  // do this in-place.
-  for (auto &Entry : EntryNodes)
-    if (Node *EntryN = Entry.dyn_cast<Node *>())
-      Entry = moveInto(std::move(*EntryN));
+    : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
+      EntryNodes(std::move(G.EntryNodes)),
+      EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)),
+      SCCMap(std::move(G.SCCMap)), LeafSCCs(std::move(G.LeafSCCs)),
+      DFSStack(std::move(G.DFSStack)),
+      SCCEntryNodes(std::move(G.SCCEntryNodes)),
+      NextDFSNumber(G.NextDFSNumber) {
+  updateGraphPtrs();
 }
 
-LazyCallGraph::Node *LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
-  return new (MappedN = BPA.Allocate()) Node(*this, F);
+LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
+  BPA = std::move(G.BPA);
+  NodeMap = std::move(G.NodeMap);
+  EntryNodes = std::move(G.EntryNodes);
+  EntryIndexMap = std::move(G.EntryIndexMap);
+  SCCBPA = std::move(G.SCCBPA);
+  SCCMap = std::move(G.SCCMap);
+  LeafSCCs = std::move(G.LeafSCCs);
+  DFSStack = std::move(G.DFSStack);
+  SCCEntryNodes = std::move(G.SCCEntryNodes);
+  NextDFSNumber = G.NextDFSNumber;
+  updateGraphPtrs();
+  return *this;
 }
 
-LazyCallGraph::Node *LazyCallGraph::copyInto(const Node &OtherN) {
-  Node *&N = NodeMap[&OtherN.F];
-  if (N)
-    return N;
-
-  return new (N = BPA.Allocate()) Node(*this, OtherN);
+void LazyCallGraph::SCC::insert(Node &N) {
+  N.DFSNumber = N.LowLink = -1;
+  Nodes.push_back(&N);
+  G->SCCMap[&N] = this;
 }
 
-LazyCallGraph::Node *LazyCallGraph::moveInto(Node &&OtherN) {
-  Node *&N = NodeMap[&OtherN.F];
-  if (N)
-    return N;
+bool LazyCallGraph::SCC::isDescendantOf(const SCC &C) const {
+  // Walk up the parents of this SCC and verify that we eventually find C.
+  SmallVector<const SCC *, 4> AncestorWorklist;
+  AncestorWorklist.push_back(this);
+  do {
+    const SCC *AncestorC = AncestorWorklist.pop_back_val();
+    if (AncestorC->isChildOf(C))
+      return true;
+    for (const SCC *ParentC : AncestorC->ParentSCCs)
+      AncestorWorklist.push_back(ParentC);
+  } while (!AncestorWorklist.empty());
 
-  return new (N = BPA.Allocate()) Node(*this, std::move(OtherN));
+  return false;
+}
+
+void LazyCallGraph::SCC::insertIntraSCCEdge(Node &CallerN, Node &CalleeN) {
+  // First insert it into the caller.
+  CallerN.insertEdgeInternal(CalleeN);
+
+  assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC.");
+  assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC.");
+
+  // Nothing changes about this SCC or any other.
+}
+
+void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {
+  // First insert it into the caller.
+  CallerN.insertEdgeInternal(CalleeN);
+
+  assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC.");
+
+  SCC &CalleeC = *G->SCCMap.lookup(&CalleeN);
+  assert(&CalleeC != this && "Callee must not be in this SCC.");
+  assert(CalleeC.isDescendantOf(*this) &&
+         "Callee must be a descendant of the Caller.");
+
+  // The only change required is to add this SCC to the parent set of the callee.
+  CalleeC.ParentSCCs.insert(this);
+}
+
+SmallVector<LazyCallGraph::SCC *, 1>
+LazyCallGraph::SCC::insertIncomingEdge(Node &CallerN, Node &CalleeN) {
+  // First insert it into the caller.
+  CallerN.insertEdgeInternal(CalleeN);
+
+  assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC.");
+
+  SCC &CallerC = *G->SCCMap.lookup(&CallerN);
+  assert(&CallerC != this && "Caller must not be in this SCC.");
+  assert(CallerC.isDescendantOf(*this) &&
+         "Caller must be a descendant of the Callee.");
+
+  // The algorithm we use for merging SCCs based on the cycle introduced here
+  // is to walk the SCC inverted DAG formed by the parent SCC sets. The inverse
+  // graph has the same cycle properties as the actual DAG of the SCCs, and
+  // when forming SCCs lazily by a DFS, the bottom of the graph won't exist in
+  // many cases which should prune the search space.
+  //
+  // FIXME: We can get this pruning behavior even after the incremental SCC
+  // formation by leaving behind (conservative) DFS numberings in the nodes,
+  // and pruning the search with them. These would need to be cleverly updated
+  // during the removal of intra-SCC edges, but could be preserved
+  // conservatively.
+
+  // The set of SCCs that are connected to the caller, and thus will
+  // participate in the merged connected component.
+  SmallPtrSet<SCC *, 8> ConnectedSCCs;
+  ConnectedSCCs.insert(this);
+  ConnectedSCCs.insert(&CallerC);
+
+  // We build up a DFS stack of the parents chains.
+  SmallVector<std::pair<SCC *, SCC::parent_iterator>, 8> DFSSCCs;
+  SmallPtrSet<SCC *, 8> VisitedSCCs;
+  int ConnectedDepth = -1;
+  SCC *C = this;
+  parent_iterator I = parent_begin(), E = parent_end();
+  for (;;) {
+    while (I != E) {
+      SCC &ParentSCC = *I++;
+
+      // If we have already processed this parent SCC, skip it, and remember
+      // whether it was connected so we don't have to check the rest of the
+      // stack. This also handles when we reach a child of the 'this' SCC (the
+      // callee) which terminates the search.
+      if (ConnectedSCCs.count(&ParentSCC)) {
+        ConnectedDepth = std::max<int>(ConnectedDepth, DFSSCCs.size());
+        continue;
+      }
+      if (VisitedSCCs.count(&ParentSCC))
+        continue;
+
+      // We fully explore the depth-first space, adding nodes to the connected
+      // set only as we pop them off, so "recurse" by rotating to the parent.
+      DFSSCCs.push_back(std::make_pair(C, I));
+      C = &ParentSCC;
+      I = ParentSCC.parent_begin();
+      E = ParentSCC.parent_end();
+    }
+
+    // If we've found a connection anywhere below this point on the stack (and
+    // thus up the parent graph from the caller), the current node needs to be
+    // added to the connected set now that we've processed all of its parents.
+    if ((int)DFSSCCs.size() == ConnectedDepth) {
+      --ConnectedDepth; // We're finished with this connection.
+      ConnectedSCCs.insert(C);
+    } else {
+      // Otherwise remember that its parents don't ever connect.
+      assert(ConnectedDepth < (int)DFSSCCs.size() &&
+             "Cannot have a connected depth greater than the DFS depth!");
+      VisitedSCCs.insert(C);
+    }
+
+    if (DFSSCCs.empty())
+      break; // We've walked all the parents of the caller transitively.
+
+    // Pop off the prior node and position to unwind the depth first recursion.
+    std::tie(C, I) = DFSSCCs.pop_back_val();
+    E = C->parent_end();
+  }
+
+  // Now that we have identified all of the SCCs which need to be merged into
+  // a connected set with the inserted edge, merge all of them into this SCC.
+  // FIXME: This operation currently creates ordering stability problems
+  // because we don't use stably ordered containers for the parent SCCs or the
+  // connected SCCs.
+  unsigned NewNodeBeginIdx = Nodes.size();
+  for (SCC *C : ConnectedSCCs) {
+    if (C == this)
+      continue;
+    for (SCC *ParentC : C->ParentSCCs)
+      if (!ConnectedSCCs.count(ParentC))
+        ParentSCCs.insert(ParentC);
+    C->ParentSCCs.clear();
+
+    for (Node *N : *C) {
+      for (Node &ChildN : *N) {
+        SCC &ChildC = *G->SCCMap.lookup(&ChildN);
+        if (&ChildC != C)
+          ChildC.ParentSCCs.erase(C);
+      }
+      G->SCCMap[N] = this;
+      Nodes.push_back(N);
+    }
+    C->Nodes.clear();
+  }
+  for (auto I = Nodes.begin() + NewNodeBeginIdx, E = Nodes.end(); I != E; ++I)
+    for (Node &ChildN : **I) {
+      SCC &ChildC = *G->SCCMap.lookup(&ChildN);
+      if (&ChildC != this)
+        ChildC.ParentSCCs.insert(this);
+    }
+
+  // We return the list of SCCs which were merged so that callers can
+  // invalidate any data they have associated with those SCCs. Note that these
+  // SCCs are no longer in an interesting state (they are totally empty) but
+  // the pointers will remain stable for the life of the graph itself.
+  return SmallVector<SCC *, 1>(ConnectedSCCs.begin(), ConnectedSCCs.end());
+}
+
+void LazyCallGraph::SCC::removeInterSCCEdge(Node &CallerN, Node &CalleeN) {
+  // First remove it from the node.
+  CallerN.removeEdgeInternal(CalleeN.getFunction());
+
+  assert(G->SCCMap.lookup(&CallerN) == this &&
+         "The caller must be a member of this SCC.");
+
+  SCC &CalleeC = *G->SCCMap.lookup(&CalleeN);
+  assert(&CalleeC != this &&
+         "This API only supports the rmoval of inter-SCC edges.");
+
+  assert(std::find(G->LeafSCCs.begin(), G->LeafSCCs.end(), this) ==
+             G->LeafSCCs.end() &&
+         "Cannot have a leaf SCC caller with a different SCC callee.");
+
+  bool HasOtherCallToCalleeC = false;
+  bool HasOtherCallOutsideSCC = false;
+  for (Node *N : *this) {
+    for (Node &OtherCalleeN : *N) {
+      SCC &OtherCalleeC = *G->SCCMap.lookup(&OtherCalleeN);
+      if (&OtherCalleeC == &CalleeC) {
+        HasOtherCallToCalleeC = true;
+        break;
+      }
+      if (&OtherCalleeC != this)
+        HasOtherCallOutsideSCC = true;
+    }
+    if (HasOtherCallToCalleeC)
+      break;
+  }
+  // Because the SCCs form a DAG, deleting such an edge cannot change the set
+  // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making
+  // the caller no longer a parent of the callee. Walk the other call edges
+  // in the caller to tell.
+  if (!HasOtherCallToCalleeC) {
+    bool Removed = CalleeC.ParentSCCs.erase(this);
+    (void)Removed;
+    assert(Removed &&
+           "Did not find the caller SCC in the callee SCC's parent list!");
+
+    // It may orphan an SCC if it is the last edge reaching it, but that does
+    // not violate any invariants of the graph.
+    if (CalleeC.ParentSCCs.empty())
+      DEBUG(dbgs() << "LCG: Update removing " << CallerN.getFunction().getName()
+                   << " -> " << CalleeN.getFunction().getName()
+                   << " edge orphaned the callee's SCC!\n");
+  }
+
+  // It may make the Caller SCC a leaf SCC.
+  if (!HasOtherCallOutsideSCC)
+    G->LeafSCCs.push_back(this);
+}
+
+void LazyCallGraph::SCC::internalDFS(
+    SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
+    SmallVectorImpl<Node *> &PendingSCCStack, Node *N,
+    SmallVectorImpl<SCC *> &ResultSCCs) {
+  Node::iterator I = N->begin();
+  N->LowLink = N->DFSNumber = 1;
+  int NextDFSNumber = 2;
+  for (;;) {
+    assert(N->DFSNumber != 0 && "We should always assign a DFS number "
+                                "before processing a node.");
+
+    // We simulate recursion by popping out of the nested loop and continuing.
+    Node::iterator E = N->end();
+    while (I != E) {
+      Node &ChildN = *I;
+      if (SCC *ChildSCC = G->SCCMap.lookup(&ChildN)) {
+        // Check if we have reached a node in the new (known connected) set of
+        // this SCC. If so, the entire stack is necessarily in that set and we
+        // can re-start.
+        if (ChildSCC == this) {
+          insert(*N);
+          while (!PendingSCCStack.empty())
+            insert(*PendingSCCStack.pop_back_val());
+          while (!DFSStack.empty())
+            insert(*DFSStack.pop_back_val().first);
+          return;
+        }
+
+        // If this child isn't currently in this SCC, no need to process it.
+        // However, we do need to remove this SCC from its SCC's parent set.
+        ChildSCC->ParentSCCs.erase(this);
+        ++I;
+        continue;
+      }
+
+      if (ChildN.DFSNumber == 0) {
+        // Mark that we should start at this child when next this node is the
+        // top of the stack. We don't start at the next child to ensure this
+        // child's lowlink is reflected.
+        DFSStack.push_back(std::make_pair(N, I));
+
+        // Continue, resetting to the child node.
+        ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
+        N = &ChildN;
+        I = ChildN.begin();
+        E = ChildN.end();
+        continue;
+      }
+
+      // Track the lowest link of the children, if any are still in the stack.
+      // Any child not on the stack will have a LowLink of -1.
+      assert(ChildN.LowLink != 0 &&
+             "Low-link must not be zero with a non-zero DFS number.");
+      if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
+        N->LowLink = ChildN.LowLink;
+      ++I;
+    }
+
+    if (N->LowLink == N->DFSNumber) {
+      ResultSCCs.push_back(G->formSCC(N, PendingSCCStack));
+      if (DFSStack.empty())
+        return;
+    } else {
+      // At this point we know that N cannot ever be an SCC root. Its low-link
+      // is not its dfs-number, and we've processed all of its children. It is
+      // just sitting here waiting until some node further down the stack gets
+      // low-link == dfs-number and pops it off as well. Move it to the pending
+      // stack which is pulled into the next SCC to be formed.
+      PendingSCCStack.push_back(N);
+
+      assert(!DFSStack.empty() && "We shouldn't have an empty stack!");
+    }
+
+    N = DFSStack.back().first;
+    I = DFSStack.back().second;
+    DFSStack.pop_back();
+  }
+}
+
+SmallVector<LazyCallGraph::SCC *, 1>
+LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
+                                       Node &CalleeN) {
+  // First remove it from the node.
+  CallerN.removeEdgeInternal(CalleeN.getFunction());
+
+  // We return a list of the resulting *new* SCCs in postorder.
+  SmallVector<SCC *, 1> ResultSCCs;
+
+  // Direct recursion doesn't impact the SCC graph at all.
+  if (&CallerN == &CalleeN)
+    return ResultSCCs;
+
+  // The worklist is every node in the original SCC.
+  SmallVector<Node *, 1> Worklist;
+  Worklist.swap(Nodes);
+  for (Node *N : Worklist) {
+    // The nodes formerly in this SCC are no longer in any SCC.
+    N->DFSNumber = 0;
+    N->LowLink = 0;
+    G->SCCMap.erase(N);
+  }
+  assert(Worklist.size() > 1 && "We have to have at least two nodes to have an "
+                                "edge between them that is within the SCC.");
+
+  // The callee can already reach every node in this SCC (by definition). It is
+  // the only node we know will stay inside this SCC. Everything which
+  // transitively reaches Callee will also remain in the SCC. To model this we
+  // incrementally add any chain of nodes which reaches something in the new
+  // node set to the new node set. This short circuits one side of the Tarjan's
+  // walk.
+  insert(CalleeN);
+
+  // We're going to do a full mini-Tarjan's walk using a local stack here.
+  SmallVector<std::pair<Node *, Node::iterator>, 4> DFSStack;
+  SmallVector<Node *, 4> PendingSCCStack;
+  do {
+    Node *N = Worklist.pop_back_val();
+    if (N->DFSNumber == 0)
+      internalDFS(DFSStack, PendingSCCStack, N, ResultSCCs);
+
+    assert(DFSStack.empty() && "Didn't flush the entire DFS stack!");
+    assert(PendingSCCStack.empty() && "Didn't flush all pending SCC nodes!");
+  } while (!Worklist.empty());
+
+  // Now we need to reconnect the current SCC to the graph.
+  bool IsLeafSCC = true;
+  for (Node *N : Nodes) {
+    for (Node &ChildN : *N) {
+      SCC &ChildSCC = *G->SCCMap.lookup(&ChildN);
+      if (&ChildSCC == this)
+        continue;
+      ChildSCC.ParentSCCs.insert(this);
+      IsLeafSCC = false;
+    }
+  }
+#ifndef NDEBUG
+  if (!ResultSCCs.empty())
+    assert(!IsLeafSCC && "This SCC cannot be a leaf as we have split out new "
+                         "SCCs by removing this edge.");
+  if (!std::any_of(G->LeafSCCs.begin(), G->LeafSCCs.end(),
+                   [&](SCC *C) { return C == this; }))
+    assert(!IsLeafSCC && "This SCC cannot be a leaf as it already had child "
+                         "SCCs before we removed this edge.");
+#endif
+  // If this SCC stopped being a leaf through this edge removal, remove it from
+  // the leaf SCC list.
+  if (!IsLeafSCC && !ResultSCCs.empty())
+    G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this),
+                     G->LeafSCCs.end());
+
+  // Return the new list of SCCs.
+  return ResultSCCs;
+}
+
+void LazyCallGraph::insertEdge(Node &CallerN, Function &Callee) {
+  assert(SCCMap.empty() && DFSStack.empty() &&
+         "This method cannot be called after SCCs have been formed!");
+
+  return CallerN.insertEdgeInternal(Callee);
+}
+
+void LazyCallGraph::removeEdge(Node &CallerN, Function &Callee) {
+  assert(SCCMap.empty() && DFSStack.empty() &&
+         "This method cannot be called after SCCs have been formed!");
+
+  return CallerN.removeEdgeInternal(Callee);
+}
+
+LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
+  return *new (MappedN = BPA.Allocate()) Node(*this, F);
+}
+
+void LazyCallGraph::updateGraphPtrs() {
+  // Process all nodes updating the graph pointers.
+  {
+    SmallVector<Node *, 16> Worklist;
+    for (auto &Entry : EntryNodes)
+      if (Node *EntryN = Entry.dyn_cast<Node *>())
+        Worklist.push_back(EntryN);
+
+    while (!Worklist.empty()) {
+      Node *N = Worklist.pop_back_val();
+      N->G = this;
+      for (auto &Callee : N->Callees)
+        if (!Callee.isNull())
+          if (Node *CalleeN = Callee.dyn_cast<Node *>())
+            Worklist.push_back(CalleeN);
+    }
+  }
+
+  // Process all SCCs updating the graph pointers.
+  {
+    SmallVector<SCC *, 16> Worklist(LeafSCCs.begin(), LeafSCCs.end());
+
+    while (!Worklist.empty()) {
+      SCC *C = Worklist.pop_back_val();
+      C->G = this;
+      Worklist.insert(Worklist.end(), C->ParentSCCs.begin(),
+                      C->ParentSCCs.end());
+    }
+  }
+}
+
+LazyCallGraph::SCC *LazyCallGraph::formSCC(Node *RootN,
+                                           SmallVectorImpl<Node *> &NodeStack) {
+  // The tail of the stack is the new SCC. Allocate the SCC and pop the stack
+  // into it.
+  SCC *NewSCC = new (SCCBPA.Allocate()) SCC(*this);
+
+  while (!NodeStack.empty() && NodeStack.back()->DFSNumber > RootN->DFSNumber) {
+    assert(NodeStack.back()->LowLink >= RootN->LowLink &&
+           "We cannot have a low link in an SCC lower than its root on the "
+           "stack!");
+    NewSCC->insert(*NodeStack.pop_back_val());
+  }
+  NewSCC->insert(*RootN);
+
+  // A final pass over all edges in the SCC (this remains linear as we only
+  // do this once when we build the SCC) to connect it to the parent sets of
+  // its children.
+  bool IsLeafSCC = true;
+  for (Node *SCCN : NewSCC->Nodes)
+    for (Node &SCCChildN : *SCCN) {
+      SCC &ChildSCC = *SCCMap.lookup(&SCCChildN);
+      if (&ChildSCC == NewSCC)
+        continue;
+      ChildSCC.ParentSCCs.insert(NewSCC);
+      IsLeafSCC = false;
+    }
+
+  // For the SCCs where we fine no child SCCs, add them to the leaf list.
+  if (IsLeafSCC)
+    LeafSCCs.push_back(NewSCC);
+
+  return NewSCC;
+}
+
+LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() {
+  Node *N;
+  Node::iterator I;
+  if (!DFSStack.empty()) {
+    N = DFSStack.back().first;
+    I = DFSStack.back().second;
+    DFSStack.pop_back();
+  } else {
+    // If we've handled all candidate entry nodes to the SCC forest, we're done.
+    do {
+      if (SCCEntryNodes.empty())
+        return nullptr;
+
+      N = &get(*SCCEntryNodes.pop_back_val());
+    } while (N->DFSNumber != 0);
+    I = N->begin();
+    N->LowLink = N->DFSNumber = 1;
+    NextDFSNumber = 2;
+  }
+
+  for (;;) {
+    assert(N->DFSNumber != 0 && "We should always assign a DFS number "
+                                "before placing a node onto the stack.");
+
+    Node::iterator E = N->end();
+    while (I != E) {
+      Node &ChildN = *I;
+      if (ChildN.DFSNumber == 0) {
+        // Mark that we should start at this child when next this node is the
+        // top of the stack. We don't start at the next child to ensure this
+        // child's lowlink is reflected.
+        DFSStack.push_back(std::make_pair(N, N->begin()));
+
+        // Recurse onto this node via a tail call.
+        assert(!SCCMap.count(&ChildN) &&
+               "Found a node with 0 DFS number but already in an SCC!");
+        ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
+        N = &ChildN;
+        I = ChildN.begin();
+        E = ChildN.end();
+        continue;
+      }
+
+      // Track the lowest link of the children, if any are still in the stack.
+      assert(ChildN.LowLink != 0 &&
+             "Low-link must not be zero with a non-zero DFS number.");
+      if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
+        N->LowLink = ChildN.LowLink;
+      ++I;
+    }
+
+    if (N->LowLink == N->DFSNumber)
+      // Form the new SCC out of the top of the DFS stack.
+      return formSCC(N, PendingSCCStack);
+
+    // At this point we know that N cannot ever be an SCC root. Its low-link
+    // is not its dfs-number, and we've processed all of its children. It is
+    // just sitting here waiting until some node further down the stack gets
+    // low-link == dfs-number and pops it off as well. Move it to the pending
+    // stack which is pulled into the next SCC to be formed.
+    PendingSCCStack.push_back(N);
+
+    assert(!DFSStack.empty() && "We never found a viable root!");
+    N = DFSStack.back().first;
+    I = DFSStack.back().second;
+    DFSStack.pop_back();
+  }
 }
 
 char LazyCallGraphAnalysis::PassID;
@@ -154,9 +687,9 @@
 static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
                        SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) {
   // Recurse depth first through the nodes.
-  for (LazyCallGraph::Node *ChildN : N)
-    if (Printed.insert(ChildN))
-      printNodes(OS, *ChildN, Printed);
+  for (LazyCallGraph::Node &ChildN : N)
+    if (Printed.insert(&ChildN))
+      printNodes(OS, ChildN, Printed);
 
   OS << "  Call edges in function: " << N.getFunction().getName() << "\n";
   for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I)
@@ -165,6 +698,16 @@
   OS << "\n";
 }
 
+static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) {
+  ptrdiff_t SCCSize = std::distance(SCC.begin(), SCC.end());
+  OS << "  SCC with " << SCCSize << " functions:\n";
+
+  for (LazyCallGraph::Node *N : SCC)
+    OS << "    " << N->getFunction().getName() << "\n";
+
+  OS << "\n";
+}
+
 PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
                                                 ModuleAnalysisManager *AM) {
   LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M);
@@ -173,9 +716,13 @@
      << "\n\n";
 
   SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
-  for (LazyCallGraph::Node *N : G)
-    if (Printed.insert(N))
-      printNodes(OS, *N, Printed);
+  for (LazyCallGraph::Node &N : G)
+    if (Printed.insert(&N))
+      printNodes(OS, N, Printed);
+
+  for (LazyCallGraph::SCC &SCC : G.postorder_sccs())
+    printSCC(OS, SCC);
 
   return PreservedAnalyses::all();
+
 }
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 3d6c583..9f919f7 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "lazy-value-info"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
@@ -34,6 +33,8 @@
 using namespace llvm;
 using namespace PatternMatch;
 
+#define DEBUG_TYPE "lazy-value-info"
+
 char LazyValueInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
@@ -82,7 +83,7 @@
   ConstantRange Range;
   
 public:
-  LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+  LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
 
   static LVILatticeVal get(Constant *C) {
     LVILatticeVal Res;
@@ -516,7 +517,7 @@
   BBLV.markOverdefined();
   
   Instruction *BBI = dyn_cast<Instruction>(Val);
-  if (BBI == 0 || BBI->getParent() != BB) {
+  if (!BBI || BBI->getParent() != BB) {
     return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
   }
 
@@ -595,7 +596,7 @@
       Value *UnderlyingVal = GetUnderlyingObject(Val);
       // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
       // inside InstructionDereferencesPointer either.
-      if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) {
+      if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
         for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
              BI != BE; ++BI) {
           if (InstructionDereferencesPointer(BI, UnderlyingVal)) {
@@ -813,7 +814,7 @@
 
         // Recognize the range checking idiom that InstCombine produces.
         // (X-C1) u< C2 --> [C1, C1+C2)
-        ConstantInt *NegOffset = 0;
+        ConstantInt *NegOffset = nullptr;
         if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
           match(ICI->getOperand(0), m_Add(m_Specific(Val),
                                           m_ConstantInt(NegOffset)));
@@ -1014,7 +1015,7 @@
     getCache(PImpl).clear();
 
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
   TLI = &getAnalysis<TargetLibraryInfo>();
 
   // Fully lazy.
@@ -1030,7 +1031,7 @@
   // If the cache was allocated, free it.
   if (PImpl) {
     delete &getCache(PImpl);
-    PImpl = 0;
+    PImpl = nullptr;
   }
 }
 
@@ -1044,7 +1045,7 @@
     if (const APInt *SingleVal = CR.getSingleElement())
       return ConstantInt::get(V->getContext(), *SingleVal);
   }
-  return 0;
+  return nullptr;
 }
 
 /// getConstantOnEdge - Determine whether the specified value is known to be a
@@ -1060,7 +1061,7 @@
     if (const APInt *SingleVal = CR.getSingleElement())
       return ConstantInt::get(V->getContext(), *SingleVal);
   }
-  return 0;
+  return nullptr;
 }
 
 /// getPredicateOnEdge - Determine whether the specified value comparison
@@ -1072,7 +1073,7 @@
   LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
   
   // If we know the value is a constant, evaluate the conditional.
-  Constant *Res = 0;
+  Constant *Res = nullptr;
   if (Result.isConstant()) {
     Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
                                           TLI);
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index fefa516..016f8c5 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -54,7 +54,7 @@
   // if we have detailed info and if 'P' is any of the locations we know
   // about.
   const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
-  if (Details == 0)
+  if (Details == nullptr)
     return MRInfo;
   
   // If the details array is of the 'DoesNot' kind, we only know something if
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 0592ccb..7d4e254 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -46,11 +46,11 @@
   
   /// If this is the first time we are querying for this info, lazily construct
   /// the StringMap to index it.
-  if (Map == 0) {
+  if (!Map) {
     Impl = Map = new StringMap<const LibCallFunctionInfo*>();
     
     const LibCallFunctionInfo *Array = getFunctionInfoArray();
-    if (Array == 0) return 0;
+    if (!Array) return nullptr;
     
     // We now have the array of entries.  Populate the StringMap.
     for (unsigned i = 0; Array[i].Name; ++i)
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index b2182b1..b14f329 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -137,8 +137,8 @@
     // that failed.  This provides a nice place to put a breakpoint if you want
     // to see why something is not correct.
     void CheckFailed(const Twine &Message,
-                     const Value *V1 = 0, const Value *V2 = 0,
-                     const Value *V3 = 0, const Value *V4 = 0) {
+                     const Value *V1 = nullptr, const Value *V2 = nullptr,
+                     const Value *V3 = nullptr, const Value *V4 = nullptr) {
       MessagesStr << Message.str() << "\n";
       WriteValue(V1);
       WriteValue(V2);
@@ -177,7 +177,7 @@
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
   TLI = &getAnalysis<TargetLibraryInfo>();
   visit(F);
   dbgs() << MessagesStr.str();
@@ -199,7 +199,7 @@
   Value *Callee = CS.getCalledValue();
 
   visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
-                       0, 0, MemRef::Callee);
+                       0, nullptr, MemRef::Callee);
 
   if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
     Assert1(CS.getCallingConv() == F->getCallingConv(),
@@ -275,10 +275,10 @@
       MemCpyInst *MCI = cast<MemCpyInst>(&I);
       // TODO: If the size is known, use it.
       visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
-                           MCI->getAlignment(), 0,
+                           MCI->getAlignment(), nullptr,
                            MemRef::Write);
       visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
-                           MCI->getAlignment(), 0,
+                           MCI->getAlignment(), nullptr,
                            MemRef::Read);
 
       // Check that the memcpy arguments don't overlap. The AliasAnalysis API
@@ -299,10 +299,10 @@
       MemMoveInst *MMI = cast<MemMoveInst>(&I);
       // TODO: If the size is known, use it.
       visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
-                           MMI->getAlignment(), 0,
+                           MMI->getAlignment(), nullptr,
                            MemRef::Write);
       visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
-                           MMI->getAlignment(), 0,
+                           MMI->getAlignment(), nullptr,
                            MemRef::Read);
       break;
     }
@@ -310,7 +310,7 @@
       MemSetInst *MSI = cast<MemSetInst>(&I);
       // TODO: If the size is known, use it.
       visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
-                           MSI->getAlignment(), 0,
+                           MSI->getAlignment(), nullptr,
                            MemRef::Write);
       break;
     }
@@ -321,17 +321,17 @@
               &I);
 
       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, 0, MemRef::Read | MemRef::Write);
+                           0, nullptr, MemRef::Read | MemRef::Write);
       break;
     case Intrinsic::vacopy:
       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, 0, MemRef::Write);
+                           0, nullptr, MemRef::Write);
       visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
-                           0, 0, MemRef::Read);
+                           0, nullptr, MemRef::Read);
       break;
     case Intrinsic::vaend:
       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, 0, MemRef::Read | MemRef::Write);
+                           0, nullptr, MemRef::Read | MemRef::Write);
       break;
 
     case Intrinsic::stackrestore:
@@ -339,7 +339,7 @@
       // stack pointer, which the compiler may read from or write to
       // at any time, so check it for both readability and writeability.
       visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
-                           0, 0, MemRef::Read | MemRef::Write);
+                           0, nullptr, MemRef::Read | MemRef::Write);
       break;
     }
 }
@@ -513,7 +513,7 @@
   if (!VecTy) {
     unsigned BitWidth = V->getType()->getIntegerBitWidth();
     APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-    ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+    computeKnownBits(V, KnownZero, KnownOne, DL);
     return KnownZero.isAllOnesValue();
   }
 
@@ -534,7 +534,7 @@
       return true;
 
     APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-    ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
+    computeKnownBits(Elem, KnownZero, KnownOne, DL);
     if (KnownZero.isAllOnesValue())
       return true;
   }
@@ -572,13 +572,13 @@
 }
 
 void Lint::visitVAArgInst(VAArgInst &I) {
-  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
-                       MemRef::Read | MemRef::Write);
+  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0,
+                       nullptr, MemRef::Read | MemRef::Write);
 }
 
 void Lint::visitIndirectBrInst(IndirectBrInst &I) {
-  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
-                       MemRef::Branchee);
+  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
+                       nullptr, MemRef::Branchee);
 
   Assert1(I.getNumDestinations() != 0,
           "Undefined behavior: indirectbr with no destinations", &I);
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 0902a39..005d309 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -62,7 +62,7 @@
   if (ByteOffset < 0) // out of bounds
     return false;
 
-  Type *BaseType = 0;
+  Type *BaseType = nullptr;
   unsigned BaseAlign = 0;
   if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
     // An alloca is safe to load from as load as it is suitably aligned.
@@ -161,7 +161,7 @@
     ScanFrom++;
    
     // Don't scan huge blocks.
-    if (MaxInstsToScan-- == 0) return 0;
+    if (MaxInstsToScan-- == 0) return nullptr;
     
     --ScanFrom;
     // If this is a load of Ptr, the loaded value is available.
@@ -198,7 +198,7 @@
       
       // Otherwise the store that may or may not alias the pointer, bail out.
       ++ScanFrom;
-      return 0;
+      return nullptr;
     }
     
     // If this is some other instruction that may clobber Ptr, bail out.
@@ -211,11 +211,11 @@
       
       // May modify the pointer, bail out.
       ++ScanFrom;
-      return 0;
+      return nullptr;
     }
   }
   
   // Got to the start of the block, we didn't find it, but are done for this
   // block.
-  return 0;
+  return nullptr;
 }
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index b38672e..46c0eaa 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -141,21 +141,21 @@
 PHINode *Loop::getCanonicalInductionVariable() const {
   BasicBlock *H = getHeader();
 
-  BasicBlock *Incoming = 0, *Backedge = 0;
+  BasicBlock *Incoming = nullptr, *Backedge = nullptr;
   pred_iterator PI = pred_begin(H);
   assert(PI != pred_end(H) &&
          "Loop must have at least one backedge!");
   Backedge = *PI++;
-  if (PI == pred_end(H)) return 0;  // dead loop
+  if (PI == pred_end(H)) return nullptr;  // dead loop
   Incoming = *PI++;
-  if (PI != pred_end(H)) return 0;  // multiple backedges?
+  if (PI != pred_end(H)) return nullptr;  // multiple backedges?
 
   if (contains(Incoming)) {
     if (contains(Backedge))
-      return 0;
+      return nullptr;
     std::swap(Incoming, Backedge);
   } else if (!contains(Backedge))
-    return 0;
+    return nullptr;
 
   // Loop over all of the PHI nodes, looking for a canonical indvar.
   for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
@@ -171,7 +171,7 @@
               if (CI->equalsInt(1))
                 return PN;
   }
-  return 0;
+  return nullptr;
 }
 
 /// isLCSSAForm - Return true if the Loop is in LCSSA form
@@ -232,7 +232,7 @@
 }
 
 MDNode *Loop::getLoopID() const {
-  MDNode *LoopID = 0;
+  MDNode *LoopID = nullptr;
   if (isLoopSimplifyForm()) {
     LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName);
   } else {
@@ -241,7 +241,7 @@
     BasicBlock *H = getHeader();
     for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) {
       TerminatorInst *TI = (*I)->getTerminator();
-      MDNode *MD = 0;
+      MDNode *MD = nullptr;
 
       // Check if this terminator branches to the loop header.
       for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) {
@@ -251,17 +251,17 @@
         }
       }
       if (!MD)
-        return 0;
+        return nullptr;
 
       if (!LoopID)
         LoopID = MD;
       else if (MD != LoopID)
-        return 0;
+        return nullptr;
     }
   }
   if (!LoopID || LoopID->getNumOperands() == 0 ||
       LoopID->getOperand(0) != LoopID)
-    return 0;
+    return nullptr;
   return LoopID;
 }
 
@@ -402,7 +402,7 @@
   getUniqueExitBlocks(UniqueExitBlocks);
   if (UniqueExitBlocks.size() == 1)
     return UniqueExitBlocks[0];
-  return 0;
+  return nullptr;
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -548,7 +548,7 @@
   // is considered uninitialized.
   Loop *NearLoop = BBLoop;
 
-  Loop *Subloop = 0;
+  Loop *Subloop = nullptr;
   if (NearLoop != Unloop && Unloop->contains(NearLoop)) {
     Subloop = NearLoop;
     // Find the subloop ancestor that is directly contained within Unloop.
@@ -564,7 +564,7 @@
   succ_iterator I = succ_begin(BB), E = succ_end(BB);
   if (I == E) {
     assert(!Subloop && "subloop blocks must have a successor");
-    NearLoop = 0; // unloop blocks may now exit the function.
+    NearLoop = nullptr; // unloop blocks may now exit the function.
   }
   for (; I != E; ++I) {
     if (*I == BB)
@@ -637,7 +637,7 @@
 
       // Blocks no longer have a parent but are still referenced by Unloop until
       // the Unloop object is deleted.
-      LI.changeLoopFor(*I, 0);
+      LI.changeLoopFor(*I, nullptr);
     }
 
     // Remove the loop from the top-level LoopInfo object.
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 38e753f..8df18e7 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -15,10 +15,13 @@
 
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "loop-pass-manager"
+
 namespace {
 
 /// PrintLoopPass - Print a Function corresponding to a Loop.
@@ -61,8 +64,8 @@
   : FunctionPass(ID), PMDataManager() {
   skipThisLoop = false;
   redoThisLoop = false;
-  LI = NULL;
-  CurrentLoop = NULL;
+  LI = nullptr;
+  CurrentLoop = nullptr;
 }
 
 /// Delete loop from the loop queue and loop hierarchy (LoopInfo).
@@ -251,6 +254,8 @@
 
         // Then call the regular verifyAnalysis functions.
         verifyPreservedAnalysis(P);
+
+        F.getContext().yield();
       }
 
       removeNotPreservedAnalysis(P);
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index bc1dc69..10da3d5 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -46,7 +46,7 @@
 
     bool runOnFunction(Function &F) override;
 
-    void print(raw_ostream &OS, const Module * = 0) const override;
+    void print(raw_ostream &OS, const Module * = nullptr) const override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequiredTransitive<AliasAnalysis>();
@@ -56,7 +56,7 @@
 
     void releaseMemory() override {
       Deps.clear();
-      F = 0;
+      F = nullptr;
     }
 
   private:
@@ -106,7 +106,7 @@
     MemDepResult Res = MDA.getDependency(Inst);
     if (!Res.isNonLocal()) {
       Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
-                                       static_cast<BasicBlock *>(0)));
+                                       static_cast<BasicBlock *>(nullptr)));
     } else if (CallSite CS = cast<Value>(Inst)) {
       const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
         MDA.getNonLocalCallDependency(CS);
@@ -122,8 +122,8 @@
       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
         if (!LI->isUnordered()) {
           // FIXME: Handle atomic/volatile loads.
-          Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
-                                           static_cast<BasicBlock *>(0)));
+          Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
+                                           static_cast<BasicBlock *>(nullptr)));
           continue;
         }
         AliasAnalysis::Location Loc = AA.getLocation(LI);
@@ -131,8 +131,8 @@
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
         if (!SI->isUnordered()) {
           // FIXME: Handle atomic/volatile stores.
-          Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
-                                           static_cast<BasicBlock *>(0)));
+          Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
+                                           static_cast<BasicBlock *>(nullptr)));
           continue;
         }
         AliasAnalysis::Location Loc = AA.getLocation(SI);
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 1dba323..64d339f 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "memory-builtins"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
@@ -30,6 +29,8 @@
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "memory-builtins"
+
 enum AllocType {
   OpNewLike          = 1<<0, // allocates; never returns null
   MallocLike         = 1<<1 | OpNewLike, // allocates; may return null
@@ -76,14 +77,14 @@
 
   CallSite CS(const_cast<Value*>(V));
   if (!CS.getInstruction())
-    return 0;
+    return nullptr;
 
   if (CS.isNoBuiltin())
-    return 0;
+    return nullptr;
 
   Function *Callee = CS.getCalledFunction();
   if (!Callee || !Callee->isDeclaration())
-    return 0;
+    return nullptr;
   return Callee;
 }
 
@@ -94,17 +95,17 @@
                                            bool LookThroughBitCast = false) {
   // Skip intrinsics
   if (isa<IntrinsicInst>(V))
-    return 0;
+    return nullptr;
 
   Function *Callee = getCalledFunction(V, LookThroughBitCast);
   if (!Callee)
-    return 0;
+    return nullptr;
 
   // Make sure that the function is available.
   StringRef FnName = Callee->getName();
   LibFunc::Func TLIFn;
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
-    return 0;
+    return nullptr;
 
   unsigned i = 0;
   bool found = false;
@@ -115,11 +116,11 @@
     }
   }
   if (!found)
-    return 0;
+    return nullptr;
 
   const AllocFnsTy *FnData = &AllocationFnData[i];
   if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
-    return 0;
+    return nullptr;
 
   // Check function prototype.
   int FstParam = FnData->FstParam;
@@ -135,7 +136,7 @@
        FTy->getParamType(SndParam)->isIntegerTy(32) ||
        FTy->getParamType(SndParam)->isIntegerTy(64)))
     return FnData;
-  return 0;
+  return nullptr;
 }
 
 static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
@@ -202,19 +203,19 @@
 /// ignore InvokeInst here.
 const CallInst *llvm::extractMallocCall(const Value *I,
                                         const TargetLibraryInfo *TLI) {
-  return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
+  return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
 }
 
 static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
                                const TargetLibraryInfo *TLI,
                                bool LookThroughSExt = false) {
   if (!CI)
-    return 0;
+    return nullptr;
 
   // The size of the malloc's result type must be known to determine array size.
   Type *T = getMallocAllocatedType(CI, TLI);
   if (!T || !T->isSized() || !DL)
-    return 0;
+    return nullptr;
 
   unsigned ElementSize = DL->getTypeAllocSize(T);
   if (StructType *ST = dyn_cast<StructType>(T))
@@ -223,12 +224,12 @@
   // If malloc call's arg can be determined to be a multiple of ElementSize,
   // return the multiple.  Otherwise, return NULL.
   Value *MallocArg = CI->getArgOperand(0);
-  Value *Multiple = 0;
+  Value *Multiple = nullptr;
   if (ComputeMultiple(MallocArg, ElementSize, Multiple,
                       LookThroughSExt))
     return Multiple;
 
-  return 0;
+  return nullptr;
 }
 
 /// isArrayMalloc - Returns the corresponding CallInst if the instruction
@@ -245,7 +246,7 @@
       return CI;
 
   // CI is a non-array malloc or we can't figure out that it is an array malloc.
-  return 0;
+  return nullptr;
 }
 
 /// getMallocType - Returns the PointerType resulting from the malloc call.
@@ -257,7 +258,7 @@
                                  const TargetLibraryInfo *TLI) {
   assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
 
-  PointerType *MallocType = 0;
+  PointerType *MallocType = nullptr;
   unsigned NumOfBitCastUses = 0;
 
   // Determine if CallInst has a bitcast use.
@@ -277,7 +278,7 @@
     return cast<PointerType>(CI->getType());
 
   // Type could not be determined.
-  return 0;
+  return nullptr;
 }
 
 /// getMallocAllocatedType - Returns the Type allocated by malloc call.
@@ -288,7 +289,7 @@
 Type *llvm::getMallocAllocatedType(const CallInst *CI,
                                    const TargetLibraryInfo *TLI) {
   PointerType *PT = getMallocType(CI, TLI);
-  return PT ? PT->getElementType() : 0;
+  return PT ? PT->getElementType() : nullptr;
 }
 
 /// getMallocArraySize - Returns the array size of a malloc call.  If the
@@ -308,7 +309,7 @@
 /// is a calloc call.
 const CallInst *llvm::extractCallocCall(const Value *I,
                                         const TargetLibraryInfo *TLI) {
-  return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0;
+  return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
 }
 
 
@@ -316,15 +317,15 @@
 const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
   const CallInst *CI = dyn_cast<CallInst>(I);
   if (!CI || isa<IntrinsicInst>(CI))
-    return 0;
+    return nullptr;
   Function *Callee = CI->getCalledFunction();
-  if (Callee == 0 || !Callee->isDeclaration())
-    return 0;
+  if (Callee == nullptr || !Callee->isDeclaration())
+    return nullptr;
 
   StringRef FnName = Callee->getName();
   LibFunc::Func TLIFn;
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
-    return 0;
+    return nullptr;
 
   unsigned ExpectedNumParams;
   if (TLIFn == LibFunc::free ||
@@ -335,18 +336,18 @@
            TLIFn == LibFunc::ZdaPvRKSt9nothrow_t)   // delete[](void*, nothrow)
     ExpectedNumParams = 2;
   else
-    return 0;
+    return nullptr;
 
   // Check free prototype.
   // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
   // attribute will exist.
   FunctionType *FTy = Callee->getFunctionType();
   if (!FTy->getReturnType()->isVoidTy())
-    return 0;
+    return nullptr;
   if (FTy->getNumParams() != ExpectedNumParams)
-    return 0;
+    return nullptr;
   if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
-    return 0;
+    return nullptr;
 
   return CI;
 }
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 015ded1..9eaf109 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -14,7 +14,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "memdep"
+
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
@@ -88,10 +89,10 @@
 bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : 0;
+  DT = DTWP ? &DTWP->getDomTree() : nullptr;
   if (!PredCache)
     PredCache.reset(new PredIteratorCache());
   return false;
@@ -261,10 +262,10 @@
                                        const LoadInst *LI,
                                        const DataLayout *DL) {
   // If we have no target data, we can't do this.
-  if (DL == 0) return false;
+  if (!DL) return false;
 
   // If we haven't already computed the base/offset of MemLoc, do so now.
-  if (MemLocBase == 0)
+  if (!MemLocBase)
     MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
 
   unsigned Size = MemoryDependenceAnalysis::
@@ -362,13 +363,13 @@
                          BasicBlock::iterator ScanIt, BasicBlock *BB,
                          Instruction *QueryInst) {
 
-  const Value *MemLocBase = 0;
+  const Value *MemLocBase = nullptr;
   int64_t MemLocOffset = 0;
   unsigned Limit = BlockScanLimit;
   bool isInvariantLoad = false;
   if (isLoad && QueryInst) {
     LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
-    if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0)
+    if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
       isInvariantLoad = true;
   }
 
@@ -696,7 +697,7 @@
     if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
       --Entry;
 
-    NonLocalDepEntry *ExistingResult = 0;
+    NonLocalDepEntry *ExistingResult = nullptr;
     if (Entry != Cache.begin()+NumSortedEntries &&
         Entry->getBB() == DirtyBB) {
       // If we already have an entry, and if it isn't already dirty, the block
@@ -807,7 +808,7 @@
   if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
     --Entry;
 
-  NonLocalDepEntry *ExistingResult = 0;
+  NonLocalDepEntry *ExistingResult = nullptr;
   if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
     ExistingResult = &*Entry;
 
@@ -960,7 +961,7 @@
     if (CacheInfo->TBAATag != Loc.TBAATag) {
       if (CacheInfo->TBAATag) {
         CacheInfo->Pair = BBSkipFirstBlockPair();
-        CacheInfo->TBAATag = 0;
+        CacheInfo->TBAATag = nullptr;
         for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
              DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
           if (Instruction *Inst = DI->getResult().getInst())
@@ -1116,7 +1117,7 @@
       SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
       NumSortedEntries = Cache->size();
     }
-    Cache = 0;
+    Cache = nullptr;
 
     PredList.clear();
     for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
@@ -1126,7 +1127,7 @@
       // Get the PHI translated pointer in this predecessor.  This can fail if
       // not translatable, in which case the getAddr() returns null.
       PHITransAddr &PredPointer = PredList.back().second;
-      PredPointer.PHITranslateValue(BB, Pred, 0);
+      PredPointer.PHITranslateValue(BB, Pred, nullptr);
 
       Value *PredPtrVal = PredPointer.getAddr();
 
@@ -1175,7 +1176,7 @@
       // predecessor, then we have to assume that the pointer is clobbered in
       // that predecessor.  We can still do PRE of the load, which would insert
       // a computation of the pointer in this predecessor.
-      if (PredPtrVal == 0)
+      if (!PredPtrVal)
         CanTranslate = false;
 
       // FIXME: it is entirely possible that PHI translating will end up with
@@ -1224,7 +1225,7 @@
     // for the given block.  It assumes that we haven't modified any of
     // our datastructures while processing the current block.
 
-    if (Cache == 0) {
+    if (!Cache) {
       // Refresh the CacheInfo/Cache pointer if it got invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
       Cache = &CacheInfo->NonLocalDeps;
@@ -1279,7 +1280,7 @@
 
   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
     Instruction *Target = PInfo[i].getResult().getInst();
-    if (Target == 0) continue;  // Ignore non-local dep results.
+    if (!Target) continue;  // Ignore non-local dep results.
     assert(Target->getParent() == PInfo[i].getBB());
 
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 0c119d6..4e11e50 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -36,7 +36,7 @@
       // Note: NoAA does not call InitializeAliasAnalysis because it's
       // special and does not support chaining.
       DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-      DL = DLP ? &DLP->getDataLayout() : 0;
+      DL = DLP ? &DLP->getDataLayout() : nullptr;
     }
 
     AliasResult alias(const Location &LocA, const Location &LocB) override {
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index ad3685a..bfe8642 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -43,7 +43,7 @@
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void PHITransAddr::dump() const {
-  if (Addr == 0) {
+  if (!Addr) {
     dbgs() << "PHITransAddr: null\n";
     return;
   }
@@ -58,7 +58,7 @@
                           SmallVectorImpl<Instruction*> &InstInputs) {
   // If this is a non-instruction value, there is nothing to do.
   Instruction *I = dyn_cast<Instruction>(Expr);
-  if (I == 0) return true;
+  if (!I) return true;
 
   // If it's an instruction, it is either in Tmp or its operands recursively
   // are.
@@ -90,7 +90,7 @@
 /// structure is valid, it returns true.  If invalid, it prints errors and
 /// returns false.
 bool PHITransAddr::Verify() const {
-  if (Addr == 0) return true;
+  if (!Addr) return true;
 
   SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
 
@@ -116,14 +116,14 @@
   // If the input value is not an instruction, or if it is not defined in CurBB,
   // then we don't need to phi translate it.
   Instruction *Inst = dyn_cast<Instruction>(Addr);
-  return Inst == 0 || CanPHITrans(Inst);
+  return !Inst || CanPHITrans(Inst);
 }
 
 
 static void RemoveInstInputs(Value *V,
                              SmallVectorImpl<Instruction*> &InstInputs) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0) return;
+  if (!I) return;
 
   // If the instruction is in the InstInputs list, remove it.
   SmallVectorImpl<Instruction*>::iterator Entry =
@@ -147,7 +147,7 @@
                                          const DominatorTree *DT) {
   // If this is a non-instruction value, it can't require PHI translation.
   Instruction *Inst = dyn_cast<Instruction>(V);
-  if (Inst == 0) return V;
+  if (!Inst) return V;
 
   // Determine whether 'Inst' is an input to our PHI translatable expression.
   bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
@@ -173,7 +173,7 @@
     // If this is a non-phi value, and it is analyzable, we can incorporate it
     // into the expression by making all instruction operands be inputs.
     if (!CanPHITrans(Inst))
-      return 0;
+      return nullptr;
 
     // All instruction operands are now inputs (and of course, they may also be
     // defined in this block, so they may need to be phi translated themselves.
@@ -187,9 +187,9 @@
   // operands need to be phi translated, and if so, reconstruct it.
 
   if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
-    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
+    if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
     Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
-    if (PHIIn == 0) return 0;
+    if (!PHIIn) return nullptr;
     if (PHIIn == Cast->getOperand(0))
       return Cast;
 
@@ -209,7 +209,7 @@
             (!DT || DT->dominates(CastI->getParent(), PredBB)))
           return CastI;
     }
-    return 0;
+    return nullptr;
   }
 
   // Handle getelementptr with at least one PHI translatable operand.
@@ -218,7 +218,7 @@
     bool AnyChanged = false;
     for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
       Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
-      if (GEPOp == 0) return 0;
+      if (!GEPOp) return nullptr;
 
       AnyChanged |= GEPOp != GEP->getOperand(i);
       GEPOps.push_back(GEPOp);
@@ -253,7 +253,7 @@
             return GEPI;
         }
     }
-    return 0;
+    return nullptr;
   }
 
   // Handle add with a constant RHS.
@@ -265,7 +265,7 @@
     bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
 
     Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
-    if (LHS == 0) return 0;
+    if (!LHS) return nullptr;
 
     // If the PHI translated LHS is an add of a constant, fold the immediates.
     if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
@@ -304,11 +304,11 @@
           return BO;
     }
 
-    return 0;
+    return nullptr;
   }
 
   // Otherwise, we failed.
-  return 0;
+  return nullptr;
 }
 
 
@@ -326,10 +326,10 @@
     // Make sure the value is live in the predecessor.
     if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
       if (!DT->dominates(Inst->getParent(), PredBB))
-        Addr = 0;
+        Addr = nullptr;
   }
 
-  return Addr == 0;
+  return Addr == nullptr;
 }
 
 /// PHITranslateWithInsertion - PHI translate this value into the specified
@@ -354,7 +354,7 @@
   // If not, destroy any intermediate instructions inserted.
   while (NewInsts.size() != NISize)
     NewInsts.pop_back_val()->eraseFromParent();
-  return 0;
+  return nullptr;
 }
 
 
@@ -379,10 +379,10 @@
 
   // Handle cast of PHI translatable value.
   if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
-    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
+    if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
     Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
                                               CurBB, PredBB, DT, NewInsts);
-    if (OpVal == 0) return 0;
+    if (!OpVal) return nullptr;
 
     // Otherwise insert a cast at the end of PredBB.
     CastInst *New = CastInst::Create(Cast->getOpcode(),
@@ -400,7 +400,7 @@
     for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
       Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
                                                 CurBB, PredBB, DT, NewInsts);
-      if (OpVal == 0) return 0;
+      if (!OpVal) return nullptr;
       GEPOps.push_back(OpVal);
     }
 
@@ -436,5 +436,5 @@
   }
 #endif
 
-  return 0;
+  return nullptr;
 }
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index f23833a..6d92909 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -11,8 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "postdomtree"
-
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
@@ -22,6 +20,8 @@
 #include "llvm/Support/GenericDomTreeConstruction.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "postdomtree"
+
 //===----------------------------------------------------------------------===//
 //  PostDominatorTree Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index f4da598..7f88ae1 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -9,7 +9,6 @@
 // Detects single entry single exit regions in the control flow graph.
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "region"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -19,10 +18,13 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
+#include <iterator>
 #include <set>
 
 using namespace llvm;
 
+#define DEBUG_TYPE "region"
+
 // Always verify if expensive checking is enabled.
 #ifdef XDEBUG
 static bool VerifyRegionInfo = true;
@@ -62,9 +64,6 @@
   // Only clean the cache for this Region. Caches of child Regions will be
   // cleaned when the child Regions are deleted.
   BBNodeMap.clear();
-
-  for (iterator I = begin(), E = end(); I != E; ++I)
-    delete *I;
 }
 
 void Region::replaceEntry(BasicBlock *BB) {
@@ -88,7 +87,7 @@
     R->replaceEntry(NewEntry);
     for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
       if ((*RI)->getEntry() == OldEntry)
-        RegionQueue.push_back(*RI);
+        RegionQueue.push_back(RI->get());
   }
 }
 
@@ -104,7 +103,7 @@
     R->replaceExit(NewExit);
     for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
       if ((*RI)->getExit() == OldExit)
-        RegionQueue.push_back(*RI);
+        RegionQueue.push_back(RI->get());
   }
 }
 
@@ -128,8 +127,8 @@
   // BBs that are not part of any loop are element of the Loop
   // described by the NULL pointer. This loop is not part of any region,
   // except if the region describes the whole function.
-  if (L == 0)
-    return getExit() == 0;
+  if (!L)
+    return getExit() == nullptr;
 
   if (!contains(L->getHeader()))
     return false;
@@ -147,7 +146,7 @@
 
 Loop *Region::outermostLoopInRegion(Loop *L) const {
   if (!contains(L))
-    return 0;
+    return nullptr;
 
   while (L && contains(L->getParentLoop())) {
     L = L->getParentLoop();
@@ -165,14 +164,14 @@
 BasicBlock *Region::getEnteringBlock() const {
   BasicBlock *entry = getEntry();
   BasicBlock *Pred;
-  BasicBlock *enteringBlock = 0;
+  BasicBlock *enteringBlock = nullptr;
 
   for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
        ++PI) {
     Pred = *PI;
     if (DT->getNode(Pred) && !contains(Pred)) {
       if (enteringBlock)
-        return 0;
+        return nullptr;
 
       enteringBlock = Pred;
     }
@@ -184,17 +183,17 @@
 BasicBlock *Region::getExitingBlock() const {
   BasicBlock *exit = getExit();
   BasicBlock *Pred;
-  BasicBlock *exitingBlock = 0;
+  BasicBlock *exitingBlock = nullptr;
 
   if (!exit)
-    return 0;
+    return nullptr;
 
   for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
        ++PI) {
     Pred = *PI;
     if (contains(Pred)) {
       if (exitingBlock)
-        return 0;
+        return nullptr;
 
       exitingBlock = Pred;
     }
@@ -295,7 +294,7 @@
   Region *R = RI->getRegionFor(BB);
 
   if (!R || R == this)
-    return 0;
+    return nullptr;
 
   // If we pass the BB out of this region, that means our code is broken.
   assert(contains(R) && "BB not in current region!");
@@ -304,7 +303,7 @@
     R = R->getParent();
 
   if (R->getEntry() != BB)
-    return 0;
+    return nullptr;
 
   return R;
 }
@@ -333,18 +332,20 @@
 void Region::transferChildrenTo(Region *To) {
   for (iterator I = begin(), E = end(); I != E; ++I) {
     (*I)->parent = To;
-    To->children.push_back(*I);
+    To->children.push_back(std::move(*I));
   }
   children.clear();
 }
 
 void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
-  assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
-  assert(std::find(begin(), end(), SubRegion) == children.end()
-         && "Subregion already exists!");
+  assert(!SubRegion->parent && "SubRegion already has a parent!");
+  assert(std::find_if(begin(), end(), [&](const std::unique_ptr<Region> &R) {
+           return R.get() == SubRegion;
+         }) == children.end() &&
+         "Subregion already exists!");
 
   SubRegion->parent = this;
-  children.push_back(SubRegion);
+  children.push_back(std::unique_ptr<Region>(SubRegion));
 
   if (!moveChildren)
     return;
@@ -360,23 +361,27 @@
         RI->setRegionFor(BB, SubRegion);
     }
 
-  std::vector<Region*> Keep;
+  std::vector<std::unique_ptr<Region>> Keep;
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (SubRegion->contains(*I) && *I != SubRegion) {
-      SubRegion->children.push_back(*I);
+    if (SubRegion->contains(I->get()) && I->get() != SubRegion) {
       (*I)->parent = SubRegion;
+      SubRegion->children.push_back(std::move(*I));
     } else
-      Keep.push_back(*I);
+      Keep.push_back(std::move(*I));
 
   children.clear();
-  children.insert(children.begin(), Keep.begin(), Keep.end());
+  children.insert(children.begin(),
+                  std::move_iterator<RegionSet::iterator>(Keep.begin()),
+                  std::move_iterator<RegionSet::iterator>(Keep.end()));
 }
 
 
 Region *Region::removeSubRegion(Region *Child) {
   assert(Child->parent == this && "Child is not a child of this region!");
-  Child->parent = 0;
-  RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+  Child->parent = nullptr;
+  RegionSet::iterator I = std::find_if(
+      children.begin(), children.end(),
+      [&](const std::unique_ptr<Region> &R) { return R.get() == Child; });
   assert(I != children.end() && "Region does not exit. Unable to remove.");
   children.erase(children.begin()+(I-begin()));
   return Child;
@@ -385,7 +390,7 @@
 unsigned Region::getDepth() const {
   unsigned Depth = 0;
 
-  for (Region *R = parent; R != 0; R = R->parent)
+  for (Region *R = parent; R != nullptr; R = R->parent)
     ++Depth;
 
   return Depth;
@@ -395,12 +400,12 @@
   unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
 
   if (NumSuccessors == 0)
-    return NULL;
+    return nullptr;
 
   for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
        PI != PE; ++PI)
     if (!DT->dominates(getEntry(), *PI))
-      return NULL;
+      return nullptr;
 
   Region *R = RI->getRegionFor(exit);
 
@@ -408,7 +413,7 @@
     if (exit->getTerminator()->getNumSuccessors() == 1)
       return new Region(getEntry(), *succ_begin(exit), RI, DT);
     else
-      return NULL;
+      return nullptr;
   }
 
   while (R->getParent() && R->getParent()->getEntry() == exit)
@@ -418,7 +423,7 @@
     for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
          PI != PE; ++PI)
     if (!DT->dominates(R->getExit(), *PI))
-      return NULL;
+      return nullptr;
 
   return new Region(getEntry(), R->getExit(), RI, DT);
 }
@@ -577,7 +582,7 @@
   assert(entry && exit && "entry and exit must not be null!");
 
   if (isTrivialRegion(entry, exit))
-    return 0;
+    return nullptr;
 
   Region *region = new Region(entry, exit, this, DT);
   BBtoRegion.insert(std::make_pair(entry, region));
@@ -600,7 +605,7 @@
   if (!N)
     return;
 
-  Region *lastRegion= 0;
+  Region *lastRegion= nullptr;
   BasicBlock *lastExit = entry;
 
   // As only a BasicBlock that postdominates entry can finish a region, walk the
@@ -680,12 +685,12 @@
   BBtoRegion.clear();
   if (TopLevelRegion)
     delete TopLevelRegion;
-  TopLevelRegion = 0;
+  TopLevelRegion = nullptr;
 }
 
 RegionInfo::RegionInfo() : FunctionPass(ID) {
   initializeRegionInfoPass(*PassRegistry::getPassRegistry());
-  TopLevelRegion = 0;
+  TopLevelRegion = nullptr;
 }
 
 RegionInfo::~RegionInfo() {
@@ -710,7 +715,7 @@
   PDT = &getAnalysis<PostDominatorTree>();
   DF = &getAnalysis<DominanceFrontier>();
 
-  TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+  TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, this, DT, nullptr);
   updateStatistics(TopLevelRegion);
 
   Calculate(F);
@@ -744,7 +749,7 @@
 Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
   BBtoRegionMap::const_iterator I=
     BBtoRegion.find(BB);
-  return I != BBtoRegion.end() ? I->second : 0;
+  return I != BBtoRegion.end() ? I->second : nullptr;
 }
 
 void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
@@ -756,7 +761,7 @@
 }
 
 BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
-  BasicBlock *Exit = NULL;
+  BasicBlock *Exit = nullptr;
 
   while (true) {
     // Get largest region that starts at BB.
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 12d7ca3..3c7798f 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -17,10 +17,11 @@
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Support/Timer.h"
 
-#define DEBUG_TYPE "regionpassmgr"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "regionpassmgr"
+
 //===----------------------------------------------------------------------===//
 // RGPassManager
 //
@@ -31,15 +32,15 @@
   : FunctionPass(ID), PMDataManager() {
   skipThisRegion = false;
   redoThisRegion = false;
-  RI = NULL;
-  CurrentRegion = NULL;
+  RI = nullptr;
+  CurrentRegion = nullptr;
 }
 
 // Recurse through all subregions and all regions  into RQ.
-static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
-  RQ.push_back(R);
-  for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
-    addRegionIntoQueue(*I, RQ);
+static void addRegionIntoQueue(Region &R, std::deque<Region *> &RQ) {
+  RQ.push_back(&R);
+  for (const auto &E : R)
+    addRegionIntoQueue(*E, RQ);
 }
 
 /// Pass Manager itself does not invalidate any analysis info.
@@ -57,7 +58,7 @@
   // Collect inherited analysis from Module level pass manager.
   populateInheritedAnalysis(TPM->activeStack);
 
-  addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+  addRegionIntoQueue(*RI->getTopLevelRegion(), RQ);
 
   if (RQ.empty()) // No regions, skip calling finalizers
     return false;
@@ -185,7 +186,6 @@
 
 public:
   static char ID;
-  PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
   PrintRegionPass(const std::string &B, raw_ostream &o)
       : RegionPass(ID), Banner(B), Out(o) {}
 
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 6467f47..893210a 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -98,31 +98,31 @@
 
   // Print the cluster of the subregions. This groups the single basic blocks
   // and adds a different background color for each group.
-  static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+  static void printRegionCluster(const Region &R, GraphWriter<RegionInfo*> &GW,
                                  unsigned depth = 0) {
     raw_ostream &O = GW.getOStream();
-    O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+    O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
       << " {\n";
     O.indent(2 * (depth + 1)) << "label = \"\";\n";
 
-    if (!onlySimpleRegions || R->isSimple()) {
+    if (!onlySimpleRegions || R.isSimple()) {
       O.indent(2 * (depth + 1)) << "style = filled;\n";
       O.indent(2 * (depth + 1)) << "color = "
-        << ((R->getDepth() * 2 % 12) + 1) << "\n";
+        << ((R.getDepth() * 2 % 12) + 1) << "\n";
 
     } else {
       O.indent(2 * (depth + 1)) << "style = solid;\n";
       O.indent(2 * (depth + 1)) << "color = "
-        << ((R->getDepth() * 2 % 12) + 2) << "\n";
+        << ((R.getDepth() * 2 % 12) + 2) << "\n";
     }
 
-    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
-      printRegionCluster(*RI, GW, depth + 1);
+    for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI)
+      printRegionCluster(**RI, GW, depth + 1);
 
-    RegionInfo *RI = R->getRegionInfo();
+    RegionInfo *RI = R.getRegionInfo();
 
-    for (const auto &BB : R->blocks())
-      if (RI->getRegionFor(BB) == R)
+    for (const auto &BB : R.blocks())
+      if (RI->getRegionFor(BB) == &R)
         O.indent(2 * (depth + 1)) << "Node"
           << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
           << ";\n";
@@ -134,7 +134,7 @@
                                      GraphWriter<RegionInfo*> &GW) {
     raw_ostream &O = GW.getOStream();
     O << "\tcolorscheme = \"paired12\"\n";
-    printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+    printRegionCluster(*RI->getTopLevelRegion(), GW, 4);
   }
 };
 } //end namespace llvm
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 08de621..42a7aa2 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -58,7 +58,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "scalar-evolution"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -89,6 +88,8 @@
 #include <algorithm>
 using namespace llvm;
 
+#define DEBUG_TYPE "scalar-evolution"
+
 STATISTIC(NumArrayLenItCounts,
           "Number of trip counts computed with array length");
 STATISTIC(NumTripCountsComputed,
@@ -182,7 +183,7 @@
   case scUMaxExpr:
   case scSMaxExpr: {
     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
-    const char *OpStr = 0;
+    const char *OpStr = nullptr;
     switch (NAry->getSCEVType()) {
     case scAddExpr: OpStr = " + "; break;
     case scMulExpr: OpStr = " * "; break;
@@ -312,7 +313,7 @@
   FoldingSetNodeID ID;
   ID.AddInteger(scConstant);
   ID.AddPointer(V);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
   UniqueSCEVs.InsertNode(S, IP);
@@ -365,7 +366,7 @@
   SE->UniqueSCEVs.RemoveNode(this);
 
   // Release the value.
-  setValPtr(0);
+  setValPtr(nullptr);
 }
 
 void SCEVUnknown::allUsesReplacedWith(Value *New) {
@@ -829,7 +830,7 @@
   ID.AddInteger(scTruncate);
   ID.AddPointer(Op);
   ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 
   // Fold if the operand is constant.
@@ -919,7 +920,7 @@
   ID.AddInteger(scZeroExtend);
   ID.AddPointer(Op);
   ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 
   // zext(trunc(x)) --> zext(x) or x or trunc(x)
@@ -1072,7 +1073,7 @@
     return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
                        SE->getSignedRange(Step).getSignedMin());
   }
-  return 0;
+  return nullptr;
 }
 
 // The recurrence AR has been shown to have no signed wrap. Typically, if we can
@@ -1091,19 +1092,18 @@
   // Check for a simple looking step prior to loop entry.
   const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
   if (!SA)
-    return 0;
+    return nullptr;
 
   // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
   // subtraction is expensive. For this purpose, perform a quick and dirty
   // difference, by checking for Step in the operand list.
   SmallVector<const SCEV *, 4> DiffOps;
-  for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
-       I != E; ++I) {
-    if (*I != Step)
-      DiffOps.push_back(*I);
-  }
+  for (const SCEV *Op : SA->operands())
+    if (Op != Step)
+      DiffOps.push_back(Op);
+
   if (DiffOps.size() == SA->getNumOperands())
-    return 0;
+    return nullptr;
 
   // This is a postinc AR. Check for overflow on the preinc recurrence using the
   // same three conditions that getSignExtendedExpr checks.
@@ -1139,7 +1139,7 @@
       SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
     return PreStart;
   }
-  return 0;
+  return nullptr;
 }
 
 // Get the normalized sign-extended expression for this AddRec's Start.
@@ -1181,7 +1181,7 @@
   ID.AddInteger(scSignExtend);
   ID.AddPointer(Op);
   ID.AddPointer(Ty);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
 
   // If the input value is provably positive, build a zext instead.
@@ -1201,6 +1201,23 @@
       return getTruncateOrSignExtend(X, Ty);
   }
 
+  // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
+  if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+    if (SA->getNumOperands() == 2) {
+      auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+      auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+      if (SMul && SC1) {
+        if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+          const APInt &C1 = SC1->getValue()->getValue();
+          const APInt &C2 = SC2->getValue()->getValue();
+          if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
+              C2.ugt(C1) && C2.isPowerOf2())
+            return getAddExpr(getSignExtendExpr(SC1, Ty),
+                              getSignExtendExpr(SMul, Ty));
+        }
+      }
+    }
+  }
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can sign extend all of the
   // operands (often constants).  This allows analysis of something like
@@ -1292,6 +1309,22 @@
                                L, AR->getNoWrapFlags());
         }
       }
+      // If Start and Step are constants, check if we can apply this
+      // transformation:
+      // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
+      auto SC1 = dyn_cast<SCEVConstant>(Start);
+      auto SC2 = dyn_cast<SCEVConstant>(Step);
+      if (SC1 && SC2) {
+        const APInt &C1 = SC1->getValue()->getValue();
+        const APInt &C2 = SC2->getValue()->getValue();
+        if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
+            C2.isPowerOf2()) {
+          Start = getSignExtendExpr(Start, Ty);
+          const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
+                                            L, AR->getNoWrapFlags());
+          return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+        }
+      }
     }
 
   // The cast wasn't folded; create an explicit cast node.
@@ -1340,9 +1373,8 @@
   // Force the cast to be folded into the operands of an addrec.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
     SmallVector<const SCEV *, 4> Ops;
-    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
-         I != E; ++I)
-      Ops.push_back(getAnyExtendExpr(*I, Ty));
+    for (const SCEV *Op : AR->operands())
+      Ops.push_back(getAnyExtendExpr(Op, Ty));
     return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
   }
 
@@ -1811,7 +1843,7 @@
   ID.AddInteger(scAddExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
   SCEVAddExpr *S =
     static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
@@ -2105,7 +2137,7 @@
   ID.AddInteger(scMulExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
   SCEVMulExpr *S =
     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
@@ -2230,7 +2262,7 @@
   ID.AddInteger(scUDivExpr);
   ID.AddPointer(LHS);
   ID.AddPointer(RHS);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
                                              LHS, RHS);
@@ -2425,7 +2457,7 @@
   for (unsigned i = 0, e = Operands.size(); i != e; ++i)
     ID.AddPointer(Operands[i]);
   ID.AddPointer(L);
-  void *IP = 0;
+  void *IP = nullptr;
   SCEVAddRecExpr *S =
     static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
   if (!S) {
@@ -2533,7 +2565,7 @@
   ID.AddInteger(scSMaxExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2637,7 +2669,7 @@
   ID.AddInteger(scUMaxExpr);
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
-  void *IP = 0;
+  void *IP = nullptr;
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2704,7 +2736,7 @@
   FoldingSetNodeID ID;
   ID.AddInteger(scUnknown);
   ID.AddPointer(V);
-  void *IP = 0;
+  void *IP = nullptr;
   if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
     assert(cast<SCEVUnknown>(S)->getValue() == V &&
            "Stale SCEVUnknown in uniquing map!");
@@ -3010,7 +3042,7 @@
     return getPointerBase(Cast->getOperand());
   }
   else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
-    const SCEV *PtrOp = 0;
+    const SCEV *PtrOp = nullptr;
     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
          I != E; ++I) {
       if ((*I)->getType()->isPointerTy()) {
@@ -3090,20 +3122,20 @@
       // The loop may have multiple entrances or multiple exits; we can analyze
       // this phi as an addrec if it has a unique entry value and a unique
       // backedge value.
-      Value *BEValueV = 0, *StartValueV = 0;
+      Value *BEValueV = nullptr, *StartValueV = nullptr;
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         Value *V = PN->getIncomingValue(i);
         if (L->contains(PN->getIncomingBlock(i))) {
           if (!BEValueV) {
             BEValueV = V;
           } else if (BEValueV != V) {
-            BEValueV = 0;
+            BEValueV = nullptr;
             break;
           }
         } else if (!StartValueV) {
           StartValueV = V;
         } else if (StartValueV != V) {
-          StartValueV = 0;
+          StartValueV = nullptr;
           break;
         }
       }
@@ -3363,7 +3395,7 @@
     // For a SCEVUnknown, ask ValueTracking.
     unsigned BitWidth = getTypeSizeInBits(U->getType());
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Zeros, Ones);
+    computeKnownBits(U->getValue(), Zeros, Ones);
     return Zeros.countTrailingOnes();
   }
 
@@ -3502,7 +3534,7 @@
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Zeros, Ones, DL);
+    computeKnownBits(U->getValue(), Zeros, Ones, DL);
     if (Ones == ~Zeros + 1)
       return setUnsignedRange(U, ConservativeResult);
     return setUnsignedRange(U,
@@ -3755,13 +3787,13 @@
 
       // Instcombine's ShrinkDemandedConstant may strip bits out of
       // constants, obscuring what would otherwise be a low-bits mask.
-      // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+      // Use computeKnownBits to compute what ShrinkDemandedConstant
       // knew about to reconstruct a low-bits mask value.
       unsigned LZ = A.countLeadingZeros();
       unsigned TZ = A.countTrailingZeros();
       unsigned BitWidth = A.getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL);
+      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL);
 
       APInt EffectiveMask =
           APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4316,9 +4348,9 @@
   if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
   assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
 
-  const SCEV *BECount = 0;
+  const SCEV *BECount = nullptr;
   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
 
     assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
 
@@ -4336,7 +4368,7 @@
 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                              ScalarEvolution *SE) const {
   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
 
     if (ENT->ExitingBlock == ExitingBlock)
       return ENT->ExactNotTaken;
@@ -4359,7 +4391,7 @@
     return false;
 
   for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
-       ENT != 0; ENT = ENT->getNextExit()) {
+       ENT != nullptr; ENT = ENT->getNextExit()) {
 
     if (ENT->ExactNotTaken != SE->getCouldNotCompute()
         && SE->hasOperand(ENT->ExactNotTaken, S)) {
@@ -4398,8 +4430,8 @@
 
 /// clear - Invalidate this result and free the ExitNotTakenInfo array.
 void ScalarEvolution::BackedgeTakenInfo::clear() {
-  ExitNotTaken.ExitingBlock = 0;
-  ExitNotTaken.ExactNotTaken = 0;
+  ExitNotTaken.ExitingBlock = nullptr;
+  ExitNotTaken.ExactNotTaken = nullptr;
   delete[] ExitNotTaken.getNextExit();
 }
 
@@ -4410,38 +4442,63 @@
   SmallVector<BasicBlock *, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
-  // Examine all exits and pick the most conservative values.
-  const SCEV *MaxBECount = getCouldNotCompute();
+  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
   bool CouldComputeBECount = true;
   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
-  const SCEV *LatchMaxCount = 0;
-  SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+  const SCEV *MustExitMaxBECount = nullptr;
+  const SCEV *MayExitMaxBECount = nullptr;
+
+  // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
+  // and compute maxBECount.
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+    BasicBlock *ExitBB = ExitingBlocks[i];
+    ExitLimit EL = ComputeExitLimit(L, ExitBB);
+
+    // 1. For each exit that can be computed, add an entry to ExitCounts.
+    // CouldComputeBECount is true only if all exits can be computed.
     if (EL.Exact == getCouldNotCompute())
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
       CouldComputeBECount = false;
     else
-      ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
+      ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
 
-    if (MaxBECount == getCouldNotCompute())
-      MaxBECount = EL.Max;
-    else if (EL.Max != getCouldNotCompute()) {
-      // We cannot take the "min" MaxBECount, because non-unit stride loops may
-      // skip some loop tests. Taking the max over the exits is sufficiently
-      // conservative.  TODO: We could do better taking into consideration
-      // non-latch exits that dominate the latch.
-      if (EL.MustExit && ExitingBlocks[i] == Latch)
-        LatchMaxCount = EL.Max;
-      else
-        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+    // 2. Derive the loop's MaxBECount from each exit's max number of
+    // non-exiting iterations. Partition the loop exits into two kinds:
+    // LoopMustExits and LoopMayExits.
+    //
+    // A LoopMustExit meets two requirements:
+    //
+    // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit
+    // test condition cannot be skipped (the tested variable has unit stride or
+    // the test is less-than or greater-than, rather than a strict inequality).
+    //
+    // (b) It must dominate the loop latch, hence must be tested on every loop
+    // iteration.
+    //
+    // If any computable LoopMustExit is found, then MaxBECount is the minimum
+    // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is
+    // conservatively the maximum EL.Max, where CouldNotCompute is considered
+    // greater than any computable EL.Max.
+    if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch &&
+        DT->dominates(ExitBB, Latch)) {
+      if (!MustExitMaxBECount)
+        MustExitMaxBECount = EL.Max;
+      else {
+        MustExitMaxBECount =
+          getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+      }
+    } else if (MayExitMaxBECount != getCouldNotCompute()) {
+      if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
+        MayExitMaxBECount = EL.Max;
+      else {
+        MayExitMaxBECount =
+          getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+      }
     }
   }
-  // Be more precise in the easy case of a loop latch that must exit.
-  if (LatchMaxCount) {
-    MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount);
-  }
+  const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
+    (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
 }
 
@@ -4454,7 +4511,7 @@
   // exit at this block and remember the exit block and whether all other targets
   // lead to the loop header.
   bool MustExecuteLoopHeader = true;
-  BasicBlock *Exit = 0;
+  BasicBlock *Exit = nullptr;
   for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
        SI != SE; ++SI)
     if (!L->contains(*SI)) {
@@ -4800,7 +4857,7 @@
     return getCouldNotCompute();
 
   // Okay, we allow one non-constant index into the GEP instruction.
-  Value *VarIdx = 0;
+  Value *VarIdx = nullptr;
   std::vector<Constant*> Indexes;
   unsigned VarIdxNum = 0;
   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
@@ -4810,7 +4867,7 @@
       if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
       VarIdx = GEP->getOperand(i);
       VarIdxNum = i-2;
-      Indexes.push_back(0);
+      Indexes.push_back(nullptr);
     }
 
   // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
@@ -4841,7 +4898,7 @@
 
     Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
                                                          Indexes);
-    if (Result == 0) break;  // Cannot compute!
+    if (!Result) break;  // Cannot compute!
 
     // Evaluate the condition for this iteration.
     Result = ConstantExpr::getICmp(predicate, Result, RHS);
@@ -4902,14 +4959,14 @@
 
   // Otherwise, we can evaluate this instruction if all of its operands are
   // constant or derived from a PHI node themselves.
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
   for (Instruction::op_iterator OpI = UseInst->op_begin(),
          OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
 
     if (isa<Constant>(*OpI)) continue;
 
     Instruction *OpInst = dyn_cast<Instruction>(*OpI);
-    if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+    if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
 
     PHINode *P = dyn_cast<PHINode>(OpInst);
     if (!P)
@@ -4923,8 +4980,10 @@
       P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
       PHIMap[OpInst] = P;
     }
-    if (P == 0) return 0;        // Not evolving from PHI
-    if (PHI && PHI != P) return 0;  // Evolving from multiple different PHIs.
+    if (!P)
+      return nullptr;  // Not evolving from PHI
+    if (PHI && PHI != P)
+      return nullptr;  // Evolving from multiple different PHIs.
     PHI = P;
   }
   // This is a expression evolving from a constant PHI!
@@ -4938,7 +4997,7 @@
 /// constraints, return null.
 static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !canConstantEvolve(I, L)) return 0;
+  if (!I || !canConstantEvolve(I, L)) return nullptr;
 
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     return PN;
@@ -4960,18 +5019,18 @@
   // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return 0;
+  if (!I) return nullptr;
 
   if (Constant *C = Vals.lookup(I)) return C;
 
   // An instruction inside the loop depends on a value outside the loop that we
   // weren't given a mapping for, or a value such as a call inside the loop.
-  if (!canConstantEvolve(I, L)) return 0;
+  if (!canConstantEvolve(I, L)) return nullptr;
 
   // An unmapped PHI can be due to a branch or another loop inside this loop,
   // or due to this not being the initial iteration through a loop where we
   // couldn't compute the evolution of this particular PHI last time.
-  if (isa<PHINode>(I)) return 0;
+  if (isa<PHINode>(I)) return nullptr;
 
   std::vector<Constant*> Operands(I->getNumOperands());
 
@@ -4979,12 +5038,12 @@
     Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
     if (!Operand) {
       Operands[i] = dyn_cast<Constant>(I->getOperand(i));
-      if (!Operands[i]) return 0;
+      if (!Operands[i]) return nullptr;
       continue;
     }
     Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
     Vals[Operand] = C;
-    if (!C) return 0;
+    if (!C) return nullptr;
     Operands[i] = C;
   }
 
@@ -5013,7 +5072,7 @@
     return I->second;
 
   if (BEs.ugt(MaxBruteForceIterations))
-    return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
+    return ConstantEvolutionLoopExitValue[PN] = nullptr;  // Not going to evaluate it.
 
   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
 
@@ -5025,22 +5084,22 @@
   // entry must be a constant (coming in from outside of the loop), and the
   // second must be derived from the same PHI.
   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
   for (BasicBlock::iterator I = Header->begin();
        (PHI = dyn_cast<PHINode>(I)); ++I) {
     Constant *StartCST =
       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
-    if (StartCST == 0) continue;
+    if (!StartCST) continue;
     CurrentIterVals[PHI] = StartCST;
   }
   if (!CurrentIterVals.count(PN))
-    return RetVal = 0;
+    return RetVal = nullptr;
 
   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
 
   // Execute the loop symbolically to determine the exit value.
   if (BEs.getActiveBits() >= 32)
-    return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+    return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
 
   unsigned NumIterations = BEs.getZExtValue(); // must be in range
   unsigned IterationNum = 0;
@@ -5053,8 +5112,8 @@
     DenseMap<Instruction *, Constant *> NextIterVals;
     Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
                                            TLI);
-    if (NextPHI == 0)
-      return 0;        // Couldn't evaluate!
+    if (!NextPHI)
+      return nullptr;        // Couldn't evaluate!
     NextIterVals[PN] = NextPHI;
 
     bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
@@ -5101,7 +5160,7 @@
                                                           Value *Cond,
                                                           bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
-  if (PN == 0) return getCouldNotCompute();
+  if (!PN) return getCouldNotCompute();
 
   // If the loop is canonicalized, the PHI will have exactly two entries.
   // That's the only form we support here.
@@ -5114,12 +5173,12 @@
   // One entry must be a constant (coming in from outside of the loop), and the
   // second must be derived from the same PHI.
   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = 0;
+  PHINode *PHI = nullptr;
   for (BasicBlock::iterator I = Header->begin();
        (PHI = dyn_cast<PHINode>(I)); ++I) {
     Constant *StartCST =
       dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
-    if (StartCST == 0) continue;
+    if (!StartCST) continue;
     CurrentIterVals[PHI] = StartCST;
   }
   if (!CurrentIterVals.count(PN))
@@ -5189,7 +5248,7 @@
     if (Values[u].first == L)
       return Values[u].second ? Values[u].second : V;
   }
-  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
+  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
   // Otherwise compute it.
   const SCEV *C = computeSCEVAtScope(V, L);
   SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
@@ -5243,7 +5302,7 @@
         }
         for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
           Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
-          if (!C2) return 0;
+          if (!C2) return nullptr;
 
           // First pointer!
           if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
@@ -5258,7 +5317,7 @@
           // Don't bother trying to sum two pointers. We probably can't
           // statically compute a load that results from it anyway.
           if (C2->getType()->isPointerTy())
-            return 0;
+            return nullptr;
 
           if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
             if (PTy->getElementType()->isStructTy())
@@ -5276,10 +5335,10 @@
       const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
       if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
         // Don't bother with pointers at all.
-        if (C->getType()->isPointerTy()) return 0;
+        if (C->getType()->isPointerTy()) return nullptr;
         for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
           Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
-          if (!C2 || C2->getType()->isPointerTy()) return 0;
+          if (!C2 || C2->getType()->isPointerTy()) return nullptr;
           C = ConstantExpr::getMul(C, C2);
         }
         return C;
@@ -5298,7 +5357,7 @@
     case scUMaxExpr:
       break; // TODO: smax, umax.
   }
-  return 0;
+  return nullptr;
 }
 
 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
@@ -5365,7 +5424,7 @@
 
         // Check to see if getSCEVAtScope actually made an improvement.
         if (MadeImprovement) {
-          Constant *C = 0;
+          Constant *C = nullptr;
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
             C = ConstantFoldCompareInstOperands(CI->getPredicate(),
                                                 Operands[0], Operands[1], DL,
@@ -5697,7 +5756,7 @@
   // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
   // We have not yet seen any such cases.
   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
-  if (StepC == 0 || StepC->getValue()->equalsInt(0))
+  if (!StepC || StepC->getValue()->equalsInt(0))
     return getCouldNotCompute();
 
   // For positive steps (counting up until unsigned overflow):
@@ -6136,18 +6195,30 @@
 
   // If LHS or RHS is an addrec, check to see if the condition is true in
   // every iteration of the loop.
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
-    if (isLoopEntryGuardedByCond(
-          AR->getLoop(), Pred, AR->getStart(), RHS) &&
-        isLoopBackedgeGuardedByCond(
-          AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
-      return true;
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
-    if (isLoopEntryGuardedByCond(
-          AR->getLoop(), Pred, LHS, AR->getStart()) &&
-        isLoopBackedgeGuardedByCond(
-          AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
-      return true;
+  // If LHS and RHS are both addrec, both conditions must be true in
+  // every iteration of the loop.
+  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+  bool LeftGuarded = false;
+  bool RightGuarded = false;
+  if (LAR) {
+    const Loop *L = LAR->getLoop();
+    if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
+      if (!RAR) return true;
+      LeftGuarded = true;
+    }
+  }
+  if (RAR) {
+    const Loop *L = RAR->getLoop();
+    if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
+        isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
+      if (!LAR) return true;
+      RightGuarded = true;
+    }
+  }
+  if (LeftGuarded && RightGuarded)
+    return true;
 
   // Otherwise see what can be done with known constant ranges.
   return isKnownPredicateWithRanges(Pred, LHS, RHS);
@@ -6814,6 +6885,105 @@
   return SE.getCouldNotCompute();
 }
 
+namespace {
+struct FindUndefs {
+  bool Found;
+  FindUndefs() : Found(false) {}
+
+  bool follow(const SCEV *S) {
+    if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
+      if (isa<UndefValue>(C->getValue()))
+        Found = true;
+    } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+      if (isa<UndefValue>(C->getValue()))
+        Found = true;
+    }
+
+    // Keep looking if we haven't found it yet.
+    return !Found;
+  }
+  bool isDone() const {
+    // Stop recursion if we have found an undef.
+    return Found;
+  }
+};
+}
+
+// Return true when S contains at least an undef value.
+static inline bool
+containsUndefs(const SCEV *S) {
+  FindUndefs F;
+  SCEVTraversal<FindUndefs> ST(F);
+  ST.visitAll(S);
+
+  return F.Found;
+}
+
+namespace {
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+  ScalarEvolution &SE;
+  SmallVectorImpl<const SCEV *> &Strides;
+
+  SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+      : SE(SE), Strides(S) {}
+
+  bool follow(const SCEV *S) {
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      Strides.push_back(AR->getStepRecurrence(SE));
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+  SmallVectorImpl<const SCEV *> &Terms;
+
+  SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
+      : Terms(T) {}
+
+  bool follow(const SCEV *S) {
+    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+      if (!containsUndefs(S))
+        Terms.push_back(S);
+
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+}
+
+/// Find parametric terms in this SCEVAddRecExpr.
+void SCEVAddRecExpr::collectParametricTerms(
+    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const {
+  SmallVector<const SCEV *, 4> Strides;
+  SCEVCollectStrides StrideCollector(SE, Strides);
+  visitAll(this, StrideCollector);
+
+  DEBUG({
+      dbgs() << "Strides:\n";
+      for (const SCEV *S : Strides)
+        dbgs() << *S << "\n";
+    });
+
+  for (const SCEV *S : Strides) {
+    SCEVCollectTerms TermCollector(Terms);
+    visitAll(S, TermCollector);
+  }
+
+  DEBUG({
+      dbgs() << "Terms:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
+}
+
 static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
   APInt A = C1->getValue()->getValue();
   APInt B = C2->getValue()->getValue();
@@ -6843,353 +7013,481 @@
 }
 
 namespace {
-struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
+struct FindSCEVSize {
+  int Size;
+  FindSCEVSize() : Size(0) {}
+
+  bool follow(const SCEV *S) {
+    ++Size;
+    // Keep looking at all operands of S.
+    return true;
+  }
+  bool isDone() const {
+    return false;
+  }
+};
+}
+
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+  FindSCEVSize F;
+  SCEVTraversal<FindSCEVSize> ST(F);
+  ST.visitAll(S);
+  return F.Size;
+}
+
+namespace {
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
 public:
-  // Pattern match Step into Start. When Step is a multiply expression, find
-  // the largest subexpression of Step that appears in Start. When Start is an
-  // add expression, try to match Step in the subexpressions of Start, non
-  // matching subexpressions are returned under Remainder.
-  static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
-                             const SCEV *Step, const SCEV **Remainder) {
-    assert(Remainder && "Remainder should not be NULL");
-    SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
-    const SCEV *Res = R.visit(Start);
-    *Remainder = R.Remainder;
-    return Res;
-  }
+  // Computes the Quotient and Remainder of the division of Numerator by
+  // Denominator.
+  static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+                     const SCEV *Denominator, const SCEV **Quotient,
+                     const SCEV **Remainder) {
+    assert(Numerator && Denominator && "Uninitialized SCEV");
 
-  SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
-      : SE(S), GCD(G), Remainder(R) {
-    Zero = SE.getConstant(GCD->getType(), 0);
-    One = SE.getConstant(GCD->getType(), 1);
-  }
+    SCEVDivision D(SE, Numerator, Denominator);
 
-  const SCEV *visitConstant(const SCEVConstant *Constant) {
-    if (GCD == Constant || Constant == Zero)
-      return GCD;
-
-    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
-      const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
-      if (Res != One)
-        return Res;
-
-      Remainder = SE.getConstant(srem(Constant, CGCD));
-      Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
-      Res = SE.getConstant(gcd(Constant, CGCD));
-      return Res;
+    // Check for the trivial case here to avoid having to check for it in the
+    // rest of the code.
+    if (Numerator == Denominator) {
+      *Quotient = D.One;
+      *Remainder = D.Zero;
+      return;
     }
 
-    // When GCD is not a constant, it could be that the GCD is an Add, Mul,
-    // AddRec, etc., in which case we want to find out how many times the
-    // Constant divides the GCD: we then return that as the new GCD.
-    const SCEV *Rem = Zero;
-    const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
-
-    if (Res == One || Rem != Zero) {
-      Remainder = Constant;
-      return One;
+    if (Numerator->isZero()) {
+      *Quotient = D.Zero;
+      *Remainder = D.Zero;
+      return;
     }
 
-    assert(isa<SCEVConstant>(Res) && "Res should be a constant");
-    Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
-    return Res;
-  }
+    // Split the Denominator when it is a product.
+    if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+      const SCEV *Q, *R;
+      *Quotient = Numerator;
+      for (const SCEV *Op : T->operands()) {
+        divide(SE, *Quotient, Op, &Q, &R);
+        *Quotient = Q;
 
-  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-    if (GCD == Expr)
-      return GCD;
-
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-      const SCEV *Rem = Zero;
-      const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
-
-      // FIXME: There may be ambiguous situations: for instance,
-      // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
-      // The order in which the AddExpr is traversed computes a different GCD
-      // and Remainder.
-      if (Res != One)
-        GCD = Res;
-      if (Rem != Zero)
-        Remainder = SE.getAddExpr(Remainder, Rem);
+        // Bail out when the Numerator is not divisible by one of the terms of
+        // the Denominator.
+        if (!R->isZero()) {
+          *Quotient = D.Zero;
+          *Remainder = Numerator;
+          return;
+        }
+      }
+      *Remainder = D.Zero;
+      return;
     }
 
-    return GCD;
+    D.visit(Numerator);
+    *Quotient = D.Quotient;
+    *Remainder = D.Remainder;
   }
 
-  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
-    if (GCD == Expr)
-      return GCD;
+  SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
+      : SE(S), Denominator(Denominator) {
+    Zero = SE.getConstant(Denominator->getType(), 0);
+    One = SE.getConstant(Denominator->getType(), 1);
 
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-      if (Expr->getOperand(i) == GCD)
-        return GCD;
+    // By default, we don't know how to divide Expr by Denominator.
+    // Providing the default here simplifies the rest of the code.
+    Quotient = Zero;
+    Remainder = Numerator;
+  }
+
+  // Except in the trivial case described above, we do not know how to divide
+  // Expr by Denominator for the following functions with empty implementation.
+  void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+  void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+  void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+  void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+  void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+  void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+  void visitUnknown(const SCEVUnknown *Numerator) {}
+  void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+  void visitConstant(const SCEVConstant *Numerator) {
+    if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+      Quotient = SE.getConstant(sdiv(Numerator, D));
+      Remainder = SE.getConstant(srem(Numerator, D));
+      return;
+    }
+  }
+
+  void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+    const SCEV *StartQ, *StartR, *StepQ, *StepR;
+    assert(Numerator->isAffine() && "Numerator should be affine");
+    divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+    divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+    Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+                                Numerator->getNoWrapFlags());
+    Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+                                 Numerator->getNoWrapFlags());
+  }
+
+  void visitAddExpr(const SCEVAddExpr *Numerator) {
+    SmallVector<const SCEV *, 2> Qs, Rs;
+    Type *Ty = Denominator->getType();
+
+    for (const SCEV *Op : Numerator->operands()) {
+      const SCEV *Q, *R;
+      divide(SE, Op, Denominator, &Q, &R);
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType() || Ty != R->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
+      Qs.push_back(Q);
+      Rs.push_back(R);
     }
 
-    // If we have not returned yet, it means that GCD is not part of Expr.
-    const SCEV *PartialGCD = One;
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-      const SCEV *Rem = Zero;
-      const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
-      if (Rem != Zero)
-        // GCD does not divide Expr->getOperand(i).
+    if (Qs.size() == 1) {
+      Quotient = Qs[0];
+      Remainder = Rs[0];
+      return;
+    }
+
+    Quotient = SE.getAddExpr(Qs);
+    Remainder = SE.getAddExpr(Rs);
+  }
+
+  void visitMulExpr(const SCEVMulExpr *Numerator) {
+    SmallVector<const SCEV *, 2> Qs;
+    Type *Ty = Denominator->getType();
+
+    bool FoundDenominatorTerm = false;
+    for (const SCEV *Op : Numerator->operands()) {
+      // Bail out if types do not match.
+      if (Ty != Op->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
+      if (FoundDenominatorTerm) {
+        Qs.push_back(Op);
         continue;
-
-      if (Res == GCD)
-        return GCD;
-      PartialGCD = SE.getMulExpr(PartialGCD, Res);
-      if (PartialGCD == GCD)
-        return GCD;
-    }
-
-    if (PartialGCD != One)
-      return PartialGCD;
-
-    Remainder = Expr;
-    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
-    if (!Mul)
-      return PartialGCD;
-
-    // When the GCD is a multiply expression, try to decompose it:
-    // this occurs when Step does not divide the Start expression
-    // as in: {(-4 + (3 * %m)),+,(2 * %m)}
-    for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
-      const SCEV *Rem = Zero;
-      const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
-      if (Rem == Zero) {
-        Remainder = Rem;
-        return Res;
       }
+
+      // Check whether Denominator divides one of the product operands.
+      const SCEV *Q, *R;
+      divide(SE, Op, Denominator, &Q, &R);
+      if (!R->isZero()) {
+        Qs.push_back(Op);
+        continue;
+      }
+
+      // Bail out if types do not match.
+      if (Ty != Q->getType()) {
+        Quotient = Zero;
+        Remainder = Numerator;
+        return;
+      }
+
+      FoundDenominatorTerm = true;
+      Qs.push_back(Q);
     }
 
-    return PartialGCD;
-  }
-
-  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-    if (GCD == Expr)
-      return GCD;
-
-    if (!Expr->isAffine()) {
-      Remainder = Expr;
-      return GCD;
+    if (FoundDenominatorTerm) {
+      Remainder = Zero;
+      if (Qs.size() == 1)
+        Quotient = Qs[0];
+      else
+        Quotient = SE.getMulExpr(Qs);
+      return;
     }
 
-    const SCEV *Rem = Zero;
-    const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
-    if (Rem != Zero)
-      Remainder = SE.getAddExpr(Remainder, Rem);
-
-    Rem = Zero;
-    Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
-    if (Rem != Zero) {
-      Remainder = Expr;
-      return GCD;
+    if (!isa<SCEVUnknown>(Denominator)) {
+      Quotient = Zero;
+      Remainder = Numerator;
+      return;
     }
 
-    return Res;
-  }
+    // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+    ValueToValueMap RewriteMap;
+    RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+        cast<SCEVConstant>(Zero)->getValue();
+    Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
 
-  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    if (GCD != Expr)
-      Remainder = Expr;
-    return GCD;
-  }
-
-  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
-    return One;
+    // Quotient is (Numerator - Remainder) divided by Denominator.
+    const SCEV *Q, *R;
+    const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
+      // This SCEV does not seem to simplify: fail the division here.
+      Quotient = Zero;
+      Remainder = Numerator;
+      return;
+    }
+    divide(SE, Diff, Denominator, &Q, &R);
+    assert(R == Zero &&
+           "(Numerator - Remainder) should evenly divide Denominator");
+    Quotient = Q;
   }
 
 private:
   ScalarEvolution &SE;
-  const SCEV *GCD, *Remainder, *Zero, *One;
+  const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
 };
+}
 
-struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
-public:
-  // Remove from Start all multiples of Step.
-  static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
-                            const SCEV *Step) {
-    SCEVDivision D(SE, Step);
-    const SCEV *Rem = D.Zero;
-    (void)Rem;
-    // The division is guaranteed to succeed: Step should divide Start with no
-    // remainder.
-    assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
-           "Step should divide Start with no remainder.");
-    return D.visit(Start);
-  }
+static bool findArrayDimensionsRec(ScalarEvolution &SE,
+                                   SmallVectorImpl<const SCEV *> &Terms,
+                                   SmallVectorImpl<const SCEV *> &Sizes) {
+  int Last = Terms.size() - 1;
+  const SCEV *Step = Terms[Last];
 
-  SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
-    Zero = SE.getConstant(GCD->getType(), 0);
-    One = SE.getConstant(GCD->getType(), 1);
-  }
+  // End of recursion.
+  if (Last == 0) {
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
+      SmallVector<const SCEV *, 2> Qs;
+      for (const SCEV *Op : M->operands())
+        if (!isa<SCEVConstant>(Op))
+          Qs.push_back(Op);
 
-  const SCEV *visitConstant(const SCEVConstant *Constant) {
-    if (GCD == Constant)
-      return One;
-
-    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
-      return SE.getConstant(sdiv(Constant, CGCD));
-    return Constant;
-  }
-
-  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
-
-  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
-
-  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
-
-  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-
-    SmallVector<const SCEV *, 2> Operands;
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-      Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
-
-    if (Operands.size() == 1)
-      return Operands[0];
-    return SE.getAddExpr(Operands);
-  }
-
-  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-
-    bool FoundGCDTerm = false;
-    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-      if (Expr->getOperand(i) == GCD)
-        FoundGCDTerm = true;
-
-    SmallVector<const SCEV *, 2> Operands;
-    if (FoundGCDTerm) {
-      FoundGCDTerm = false;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-        if (FoundGCDTerm)
-          Operands.push_back(Expr->getOperand(i));
-        else if (Expr->getOperand(i) == GCD)
-          FoundGCDTerm = true;
-        else
-          Operands.push_back(Expr->getOperand(i));
-      }
-    } else {
-      const SCEV *PartialGCD = One;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
-        if (PartialGCD == GCD) {
-          Operands.push_back(Expr->getOperand(i));
-          continue;
-        }
-
-        const SCEV *Rem = Zero;
-        const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
-        if (Rem == Zero) {
-          PartialGCD = SE.getMulExpr(PartialGCD, Res);
-          Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
-        } else {
-          Operands.push_back(Expr->getOperand(i));
-        }
-      }
+      Step = SE.getMulExpr(Qs);
     }
 
-    if (Operands.size() == 1)
-      return Operands[0];
-    return SE.getMulExpr(Operands);
+    Sizes.push_back(Step);
+    return true;
   }
 
-  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
+  for (const SCEV *&Term : Terms) {
+    // Normalize the terms before the next call to findArrayDimensionsRec.
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Term, Step, &Q, &R);
+
+    // Bail out when GCD does not evenly divide one of the terms.
+    if (!R->isZero())
+      return false;
+
+    Term = Q;
   }
 
-  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-    if (GCD == Expr)
-      return One;
+  // Remove all SCEVConstants.
+  Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
+                return isa<SCEVConstant>(E);
+              }),
+              Terms.end());
 
-    assert(Expr->isAffine() && "Expr should be affine");
+  if (Terms.size() > 0)
+    if (!findArrayDimensionsRec(SE, Terms, Sizes))
+      return false;
 
-    const SCEV *Start = divide(SE, Expr->getStart(), GCD);
-    const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+  Sizes.push_back(Step);
+  return true;
+}
 
-    return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
-                            Expr->getNoWrapFlags());
+namespace {
+struct FindParameter {
+  bool FoundParameter;
+  FindParameter() : FoundParameter(false) {}
+
+  bool follow(const SCEV *S) {
+    if (isa<SCEVUnknown>(S)) {
+      FoundParameter = true;
+      // Stop recursion: we found a parameter.
+      return false;
+    }
+    // Keep looking.
+    return true;
   }
-
-  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
+  bool isDone() const {
+    // Stop recursion if we have found a parameter.
+    return FoundParameter;
   }
-
-  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
-
-  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    if (GCD == Expr)
-      return One;
-    return Expr;
-  }
-
-  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
-    return Expr;
-  }
-
-private:
-  ScalarEvolution &SE;
-  const SCEV *GCD, *Zero, *One;
 };
 }
 
+// Returns true when S contains at least a SCEVUnknown parameter.
+static inline bool
+containsParameters(const SCEV *S) {
+  FindParameter F;
+  SCEVTraversal<FindParameter> ST(F);
+  ST.visitAll(S);
+
+  return F.FoundParameter;
+}
+
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool
+containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+  for (const SCEV *T : Terms)
+    if (containsParameters(T))
+      return true;
+  return false;
+}
+
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+  if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+    return Expr->getNumOperands();
+  return 1;
+}
+
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+  if (isa<SCEVConstant>(T))
+    return nullptr;
+
+  if (isa<SCEVUnknown>(T))
+    return T;
+
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+    SmallVector<const SCEV *, 2> Factors;
+    for (const SCEV *Op : M->operands())
+      if (!isa<SCEVConstant>(Op))
+        Factors.push_back(Op);
+
+    return SE.getMulExpr(Factors);
+  }
+
+  return T;
+}
+
+/// Return the size of an element read or written by Inst.
+const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
+  Type *Ty;
+  if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+    Ty = Store->getValueOperand()->getType();
+  else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+    Ty = Load->getPointerOperand()->getType();
+  else
+    return nullptr;
+
+  Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
+  return getSizeOfExpr(ETy, Ty);
+}
+
+/// Second step of delinearization: compute the array dimensions Sizes from the
+/// set of Terms extracted from the memory access function of this SCEVAddRec.
+void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
+                                          SmallVectorImpl<const SCEV *> &Sizes,
+                                          const SCEV *ElementSize) const {
+
+  if (Terms.size() < 1)
+    return;
+
+  // Early return when Terms do not contain parameters: we do not delinearize
+  // non parametric SCEVs.
+  if (!containsParameters(Terms))
+    return;
+
+  DEBUG({
+      dbgs() << "Terms:\n";
+      for (const SCEV *T : Terms)
+        dbgs() << *T << "\n";
+    });
+
+  // Remove duplicates.
+  std::sort(Terms.begin(), Terms.end());
+  Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
+
+  // Put larger terms first.
+  std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+    return numberOfTerms(LHS) > numberOfTerms(RHS);
+  });
+
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  // Divide all terms by the element size.
+  for (const SCEV *&Term : Terms) {
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+    Term = Q;
+  }
+
+  SmallVector<const SCEV *, 4> NewTerms;
+
+  // Remove constant factors.
+  for (const SCEV *T : Terms)
+    if (const SCEV *NewT = removeConstantFactors(SE, T))
+      NewTerms.push_back(NewT);
+
+  DEBUG({
+      dbgs() << "Terms after sorting:\n";
+      for (const SCEV *T : NewTerms)
+        dbgs() << *T << "\n";
+    });
+
+  if (NewTerms.empty() ||
+      !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+    Sizes.clear();
+    return;
+  }
+
+  // The last element to be pushed into Sizes is the size of an element.
+  Sizes.push_back(ElementSize);
+
+  DEBUG({
+      dbgs() << "Sizes:\n";
+      for (const SCEV *S : Sizes)
+        dbgs() << *S << "\n";
+    });
+}
+
+/// Third step of delinearization: compute the access functions for the
+/// Subscripts based on the dimensions in Sizes.
+void SCEVAddRecExpr::computeAccessFunctions(
+    ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts,
+    SmallVectorImpl<const SCEV *> &Sizes) const {
+
+  // Early exit in case this SCEV is not an affine multivariate function.
+  if (Sizes.empty() || !this->isAffine())
+    return;
+
+  const SCEV *Res = this;
+  int Last = Sizes.size() - 1;
+  for (int i = Last; i >= 0; i--) {
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+
+    DEBUG({
+        dbgs() << "Res: " << *Res << "\n";
+        dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+        dbgs() << "Res divided by Sizes[i]:\n";
+        dbgs() << "Quotient: " << *Q << "\n";
+        dbgs() << "Remainder: " << *R << "\n";
+      });
+
+    Res = Q;
+
+    // Do not record the last subscript corresponding to the size of elements in
+    // the array.
+    if (i == Last) {
+
+      // Bail out if the remainder is too complex.
+      if (isa<SCEVAddRecExpr>(R)) {
+        Subscripts.clear();
+        Sizes.clear();
+        return;
+      }
+
+      continue;
+    }
+
+    // Record the access function for the current subscript.
+    Subscripts.push_back(R);
+  }
+
+  // Also push in last position the remainder of the last division: it will be
+  // the access function of the innermost dimension.
+  Subscripts.push_back(Res);
+
+  std::reverse(Subscripts.begin(), Subscripts.end());
+
+  DEBUG({
+      dbgs() << "Subscripts:\n";
+      for (const SCEV *S : Subscripts)
+        dbgs() << *S << "\n";
+    });
+}
+
 /// Splits the SCEV into two vectors of SCEVs representing the subscripts and
 /// sizes of an array access. Returns the remainder of the delinearization that
 /// is the offset start of the array.  The SCEV->delinearize algorithm computes
@@ -7239,84 +7537,40 @@
 /// asking for the SCEV of the memory access with respect to all enclosing
 /// loops, calling SCEV->delinearize on that and printing the results.
 
-const SCEV *
-SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
-                            SmallVectorImpl<const SCEV *> &Subscripts,
-                            SmallVectorImpl<const SCEV *> &Sizes) const {
-  // Early exit in case this SCEV is not an affine multivariate function.
-  if (!this->isAffine())
-    return this;
+void SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+                                 SmallVectorImpl<const SCEV *> &Subscripts,
+                                 SmallVectorImpl<const SCEV *> &Sizes,
+                                 const SCEV *ElementSize) const {
+  // First step: collect parametric terms.
+  SmallVector<const SCEV *, 4> Terms;
+  collectParametricTerms(SE, Terms);
 
-  const SCEV *Start = this->getStart();
-  const SCEV *Step = this->getStepRecurrence(SE);
+  if (Terms.empty())
+    return;
 
-  // Build the SCEV representation of the canonical induction variable in the
-  // loop of this SCEV.
-  const SCEV *Zero = SE.getConstant(this->getType(), 0);
-  const SCEV *One = SE.getConstant(this->getType(), 1);
-  const SCEV *IV =
-      SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+  // Second step: find subscript sizes.
+  SE.findArrayDimensions(Terms, Sizes, ElementSize);
 
-  DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+  if (Sizes.empty())
+    return;
 
-  // When the stride of this SCEV is 1, do not compute the GCD: the size of this
-  // subscript is 1, and this same SCEV for the access function.
-  const SCEV *Remainder = Zero;
-  const SCEV *GCD = One;
+  // Third step: compute the access functions for each subscript.
+  computeAccessFunctions(SE, Subscripts, Sizes);
 
-  // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
-  if (Step != One && !Step->isAllOnesValue())
-    GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+  if (Subscripts.empty())
+    return;
 
-  DEBUG(dbgs() << "GCD: " << *GCD << "\n");
-  DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
+  DEBUG({
+      dbgs() << "succeeded to delinearize " << *this << "\n";
+      dbgs() << "ArrayDecl[UnknownSize]";
+      for (const SCEV *S : Sizes)
+        dbgs() << "[" << *S << "]";
 
-  const SCEV *Quotient = Start;
-  if (GCD != One && !GCD->isAllOnesValue())
-    // As findGCD computed Remainder, GCD divides "Start - Remainder." The
-    // Quotient is then this SCEV without Remainder, scaled down by the GCD.  The
-    // Quotient is what will be used in the next subscript delinearization.
-    Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
-
-  DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
-
-  const SCEV *Rem = Quotient;
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
-    // Recursively call delinearize on the Quotient until there are no more
-    // multiples that can be recognized.
-    Rem = AR->delinearize(SE, Subscripts, Sizes);
-
-  // Scale up the canonical induction variable IV by whatever remains from the
-  // Step after division by the GCD: the GCD is the size of all the sub-array.
-  if (Step != One && !Step->isAllOnesValue() && GCD != One &&
-      !GCD->isAllOnesValue() && Step != GCD) {
-    Step = SCEVDivision::divide(SE, Step, GCD);
-    IV = SE.getMulExpr(IV, Step);
-  }
-  // The access function in the current subscript is computed as the canonical
-  // induction variable IV (potentially scaled up by the step) and offset by
-  // Rem, the offset of delinearization in the sub-array.
-  const SCEV *Index = SE.getAddExpr(IV, Rem);
-
-  // Record the access function and the size of the current subscript.
-  Subscripts.push_back(Index);
-  Sizes.push_back(GCD);
-
-#ifndef NDEBUG
-  int Size = Sizes.size();
-  DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
-  DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
-  for (int i = 0; i < Size - 1; i++)
-    DEBUG(dbgs() << "[" << *Sizes[i] << "]");
-  DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
-
-  DEBUG(dbgs() << "ArrayRef");
-  for (int i = 0; i < Size; i++)
-    DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
-  DEBUG(dbgs() << "\n)\n");
-#endif
-
-  return Remainder;
+      dbgs() << "\nArrayRef";
+      for (const SCEV *S : Subscripts)
+        dbgs() << "[" << *S << "]";
+      dbgs() << "\n";
+    });
 }
 
 //===----------------------------------------------------------------------===//
@@ -7368,7 +7622,8 @@
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
+  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
+    BlockDispositions(64), FirstUnknown(nullptr) {
   initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
 }
 
@@ -7376,7 +7631,7 @@
   this->F = &F;
   LI = &getAnalysis<LoopInfo>();
   DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-  DL = DLP ? &DLP->getDataLayout() : 0;
+  DL = DLP ? &DLP->getDataLayout() : nullptr;
   TLI = &getAnalysis<TargetLibraryInfo>();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   return false;
@@ -7387,7 +7642,7 @@
   // destructors, so that they release their references to their values.
   for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
     U->~SCEVUnknown();
-  FirstUnknown = 0;
+  FirstUnknown = nullptr;
 
   ValueExprMap.clear();
 
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 7be6aca..6933f74 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -34,7 +34,7 @@
 
   public:
     static char ID; // Class identification, replacement for typeinfo
-    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) {
       initializeScalarEvolutionAliasAnalysisPass(
         *PassRegistry::getPassRegistry());
     }
@@ -102,7 +102,7 @@
     return U->getValue();
   }
   // No Identified object found.
-  return 0;
+  return nullptr;
 }
 
 AliasAnalysis::AliasResult
@@ -162,10 +162,10 @@
   if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
     if (alias(Location(AO ? AO : LocA.Ptr,
                        AO ? +UnknownSize : LocA.Size,
-                       AO ? 0 : LocA.TBAATag),
+                       AO ? nullptr : LocA.TBAATag),
               Location(BO ? BO : LocB.Ptr,
                        BO ? +UnknownSize : LocB.Size,
-                       BO ? 0 : LocB.TBAATag)) == NoAlias)
+                       BO ? nullptr : LocB.TBAATag)) == NoAlias)
       return NoAlias;
 
   // Forward the query to the next analysis.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index fb3d595..b507043 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -44,7 +44,7 @@
   // not allowed to move it.
   BasicBlock::iterator BIP = Builder.GetInsertPoint();
 
-  Instruction *Ret = NULL;
+  Instruction *Ret = nullptr;
 
   // Check to see if there is already a cast!
   for (User *U : V->users())
@@ -627,21 +627,21 @@
 const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
   // Test whether we've already computed the most relevant loop for this SCEV.
   std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
-    RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+    RelevantLoops.insert(std::make_pair(S, nullptr));
   if (!Pair.second)
     return Pair.first->second;
 
   if (isa<SCEVConstant>(S))
     // A constant has no relevant loops.
-    return 0;
+    return nullptr;
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
       return Pair.first->second = SE.LI->getLoopFor(I->getParent());
     // A non-instruction has no relevant loops.
-    return 0;
+    return nullptr;
   }
   if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
-    const Loop *L = 0;
+    const Loop *L = nullptr;
     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
       L = AR->getLoop();
     for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
@@ -716,7 +716,7 @@
 
   // Emit instructions to add all the operands. Hoist as much as possible
   // out of loops, and form meaningful getelementptrs where possible.
-  Value *Sum = 0;
+  Value *Sum = nullptr;
   for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
        I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
     const Loop *CurLoop = I->first;
@@ -784,7 +784,7 @@
 
   // Emit instructions to mul all the operands. Hoist as much as possible
   // out of loops.
-  Value *Prod = 0;
+  Value *Prod = nullptr;
   for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
        I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
     const SCEV *Op = I->second;
@@ -892,18 +892,18 @@
                                            Instruction *InsertPos,
                                            bool allowScale) {
   if (IncV == InsertPos)
-    return NULL;
+    return nullptr;
 
   switch (IncV->getOpcode()) {
   default:
-    return NULL;
+    return nullptr;
   // Check for a simple Add/Sub or GEP of a loop invariant step.
   case Instruction::Add:
   case Instruction::Sub: {
     Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
     if (!OInst || SE.DT->dominates(OInst, InsertPos))
       return dyn_cast<Instruction>(IncV->getOperand(0));
-    return NULL;
+    return nullptr;
   }
   case Instruction::BitCast:
     return dyn_cast<Instruction>(IncV->getOperand(0));
@@ -914,7 +914,7 @@
         continue;
       if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
         if (!SE.DT->dominates(OInst, InsertPos))
-          return NULL;
+          return nullptr;
       }
       if (allowScale) {
         // allow any kind of GEP as long as it can be hoisted.
@@ -925,11 +925,11 @@
       // have 2 operands. i1* is used by the expander to represent an
       // address-size element.
       if (IncV->getNumOperands() != 2)
-        return NULL;
+        return nullptr;
       unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
       if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
           && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
-        return NULL;
+        return nullptr;
       break;
     }
     return dyn_cast<Instruction>(IncV->getOperand(0));
@@ -1077,9 +1077,9 @@
   // Reuse a previously-inserted PHI, if present.
   BasicBlock *LatchBlock = L->getLoopLatch();
   if (LatchBlock) {
-    PHINode *AddRecPhiMatch = 0;
-    Instruction *IncV = 0;
-    TruncTy = 0;
+    PHINode *AddRecPhiMatch = nullptr;
+    Instruction *IncV = nullptr;
+    TruncTy = nullptr;
     InvertStep = false;
 
     // Only try partially matching scevs that need truncation and/or
@@ -1120,7 +1120,7 @@
       // Stop if we have found an exact match SCEV.
       if (IsMatchingSCEV) {
         IncV = TempIncV;
-        TruncTy = 0;
+        TruncTy = nullptr;
         InvertStep = false;
         AddRecPhiMatch = PN;
         break;
@@ -1243,13 +1243,13 @@
     PostIncLoopSet Loops;
     Loops.insert(L);
     Normalized =
-      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
-                                                  Loops, SE, *SE.DT));
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr,
+                                                  nullptr, Loops, SE, *SE.DT));
   }
 
   // Strip off any non-loop-dominating component from the addrec start.
   const SCEV *Start = Normalized->getStart();
-  const SCEV *PostLoopOffset = 0;
+  const SCEV *PostLoopOffset = nullptr;
   if (!SE.properlyDominates(Start, L->getHeader())) {
     PostLoopOffset = Start;
     Start = SE.getConstant(Normalized->getType(), 0);
@@ -1261,7 +1261,7 @@
 
   // Strip off any non-loop-dominating component from the addrec step.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
-  const SCEV *PostLoopScale = 0;
+  const SCEV *PostLoopScale = nullptr;
   if (!SE.dominates(Step, L->getHeader())) {
     PostLoopScale = Step;
     Step = SE.getConstant(Normalized->getType(), 1);
@@ -1276,7 +1276,7 @@
   Type *ExpandTy = PostLoopScale ? IntTy : STy;
   // In some cases, we decide to reuse an existing phi node but need to truncate
   // it and/or invert the step.
-  Type *TruncTy = 0;
+  Type *TruncTy = nullptr;
   bool InvertStep = false;
   PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
                                           TruncTy, InvertStep);
@@ -1372,7 +1372,7 @@
   const Loop *L = S->getLoop();
 
   // First check for an existing canonical IV in a suitable type.
-  PHINode *CanonicalIV = 0;
+  PHINode *CanonicalIV = nullptr;
   if (PHINode *PN = L->getCanonicalInductionVariable())
     if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
       CanonicalIV = PN;
@@ -1393,7 +1393,7 @@
     while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
            isa<LandingPadInst>(NewInsertPt))
       ++NewInsertPt;
-    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
                       NewInsertPt);
     return V;
   }
@@ -1666,7 +1666,8 @@
 
   // Emit code for it.
   BuilderType::InsertPointGuard Guard(Builder);
-  PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+  PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr,
+                                           L->getHeader()->begin()));
 
   return V;
 }
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 1e4c0bd..e9db295 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -113,7 +113,7 @@
     // Transform each operand.
     for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
          I != E; ++I) {
-      Operands.push_back(TransformSubExpr(*I, LUser, 0));
+      Operands.push_back(TransformSubExpr(*I, LUser, nullptr));
     }
     // Conservatively use AnyWrap until/unless we need FlagNW.
     const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 87a4fa4..edd82f5 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "sparseprop"
 #include "llvm/Analysis/SparsePropagation.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -21,6 +20,8 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+#define DEBUG_TYPE "sparseprop"
+
 //===----------------------------------------------------------------------===//
 //                  AbstractLatticeFunction Implementation
 //===----------------------------------------------------------------------===//
@@ -147,7 +148,7 @@
       return;
 
     Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
-    if (C == 0 || !isa<ConstantInt>(C)) {
+    if (!C || !isa<ConstantInt>(C)) {
       // Non-constant values can go either way.
       Succs[0] = Succs[1] = true;
       return;
@@ -189,7 +190,7 @@
     return;
   
   Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
-  if (C == 0 || !isa<ConstantInt>(C)) {
+  if (!C || !isa<ConstantInt>(C)) {
     // All destinations are executable!
     Succs.assign(TI.getNumSuccessors(), true);
     return;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 04d09f1..cdb0b79 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -7,7 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "tti"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
@@ -19,6 +18,8 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "tti"
+
 // Setup the analysis group to manage the TargetTransformInfo passes.
 INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
 char TargetTransformInfo::ID = 0;
@@ -234,7 +235,7 @@
 struct NoTTI final : ImmutablePass, TargetTransformInfo {
   const DataLayout *DL;
 
-  NoTTI() : ImmutablePass(ID), DL(0) {
+  NoTTI() : ImmutablePass(ID), DL(nullptr) {
     initializeNoTTIPass(*PassRegistry::getPassRegistry());
   }
 
@@ -242,9 +243,9 @@
     // Note that this subclass is special, and must *not* call initializeTTI as
     // it does not chain.
     TopTTI = this;
-    PrevTTI = 0;
+    PrevTTI = nullptr;
     DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
-    DL = DLP ? &DLP->getDataLayout() : 0;
+    DL = DLP ? &DLP->getDataLayout() : nullptr;
   }
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -443,7 +444,7 @@
     // Otherwise delegate to the fully generic implementations.
     return getOperationCost(Operator::getOpcode(U), U->getType(),
                             U->getNumOperands() == 1 ?
-                                U->getOperand(0)->getType() : 0);
+                                U->getOperand(0)->getType() : nullptr);
   }
 
   bool hasBranchDivergence() const override { return false; }
@@ -567,7 +568,7 @@
   }
 
   unsigned getShuffleCost(ShuffleKind Kind, Type *Ty,
-                          int Index = 0, Type *SubTp = 0) const override {
+                          int Index = 0, Type *SubTp = nullptr) const override {
     return 1;
   }
 
@@ -581,7 +582,7 @@
   }
 
   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy = 0) const override {
+                              Type *CondTy = nullptr) const override {
     return 1;
   }
 
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 05daf18..f36f6f8 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -144,7 +144,7 @@
     const MDNode *Node;
 
   public:
-    TBAANode() : Node(0) {}
+    TBAANode() : Node(nullptr) {}
     explicit TBAANode(const MDNode *N) : Node(N) {}
 
     /// getNode - Get the MDNode for this TBAANode.
@@ -182,7 +182,6 @@
     const MDNode *Node;
 
   public:
-    TBAAStructTagNode() : Node(0) {}
     explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
 
     /// Get the MDNode for this TBAAStructTagNode.
@@ -218,7 +217,7 @@
     const MDNode *Node;
 
   public:
-    TBAAStructTypeNode() : Node(0) {}
+    TBAAStructTypeNode() : Node(nullptr) {}
     explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
 
     /// Get the MDNode for this TBAAStructTypeNode.
@@ -340,7 +339,8 @@
 bool
 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
                                 const MDNode *B) const {
-  if (isStructPathTBAA(A))
+  // Make sure that both MDNodes are struct-path aware.
+  if (isStructPathTBAA(A) && isStructPathTBAA(B))
     return PathAliases(A, B);
 
   // Keep track of the root node for A and B.
@@ -386,6 +386,10 @@
 bool
 TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
                                     const MDNode *B) const {
+  // Verify that both input nodes are struct-path aware.
+  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
+  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+
   // Keep track of the root node for A and B.
   TBAAStructTypeNode RootA, RootB;
   TBAAStructTagNode TagA(A), TagB(B);
@@ -555,38 +559,40 @@
 
 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
   if (!A || !B)
-    return NULL;
+    return nullptr;
 
   if (A == B)
     return A;
 
   // For struct-path aware TBAA, we use the access type of the tag.
-  bool StructPath = isStructPathTBAA(A);
+  bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
   if (StructPath) {
     A = cast_or_null<MDNode>(A->getOperand(1));
-    if (!A) return 0;
+    if (!A) return nullptr;
     B = cast_or_null<MDNode>(B->getOperand(1));
-    if (!B) return 0;
+    if (!B) return nullptr;
   }
 
   SmallVector<MDNode *, 4> PathA;
   MDNode *T = A;
   while (T) {
     PathA.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
+                                 : nullptr;
   }
 
   SmallVector<MDNode *, 4> PathB;
   T = B;
   while (T) {
     PathB.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
+                                 : nullptr;
   }
 
   int IA = PathA.size() - 1;
   int IB = PathB.size() - 1;
 
-  MDNode *Ret = 0;
+  MDNode *Ret = nullptr;
   while (IA >= 0 && IB >=0) {
     if (PathA[IA] == PathB[IB])
       Ret = PathA[IA];
@@ -599,7 +605,7 @@
     return Ret;
 
   if (!Ret)
-    return 0;
+    return nullptr;
   // We need to convert from a type node to a tag node.
   Type *Int64 = IntegerType::get(A->getContext(), 64);
   Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 72617a0..4f48753 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -44,10 +45,10 @@
   return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
 }
 
-static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
-                                    APInt &KnownZero, APInt &KnownOne,
-                                    APInt &KnownZero2, APInt &KnownOne2,
-                                    const DataLayout *TD, unsigned Depth) {
+static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+                                   APInt &KnownZero, APInt &KnownOne,
+                                   APInt &KnownZero2, APInt &KnownOne2,
+                                   const DataLayout *TD, unsigned Depth) {
   if (!Add) {
     if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
       // We know that the top bits of C-X are clear if X contains less bits
@@ -58,7 +59,7 @@
         unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+        llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
@@ -79,13 +80,10 @@
   // result. For an add, this works with either operand. For a subtract,
   // this only works if the known zeros are in the right operand.
   APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-  llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
-  assert((LHSKnownZero & LHSKnownOne) == 0 &&
-         "Bits known to be one AND zero?");
+  llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
   unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
 
-  llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
-  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+  llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
   unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
 
   // Determine which operand has more trailing zeros, and use that
@@ -130,15 +128,13 @@
   }
 }
 
-static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
-                                 APInt &KnownZero, APInt &KnownOne,
-                                 APInt &KnownZero2, APInt &KnownOne2,
-                                 const DataLayout *TD, unsigned Depth) {
+static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
+                                APInt &KnownZero, APInt &KnownOne,
+                                APInt &KnownZero2, APInt &KnownOne2,
+                                const DataLayout *TD, unsigned Depth) {
   unsigned BitWidth = KnownZero.getBitWidth();
-  ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1);
-  ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+  computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1);
+  computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
 
   bool isKnownNegative = false;
   bool isKnownNonNegative = false;
@@ -192,7 +188,7 @@
     KnownOne.setBit(BitWidth - 1);
 }
 
-void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
   unsigned BitWidth = KnownZero.getBitWidth();
   unsigned NumRanges = Ranges.getNumOperands() / 2;
   assert(NumRanges >= 1);
@@ -211,8 +207,9 @@
 
   KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
 }
-/// ComputeMaskedBits - Determine which of the bits are known to be either zero
-/// or one and return them in the KnownZero/KnownOne bit sets.
+
+/// Determine which bits of V are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bit sets.
 ///
 /// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
 /// we cannot optimize based on the assumption that it is zero without changing
@@ -226,8 +223,8 @@
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
-                             const DataLayout *TD, unsigned Depth) {
+void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+                            const DataLayout *TD, unsigned Depth) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = KnownZero.getBitWidth();
@@ -241,7 +238,7 @@
           V->getType()->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
-         "V, Mask, KnownOne and KnownZero should have same BitWidth");
+         "V, KnownOne and KnownZero should have same BitWidth");
 
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     // We know all of the bits for a constant!
@@ -303,7 +300,7 @@
     if (GA->mayBeOverridden()) {
       KnownZero.clearAllBits(); KnownOne.clearAllBits();
     } else {
-      ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
+      computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
     }
     return;
   }
@@ -341,49 +338,43 @@
   default: break;
   case Instruction::Load:
     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
-      computeMaskedBitsLoad(*MD, KnownZero);
-    return;
+      computeKnownBitsLoad(*MD, KnownZero);
+    break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
-    ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
     KnownZero |= KnownZero2;
-    return;
+    break;
   }
   case Instruction::Or: {
-    ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero &= KnownZero2;
     // Output known-1 are known to be set if set in either the LHS | RHS.
     KnownOne |= KnownOne2;
-    return;
+    break;
   }
   case Instruction::Xor: {
-    ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
     // Output known-1 are known to be set if set in only one of the LHS, RHS.
     KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
     KnownZero = KnownZeroOut;
-    return;
+    break;
   }
   case Instruction::Mul: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
-    ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
                          KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
     break;
   }
@@ -391,42 +382,40 @@
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
     KnownOne2.clearAllBits();
     KnownZero2.clearAllBits();
-    ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
                        LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
 
     KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
-    return;
+    break;
   }
   case Instruction::Select:
-    ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
+    computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
                       Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
-    return;
+    break;
   case Instruction::FPTrunc:
   case Instruction::FPExt:
   case Instruction::FPToUI:
   case Instruction::FPToSI:
   case Instruction::SIToFP:
   case Instruction::UIToFP:
-    return; // Can't work with floating point.
+    break; // Can't work with floating point.
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
     // We can't handle these if we don't know the pointer size.
-    if (!TD) return;
+    if (!TD) break;
     // FALL THROUGH and handle them the same as zext/trunc.
   case Instruction::ZExt:
   case Instruction::Trunc: {
@@ -439,19 +428,19 @@
       SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
     } else {
       SrcBitWidth = SrcTy->getScalarSizeInBits();
-      if (!SrcBitWidth) return;
+      if (!SrcBitWidth) break;
     }
 
     assert(SrcBitWidth && "SrcBitWidth can't be zero");
     KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
     KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
     KnownZero = KnownZero.zextOrTrunc(BitWidth);
     KnownOne = KnownOne.zextOrTrunc(BitWidth);
     // Any top bits are known to be zero.
     if (BitWidth > SrcBitWidth)
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    return;
+    break;
   }
   case Instruction::BitCast: {
     Type *SrcTy = I->getOperand(0)->getType();
@@ -459,8 +448,8 @@
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !I->getType()->isVectorTy()) {
-      ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-      return;
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+      break;
     }
     break;
   }
@@ -470,8 +459,7 @@
 
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
 
@@ -481,18 +469,17 @@
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
     else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
       KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
-    return;
+    break;
   }
   case Instruction::Shl:
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
       KnownZero <<= ShiftAmt;
       KnownOne  <<= ShiftAmt;
       KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
-      return;
+      break;
     }
     break;
   case Instruction::LShr:
@@ -502,13 +489,12 @@
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
 
       // Unsigned shift right.
-      ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
       // high bits known zero.
       KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-      return;
+      break;
     }
     break;
   case Instruction::AShr:
@@ -518,8 +504,7 @@
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
       // Signed shift right.
-      ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
 
@@ -528,19 +513,19 @@
         KnownZero |= HighBits;
       else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
         KnownOne |= HighBits;
-      return;
+      break;
     }
     break;
   case Instruction::Sub: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
-    ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+    computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
                             KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
                             Depth);
     break;
   }
   case Instruction::Add: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
-    ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+    computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
                             KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
                             Depth);
     break;
@@ -550,7 +535,7 @@
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -574,8 +559,8 @@
     // remainder is zero.
     if (KnownZero.isNonNegative()) {
       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
-                        Depth+1);
+      computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
+                       Depth+1);
       // If it's known zero, our sign bit is also zero.
       if (LHSKnownZero.isNegative())
         KnownZero.setBit(BitWidth - 1);
@@ -587,9 +572,8 @@
       APInt RA = Rem->getValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD,
-                          Depth+1);
-        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+        computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
+                         Depth+1);
         KnownZero |= ~LowBits;
         KnownOne &= LowBits;
         break;
@@ -598,8 +582,8 @@
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+    computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
 
     unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
@@ -622,8 +606,8 @@
     // Analyze all of the subscripts of this getelementptr instruction
     // to determine if we can prove known low zero bits.
     APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
-    ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
-                      Depth+1);
+    computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
+                     Depth+1);
     unsigned TrailZ = LocalKnownZero.countTrailingOnes();
 
     gep_type_iterator GTI = gep_type_begin(I);
@@ -631,8 +615,10 @@
       Value *Index = I->getOperand(i);
       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
         // Handle struct member offset arithmetic.
-        if (!TD)
-          return;
+        if (!TD) {
+          TrailZ = 0;
+          break;
+        }
 
         // Handle case when index is vector zeroinitializer
         Constant *CIndex = cast<Constant>(Index);
@@ -650,11 +636,14 @@
       } else {
         // Handle array index arithmetic.
         Type *IndexedTy = GTI.getIndexedType();
-        if (!IndexedTy->isSized()) return;
+        if (!IndexedTy->isSized()) {
+          TrailZ = 0;
+          break;
+        }
         unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
         uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
         LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
-        ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
+        computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
         TrailZ = std::min(TrailZ,
                           unsigned(countTrailingZeros(TypeSize) +
                                    LocalKnownZero.countTrailingOnes()));
@@ -696,11 +685,11 @@
             break;
           // Ok, we have a PHI of the form L op= R. Check for low
           // zero bits.
-          ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1);
+          computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1);
 
           // We need to take the minimum number of known bits
           APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
-          ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1);
+          computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1);
 
           KnownZero = APInt::getLowBitsSet(BitWidth,
                                            std::min(KnownZero2.countTrailingOnes(),
@@ -712,7 +701,7 @@
 
     // Unreachable blocks may have zero-operand PHI nodes.
     if (P->getNumIncomingValues() == 0)
-      return;
+      break;
 
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
@@ -731,8 +720,8 @@
         KnownOne2 = APInt(BitWidth, 0);
         // Recurse, but cap the recursion to one level, because we don't
         // want to waste time spinning around in loops.
-        ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
-                          MaxDepth-1);
+        computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
+                         MaxDepth-1);
         KnownZero &= KnownZero2;
         KnownOne &= KnownOne2;
         // If all bits have been ruled out, there's no need to check
@@ -776,30 +765,32 @@
         default: break;
         case Intrinsic::uadd_with_overflow:
         case Intrinsic::sadd_with_overflow:
-          ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
-                                  II->getArgOperand(1), false, KnownZero,
-                                  KnownOne, KnownZero2, KnownOne2, TD, Depth);
+          computeKnownBitsAddSub(true, II->getArgOperand(0),
+                                 II->getArgOperand(1), false, KnownZero,
+                                 KnownOne, KnownZero2, KnownOne2, TD, Depth);
           break;
         case Intrinsic::usub_with_overflow:
         case Intrinsic::ssub_with_overflow:
-          ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
-                                  II->getArgOperand(1), false, KnownZero,
-                                  KnownOne, KnownZero2, KnownOne2, TD, Depth);
+          computeKnownBitsAddSub(false, II->getArgOperand(0),
+                                 II->getArgOperand(1), false, KnownZero,
+                                 KnownOne, KnownZero2, KnownOne2, TD, Depth);
           break;
         case Intrinsic::umul_with_overflow:
         case Intrinsic::smul_with_overflow:
-          ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
-                               false, KnownZero, KnownOne,
-                               KnownZero2, KnownOne2, TD, Depth);
+          computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+                              false, KnownZero, KnownOne,
+                              KnownZero2, KnownOne2, TD, Depth);
           break;
         }
       }
     }
   }
+
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 }
 
 /// ComputeSignBit - Determine whether the sign bit is known to be zero or
-/// one.  Convenience wrapper around ComputeMaskedBits.
+/// one.  Convenience wrapper around computeKnownBits.
 void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
                           const DataLayout *TD, unsigned Depth) {
   unsigned BitWidth = getBitWidth(V->getType(), TD);
@@ -810,7 +801,7 @@
   }
   APInt ZeroBits(BitWidth, 0);
   APInt OneBits(BitWidth, 0);
-  ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth);
+  computeKnownBits(V, ZeroBits, OneBits, TD, Depth);
   KnownOne = OneBits[BitWidth - 1];
   KnownZero = ZeroBits[BitWidth - 1];
 }
@@ -842,7 +833,7 @@
   if (Depth++ == MaxDepth)
     return false;
 
-  Value *X = 0, *Y = 0;
+  Value *X = nullptr, *Y = nullptr;
   // A shift of a power of two is a power of two or zero.
   if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
                  match(V, m_Shr(m_Value(X), m_Value()))))
@@ -882,10 +873,10 @@
 
       unsigned BitWidth = V->getType()->getScalarSizeInBits();
       APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
-      ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth);
+      computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth);
 
       APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
-      ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth);
+      computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth);
       // If i8 V is a power of two or zero:
       //  ZeroBits: 1 1 1 0 1 1 1 1
       // ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -1005,7 +996,7 @@
   unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
 
   // X | Y != 0 if X != 0 or Y != 0.
-  Value *X = 0, *Y = 0;
+  Value *X = nullptr, *Y = nullptr;
   if (match(V, m_Or(m_Value(X), m_Value(Y))))
     return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
 
@@ -1023,7 +1014,7 @@
 
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
+    computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
     if (KnownOne[0])
       return true;
   }
@@ -1065,12 +1056,12 @@
       APInt Mask = APInt::getSignedMaxValue(BitWidth);
       // The sign bit of X is set.  If some other bit is set then X is not equal
       // to INT_MIN.
-      ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
+      computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
       if ((KnownOne & Mask) != 0)
         return true;
       // The sign bit of Y is set.  If some other bit is set then Y is not equal
       // to INT_MIN.
-      ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth);
+      computeKnownBits(Y, KnownZero, KnownOne, TD, Depth);
       if ((KnownOne & Mask) != 0)
         return true;
     }
@@ -1100,7 +1091,7 @@
   if (!BitWidth) return false;
   APInt KnownZero(BitWidth, 0);
   APInt KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+  computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
   return KnownOne != 0;
 }
 
@@ -1116,8 +1107,7 @@
 bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
                              const DataLayout *TD, unsigned Depth) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
-  ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
-  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
   return (KnownZero & Mask) == Mask;
 }
 
@@ -1142,7 +1132,7 @@
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
-  // Note that ConstantInt is handled by the general ComputeMaskedBits case
+  // Note that ConstantInt is handled by the general computeKnownBits case
   // below.
 
   if (Depth == 6)
@@ -1187,7 +1177,7 @@
       FirstAnswer = std::min(Tmp, Tmp2);
       // We computed what we know about the sign bits as our first
       // answer. Now proceed to the generic code that uses
-      // ComputeMaskedBits, and pick whichever answer is better.
+      // computeKnownBits, and pick whichever answer is better.
     }
     break;
 
@@ -1207,7 +1197,7 @@
     if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
       if (CRHS->isAllOnesValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+        computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
@@ -1232,7 +1222,7 @@
     if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+        computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
         if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1278,7 +1268,7 @@
   // use this information.
   APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
   APInt Mask;
-  ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+  computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
 
   if (KnownZero.isNegative()) {        // sign bit is 0
     Mask = KnownZero;
@@ -1364,7 +1354,7 @@
       Op1 = ConstantInt::get(V->getContext(), API);
     }
 
-    Value *Mul0 = NULL;
+    Value *Mul0 = nullptr;
     if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
       if (Constant *Op1C = dyn_cast<Constant>(Op1))
         if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
@@ -1388,7 +1378,7 @@
         }
     }
 
-    Value *Mul1 = NULL;
+    Value *Mul1 = nullptr;
     if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
       if (Constant *Op0C = dyn_cast<Constant>(Op0))
         if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
@@ -1432,7 +1422,7 @@
     return 1;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
-  if (I == 0) return false;
+  if (!I) return false;
 
   // Check if the nsz fast-math flag is set
   if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
@@ -1513,7 +1503,7 @@
 
         // If the top/bottom halves aren't the same, reject it.
         if (Val != Val2)
-          return 0;
+          return nullptr;
       }
       return ConstantInt::get(V->getContext(), Val);
     }
@@ -1525,11 +1515,11 @@
     Value *Elt = CA->getElementAsConstant(0);
     Value *Val = isBytewiseValue(Elt);
     if (!Val)
-      return 0;
+      return nullptr;
 
     for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
       if (CA->getElementAsConstant(I) != Elt)
-        return 0;
+        return nullptr;
 
     return Val;
   }
@@ -1540,7 +1530,7 @@
   //   %c = or i16 %a, %b
   // but until there is an example that actually needs this, it doesn't seem
   // worth worrying about.
-  return 0;
+  return nullptr;
 }
 
 
@@ -1590,7 +1580,7 @@
   Value *V = FindInsertedValue(From, Idxs);
 
   if (!V)
-    return NULL;
+    return nullptr;
 
   // Insert the value in the new (sub) aggregrate
   return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
@@ -1641,7 +1631,7 @@
 
   if (Constant *C = dyn_cast<Constant>(V)) {
     C = C->getAggregateElement(idx_range[0]);
-    if (C == 0) return 0;
+    if (!C) return nullptr;
     return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
   }
 
@@ -1654,7 +1644,7 @@
       if (req_idx == idx_range.end()) {
         // We can't handle this without inserting insertvalues
         if (!InsertBefore)
-          return 0;
+          return nullptr;
 
         // The requested index identifies a part of a nested aggregate. Handle
         // this specially. For example,
@@ -1708,7 +1698,7 @@
   }
   // Otherwise, we don't know (such as, extracting from a function return value
   // or load instruction)
-  return 0;
+  return nullptr;
 }
 
 /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
@@ -1769,13 +1759,13 @@
     // Make sure the index-ee is a pointer to array of i8.
     PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+    if (!AT || !AT->getElementType()->isIntegerTy(8))
       return false;
 
     // Check to make sure that the first operand of the GEP is an integer and
     // has value 0 so that we are sure we're indexing into the initializer.
     const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
-    if (FirstIdx == 0 || !FirstIdx->isZero())
+    if (!FirstIdx || !FirstIdx->isZero())
       return false;
 
     // If the second index isn't a ConstantInt, then this is a variable index
@@ -1807,7 +1797,7 @@
   // Must be a Constant Array
   const ConstantDataArray *Array =
     dyn_cast<ConstantDataArray>(GV->getInitializer());
-  if (Array == 0 || !Array->isString())
+  if (!Array || !Array->isString())
     return false;
 
   // Get the number of elements in the array
@@ -1913,7 +1903,7 @@
       // See if InstructionSimplify knows any relevant tricks.
       if (Instruction *I = dyn_cast<Instruction>(V))
         // TODO: Acquire a DominatorTree and use it.
-        if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+        if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
           V = Simplified;
           continue;
         }
@@ -2001,7 +1991,7 @@
       return false;
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(Op, KnownZero, KnownOne, TD);
+    computeKnownBits(Op, KnownZero, KnownOne, TD);
     return !!KnownZero;
   }
   case Instruction::Load: {
@@ -2076,14 +2066,18 @@
   // Alloca never returns null, malloc might.
   if (isa<AllocaInst>(V)) return true;
 
-  // A byval or inalloca argument is never null.
+  // A byval, inalloca, or nonnull argument is never null.
   if (const Argument *A = dyn_cast<Argument>(V))
-    return A->hasByValOrInAllocaAttr();
+    return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
 
   // Global values are not null unless extern weak.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
     return !GV->hasExternalWeakLinkage();
 
+  if (ImmutableCallSite CS = V)
+    if (CS.paramHasAttr(0, Attribute::NonNull))
+      return true;
+
   // operator new never returns null.
   if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
     return true;