AArch64CollectLOH: Rewrite as block-local analysis.

Re-apply r288561: This time with a fix where the ADDs that are part of a
3 instruction LOH would not invalidate the "LastAdrp" state. This fixes
http://llvm.org/PR31361

Previously this pass was using up to 5% compile time in some cases which
is a bit much for what it is doing. The pass featured a full blown
data-flow analysis which in the default configuration was restricted to a
single block.

This rewrites the pass under the assumption that we only ever work on a
single block. This is done in a single pass maintaining a state machine
per general purpose register to catch LOH patterns.

Differential Revision: https://reviews.llvm.org/D27329

This reverts commit 9e6cedb0a4f14364d6511597a9160305e7d34493.

llvm-svn: 291266
diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 7666011..17aafa0 100644
--- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -110,72 +110,34 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-collect-loh"
 
-static cl::opt<bool>
-PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
-                   cl::desc("Restrict analysis to registers invovled"
-                            " in LOHs"),
-                   cl::init(true));
-
-static cl::opt<bool>
-BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
-                    cl::desc("Restrict analysis at basic block scope"),
-                    cl::init(true));
-
 STATISTIC(NumADRPSimpleCandidate,
           "Number of simplifiable ADRP dominate by another");
-#ifndef NDEBUG
-STATISTIC(NumADRPComplexCandidate2,
-          "Number of simplifiable ADRP reachable by 2 defs");
-STATISTIC(NumADRPComplexCandidate3,
-          "Number of simplifiable ADRP reachable by 3 defs");
-STATISTIC(NumADRPComplexCandidateOther,
-          "Number of simplifiable ADRP reachable by 4 or more defs");
-STATISTIC(NumADDToSTRWithImm,
-          "Number of simplifiable STR with imm reachable by ADD");
-STATISTIC(NumLDRToSTRWithImm,
-          "Number of simplifiable STR with imm reachable by LDR");
 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
-STATISTIC(NumADDToLDRWithImm,
-          "Number of simplifiable LDR with imm reachable by ADD");
-STATISTIC(NumLDRToLDRWithImm,
-          "Number of simplifiable LDR with imm reachable by LDR");
 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
-#endif // NDEBUG
 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
-#ifndef NDEBUG
-STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
-STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
-STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
-STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
-#endif // NDEBUG
 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
-STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
 
 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
 
 namespace {
+
 struct AArch64CollectLOH : public MachineFunctionPass {
   static char ID;
-  AArch64CollectLOH() : MachineFunctionPass(ID) {
-    initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
-  }
+  AArch64CollectLOH() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -187,351 +149,57 @@
   StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
-    AU.addRequired<MachineDominatorTree>();
+    AU.setPreservesAll();
   }
-
-private:
 };
 
-/// A set of MachineInstruction.
-typedef SetVector<const MachineInstr *> SetOfMachineInstr;
-/// Map a basic block to a set of instructions per register.
-/// This is used to represent the exposed uses of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
-                  std::unique_ptr<SetOfMachineInstr[]>>
-BlockToSetOfInstrsPerColor;
-/// Map a basic block to an instruction per register.
-/// This is used to represent the live-out definitions of a basic block
-/// per register.
-typedef MapVector<const MachineBasicBlock *,
-                  std::unique_ptr<const MachineInstr *[]>>
-BlockToInstrPerColor;
-/// Map an instruction to a set of instructions. Used to represent the
-/// mapping def to reachable uses or use to definitions.
-typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
-/// Map a basic block to a BitVector.
-/// This is used to record the kill registers per basic block.
-typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
-
-/// Map a register to a dense id.
-typedef DenseMap<unsigned, unsigned> MapRegToId;
-/// Map a dense id to a register. Used for debug purposes.
-typedef SmallVector<unsigned, 32> MapIdToReg;
-} // end anonymous namespace.
-
 char AArch64CollectLOH::ID = 0;
 
-INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
-                      AARCH64_COLLECT_LOH_NAME, false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
-                    AARCH64_COLLECT_LOH_NAME, false, false)
+} // end anonymous namespace.
 
-/// Given a couple (MBB, reg) get the corresponding set of instruction from
-/// the given "sets".
-/// If this couple does not reference any set, an empty set is added to "sets"
-/// for this couple and returned.
-/// \param nbRegs is used internally allocate some memory. It must be consistent
-/// with the way sets is used.
-static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
-                                 const MachineBasicBlock &MBB, unsigned reg,
-                                 unsigned nbRegs) {
-  SetOfMachineInstr *result;
-  BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
-  if (it != sets.end())
-    result = it->second.get();
-  else
-    result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
+INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh",
+                AARCH64_COLLECT_LOH_NAME, false, false)
 
-  return result[reg];
-}
-
-/// Given a couple (reg, MI) get the corresponding set of instructions from the
-/// the given "sets".
-/// This is used to get the uses record in sets of a definition identified by
-/// MI and reg, i.e., MI defines reg.
-/// If the couple does not reference anything, an empty set is added to
-/// "sets[reg]".
-/// \pre set[reg] is valid.
-static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
-                                  const MachineInstr &MI) {
-  return sets[reg][&MI];
-}
-
-/// Same as getUses but does not modify the input map: sets.
-/// \return NULL if the couple (reg, MI) is not in sets.
-static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
-                                        const MachineInstr &MI) {
-  InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
-  if (Res != sets[reg].end())
-    return &(Res->second);
-  return nullptr;
-}
-
-/// Initialize the reaching definition algorithm:
-/// For each basic block BB in MF, record:
-/// - its kill set.
-/// - its reachable uses (uses that are exposed to BB's predecessors).
-/// - its the generated definitions.
-/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
-/// the list of uses of exposed defintions.
-/// \param ADRPMode specifies to only consider ADRP instructions for generated
-/// definition. It also consider definitions of ADRP instructions as uses and
-/// ignore other uses. The ADRPMode is used to collect the information for LHO
-/// that involve ADRP operation only.
-static void initReachingDef(const MachineFunction &MF,
-                            InstrToInstrs *ColorOpToReachedUses,
-                            BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
-                            BlockToSetOfInstrsPerColor &ReachableUses,
-                            const MapRegToId &RegToId,
-                            const MachineInstr *DummyOp, bool ADRPMode) {
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  unsigned NbReg = RegToId.size();
-
-  for (const MachineBasicBlock &MBB : MF) {
-    auto &BBGen = Gen[&MBB];
-    BBGen = make_unique<const MachineInstr *[]>(NbReg);
-    std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
-
-    BitVector &BBKillSet = Kill[&MBB];
-    BBKillSet.resize(NbReg);
-    for (const MachineInstr &MI : MBB) {
-      bool IsADRP = MI.getOpcode() == AArch64::ADRP;
-
-      // Process uses first.
-      if (IsADRP || !ADRPMode)
-        for (const MachineOperand &MO : MI.operands()) {
-          // Treat ADRP def as use, as the goal of the analysis is to find
-          // ADRP defs reached by other ADRP defs.
-          if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
-              (ADRPMode && (!IsADRP || !MO.isDef())))
-            continue;
-          unsigned CurReg = MO.getReg();
-          MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
-          if (ItCurRegId == RegToId.end())
-            continue;
-          CurReg = ItCurRegId->second;
-
-          // if CurReg has not been defined, this use is reachable.
-          if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
-            getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
-          // current basic block definition for this color, if any, is in Gen.
-          if (BBGen[CurReg])
-            getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
-        }
-
-      // Process clobbers.
-      for (const MachineOperand &MO : MI.operands()) {
-        if (!MO.isRegMask())
-          continue;
-        // Clobbers kill the related colors.
-        const uint32_t *PreservedRegs = MO.getRegMask();
-
-        // Set generated regs.
-        for (const auto &Entry : RegToId) {
-          unsigned Reg = Entry.second;
-          // Use the global register ID when querying APIs external to this
-          // pass.
-          if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
-            // Do not register clobbered definition for no ADRP.
-            // This definition is not used anyway (otherwise register
-            // allocation is wrong).
-            BBGen[Reg] = ADRPMode ? &MI : nullptr;
-            BBKillSet.set(Reg);
-          }
-        }
-      }
-
-      // Process register defs.
-      for (const MachineOperand &MO : MI.operands()) {
-        if (!MO.isReg() || !MO.isDef())
-          continue;
-        unsigned CurReg = MO.getReg();
-        MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
-        if (ItCurRegId == RegToId.end())
-          continue;
-
-        for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
-          MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
-          // If this alias has not been recorded, then it is not interesting
-          // for the current analysis.
-          // We can end up in this situation because of tuple registers.
-          // E.g., Let say we are interested in S1. When we register
-          // S1, we will also register its aliases and in particular
-          // the tuple Q1_Q2.
-          // Now, when we encounter Q1_Q2, we will look through its aliases
-          // and will find that S2 is not registered.
-          if (ItRegId == RegToId.end())
-            continue;
-
-          BBKillSet.set(ItRegId->second);
-          BBGen[ItRegId->second] = &MI;
-        }
-        BBGen[ItCurRegId->second] = &MI;
-      }
-    }
-
-    // If we restrict our analysis to basic block scope, conservatively add a
-    // dummy
-    // use for each generated value.
-    if (!ADRPMode && DummyOp && !MBB.succ_empty())
-      for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
-        if (BBGen[CurReg])
-          getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
+static bool canAddBePartOfLOH(const MachineInstr &MI) {
+  // Check immediate to see if the immediate is an address.
+  switch (MI.getOperand(2).getType()) {
+  default:
+    return false;
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_BlockAddress:
+    return true;
   }
 }
 
-/// Reaching def core algorithm:
-/// while an Out has changed
-///    for each bb
-///       for each color
-///           In[bb][color] = U Out[bb.predecessors][color]
-///           insert reachableUses[bb][color] in each in[bb][color]
-///                 op.reachedUses
-///
-///           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(const MachineFunction &MF,
-                                 InstrToInstrs *ColorOpToReachedUses,
-                                 BlockToSetOfInstrsPerColor &In,
-                                 BlockToSetOfInstrsPerColor &Out,
-                                 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
-                                 BlockToSetOfInstrsPerColor &ReachableUses,
-                                 unsigned NbReg) {
-  bool HasChanged;
-  do {
-    HasChanged = false;
-    for (const MachineBasicBlock &MBB : MF) {
-      unsigned CurReg;
-      for (CurReg = 0; CurReg < NbReg; ++CurReg) {
-        SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
-        SetOfMachineInstr &BBReachableUses =
-            getSet(ReachableUses, MBB, CurReg, NbReg);
-        SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
-        unsigned Size = BBOutSet.size();
-        //   In[bb][color] = U Out[bb.predecessors][color]
-        for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
-          SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
-          BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
-        }
-        //   insert reachableUses[bb][color] in each in[bb][color] op.reachedses
-        for (const MachineInstr *MI : BBInSet) {
-          SetOfMachineInstr &OpReachedUses =
-              getUses(ColorOpToReachedUses, CurReg, *MI);
-          OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
-        }
-        //           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-        if (!Kill[&MBB].test(CurReg))
-          BBOutSet.insert(BBInSet.begin(), BBInSet.end());
-        if (Gen[&MBB][CurReg])
-          BBOutSet.insert(Gen[&MBB][CurReg]);
-        HasChanged |= BBOutSet.size() != Size;
-      }
-    }
-  } while (HasChanged);
-}
-
-/// Reaching definition algorithm.
-/// \param MF function on which the algorithm will operate.
-/// \param[out] ColorOpToReachedUses will contain the result of the reaching
-/// def algorithm.
-/// \param ADRPMode specify whether the reaching def algorithm should be tuned
-/// for ADRP optimization. \see initReachingDef for more details.
-/// \param DummyOp if not NULL, the algorithm will work at
-/// basic block scope and will set for every exposed definition a use to
-/// @p DummyOp.
-/// \pre ColorOpToReachedUses is an array of at least number of registers of
-/// InstrToInstrs.
-static void reachingDef(const MachineFunction &MF,
-                        InstrToInstrs *ColorOpToReachedUses,
-                        const MapRegToId &RegToId, bool ADRPMode = false,
-                        const MachineInstr *DummyOp = nullptr) {
-  // structures:
-  // For each basic block.
-  // Out: a set per color of definitions that reach the
-  //      out boundary of this block.
-  // In: Same as Out but for in boundary.
-  // Gen: generated color in this block (one operation per color).
-  // Kill: register set of killed color in this block.
-  // ReachableUses: a set per color of uses (operation) reachable
-  //                for "In" definitions.
-  BlockToSetOfInstrsPerColor Out, In, ReachableUses;
-  BlockToInstrPerColor Gen;
-  BlockToRegSet Kill;
-
-  // Initialize Gen, kill and reachableUses.
-  initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
-                  DummyOp, ADRPMode);
-
-  // Algo.
-  if (!DummyOp)
-    reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
-                         ReachableUses, RegToId.size());
-}
-
-#ifndef NDEBUG
-/// print the result of the reaching definition algorithm.
-static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
-                             unsigned NbReg, const TargetRegisterInfo *TRI,
-                             const MapIdToReg &IdToReg) {
-  unsigned CurReg;
-  for (CurReg = 0; CurReg < NbReg; ++CurReg) {
-    if (ColorOpToReachedUses[CurReg].empty())
-      continue;
-    DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
-
-    for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
-      DEBUG(dbgs() << "Def:\n");
-      DEBUG(DefsIt.first->print(dbgs()));
-      DEBUG(dbgs() << "Reachable uses:\n");
-      for (const MachineInstr *MI : DefsIt.second) {
-        DEBUG(MI->print(dbgs()));
-      }
-    }
-  }
-}
-#endif // NDEBUG
-
 /// Answer the following question: Can Def be one of the definition
 /// involved in a part of a LOH?
-static bool canDefBePartOfLOH(const MachineInstr *Def) {
-  unsigned Opc = Def->getOpcode();
+static bool canDefBePartOfLOH(const MachineInstr &MI) {
   // Accept ADRP, ADDLow and LOADGot.
-  switch (Opc) {
+  switch (MI.getOpcode()) {
   default:
     return false;
   case AArch64::ADRP:
     return true;
   case AArch64::ADDXri:
-    // Check immediate to see if the immediate is an address.
-    switch (Def->getOperand(2).getType()) {
-    default:
-      return false;
-    case MachineOperand::MO_GlobalAddress:
-    case MachineOperand::MO_JumpTableIndex:
-    case MachineOperand::MO_ConstantPoolIndex:
-    case MachineOperand::MO_BlockAddress:
-      return true;
-    }
+    return canAddBePartOfLOH(MI);
   case AArch64::LDRXui:
     // Check immediate to see if the immediate is an address.
-    switch (Def->getOperand(2).getType()) {
+    switch (MI.getOperand(2).getType()) {
     default:
       return false;
     case MachineOperand::MO_GlobalAddress:
-      return true;
+      return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT;
     }
   }
-  // Unreachable.
-  return false;
 }
 
 /// Check whether the given instruction can the end of a LOH chain involving a
 /// store.
-static bool isCandidateStore(const MachineInstr *Instr) {
-  switch (Instr->getOpcode()) {
+static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
+  switch (MI.getOpcode()) {
   default:
     return false;
   case AArch64::STRBBui:
@@ -543,109 +211,19 @@
   case AArch64::STRSui:
   case AArch64::STRDui:
   case AArch64::STRQui:
+    // We can only optimize the index operand.
     // In case we have str xA, [xA, #imm], this is two different uses
     // of xA and we cannot fold, otherwise the xA stored may be wrong,
     // even if #imm == 0.
-    if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
-      return true;
-  }
-  return false;
-}
-
-/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
-/// Build the Use to Defs information and filter out obvious non-LOH candidates.
-/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
-/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
-/// i.e., no simple chain.
-/// \param ADRPMode -- \see initReachingDef.
-static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
-                              const InstrToInstrs *ColorOpToReachedUses,
-                              const MapRegToId &RegToId,
-                              bool ADRPMode = false) {
-
-  SetOfMachineInstr NotCandidate;
-  unsigned NbReg = RegToId.size();
-  MapRegToId::const_iterator EndIt = RegToId.end();
-  for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
-    // If this color is never defined, continue.
-    if (ColorOpToReachedUses[CurReg].empty())
-      continue;
-
-    for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
-      for (const MachineInstr *MI : DefsIt.second) {
-        const MachineInstr *Def = DefsIt.first;
-        MapRegToId::const_iterator It;
-        // if all the reaching defs are not adrp, this use will not be
-        // simplifiable.
-        if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
-            (!ADRPMode && !canDefBePartOfLOH(Def)) ||
-            (!ADRPMode && isCandidateStore(MI) &&
-             // store are LOH candidate iff the end of the chain is used as
-             // base.
-             ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
-              It->second != CurReg))) {
-          NotCandidate.insert(MI);
-          continue;
-        }
-        // Do not consider self reaching as a simplifiable case for ADRP.
-        if (!ADRPMode || MI != DefsIt.first) {
-          UseToReachingDefs[MI].insert(DefsIt.first);
-          // If UsesIt has several reaching definitions, it is not
-          // candidate for simplificaton in non-ADRPMode.
-          if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
-            NotCandidate.insert(MI);
-        }
-      }
-    }
-  }
-  for (const MachineInstr *Elem : NotCandidate) {
-    DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
-    // It would have been better if we could just remove the entry
-    // from the map.  Because of that, we have to filter the garbage
-    // (second.empty) in the subsequence analysis.
-    UseToReachingDefs[Elem].clear();
-  }
-}
-
-/// Based on the use to defs information (in ADRPMode), compute the
-/// opportunities of LOH ADRP-related.
-static void computeADRP(const InstrToInstrs &UseToDefs,
-                        AArch64FunctionInfo &AArch64FI,
-                        const MachineDominatorTree *MDT) {
-  DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
-  for (const auto &Entry : UseToDefs) {
-    unsigned Size = Entry.second.size();
-    if (Size == 0)
-      continue;
-    if (Size == 1) {
-      const MachineInstr *L2 = *Entry.second.begin();
-      const MachineInstr *L1 = Entry.first;
-      if (!MDT->dominates(L2, L1)) {
-        DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
-                     << '\n');
-        continue;
-      }
-      DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
-      AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
-      ++NumADRPSimpleCandidate;
-    }
-#ifndef NDEBUG
-    else if (Size == 2)
-      ++NumADRPComplexCandidate2;
-    else if (Size == 3)
-      ++NumADRPComplexCandidate3;
-    else
-      ++NumADRPComplexCandidateOther;
-#endif
-    // if Size < 1, the use should have been removed from the candidates
-    assert(Size >= 1 && "No reaching defs for that use!");
+    return MI.getOperandNo(&MO) == 1 &&
+           MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
   }
 }
 
 /// Check whether the given instruction can be the end of a LOH chain
 /// involving a load.
-static bool isCandidateLoad(const MachineInstr *Instr) {
-  switch (Instr->getOpcode()) {
+static bool isCandidateLoad(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default:
     return false;
   case AArch64::LDRSBWui:
@@ -660,17 +238,13 @@
   case AArch64::LDRSui:
   case AArch64::LDRDui:
   case AArch64::LDRQui:
-    if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
-      return false;
-    return true;
+    return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT);
   }
-  // Unreachable.
-  return false;
 }
 
 /// Check whether the given instruction can load a litteral.
-static bool supportLoadFromLiteral(const MachineInstr *Instr) {
-  switch (Instr->getOpcode()) {
+static bool supportLoadFromLiteral(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default:
     return false;
   case AArch64::LDRSWui:
@@ -681,353 +255,233 @@
   case AArch64::LDRQui:
     return true;
   }
-  // Unreachable.
-  return false;
 }
 
-/// Check whether the given instruction is a LOH candidate.
-/// \param UseToDefs is used to check that Instr is at the end of LOH supported
-/// chain.
-/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
-/// already been filtered out.
-static bool isCandidate(const MachineInstr *Instr,
-                        const InstrToInstrs &UseToDefs,
-                        const MachineDominatorTree *MDT) {
-  if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
-    return false;
-
-  const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
-  if (Def->getOpcode() != AArch64::ADRP) {
-    // At this point, Def is ADDXri or LDRXui of the right type of
-    // symbol, because we filtered out the uses that were not defined
-    // by these kind of instructions (+ ADRP).
-
-    // Check if this forms a simple chain: each intermediate node must
-    // dominates the next one.
-    if (!MDT->dominates(Def, Instr))
-      return false;
-    // Move one node up in the simple chain.
-    if (UseToDefs.find(Def) ==
-            UseToDefs.end()
-            // The map may contain garbage we have to ignore.
-        ||
-        UseToDefs.find(Def)->second.empty())
-      return false;
-    Instr = Def;
-    Def = *UseToDefs.find(Def)->second.begin();
-  }
-  // Check if we reached the top of the simple chain:
-  // - top is ADRP.
-  // - check the simple chain property: each intermediate node must
-  // dominates the next one.
-  if (Def->getOpcode() == AArch64::ADRP)
-    return MDT->dominates(Def, Instr);
-  return false;
+/// Number of GPR registers traked by mapRegToGPRIndex()
+static const unsigned N_GPR_REGS = 31;
+/// Map register number to index from 0-30.
+static int mapRegToGPRIndex(MCPhysReg Reg) {
+  static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs");
+  static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs");
+  if (AArch64::X0 <= Reg && Reg <= AArch64::X28)
+    return Reg - AArch64::X0;
+  if (AArch64::W0 <= Reg && Reg <= AArch64::W30)
+    return Reg - AArch64::W0;
+  // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to
+  // handle them as special cases.
+  if (Reg == AArch64::FP)
+    return 29;
+  if (Reg == AArch64::LR)
+    return 30;
+  return -1;
 }
 
-static bool registerADRCandidate(const MachineInstr &Use,
-                                 const InstrToInstrs &UseToDefs,
-                                 const InstrToInstrs *DefsPerColorToUses,
-                                 AArch64FunctionInfo &AArch64FI,
-                                 SetOfMachineInstr *InvolvedInLOHs,
-                                 const MapRegToId &RegToId) {
-  // Look for opportunities to turn ADRP -> ADD or
-  // ADRP -> LDR GOTPAGEOFF into ADR.
-  // If ADRP has more than one use. Give up.
-  if (Use.getOpcode() != AArch64::ADDXri &&
-      (Use.getOpcode() != AArch64::LDRXui ||
-       !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
-    return false;
-  InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
-  // The map may contain garbage that we need to ignore.
-  if (It == UseToDefs.end() || It->second.empty())
-    return false;
-  const MachineInstr &Def = **It->second.begin();
-  if (Def.getOpcode() != AArch64::ADRP)
-    return false;
-  // Check the number of users of ADRP.
-  const SetOfMachineInstr *Users =
-      getUses(DefsPerColorToUses,
-              RegToId.find(Def.getOperand(0).getReg())->second, Def);
-  if (Users->size() > 1) {
-    ++NumADRComplexCandidate;
-    return false;
-  }
-  ++NumADRSimpleCandidate;
-  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
-         "ADRP already involved in LOH.");
-  assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
-         "ADD already involved in LOH.");
-  DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
+/// State tracked per register.
+/// The main algorithm walks backwards over a basic block maintaining this
+/// datastructure for each tracked general purpose register.
+struct LOHInfo {
+  MCLOHType Type : 8;           ///< "Best" type of LOH possible.
+  bool IsCandidate : 1;         ///< Possible LOH candidate.
+  bool OneUser : 1;             ///< Found exactly one user (yet).
+  bool MultiUsers : 1;          ///< Found multiple users.
+  const MachineInstr *MI0;      ///< First instruction involved in the LOH.
+  const MachineInstr *MI1;      ///< Second instruction involved in the LOH
+                                ///  (if any).
+  const MachineInstr *LastADRP; ///< Last ADRP in same register.
+};
 
-  AArch64FI.addLOHDirective(
-      Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
-      {&Def, &Use});
-  return true;
-}
-
-/// Based on the use to defs information (in non-ADRPMode), compute the
-/// opportunities of LOH non-ADRP-related
-static void computeOthers(const InstrToInstrs &UseToDefs,
-                          const InstrToInstrs *DefsPerColorToUses,
-                          AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
-                          const MachineDominatorTree *MDT) {
-  SetOfMachineInstr *InvolvedInLOHs = nullptr;
-#ifndef NDEBUG
-  SetOfMachineInstr InvolvedInLOHsStorage;
-  InvolvedInLOHs = &InvolvedInLOHsStorage;
-#endif // NDEBUG
-  DEBUG(dbgs() << "*** Compute LOH for Others\n");
-  // ADRP -> ADD/LDR -> LDR/STR pattern.
-  // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
-
-  // FIXME: When the statistics are not important,
-  // This initial filtering loop can be merged into the next loop.
-  // Currently, we didn't do it to have the same code for both DEBUG and
-  // NDEBUG builds. Indeed, the iterator of the second loop would need
-  // to be changed.
-  SetOfMachineInstr PotentialCandidates;
-  SetOfMachineInstr PotentialADROpportunities;
-  for (auto &Use : UseToDefs) {
-    // If no definition is available, this is a non candidate.
-    if (Use.second.empty())
-      continue;
-    // Keep only instructions that are load or store and at the end of
-    // a ADRP -> ADD/LDR/Nothing chain.
-    // We already filtered out the no-chain cases.
-    if (!isCandidate(Use.first, UseToDefs, MDT)) {
-      PotentialADROpportunities.insert(Use.first);
-      continue;
-    }
-    PotentialCandidates.insert(Use.first);
-  }
-
-  // Make the following distinctions for statistics as the linker does
-  // know how to decode instructions:
-  // - ADD/LDR/Nothing make there different patterns.
-  // - LDR/STR make two different patterns.
-  // Hence, 6 - 1 base patterns.
-  // (because ADRP-> Nothing -> STR is not simplifiable)
-
-  // The linker is only able to have a simple semantic, i.e., if pattern A
-  // do B.
-  // However, we want to see the opportunity we may miss if we were able to
-  // catch more complex cases.
-
-  // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
-  // A potential candidate becomes a candidate, if its current immediate
-  // operand is zero and all nodes of the chain have respectively only one user
-#ifndef NDEBUG
-  SetOfMachineInstr DefsOfPotentialCandidates;
-#endif
-  for (const MachineInstr *Candidate : PotentialCandidates) {
-    // Get the definition of the candidate i.e., ADD or LDR.
-    const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
-    // Record the elements of the chain.
-    const MachineInstr *L1 = Def;
-    const MachineInstr *L2 = nullptr;
-    unsigned ImmediateDefOpc = Def->getOpcode();
-    if (Def->getOpcode() != AArch64::ADRP) {
-      // Check the number of users of this node.
-      const SetOfMachineInstr *Users =
-          getUses(DefsPerColorToUses,
-                  RegToId.find(Def->getOperand(0).getReg())->second, *Def);
-      if (Users->size() > 1) {
-#ifndef NDEBUG
-        // if all the uses of this def are in potential candidate, this is
-        // a complex candidate of level 2.
-        bool IsLevel2 = true;
-        for (const MachineInstr *MI : *Users) {
-          if (!PotentialCandidates.count(MI)) {
-            ++NumTooCplxLvl2;
-            IsLevel2 = false;
-            break;
-          }
-        }
-        if (IsLevel2)
-          ++NumCplxLvl2;
-#endif // NDEBUG
-        PotentialADROpportunities.insert(Def);
-        continue;
-      }
-      L2 = Def;
-      Def = *UseToDefs.find(Def)->second.begin();
-      L1 = Def;
-    } // else the element in the middle of the chain is nothing, thus
-      // Def already contains the first element of the chain.
-
-    // Check the number of users of the first node in the chain, i.e., ADRP
-    const SetOfMachineInstr *Users =
-        getUses(DefsPerColorToUses,
-                RegToId.find(Def->getOperand(0).getReg())->second, *Def);
-    if (Users->size() > 1) {
-#ifndef NDEBUG
-      // if all the uses of this def are in the defs of the potential candidate,
-      // this is a complex candidate of level 1
-      if (DefsOfPotentialCandidates.empty()) {
-        // lazy init
-        DefsOfPotentialCandidates = PotentialCandidates;
-        for (const MachineInstr *Candidate : PotentialCandidates) {
-          if (!UseToDefs.find(Candidate)->second.empty())
-            DefsOfPotentialCandidates.insert(
-                *UseToDefs.find(Candidate)->second.begin());
-        }
-      }
-      bool Found = false;
-      for (auto &Use : *Users) {
-        if (!DefsOfPotentialCandidates.count(Use)) {
-          ++NumTooCplxLvl1;
-          Found = true;
-          break;
-        }
-      }
-      if (!Found)
-        ++NumCplxLvl1;
-#endif // NDEBUG
-      continue;
-    }
-
-    bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
-    // If the chain is three instructions long and ldr is the second element,
-    // then this ldr must load form GOT, otherwise this is not a correct chain.
-    if (L2 && !IsL2Add &&
-        !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
-      continue;
-    SmallVector<const MachineInstr *, 3> Args;
-    MCLOHType Kind;
-    if (isCandidateLoad(Candidate)) {
-      if (!L2) {
-        // At this point, the candidate LOH indicates that the ldr instruction
-        // may use a direct access to the symbol. There is not such encoding
-        // for loads of byte and half.
-        if (!supportLoadFromLiteral(Candidate))
-          continue;
-
-        DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
-                     << '\n');
-        Kind = MCLOH_AdrpLdr;
-        Args.push_back(L1);
-        Args.push_back(Candidate);
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
-               "L1 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
-               "Candidate already involved in LOH.");
-        ++NumADRPToLDR;
-      } else {
-        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
-                     << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
-                     << '\n');
-
-        Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
-        Args.push_back(L1);
-        Args.push_back(L2);
-        Args.push_back(Candidate);
-
-        PotentialADROpportunities.remove(L2);
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
-               "L1 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
-               "L2 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
-               "Candidate already involved in LOH.");
-#ifndef NDEBUG
-        // get the immediate of the load
-        if (Candidate->getOperand(2).getImm() == 0)
-          if (ImmediateDefOpc == AArch64::ADDXri)
-            ++NumADDToLDR;
-          else
-            ++NumLDRToLDR;
-        else if (ImmediateDefOpc == AArch64::ADDXri)
-          ++NumADDToLDRWithImm;
-        else
-          ++NumLDRToLDRWithImm;
-#endif // NDEBUG
-      }
-    } else {
-      if (ImmediateDefOpc == AArch64::ADRP)
-        continue;
-      else {
-
-        DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
-                     << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
-                     << '\n');
-
-        Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
-        Args.push_back(L1);
-        Args.push_back(L2);
-        Args.push_back(Candidate);
-
-        PotentialADROpportunities.remove(L2);
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
-               "L1 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
-               "L2 already involved in LOH.");
-        assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
-               "Candidate already involved in LOH.");
-#ifndef NDEBUG
-        // get the immediate of the store
-        if (Candidate->getOperand(2).getImm() == 0)
-          if (ImmediateDefOpc == AArch64::ADDXri)
-            ++NumADDToSTR;
-          else
-            ++NumLDRToSTR;
-        else if (ImmediateDefOpc == AArch64::ADDXri)
-          ++NumADDToSTRWithImm;
-        else
-          ++NumLDRToSTRWithImm;
-#endif // DEBUG
-      }
-    }
-    AArch64FI.addLOHDirective(Kind, Args);
-  }
-
-  // Now, we grabbed all the big patterns, check ADR opportunities.
-  for (const MachineInstr *Candidate : PotentialADROpportunities)
-    registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
-                         InvolvedInLOHs, RegToId);
-}
-
-/// Look for every register defined by potential LOHs candidates.
-/// Map these registers with dense id in @p RegToId and vice-versa in
-/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
-                               MapIdToReg &IdToReg,
-                               const TargetRegisterInfo *TRI) {
-  unsigned CurRegId = 0;
-  if (!PreCollectRegister) {
-    unsigned NbReg = TRI->getNumRegs();
-    for (; CurRegId < NbReg; ++CurRegId) {
-      RegToId[CurRegId] = CurRegId;
-      DEBUG(IdToReg.push_back(CurRegId));
-      DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
-    }
+/// Update state \p Info given \p MI uses the tracked register.
+static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
+                      LOHInfo &Info) {
+  // We have multiple uses if we already found one before.
+  if (Info.MultiUsers || Info.OneUser) {
+    Info.IsCandidate = false;
+    Info.MultiUsers = true;
     return;
   }
+  Info.OneUser = true;
 
-  DEBUG(dbgs() << "** Collect Involved Register\n");
-  for (const auto &MBB : MF) {
-    for (const MachineInstr &MI : MBB) {
-      if (!canDefBePartOfLOH(&MI) &&
-          !isCandidateLoad(&MI) && !isCandidateStore(&MI))
-        continue;
+  // Start new LOHInfo if applicable.
+  if (isCandidateLoad(MI)) {
+    Info.Type = MCLOH_AdrpLdr;
+    Info.IsCandidate = true;
+    Info.MI0 = &MI;
+    // Note that even this is AdrpLdr now, we can switch to a Ldr variant
+    // later.
+  } else if (isCandidateStore(MI, MO)) {
+    Info.Type = MCLOH_AdrpAddStr;
+    Info.IsCandidate = true;
+    Info.MI0 = &MI;
+    Info.MI1 = nullptr;
+  } else if (MI.getOpcode() == AArch64::ADDXri) {
+    Info.Type = MCLOH_AdrpAdd;
+    Info.IsCandidate = true;
+    Info.MI0 = &MI;
+  } else if (MI.getOpcode() == AArch64::LDRXui &&
+             MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
+    Info.Type = MCLOH_AdrpLdrGot;
+    Info.IsCandidate = true;
+    Info.MI0 = &MI;
+  }
+}
 
-      // Process defs
-      for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
-                                            IOEnd = MI.operands_end();
-           IO != IOEnd; ++IO) {
-        if (!IO->isReg() || !IO->isDef())
-          continue;
-        unsigned CurReg = IO->getReg();
-        for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
-          if (RegToId.find(*AI) == RegToId.end()) {
-            DEBUG(IdToReg.push_back(*AI);
-                  assert(IdToReg[CurRegId] == *AI &&
-                         "Reg index mismatches insertion index."));
-            RegToId[*AI] = CurRegId++;
-            DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
-          }
-      }
+/// Update state \p Info given the tracked register is clobbered.
+static void handleClobber(LOHInfo &Info) {
+  Info.IsCandidate = false;
+  Info.OneUser = false;
+  Info.MultiUsers = false;
+  Info.LastADRP = nullptr;
+}
+
+/// Update state \p Info given that \p MI is possibly the middle instruction
+/// of an LOH involving 3 instructions.
+static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
+                             LOHInfo &OpInfo) {
+  if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser))
+    return false;
+  // Copy LOHInfo for dest register to LOHInfo for source register.
+  if (&DefInfo != &OpInfo) {
+    OpInfo = DefInfo;
+    // Invalidate \p DefInfo because we track it in \p OpInfo now.
+    handleClobber(DefInfo);
+  } else
+    DefInfo.LastADRP = nullptr;
+
+  // Advance state machine.
+  assert(OpInfo.IsCandidate && "Expect valid state");
+  if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) {
+    if (OpInfo.Type == MCLOH_AdrpLdr) {
+      OpInfo.Type = MCLOH_AdrpAddLdr;
+      OpInfo.IsCandidate = true;
+      OpInfo.MI1 = &MI;
+      return true;
+    } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+      OpInfo.Type = MCLOH_AdrpAddStr;
+      OpInfo.IsCandidate = true;
+      OpInfo.MI1 = &MI;
+      return true;
     }
+  } else {
+    assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
+    assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
+           "Expected GOT relocation");
+    if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
+      OpInfo.Type = MCLOH_AdrpLdrGotStr;
+      OpInfo.IsCandidate = true;
+      OpInfo.MI1 = &MI;
+      return true;
+    } else if (OpInfo.Type == MCLOH_AdrpLdr) {
+      OpInfo.Type = MCLOH_AdrpLdrGotLdr;
+      OpInfo.IsCandidate = true;
+      OpInfo.MI1 = &MI;
+      return true;
+    }
+  }
+  return false;
+}
+
+/// Update state when seeing and ADRP instruction.
+static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI,
+                       LOHInfo &Info) {
+  if (Info.LastADRP != nullptr) {
+    DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t'
+                 << *Info.LastADRP);
+    AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP});
+    ++NumADRPSimpleCandidate;
+  }
+
+  // Produce LOH directive if possible.
+  if (Info.IsCandidate) {
+    switch (Info.Type) {
+    case MCLOH_AdrpAdd:
+      DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t'
+                   << *Info.MI0);
+      AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0});
+      ++NumADRSimpleCandidate;
+      break;
+    case MCLOH_AdrpLdr:
+      if (supportLoadFromLiteral(*Info.MI0)) {
+        DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t'
+                     << *Info.MI0);
+        AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0});
+        ++NumADRPToLDR;
+      }
+      break;
+    case MCLOH_AdrpAddLdr:
+      DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t'
+                   << *Info.MI1 << '\t' << *Info.MI0);
+      AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0});
+      ++NumADDToLDR;
+      break;
+    case MCLOH_AdrpAddStr:
+      if (Info.MI1 != nullptr) {
+        DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t'
+                     << *Info.MI1 << '\t' << *Info.MI0);
+        AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0});
+        ++NumADDToSTR;
+      }
+      break;
+    case MCLOH_AdrpLdrGotLdr:
+      DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t'
+                   << *Info.MI1 << '\t' << *Info.MI0);
+      AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0});
+      ++NumLDRToLDR;
+      break;
+    case MCLOH_AdrpLdrGotStr:
+      DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t'
+                   << *Info.MI1 << '\t' << *Info.MI0);
+      AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0});
+      ++NumLDRToSTR;
+      break;
+    case MCLOH_AdrpLdrGot:
+      DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t'
+                   << *Info.MI0);
+      AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0});
+      break;
+    case MCLOH_AdrpAdrp:
+      llvm_unreachable("MCLOH_AdrpAdrp not used in state machine");
+    }
+  }
+
+  handleClobber(Info);
+  Info.LastADRP = &MI;
+}
+
+static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg,
+                                 LOHInfo *LOHInfos) {
+  if (!MachineOperand::clobbersPhysReg(RegMask, Reg))
+    return;
+  int Idx = mapRegToGPRIndex(Reg);
+  if (Idx >= 0)
+    handleClobber(LOHInfos[Idx]);
+}
+
+static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
+  // Handle defs and regmasks.
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isRegMask()) {
+      const uint32_t *RegMask = MO.getRegMask();
+      for (MCPhysReg Reg : AArch64::GPR32RegClass)
+        handleRegMaskClobber(RegMask, Reg, LOHInfos);
+      for (MCPhysReg Reg : AArch64::GPR64RegClass)
+        handleRegMaskClobber(RegMask, Reg, LOHInfos);
+      continue;
+    }
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    int Idx = mapRegToGPRIndex(MO.getReg());
+    if (Idx < 0)
+      continue;
+    handleClobber(LOHInfos[Idx]);
+  }
+  // Handle uses.
+  for (const MachineOperand &MO : MI.uses()) {
+    if (!MO.isReg() || !MO.readsReg())
+      continue;
+    int Idx = mapRegToGPRIndex(MO.getReg());
+    if (Idx < 0)
+      continue;
+    handleUse(MI, MO, LOHInfos[Idx]);
   }
 }
 
@@ -1035,74 +489,59 @@
   if (skipFunction(*MF.getFunction()))
     return false;
 
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
+  DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n"
+               << "Looking in function " << MF.getName() << '\n');
 
-  MapRegToId RegToId;
-  MapIdToReg IdToReg;
-  AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
-  assert(AArch64FI && "No MachineFunctionInfo for this function!");
+  LOHInfo LOHInfos[N_GPR_REGS];
+  AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+  for (const MachineBasicBlock &MBB : MF) {
+    // Reset register tracking state.
+    memset(LOHInfos, 0, sizeof(LOHInfos));
+    // Live-out registers are used.
+    for (const MachineBasicBlock *Succ : MBB.successors()) {
+      for (const auto &LI : Succ->liveins()) {
+        int RegIdx = mapRegToGPRIndex(LI.PhysReg);
+        if (RegIdx >= 0)
+          LOHInfos[RegIdx].OneUser = true;
+      }
+    }
 
-  DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
-
-  collectInvolvedReg(MF, RegToId, IdToReg, TRI);
-  if (RegToId.empty())
-    return false;
-
-  MachineInstr *DummyOp = nullptr;
-  if (BasicBlockScopeOnly) {
-    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-    // For local analysis, create a dummy operation to record uses that are not
-    // local.
-    DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
+    // Walk the basic block backwards and update the per register state machine
+    // in the process.
+    for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+      unsigned Opcode = MI.getOpcode();
+      switch (Opcode) {
+      case AArch64::ADDXri:
+      case AArch64::LDRXui:
+        if (canDefBePartOfLOH(MI)) {
+          const MachineOperand &Def = MI.getOperand(0);
+          const MachineOperand &Op = MI.getOperand(1);
+          assert(Def.isReg() && Def.isDef() && "Expected reg def");
+          assert(Op.isReg() && Op.isUse() && "Expected reg use");
+          int DefIdx = mapRegToGPRIndex(Def.getReg());
+          int OpIdx = mapRegToGPRIndex(Op.getReg());
+          if (DefIdx >= 0 && OpIdx >= 0 &&
+              handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx]))
+            continue;
+        }
+        break;
+      case AArch64::ADRP:
+        const MachineOperand &Op0 = MI.getOperand(0);
+        int Idx = mapRegToGPRIndex(Op0.getReg());
+        if (Idx >= 0) {
+          handleADRP(MI, AFI, LOHInfos[Idx]);
+          continue;
+        }
+        break;
+      }
+      handleNormalInst(MI, LOHInfos);
+    }
   }
 
-  unsigned NbReg = RegToId.size();
-  bool Modified = false;
-
-  // Start with ADRP.
-  InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
-  // Compute the reaching def in ADRP mode, meaning ADRP definitions
-  // are first considered as uses.
-  reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
-  DEBUG(dbgs() << "ADRP reaching defs\n");
-  DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
-  // Translate the definition to uses map into a use to definitions map to ease
-  // statistic computation.
-  InstrToInstrs ADRPToReachingDefs;
-  reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
-
-  // Compute LOH for ADRP.
-  computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
-  delete[] ColorOpToReachedUses;
-
-  // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
-  ColorOpToReachedUses = new InstrToInstrs[NbReg];
-
-  // first perform a regular reaching def analysis.
-  reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
-  DEBUG(dbgs() << "All reaching defs\n");
-  DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
-
-  // Turn that into a use to defs to ease statistic computation.
-  InstrToInstrs UsesToReachingDefs;
-  reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
-
-  // Compute other than AdrpAdrp LOH.
-  computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
-                MDT);
-  delete[] ColorOpToReachedUses;
-
-  if (BasicBlockScopeOnly)
-    MF.DeleteMachineInstr(DummyOp);
-
-  return Modified;
+  // Return "no change": The pass only collects information.
+  return false;
 }
 
-/// createAArch64CollectLOHPass - returns an instance of the Statistic for
-/// linker optimization pass.
 FunctionPass *llvm::createAArch64CollectLOHPass() {
   return new AArch64CollectLOH();
 }