[X86] Improvement in CodeGen instruction selection for LEAs.

Summary:
1/  Operand folding during complex pattern matching for LEAs has been extended, such that it promotes Scale to
     accommodate similar operand appearing in the DAG  e.g.
                 T1 = A + B
                 T2 = T1 + 10
                 T3 = T2 + A
    For above DAG rooted at T3, X86AddressMode will now look like
                Base = B , Index = A , Scale = 2 , Disp = 10

2/  During OptimizeLEAPass down the pipeline factorization is now performed over LEAs so that if there is an opportunity
     then complex LEAs (having 3 operands) could be factored out  e.g.
                 leal 1(%rax,%rcx,1), %rdx
                 leal 1(%rax,%rcx,2), %rcx
     will be factored as following
                 leal 1(%rax,%rcx,1), %rdx
                 leal (%rdx,%rcx)   , %edx

3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops, thus avoiding creation of any complex LEAs within a loop.

4/ Simplify LEA converts (lea (BASE,1,INDEX,0)  --> add (BASE, INDEX) which offers better through put.

PR32755 will be taken care of by this pathc.

Previous patch revisions : r313343 , r314886

Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy, jbhateja

Reviewed By: lsaba, RKSimon, jbhateja

Subscribers: jmolloy, spatel, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D35014

llvm-svn: 319543
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index cc13686..7783c11 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -58,6 +59,7 @@
                      cl::init(false));
 
 STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
 STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
 
 /// \brief Returns true if two machine operands are identical and they are not
@@ -65,6 +67,10 @@
 static inline bool isIdenticalOp(const MachineOperand &MO1,
                                  const MachineOperand &MO2);
 
+/// \brief Returns true if two machine instructions have identical operands.
+static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
+                          const MachineOperand &MO2);
+
 /// \brief Returns true if two address displacement operands are of the same
 /// type and use the same symbol/index/address regardless of the offset.
 static bool isSimilarDispOp(const MachineOperand &MO1,
@@ -73,6 +79,9 @@
 /// \brief Returns true if the instruction is LEA.
 static inline bool isLEA(const MachineInstr &MI);
 
+/// \brief Returns true if Definition of Operand is a copylike instruction.
+static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
+
 namespace {
 
 /// A key based on instruction's memory operands.
@@ -80,15 +89,35 @@
 public:
   MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
            const MachineOperand *Index, const MachineOperand *Segment,
-           const MachineOperand *Disp)
-      : Disp(Disp) {
+           const MachineOperand *Disp, bool DispCheck = false)
+      : Disp(Disp), DeepCheck(DispCheck) {
     Operands[0] = Base;
     Operands[1] = Scale;
     Operands[2] = Index;
     Operands[3] = Segment;
   }
 
+  /// Checks operands of MemOpKey are identical, if Base or Index
+  /// operand definitions are of kind SUBREG_TO_REG then compare
+  /// operands of defining MI.
+  bool performDeepCheck(const MemOpKey &Other) const {
+    MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
+    MachineRegisterInfo *MRI = MI->getRegInfo();
+
+    for (int i = 0; i < 4; i++) {
+      bool CopyLike = isDefCopyLike(MRI, *Operands[i]);
+      if (CopyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
+        return false;
+      else if (!CopyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
+        return false;
+    }
+    return isIdenticalOp(*Disp, *Other.Disp);
+  }
+
   bool operator==(const MemOpKey &Other) const {
+    if (DeepCheck)
+      return performDeepCheck(Other);
+
     // Addresses' bases, scales, indices and segments must be identical.
     for (int i = 0; i < 4; ++i)
       if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
@@ -106,6 +135,12 @@
 
   // Address' displacement operand.
   const MachineOperand *Disp;
+
+  // If true checks Address' base, index, segment and
+  // displacement are identical, in additions if base/index
+  // are defined by copylike instruction then futher
+  // compare the operands of the defining instruction.
+  bool DeepCheck;
 };
 
 } // end anonymous namespace
@@ -131,12 +166,34 @@
   static unsigned getHashValue(const MemOpKey &Val) {
     // Checking any field of MemOpKey is enough to determine if the key is
     // empty or tombstone.
+    hash_code Hash(0);
     assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
     assert(Val.Disp != PtrInfo::getTombstoneKey() &&
            "Cannot hash the tombstone key");
 
-    hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
-                                  *Val.Operands[2], *Val.Operands[3]);
+    auto getMIHash = [](MachineInstr *MI) -> hash_code {
+      hash_code h(0);
+      for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
+        h = hash_combine(h, MI->getOperand(i));
+      return h;
+    };
+
+    const MachineOperand &Base = *Val.Operands[0];
+    const MachineOperand &Index = *Val.Operands[2];
+    MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
+    MachineRegisterInfo *MRI = MI->getRegInfo();
+
+    if (isDefCopyLike(MRI, Base))
+      Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
+    else
+      Hash = hash_combine(Hash, Base);
+
+    if (isDefCopyLike(MRI, Index))
+      Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
+    else
+      Hash = hash_combine(Hash, Index);
+
+    Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
 
     // If the address displacement is an immediate, it should not affect the
     // hash so that memory operands which differ only be immediate displacement
@@ -196,6 +253,16 @@
                   &MI.getOperand(N + X86::AddrDisp));
 }
 
+static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
+  static MachineOperand DummyScale = MachineOperand::CreateImm(1);
+  assert((isLEA(MI) || MI.mayLoadOrStore()) &&
+         "The instruction must be a LEA, a load or a store");
+  return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
+                  &MI.getOperand(N + X86::AddrIndexReg),
+                  &MI.getOperand(N + X86::AddrSegmentReg),
+                  &MI.getOperand(N + X86::AddrDisp), true);
+}
+
 static inline bool isIdenticalOp(const MachineOperand &MO1,
                                  const MachineOperand &MO2) {
   return MO1.isIdenticalTo(MO2) &&
@@ -203,6 +270,27 @@
           !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
 }
 
+static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
+                          const MachineOperand &MO2) {
+  MachineInstr *MI1 = nullptr;
+  MachineInstr *MI2 = nullptr;
+  if (!MO1.isReg() || !MO2.isReg())
+    return false;
+
+  MI1 = MRI->getVRegDef(MO1.getReg());
+  MI2 = MRI->getVRegDef(MO2.getReg());
+  if (!MI1 || !MI2)
+    return false;
+  if (MI1->getOpcode() != MI2->getOpcode())
+    return false;
+  if (MI1->getNumOperands() != MI2->getNumOperands())
+    return false;
+  for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
+    if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
+      return false;
+  return true;
+}
+
 #ifndef NDEBUG
 static bool isValidDispOp(const MachineOperand &MO) {
   return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
@@ -234,8 +322,150 @@
          Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
 }
 
+static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
+  bool isInstrErased = !(Opr.isReg() && Opr.getParent()->getParent());
+  if (!Opr.isReg() || isInstrErased ||
+      TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
+    return false;
+  MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
+  return MI && MI->isCopyLike();
+}
+
 namespace {
 
+/// This class captures the functions and attributes
+/// needed to factorize LEA within and across basic
+/// blocks.LEA instruction with same BASE,OFFSET and
+/// INDEX are the candidates for factorization.
+class FactorizeLEAOpt {
+public:
+  using LEAListT = std::list<MachineInstr *>;
+  using LEAMapT = DenseMap<MemOpKey, LEAListT>;
+  using ValueT = DenseMap<MemOpKey, unsigned>;
+  using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
+  using ScopeStackT = std::vector<ScopeEntryT>;
+
+  FactorizeLEAOpt() = default;
+  FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
+  FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
+
+  void performCleanup() {
+    for (auto LEA : removedLEAs)
+      LEA->eraseFromParent();
+    LEAs.clear();
+    Stack.clear();
+    removedLEAs.clear();
+  }
+
+  LEAMapT &getLEAMap() { return LEAs; }
+  ScopeEntryT *getTopScope() { return &Stack.back(); }
+
+  void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
+
+  bool checkIfScheduledForRemoval(MachineInstr *Instr) {
+    return removedLEAs.find(Instr) != removedLEAs.end();
+  }
+
+  /// Push the ScopeEntry for the BasicBlock over Stack.
+  /// Also traverses over list of instruction and update
+  /// LEAs Map and ScopeEntry for each LEA instruction
+  /// found using insertLEA().
+  void collectDataForBasicBlock(MachineBasicBlock *MBB);
+
+  /// Stores the size of MachineInstr list corrosponding
+  /// to key K from LEAs MAP into the ScopeEntry of
+  /// the basic block, then insert the LEA at the beginning
+  /// of the list.
+  void insertLEA(MachineInstr *MI);
+
+  /// Pops out ScopeEntry of top most BasicBlock from the stack
+  /// and remove the LEA instructions contained in the scope
+  /// from the LEAs Map.
+  void removeDataForBasicBlock();
+
+  /// If LEA contains Physical Registers then its not a candidate
+  /// for factorizations since physical registers may violate SSA
+  /// semantics of MI.
+  bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
+
+private:
+  ScopeStackT Stack;
+  LEAMapT LEAs;
+  std::set<MachineInstr *> removedLEAs;
+};
+
+void FactorizeLEAOpt::collectDataForBasicBlock(MachineBasicBlock *MBB) {
+  ValueT EmptyMap;
+  ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
+  Stack.push_back(SE);
+  for (auto &MI : *MBB) {
+    if (isLEA(MI))
+      insertLEA(&MI);
+  }
+}
+
+void FactorizeLEAOpt::removeDataForBasicBlock() {
+  ScopeEntryT &SE = Stack.back();
+  for (auto MapEntry : SE.second) {
+    LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
+    assert((Itr != LEAs.end()) &&
+           "LEAs map must have a node corresponding to ScopeEntry's Key.");
+
+    while (((*Itr).second.size() > MapEntry.second))
+      (*Itr).second.pop_front();
+    // If list goes empty remove entry from LEAs Map.
+    if ((*Itr).second.empty())
+      LEAs.erase(Itr);
+  }
+  Stack.pop_back();
+}
+
+bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
+  if (!MI || !RecLevel)
+    return false;
+
+  MachineRegisterInfo *MRI = MI->getRegInfo();
+  for (auto Operand : MI->operands()) {
+    if (!Operand.isReg())
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
+      return true;
+    MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
+    if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
+        containsPhyReg(OperDefMI, RecLevel - 1))
+      return true;
+  }
+  return false;
+}
+
+void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
+  unsigned lsize;
+  if (containsPhyReg(MI, 2))
+    return;
+
+  // Factorization is beneficial only for complex LEAs.
+  MachineOperand &Base = MI->getOperand(1);
+  MachineOperand &Index = MI->getOperand(3);
+  MachineOperand &Offset = MI->getOperand(4);
+  if ((Offset.isImm() && !Offset.getImm()) ||
+      (!Base.isReg() || !Base.getReg()) || (!Index.isReg() || !Index.getReg()))
+    return;
+
+  MemOpKey Key = getMemOpCSEKey(*MI, 1);
+  ScopeEntryT *TopScope = getTopScope();
+
+  LEAMapT::iterator Itr = LEAs.find(Key);
+  if (Itr == LEAs.end()) {
+    lsize = 0;
+    LEAs[Key].push_front(MI);
+  } else {
+    lsize = (*Itr).second.size();
+    (*Itr).second.push_front(MI);
+  }
+  if (TopScope->second.find(Key) == TopScope->second.end())
+    TopScope->second[Key] = lsize;
+}
+
 class OptimizeLEAPass : public MachineFunctionPass {
 public:
   OptimizeLEAPass() : MachineFunctionPass(ID) {}
@@ -247,6 +477,12 @@
   /// been calculated by LEA. Also, remove redundant LEAs.
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineDominatorTree>();
+  }
+
 private:
   using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
 
@@ -292,8 +528,24 @@
   /// \brief Removes LEAs which calculate similar addresses.
   bool removeRedundantLEAs(MemOpMap &LEAs);
 
+  /// \brief Visit over basic blocks, collect LEAs in a scoped
+  ///  hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
+  bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
+
+  bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
+
+  /// \brief Factor out LEAs which share Base,Index,Offset and Segment.
+  bool processBasicBlock(const MachineBasicBlock &MBB);
+
+  /// \brief Try to replace LEA with a lower strength instruction
+  /// to improves latency and throughput.
+  bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
+
   DenseMap<const MachineInstr *, unsigned> InstrPos;
 
+  FactorizeLEAOpt FactorOpt;
+
+  MachineDominatorTree *DT;
   MachineRegisterInfo *MRI;
   const X86InstrInfo *TII;
   const X86RegisterInfo *TRI;
@@ -489,7 +741,9 @@
 bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
   bool Changed = false;
 
-  assert(!LEAs.empty());
+  if (LEAs.empty())
+    return Changed;
+
   MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
 
   // Process all instructions in basic block.
@@ -659,6 +913,10 @@
         // Erase removed LEA from the list.
         I2 = List.erase(I2);
 
+        // If List becomes empty remove it from LEAs map.
+        if (List.empty())
+          LEAs.erase(E.first);
+
         Changed = true;
       }
       ++I1;
@@ -668,6 +926,217 @@
   return Changed;
 }
 
+static inline int getADDrrFromLEA(int LEAOpcode) {
+  switch (LEAOpcode) {
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
+  case X86::LEA16r:
+    return X86::ADD16rr;
+  case X86::LEA32r:
+    return X86::ADD32rr;
+  case X86::LEA64_32r:
+  case X86::LEA64r:
+    return X86::ADD64rr;
+  }
+}
+
+bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
+                                         const MachineBasicBlock &BB) {
+  bool Changed = false;
+
+  // Loop over all entries in the table.
+  for (auto &E : LEAs) {
+    auto &List = E.second;
+
+    // Loop over all LEA pairs.
+    auto I1 = List.begin();
+    while (I1 != List.end()) {
+      MachineInstrBuilder NewMI;
+      MachineInstr &First = **I1;
+      MachineOperand &Res = First.getOperand(0);
+      MachineOperand &Base = First.getOperand(1);
+      MachineOperand &Scale = First.getOperand(2);
+      MachineOperand &Index = First.getOperand(3);
+      MachineOperand &Offset = First.getOperand(4);
+
+      const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
+      const DebugLoc DL = First.getDebugLoc();
+
+      if (!Base.isReg() || !Index.isReg() || !Index.getReg()) {
+        I1++;
+        continue;
+      }
+
+      if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(Index.getReg())) {
+        I1++;
+        continue;
+      }
+
+      // Check for register class compatibility between Result and
+      // Index operands.
+      const TargetRegisterClass *ResRC = MRI->getRegClass(Res.getReg());
+      const TargetRegisterClass *IndexRC = MRI->getRegClass(Index.getReg());
+      if (TRI->getRegSizeInBits(*ResRC) != TRI->getRegSizeInBits(*IndexRC)) {
+        I1++;
+        continue;
+      }
+
+      MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
+      // R = B + I
+      if (Scale.isImm() && Scale.getImm() == 1 && Offset.isImm() &&
+          !Offset.getImm()) {
+        NewMI = BuildMI(MBB, &First, DL, ADDrr)
+                    .addDef(Res.getReg())
+                    .addUse(Base.getReg())
+                    .addUse(Index.getReg());
+        Changed = NewMI.getInstr() != nullptr;
+        First.eraseFromParent();
+        I1 = List.erase(I1);
+
+        // If List becomes empty remove it from LEAs map.
+        if (List.empty())
+          LEAs.erase(E.first);
+      } else
+        I1++;
+    }
+  }
+  return Changed;
+}
+
+bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
+  bool cseDone = false;
+
+  // Legal scale value (1,2,4 & 8) vector.
+  auto LegalScale = [](int scale) {
+    return scale == 1 || scale == 2 || scale == 4 || scale == 8;
+  };
+
+  auto CompareFn = [](const MachineInstr *Arg1,
+                      const MachineInstr *Arg2) -> bool {
+    return Arg1->getOperand(2).getImm() >= Arg2->getOperand(2).getImm();
+  };
+
+  // Loop over all entries in the table.
+  for (auto &E : FactorOpt.getLEAMap()) {
+    auto &List = E.second;
+    if (List.size() > 1)
+      List.sort(CompareFn);
+
+    // Loop over all LEA pairs.
+    for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
+      for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
+        MachineInstr &LI1 = **Iter1;
+        MachineInstr &LI2 = **Iter2;
+
+        if (!DT->dominates(&LI2, &LI1))
+          continue;
+
+        int Scale1 = LI1.getOperand(2).getImm();
+        int Scale2 = LI2.getOperand(2).getImm();
+        assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
+        DebugLoc DL = LI1.getDebugLoc();
+
+        // Continue if instruction has already been factorized.
+        if (FactorOpt.checkIfScheduledForRemoval(&LI1))
+          continue;
+
+        int Factor = Scale1 - Scale2;
+        if (Factor > 0 && LegalScale(Factor)) {
+          MachineOperand NewBase = LI2.getOperand(0);
+          MachineOperand NewIndex = LI1.getOperand(3);
+
+          const TargetRegisterClass *LI2ResRC =
+              MRI->getRegClass(LI2.getOperand(0).getReg());
+          const TargetRegisterClass *LI1BaseRC =
+              MRI->getRegClass(LI1.getOperand(1).getReg());
+
+          if (TRI->getRegSizeInBits(*LI1BaseRC) >
+              TRI->getRegSizeInBits(*LI2ResRC)) {
+            MachineInstr *LI1IndexDef =
+                MRI->getVRegDef(LI1.getOperand(3).getReg());
+            if (LI1IndexDef->getOpcode() != TargetOpcode::SUBREG_TO_REG)
+              continue;
+            MachineOperand &SubReg = LI1IndexDef->getOperand(2);
+            const TargetRegisterClass *SubRegRC =
+                MRI->getRegClass(SubReg.getReg());
+            if (TRI->getRegSizeInBits(*SubRegRC) !=
+                TRI->getRegSizeInBits(*LI2ResRC))
+              continue;
+            NewIndex = SubReg;
+          }
+
+          DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
+          MachineInstrBuilder NewMI =
+              BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
+                      TII->get(LI1.getOpcode()))
+                  .addDef(LI1.getOperand(0).getReg()) // Dst   = Dst of LI1.
+                  .addUse(NewBase.getReg())           // Base  = Dst to LI2.
+                  .addImm(Factor)            // Scale = Diff b/w scales.
+                  .addUse(NewIndex.getReg()) // Index = Index of LI1.
+                  .addImm(0)                 // Disp  = 0
+                  .addUse(
+                      LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
+
+          cseDone = NewMI.getInstr() != nullptr;
+
+          /// To preserve the SSA nature we need to remove Def flag
+          /// from old result.
+          LI1.getOperand(0).setIsDef(false);
+
+          /// Lazy removal shall ensure that replaced LEA remains
+          /// till we finish processing all the basic block. This shall
+          /// provide opportunity for further factorization based on
+          /// the replaced LEA which will be legal since it has same
+          /// destination as newly formed LEA.
+          FactorOpt.addForLazyRemoval(&LI1);
+
+          NumFactoredLEAs++;
+          DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
+        }
+      }
+    }
+  }
+  return cseDone;
+}
+
+bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
+  bool Changed = false;
+  using StackT = SmallVector<MachineDomTreeNode *, 16>;
+  using VisitedNodeMapT = SmallSet<MachineDomTreeNode *, 16>;
+
+  StackT WorkList;
+  VisitedNodeMapT DomNodeMap;
+
+  WorkList.push_back(DN);
+  while (!WorkList.empty()) {
+    MachineDomTreeNode *MDN = WorkList.back();
+    FactorOpt.collectDataForBasicBlock(MDN->getBlock());
+    Changed |= processBasicBlock(*MDN->getBlock());
+
+    if (DomNodeMap.find(MDN) == DomNodeMap.end()) {
+      DomNodeMap.insert(MDN);
+      for (auto Child : MDN->getChildren())
+        WorkList.push_back(Child);
+    }
+
+    MachineDomTreeNode *TDM = WorkList.back();
+    if (MDN->getLevel() == TDM->getLevel()) {
+      FactorOpt.removeDataForBasicBlock();
+      DomNodeMap.erase(MDN);
+      WorkList.pop_back();
+    }
+  }
+  return Changed;
+}
+
+bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
+  bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
+  FactorOpt.performCleanup();
+  return Changed;
+}
+
 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
@@ -677,6 +1146,10 @@
   MRI = &MF.getRegInfo();
   TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
   TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+
+  // Attempt factorizing LEAs.
+  Changed |= FactorizeLEAsAllBasicBlocks(MF);
 
   // Process all basic blocks.
   for (auto &MBB : MF) {
@@ -693,6 +1166,9 @@
     // Remove redundant LEA instructions.
     Changed |= removeRedundantLEAs(LEAs);
 
+    // Strength reduce LEA instructions.
+    Changed |= strengthReduceLEAs(LEAs, MBB);
+
     // Remove redundant address calculations. Do it only for -Os/-Oz since only
     // a code size gain is expected from this part of the pass.
     if (MF.getFunction()->optForSize())