[LSR] Allow giving priority to post-incrementing addressing modes

Implement TTI interface for targets to indicate that the LSR should give
priority to post-incrementing addressing modes.

Combination of patches by Sebastian Pop and Brendon Cahoon.

Differential Revision: https://reviews.llvm.org/D44758

llvm-svn: 328490
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 5f7aaa6..45d9058 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -185,6 +185,8 @@
                                 unsigned AS = UnknownAddressSpace) {
     return MemAccessTy(Type::getVoidTy(Ctx), AS);
   }
+
+  Type *getType() { return MemTy; }
 };
 
 /// This class holds data which is used to order reuse candidates.
@@ -1040,12 +1042,14 @@
   void RateRegister(const SCEV *Reg,
                     SmallPtrSetImpl<const SCEV *> &Regs,
                     const Loop *L,
-                    ScalarEvolution &SE, DominatorTree &DT);
+                    ScalarEvolution &SE, DominatorTree &DT,
+                    const TargetTransformInfo &TTI);
   void RatePrimaryRegister(const SCEV *Reg,
                            SmallPtrSetImpl<const SCEV *> &Regs,
                            const Loop *L,
                            ScalarEvolution &SE, DominatorTree &DT,
-                           SmallPtrSetImpl<const SCEV *> *LoserRegs);
+                           SmallPtrSetImpl<const SCEV *> *LoserRegs,
+                           const TargetTransformInfo &TTI);
 };
 
 /// An operand value in an instruction which is to be replaced with some
@@ -1194,7 +1198,8 @@
 void Cost::RateRegister(const SCEV *Reg,
                         SmallPtrSetImpl<const SCEV *> &Regs,
                         const Loop *L,
-                        ScalarEvolution &SE, DominatorTree &DT) {
+                        ScalarEvolution &SE, DominatorTree &DT,
+                        const TargetTransformInfo &TTI) {
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
     // If this is an addrec for another loop, it should be an invariant
     // with respect to L since L is the innermost loop (at least
@@ -1215,13 +1220,28 @@
       ++C.NumRegs;
       return;
     }
-    C.AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+    unsigned LoopCost = 1;
+    if (TTI.shouldFavorPostInc()) {
+      const SCEV *LoopStep = AR->getStepRecurrence(SE);
+      if (isa<SCEVConstant>(LoopStep)) {
+        // Check if a post-indexed load/store can be used.
+        if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
+            TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+          const SCEV *LoopStart = AR->getStart();
+          if (!isa<SCEVConstant>(LoopStart) &&
+            SE.isLoopInvariant(LoopStart, L))
+              LoopCost = 0;
+        }
+      }
+    }
+    C.AddRecCost += LoopCost;
 
     // Add the step value register, if it needs one.
     // TODO: The non-affine case isn't precisely modeled here.
     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
       if (!Regs.count(AR->getOperand(1))) {
-        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+        RateRegister(AR->getOperand(1), Regs, L, SE, DT, TTI);
         if (isLoser())
           return;
       }
@@ -1249,13 +1269,14 @@
                                SmallPtrSetImpl<const SCEV *> &Regs,
                                const Loop *L,
                                ScalarEvolution &SE, DominatorTree &DT,
-                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+                               SmallPtrSetImpl<const SCEV *> *LoserRegs,
+                               const TargetTransformInfo &TTI) {
   if (LoserRegs && LoserRegs->count(Reg)) {
     Lose();
     return;
   }
   if (Regs.insert(Reg).second) {
-    RateRegister(Reg, Regs, L, SE, DT);
+    RateRegister(Reg, Regs, L, SE, DT, TTI);
     if (LoserRegs && isLoser())
       LoserRegs->insert(Reg);
   }
@@ -1279,7 +1300,7 @@
       Lose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, TTI);
     if (isLoser())
       return;
   }
@@ -1288,7 +1309,7 @@
       Lose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, TTI);
     if (isLoser())
       return;
   }
@@ -3465,12 +3486,45 @@
   return S;
 }
 
+/// Return true if the SCEV represents a value that may end up as a
+/// post-increment operation.
+static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
+                              LSRUse &LU, const SCEV *S, const Loop *L,
+                              ScalarEvolution &SE) {
+  if (LU.Kind != LSRUse::Address ||
+      !LU.AccessTy.getType()->isIntOrIntVectorTy())
+    return false;
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (!AR)
+    return false;
+  const SCEV *LoopStep = AR->getStepRecurrence(SE);
+  if (!isa<SCEVConstant>(LoopStep))
+    return false;
+  if (LU.AccessTy.getType()->getScalarSizeInBits() !=
+      LoopStep->getType()->getScalarSizeInBits())
+    return false;
+  // Check if a post-indexed load/store can be used.
+  if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
+      TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
+    const SCEV *LoopStart = AR->getStart();
+    if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
+      return true;
+  }
+  return false;
+}
+
 /// \brief Helper function for LSRInstance::GenerateReassociations.
 void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
                                              const Formula &Base,
                                              unsigned Depth, size_t Idx,
                                              bool IsScaledReg) {
   const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+  // Don't generate reassociations for the base register of a value that
+  // may generate a post-increment operator. The reason is that the
+  // reassociations cause extra base+register formula to be created,
+  // and possibly chosen, but the post-increment is more efficient.
+  if (TTI.shouldFavorPostInc() && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
+    return;
   SmallVector<const SCEV *, 8> AddOps;
   const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
   if (Remainder)
@@ -4039,6 +4093,9 @@
           NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
           if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
                           LU.Kind, LU.AccessTy, NewF)) {
+            if (TTI.shouldFavorPostInc() &&
+                mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
+              continue;
             if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
               continue;
             NewF = F;