Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the
ability to track multiple post-increment loops on the same expression.

This refines the concept of "normalizing" SCEV expressions used for
to post-increment uses, and introduces a dedicated utility routine for
normalizing and denormalizing expressions.

This fixes the expansion of expressions which are post-increment users
of more than one loop at a time. More broadly, this takes LSR another
step closer to being able to reason about more than one loop at a time.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100699 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6605666..1a58b66 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -454,6 +454,46 @@
   return Changed;
 }
 
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L) {
+  // Loop-invariant values are safe.
+  if (S->isLoopInvariant(L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L)) return false;
+    return true;
+  }
+  
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L) &&
+           isSafe(UD->getRHS(), L);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
 void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   SmallVector<WeakVH, 16> DeadInsts;
 
@@ -465,7 +505,6 @@
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
   for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
     Value *Op = UI->getOperandValToReplace();
     const Type *UseTy = Op->getType();
     Instruction *User = UI->getUser();
@@ -486,7 +525,7 @@
     // currently can only reduce affine polynomials.  For now just disable
     // indvar subst on anything more complex than an affine addrec, unless
     // it can be expanded to a trivial value.
-    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+    if (!isSafe(AR, L))
       continue;
 
     // Determine the insertion point for this user. By default, insert
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 625a75d..631092b 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -781,10 +781,10 @@
   /// will be replaced.
   Value *OperandValToReplace;
 
-  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// PostIncLoops - If this user is to use the post-incremented value of an
   /// induction variable, this variable is non-null and holds the loop
   /// associated with the induction variable.
-  const Loop *PostIncLoop;
+  PostIncLoopSet PostIncLoops;
 
   /// LUIdx - The index of the LSRUse describing the expression which
   /// this fixup needs, minus an offset (below).
@@ -795,6 +795,8 @@
   /// offsets, for example in an unrolled loop.
   int64_t Offset;
 
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
   LSRFixup();
 
   void print(raw_ostream &OS) const;
@@ -804,9 +806,24 @@
 }
 
 LSRFixup::LSRFixup()
-  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+  : UserInst(0), OperandValToReplace(0),
     LUIdx(~size_t(0)), Offset(0) {}
 
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
 void LSRFixup::print(raw_ostream &OS) const {
   OS << "UserInst=";
   // Store is common and interesting enough to be worth special-casing.
@@ -821,9 +838,10 @@
   OS << ", OperandValToReplace=";
   WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
 
-  if (PostIncLoop) {
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
     OS << ", PostIncLoop=";
-    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
   }
 
   if (LUIdx != ~size_t(0))
@@ -1545,8 +1563,9 @@
             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
           // Conservatively assume there may be reuse if the quotient of their
           // strides could be a legal scale.
-          const SCEV *A = CondUse->getStride();
-          const SCEV *B = UI->getStride();
+          const SCEV *A = CondUse->getStride(L);
+          const SCEV *B = UI->getStride(L);
+          if (!A || !B) continue;
           if (SE.getTypeSizeInBits(A->getType()) !=
               SE.getTypeSizeInBits(B->getType())) {
             if (SE.getTypeSizeInBits(A->getType()) >
@@ -1598,7 +1617,7 @@
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+        CondUse = &IU.AddUser(CondUse->getExpr(),
                               Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
@@ -1607,9 +1626,7 @@
     // If we get to here, we know that we can transform the setcc instruction to
     // use the post-incremented version of the IV, allowing us to coalesce the
     // live ranges for the IV correctly.
-    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
-                                       CondUse->getStride()));
-    CondUse->setIsUseOfPostIncrementedValue(true);
+    CondUse->transformToPostInc(L);
     Changed = true;
 
     PostIncs.insert(Cond);
@@ -1717,19 +1734,24 @@
   SmallSetVector<const SCEV *, 4> Strides;
 
   // Collect interesting types and strides.
+  SmallVector<const SCEV *, 4> Worklist;
   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
-    const SCEV *Stride = UI->getStride();
+    const SCEV *Expr = UI->getExpr();
 
     // Collect interesting types.
-    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
 
-    // Add the stride for this loop.
-    Strides.insert(Stride);
-
-    // Add strides for other mentioned loops.
-    for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
-         AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
-      Strides.insert(AR->getStepRecurrence(SE));
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+        Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
   }
 
   // Compute interesting factors from the set of interesting strides.
@@ -1776,8 +1798,7 @@
     LSRFixup &LF = getNewFixup();
     LF.UserInst = UI->getUser();
     LF.OperandValToReplace = UI->getOperandValToReplace();
-    if (UI->isUseOfPostIncrementedValue())
-      LF.PostIncLoop = L;
+    LF.PostIncLoops = UI->getPostIncLoops();
 
     LSRUse::KindType Kind = LSRUse::Basic;
     const Type *AccessTy = 0;
@@ -1786,7 +1807,7 @@
       AccessTy = getAccessType(LF.UserInst);
     }
 
-    const SCEV *S = IU.getCanonicalExpr(*UI);
+    const SCEV *S = UI->getExpr();
 
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -1824,7 +1845,7 @@
     LF.LUIdx = P.first;
     LF.Offset = P.second;
     LSRUse &LU = Uses[LF.LUIdx];
-    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
 
     // If this is the first use of this LSRUse, give it a formula.
     if (LU.Formulae.empty()) {
@@ -1936,7 +1957,7 @@
         LF.LUIdx = P.first;
         LF.Offset = P.second;
         LSRUse &LU = Uses[LF.LUIdx];
-        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
         InsertSupplementalFormula(U, LU, LF.LUIdx);
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
         break;
@@ -2783,8 +2804,8 @@
                            SmallVectorImpl<WeakVH> &DeadInsts) const {
   const LSRUse &LU = Uses[LF.LUIdx];
 
-  // Then, collect some instructions which we will remain dominated by when
-  // expanding the replacement. These must be dominated by any operands that
+  // Then, collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
   // will be required in the expansion.
   SmallVector<Instruction *, 4> Inputs;
   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
@@ -2793,8 +2814,8 @@
     if (Instruction *I =
           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
       Inputs.push_back(I);
-  if (LF.PostIncLoop) {
-    if (!L->contains(LF.UserInst))
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
       Inputs.push_back(L->getLoopLatch()->getTerminator());
     else
       Inputs.push_back(IVIncInsertPos);
@@ -2831,7 +2852,7 @@
 
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
-  Rewriter.setPostInc(LF.PostIncLoop);
+  Rewriter.setPostInc(LF.PostIncLoops);
 
   // This is the type that the user actually needs.
   const Type *OpTy = LF.OperandValToReplace->getType();
@@ -2855,24 +2876,11 @@
     const SCEV *Reg = *I;
     assert(!Reg->isZero() && "Zero allocated in a base register!");
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    const SCEV *Start = Reg;
-    while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
-      if (AR->getLoop() == LF.PostIncLoop) {
-        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
-        // If the user is inside the loop, insert the code after the increment
-        // so that it is dominated by its operand. If the original insert point
-        // was already dominated by the increment, keep it, because there may
-        // be loop-variant operands that need to be respected also.
-        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
-          IP = IVIncInsertPos;
-          while (isa<DbgInfoIntrinsic>(IP)) ++IP;
-        }
-        break;
-      }
-      Start = AR->getStart();
-    }
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);
 
     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
   }
@@ -2889,11 +2897,11 @@
   if (F.AM.Scale != 0) {
     const SCEV *ScaledS = F.ScaledReg;
 
-    // If we're expanding for a post-inc user for the add-rec's loop, make the
-    // post-inc adjustment.
-    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
-      if (AR->getLoop() == LF.PostIncLoop)
-        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);
 
     if (LU.Kind == LSRUse::ICmpZero) {
       // An interesting way of "folding" with an icmp is to use a negated
@@ -2954,7 +2962,7 @@
   Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
 
   // We're done expanding now, so reset the rewriter.
-  Rewriter.setPostInc(0);
+  Rewriter.clearPostInc();
 
   // An ICmpZero Formula represents an ICmp which we're handling as a
   // comparison against zero. Now that we've expanded an expression for that