Extend ScalarEvolution's multiple-exit support to compute exact
trip counts in more cases.
Generalize ScalarEvolution's isLoopGuardedByCond code to recognize
And and Or conditions, splitting the code out into an
isNecessaryCond helper function so that it can evaluate Ands and Ors
recursively, and make SCEVExpander be much more aggressive about
hoisting instructions out of loops.
test/CodeGen/X86/pr3495.ll has an additional instruction now, but
it appears to be due to an arbitrary register allocation difference.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@74048 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 436b79d..d1f6679 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2813,7 +2813,6 @@
const SCEV* BECount = CouldNotCompute;
const SCEV* MaxBECount = CouldNotCompute;
bool CouldNotComputeBECount = false;
- bool CouldNotComputeMaxBECount = false;
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BackedgeTakenInfo NewBTI =
ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
@@ -2826,25 +2825,13 @@
} else if (!CouldNotComputeBECount) {
if (BECount == CouldNotCompute)
BECount = NewBTI.Exact;
- else {
- // TODO: More analysis could be done here. For example, a
- // loop with a short-circuiting && operator has an exact count
- // of the min of both sides.
- CouldNotComputeBECount = true;
- BECount = CouldNotCompute;
- }
- }
- if (NewBTI.Max == CouldNotCompute) {
- // We couldn't compute an maximum value for this exit, so
- // we won't be able to compute an maximum value for the loop.
- CouldNotComputeMaxBECount = true;
- MaxBECount = CouldNotCompute;
- } else if (!CouldNotComputeMaxBECount) {
- if (MaxBECount == CouldNotCompute)
- MaxBECount = NewBTI.Max;
else
- MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+ BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
}
+ if (MaxBECount == CouldNotCompute)
+ MaxBECount = NewBTI.Max;
+ else if (NewBTI.Max != CouldNotCompute)
+ MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
}
return BackedgeTakenInfo(BECount, MaxBECount);
@@ -2925,9 +2912,7 @@
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB) {
- // Check if the controlling expression for this loop is an and or or. In
- // such cases, an exact backedge-taken count may be infeasible, but a
- // maximum count may still be feasible.
+ // Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
@@ -3899,90 +3884,113 @@
LoopEntryPredicate->isUnconditional())
continue;
- ICmpInst *ICI = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
- if (!ICI) continue;
-
- // Now that we found a conditional branch that dominates the loop, check to
- // see if it is the comparison we are looking for.
- Value *PreCondLHS = ICI->getOperand(0);
- Value *PreCondRHS = ICI->getOperand(1);
- ICmpInst::Predicate Cond;
- if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest)
- Cond = ICI->getPredicate();
- else
- Cond = ICI->getInversePredicate();
-
- if (Cond == Pred)
- ; // An exact match.
- else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
- ; // The actual condition is beyond sufficient.
- else
- // Check a few special cases.
- switch (Cond) {
- case ICmpInst::ICMP_UGT:
- if (Pred == ICmpInst::ICMP_ULT) {
- std::swap(PreCondLHS, PreCondRHS);
- Cond = ICmpInst::ICMP_ULT;
- break;
- }
- continue;
- case ICmpInst::ICMP_SGT:
- if (Pred == ICmpInst::ICMP_SLT) {
- std::swap(PreCondLHS, PreCondRHS);
- Cond = ICmpInst::ICMP_SLT;
- break;
- }
- continue;
- case ICmpInst::ICMP_NE:
- // Expressions like (x >u 0) are often canonicalized to (x != 0),
- // so check for this case by checking if the NE is comparing against
- // a minimum or maximum constant.
- if (!ICmpInst::isTrueWhenEqual(Pred))
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
- const APInt &A = CI->getValue();
- switch (Pred) {
- case ICmpInst::ICMP_SLT:
- if (A.isMaxSignedValue()) break;
- continue;
- case ICmpInst::ICMP_SGT:
- if (A.isMinSignedValue()) break;
- continue;
- case ICmpInst::ICMP_ULT:
- if (A.isMaxValue()) break;
- continue;
- case ICmpInst::ICMP_UGT:
- if (A.isMinValue()) break;
- continue;
- default:
- continue;
- }
- Cond = ICmpInst::ICMP_NE;
- // NE is symmetric but the original comparison may not be. Swap
- // the operands if necessary so that they match below.
- if (isa<SCEVConstant>(LHS))
- std::swap(PreCondLHS, PreCondRHS);
- break;
- }
- continue;
- default:
- // We weren't able to reconcile the condition.
- continue;
- }
-
- if (!PreCondLHS->getType()->isInteger()) continue;
-
- const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS);
- const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS);
- if ((HasSameValue(LHS, PreCondLHSSCEV) &&
- HasSameValue(RHS, PreCondRHSSCEV)) ||
- (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
- HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
+ if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+ LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
return true;
}
return false;
}
+/// isNecessaryCond - Test whether the given CondValue value is a condition
+/// which is at least as strict as the one described by Pred, LHS, and RHS.
+bool ScalarEvolution::isNecessaryCond(Value *CondValue,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ bool Inverse) {
+ // Recursivly handle And and Or conditions.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
+ if (BO->getOpcode() == Instruction::And) {
+ if (!Inverse)
+ return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+ isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+ } else if (BO->getOpcode() == Instruction::Or) {
+ if (Inverse)
+ return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+ isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+ }
+ }
+
+ ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
+ if (!ICI) return false;
+
+ // Now that we found a conditional branch that dominates the loop, check to
+ // see if it is the comparison we are looking for.
+ Value *PreCondLHS = ICI->getOperand(0);
+ Value *PreCondRHS = ICI->getOperand(1);
+ ICmpInst::Predicate Cond;
+ if (Inverse)
+ Cond = ICI->getInversePredicate();
+ else
+ Cond = ICI->getPredicate();
+
+ if (Cond == Pred)
+ ; // An exact match.
+ else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+ ; // The actual condition is beyond sufficient.
+ else
+ // Check a few special cases.
+ switch (Cond) {
+ case ICmpInst::ICMP_UGT:
+ if (Pred == ICmpInst::ICMP_ULT) {
+ std::swap(PreCondLHS, PreCondRHS);
+ Cond = ICmpInst::ICMP_ULT;
+ break;
+ }
+ return false;
+ case ICmpInst::ICMP_SGT:
+ if (Pred == ICmpInst::ICMP_SLT) {
+ std::swap(PreCondLHS, PreCondRHS);
+ Cond = ICmpInst::ICMP_SLT;
+ break;
+ }
+ return false;
+ case ICmpInst::ICMP_NE:
+ // Expressions like (x >u 0) are often canonicalized to (x != 0),
+ // so check for this case by checking if the NE is comparing against
+ // a minimum or maximum constant.
+ if (!ICmpInst::isTrueWhenEqual(Pred))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+ const APInt &A = CI->getValue();
+ switch (Pred) {
+ case ICmpInst::ICMP_SLT:
+ if (A.isMaxSignedValue()) break;
+ return false;
+ case ICmpInst::ICMP_SGT:
+ if (A.isMinSignedValue()) break;
+ return false;
+ case ICmpInst::ICMP_ULT:
+ if (A.isMaxValue()) break;
+ return false;
+ case ICmpInst::ICMP_UGT:
+ if (A.isMinValue()) break;
+ return false;
+ default:
+ return false;
+ }
+ Cond = ICmpInst::ICMP_NE;
+ // NE is symmetric but the original comparison may not be. Swap
+ // the operands if necessary so that they match below.
+ if (isa<SCEVConstant>(LHS))
+ std::swap(PreCondLHS, PreCondRHS);
+ break;
+ }
+ return false;
+ default:
+ // We weren't able to reconcile the condition.
+ return false;
+ }
+
+ if (!PreCondLHS->getType()->isInteger()) return false;
+
+ const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
+ const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
+ return (HasSameValue(LHS, PreCondLHSSCEV) &&
+ HasSameValue(RHS, PreCondRHSSCEV)) ||
+ (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+ HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
+}
+
/// getBECount - Subtract the end and start values and divide by the step,
/// rounding up, to get the number of times the backedge is executed. Return
/// CouldNotCompute if an intermediate computation overflows.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c5591d7..6d7abc0 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -51,21 +51,26 @@
if (Argument *A = dyn_cast<Argument>(V)) {
// Check to see if there is already a cast!
for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI) {
+ UI != E; ++UI)
if ((*UI)->getType() == Ty)
if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
if (CI->getOpcode() == opcode) {
// If the cast isn't the first instruction of the function, move it.
- if (BasicBlock::iterator(CI) !=
+ if (BasicBlock::iterator(CI) !=
A->getParent()->getEntryBlock().begin()) {
- // If the CastInst is the insert point, change the insert point.
- if (CI == InsertPt) ++InsertPt;
- // Splice the cast at the beginning of the entry block.
- CI->moveBefore(A->getParent()->getEntryBlock().begin());
+ // Recreate the cast at the beginning of the entry block.
+ // The old cast is left in place in case it is being used
+ // as an insert point.
+ Instruction *NewCI =
+ CastInst::Create(opcode, V, Ty, "",
+ A->getParent()->getEntryBlock().begin());
+ NewCI->takeName(CI);
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
}
return CI;
}
- }
+
Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
A->getParent()->getEntryBlock().begin());
InsertedValues.insert(I);
@@ -85,10 +90,13 @@
It = cast<InvokeInst>(I)->getNormalDest()->begin();
while (isa<PHINode>(It)) ++It;
if (It != BasicBlock::iterator(CI)) {
- // If the CastInst is the insert point, change the insert point.
- if (CI == InsertPt) ++InsertPt;
- // Splice the cast immediately after the operand in question.
- CI->moveBefore(It);
+ // Recreate the cast at the beginning of the entry block.
+ // The old cast is left in place in case it is being used
+ // as an insert point.
+ Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It);
+ NewCI->takeName(CI);
+ CI->replaceAllUsesWith(NewCI);
+ return NewCI;
}
return CI;
}
@@ -497,8 +505,9 @@
}
}
- Value *RestV = expand(Rest);
- return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV)));
+ // Just do a normal add. Pre-expand the operands to suppress folding.
+ return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+ SE.getUnknown(expand(Rest))));
}
// {0,+,1} --> Insert a canonical induction variable into the loop!
@@ -546,36 +555,13 @@
getOrInsertCanonicalInductionVariable(L, Ty);
// If this is a simple linear addrec, emit it now as a special case.
- if (S->isAffine()) { // {0,+,F} --> i*F
- Value *F = expandCodeFor(S->getOperand(1), Ty);
-
- // If the insert point is directly inside of the loop, emit the multiply at
- // the insert point. Otherwise, L is a loop that is a parent of the insert
- // point loop. If we can, move the multiply to the outer most loop that it
- // is safe to be in.
- BasicBlock::iterator MulInsertPt = getInsertionPoint();
- Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent());
- if (InsertPtLoop != L && InsertPtLoop &&
- L->contains(InsertPtLoop->getHeader())) {
- do {
- // If we cannot hoist the multiply out of this loop, don't.
- if (!InsertPtLoop->isLoopInvariant(F)) break;
-
- BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader();
-
- // If this loop hasn't got a preheader, we aren't able to hoist the
- // multiply.
- if (!InsertPtLoopPH)
- break;
-
- // Otherwise, move the insert point to the preheader.
- MulInsertPt = InsertPtLoopPH->getTerminator();
- InsertPtLoop = InsertPtLoop->getParentLoop();
- } while (InsertPtLoop != L);
- }
-
- return InsertBinop(Instruction::Mul, I, F, MulInsertPt);
- }
+ if (S->isAffine()) // {0,+,F} --> i*F
+ return
+ expand(SE.getTruncateOrNoop(
+ SE.getMulExpr(SE.getUnknown(I),
+ SE.getNoopOrAnyExtend(S->getOperand(1),
+ I->getType())),
+ Ty));
// If this is a chain of recurrences, turn it into a closed form, using the
// folders, then expandCodeFor the closed form. This allows the folders to
@@ -671,8 +657,31 @@
InsertedExpressions.find(S);
if (I != InsertedExpressions.end())
return I->second;
-
+
+ // Compute an insertion point for this SCEV object. Hoist the instructions
+ // as far out in the loop nest as possible.
+ BasicBlock::iterator InsertPt = getInsertionPoint();
+ BasicBlock::iterator SaveInsertPt = InsertPt;
+ for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ;
+ L = L->getParentLoop())
+ if (S->isLoopInvariant(L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && S->hasComputableLoopEvolution(L))
+ InsertPt = L->getHeader()->getFirstNonPHI();
+ while (isInsertedInstruction(InsertPt)) ++InsertPt;
+ break;
+ }
+ setInsertionPoint(InsertPt);
+
Value *V = visit(S);
+
+ setInsertionPoint(SaveInsertPt);
InsertedExpressions[S] = V;
return V;
}
@@ -686,6 +695,9 @@
const Type *Ty) {
assert(Ty->isInteger() && "Can only insert integer induction variables!");
const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
- SE.getIntegerSCEV(1, Ty), L);
- return expand(H);
+ SE.getIntegerSCEV(1, Ty), L);
+ BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+ Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
+ setInsertionPoint(SaveInsertPt);
+ return V;
}