[LV] Fold tail by masking to vectorize loops of arbitrary trip count under opt for size
When optimizing for size, a loop is vectorized only if the resulting vector loop
completely replaces the original scalar loop. This holds if no runtime guards
are needed, if the original trip-count TC does not overflow, and if TC is a
known constant that is a multiple of the VF. The last two TC-related conditions
can be overcome by
1. rounding the trip-count of the vector loop up from TC to a multiple of VF;
2. masking the vector body under a newly introduced "if (i <= TC-1)" condition.
The patch allows loops with arbitrary trip counts to be vectorized under -Os,
subject to the existing cost model considerations. It also applies to loops with
small trip counts (under -O2) which are currently handled as if under -Os.
The patch does not handle loops with reductions, live-outs, or w/o a primary
induction variable, and disallows interleave groups.
(Third, final and main part of -)
Differential Revision: https://reviews.llvm.org/D50480
llvm-svn: 344743
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 39cb4e9..a3c15a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -303,6 +303,13 @@
State.set(this, V, Part);
break;
}
+ case VPInstruction::ICmpULE: {
+ Value *IV = State.get(getOperand(0), Part);
+ Value *TC = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateICmpULE(IV, TC);
+ State.set(this, V, Part);
+ break;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -328,6 +335,9 @@
case VPInstruction::Not:
O << "not";
break;
+ case VPInstruction::ICmpULE:
+ O << "icmp ule";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -342,6 +352,15 @@
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
+ // -1. Check if the backedge taken count is needed, and if so build it.
+ if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+ Value *TC = State->TripCount;
+ IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
+ auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
+ "trip.count.minus.1");
+ Value2VPValue[TCMO] = BackedgeTakenCount;
+ }
+
// 0. Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
@@ -469,8 +488,11 @@
OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
if (!Plan.getName().empty())
OS << "\\n" << DOT::EscapeString(Plan.getName());
- if (!Plan.Value2VPValue.empty()) {
+ if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
OS << ", where:";
+ if (Plan.BackedgeTakenCount)
+ OS << "\\n"
+ << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
for (auto Entry : Plan.Value2VPValue) {
OS << "\\n" << *Entry.second;
OS << DOT::EscapeString(" := ");