[PGO] Enhance pgo counter promotion
This is an incremental change to the promotion feature.
There are two problems with the current behavior:
1) loops with multiple exiting blocks are totally disabled
2) a counter update can only be promoted one level up in
the loop nest -- which does help much for short trip
count inner loops inside a high trip-count outer loops.
Due to this limitation, we still saw very large profile
count fluctuations from run to run for the affected loops
which are usually very hot.
This patch adds the support for promotion counters iteratively
across the loop nest. It also turns on the promotion for
loops with multiple exiting blocks (with a limit).
For single-threaded applications, the performance impact is flat
on average. For instance, dealII improves, but povray regresses.
llvm-svn: 307863
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 9c14b01..db8fa89 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -112,7 +112,7 @@
cl::desc("Do counter register promotion"),
cl::init(false));
cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
- cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10),
+ cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
cl::desc("Max number counter promotions per loop to avoid"
" increasing register pressure too much"));
@@ -121,10 +121,21 @@
MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
cl::desc("Max number of allowed counter promotions"));
-cl::opt<bool> SpeculativeCounterPromotion(
- cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false),
- cl::desc("Allow counter promotion for loops with multiple exiting blocks "
- " or top-tested loops. "));
+cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
+ cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
+ cl::desc("The max number of exiting blocks of a loop to allow "
+ " speculative counter promotion"));
+
+cl::opt<bool> SpeculativeCounterPromotionToLoop(
+ cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
+ cl::desc("When the option is false, if the target block is in a loop, "
+ "the promotion will be disallowed unless the promoted counter "
+ " update can be further/iteratively promoted into an acyclic "
+ " region."));
+
+cl::opt<bool> IterativeCounterPromotion(
+ cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
+ cl::desc("Allow counter promotion across the whole loop nest."));
class InstrProfilingLegacyPass : public ModulePass {
InstrProfiling InstrProf;
@@ -150,6 +161,7 @@
}
};
+///
/// A helper class to promote one counter RMW operation in the loop
/// into register update.
///
@@ -158,16 +170,19 @@
///
class PGOCounterPromoterHelper : public LoadAndStorePromoter {
public:
- PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA,
- Value *Init, BasicBlock *PH,
- ArrayRef<BasicBlock *> ExitBlocks,
- ArrayRef<Instruction *> InsertPts)
+ PGOCounterPromoterHelper(
+ Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
+ BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<Instruction *> InsertPts,
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+ LoopInfo &LI)
: LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
- InsertPts(InsertPts) {
+ InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
assert(isa<LoadInst>(L));
assert(isa<StoreInst>(S));
SSA.AddAvailableValue(PH, Init);
}
+
void doExtraRewritesBeforeFinalDeletion() const override {
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBlock = ExitBlocks[i];
@@ -179,12 +194,21 @@
Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
IRBuilder<> Builder(InsertPos);
if (AtomicCounterUpdatePromoted)
+ // automic update currently can only be promoted across the current
+ // loop, not the whole loop nest.
Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
AtomicOrdering::SequentiallyConsistent);
else {
LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
- Builder.CreateStore(NewVal, Addr);
+ auto *NewStore = Builder.CreateStore(NewVal, Addr);
+
+ // Now update the parent loop's candidate list:
+ if (IterativeCounterPromotion) {
+ auto *TargetLoop = LI.getLoopFor(ExitBlock);
+ if (TargetLoop)
+ LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
+ }
}
}
}
@@ -193,6 +217,8 @@
Instruction *Store;
ArrayRef<BasicBlock *> ExitBlocks;
ArrayRef<Instruction *> InsertPts;
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
+ LoopInfo &LI;
};
/// A helper class to do register promotion for all profile counter
@@ -200,12 +226,15 @@
///
class PGOCounterPromoter {
public:
- PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop)
- : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) {
+ PGOCounterPromoter(
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
+ Loop &CurLoop, LoopInfo &LI)
+ : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
+ LI(LI) {
SmallVector<BasicBlock *, 8> LoopExitBlocks;
SmallPtrSet<BasicBlock *, 8> BlockSet;
- ParentLoop.getExitBlocks(LoopExitBlocks);
+ L.getExitBlocks(LoopExitBlocks);
for (BasicBlock *ExitBlock : LoopExitBlocks) {
if (BlockSet.insert(ExitBlock).second) {
@@ -216,55 +245,97 @@
}
bool run(int64_t *NumPromoted) {
- // We can't insert into a catchswitch.
- bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
- return isa<CatchSwitchInst>(Exit->getTerminator());
- });
-
- if (HasCatchSwitch)
- return false;
-
- if (!ParentLoop.hasDedicatedExits())
- return false;
-
- BasicBlock *PH = ParentLoop.getLoopPreheader();
- if (!PH)
- return false;
-
- BasicBlock *H = ParentLoop.getHeader();
- bool TopTested =
- ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H));
- if (!SpeculativeCounterPromotion &&
- (TopTested || ParentLoop.getExitingBlock() == nullptr))
+ unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
+ if (MaxProm == 0)
return false;
unsigned Promoted = 0;
- for (auto &Cand : Candidates) {
+ for (auto &Cand : LoopToCandidates[&L]) {
SmallVector<PHINode *, 4> NewPHIs;
SSAUpdater SSA(&NewPHIs);
Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
+
PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
- PH, ExitBlocks, InsertPts);
+ L.getLoopPreheader(), ExitBlocks,
+ InsertPts, LoopToCandidates, LI);
Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
Promoted++;
- if (Promoted >= MaxNumOfPromotionsPerLoop)
+ if (Promoted >= MaxProm)
break;
+
(*NumPromoted)++;
if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
break;
}
DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
- << ParentLoop.getLoopDepth() << ")\n");
+ << L.getLoopDepth() << ")\n");
return Promoted != 0;
}
private:
- ArrayRef<LoadStorePair> Candidates;
+ bool allowSpeculativeCounterPromotion(Loop *LP) {
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ // Not considierered speculative.
+ if (ExitingBlocks.size() == 1)
+ return true;
+ if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+ return false;
+ return true;
+ }
+
+ // Returns the max number of Counter Promotions for LP.
+ unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
+ // We can't insert into a catchswitch.
+ SmallVector<BasicBlock *, 8> LoopExitBlocks;
+ LP->getExitBlocks(LoopExitBlocks);
+ if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
+ return isa<CatchSwitchInst>(Exit->getTerminator());
+ }))
+ return 0;
+
+ if (!LP->hasDedicatedExits())
+ return 0;
+
+ BasicBlock *PH = LP->getLoopPreheader();
+ if (!PH)
+ return 0;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ LP->getExitingBlocks(ExitingBlocks);
+ // Not considierered speculative.
+ if (ExitingBlocks.size() == 1)
+ return MaxNumOfPromotionsPerLoop;
+
+ if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
+ return 0;
+
+ // Whether the target block is in a loop does not matter:
+ if (SpeculativeCounterPromotionToLoop)
+ return MaxNumOfPromotionsPerLoop;
+
+ // Now check the target block:
+ unsigned MaxProm = MaxNumOfPromotionsPerLoop;
+ for (auto *TargetBlock : LoopExitBlocks) {
+ auto *TargetLoop = LI.getLoopFor(TargetBlock);
+ if (!TargetLoop)
+ continue;
+ unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
+ unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
+ MaxProm =
+ std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
+ PendingCandsInTarget);
+ }
+ return MaxProm;
+ }
+
+ DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
SmallVector<BasicBlock *, 8> ExitBlocks;
SmallVector<Instruction *, 8> InsertPts;
- Loop &ParentLoop;
+ Loop &L;
+ LoopInfo &LI;
};
} // end anonymous namespace
@@ -349,8 +420,10 @@
SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
- for (auto *Loop : Loops) {
- PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop);
+ // Do a post-order traversal of the loops so that counter updates can be
+ // iteratively hoisted outside the loop nest.
+ for (auto *Loop : llvm::reverse(Loops)) {
+ PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
Promoter.run(&TotalCountersPromoted);
}
}