blob: 9d2634f1bc9771f1773c137f21f7c7184246e0ea [file] [log] [blame]
Aditya Kumar801394a2018-09-07 15:03:49 +00001//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Outline cold regions to a separate function.
11// TODO: Update BFI and BPI
12// TODO: Add all the outlined functions to a separate section.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/BranchProbabilityInfo.h"
21#include "llvm/Analysis/CFG.h"
22#include "llvm/Analysis/OptimizationRemarkEmitter.h"
23#include "llvm/Analysis/PostDominators.h"
24#include "llvm/Analysis/ProfileSummaryInfo.h"
Sebastian Popa1f20fc2018-09-10 15:08:02 +000025#include "llvm/Analysis/TargetTransformInfo.h"
Aditya Kumar801394a2018-09-07 15:03:49 +000026#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/CFG.h"
28#include "llvm/IR/DataLayout.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Instructions.h"
34#include "llvm/IR/Metadata.h"
35#include "llvm/IR/Module.h"
36#include "llvm/IR/PassManager.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/Use.h"
39#include "llvm/IR/User.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/BlockFrequency.h"
43#include "llvm/Support/BranchProbability.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/IPO.h"
Aditya Kumar9e20ade2018-10-03 05:55:20 +000047#include "llvm/Transforms/IPO/HotColdSplitting.h"
Aditya Kumar801394a2018-09-07 15:03:49 +000048#include "llvm/Transforms/Scalar.h"
49#include "llvm/Transforms/Utils/BasicBlockUtils.h"
50#include "llvm/Transforms/Utils/Cloning.h"
51#include "llvm/Transforms/Utils/CodeExtractor.h"
52#include "llvm/Transforms/Utils/Local.h"
53#include "llvm/Transforms/Utils/SSAUpdater.h"
54#include "llvm/Transforms/Utils/ValueMapper.h"
55#include <algorithm>
56#include <cassert>
57
58#define DEBUG_TYPE "hotcoldsplit"
59
60STATISTIC(NumColdSESEFound,
61 "Number of cold single entry single exit (SESE) regions found.");
62STATISTIC(NumColdSESEOutlined,
63 "Number of cold single entry single exit (SESE) regions outlined.");
64
65using namespace llvm;
66
67static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
68 cl::init(true), cl::Hidden);
69
70
71namespace {
72
73struct PostDomTree : PostDomTreeBase<BasicBlock> {
74 PostDomTree(Function &F) { recalculate(F); }
75};
76
77typedef DenseSet<const BasicBlock *> DenseSetBB;
Sebastian Pop12171602018-09-14 20:36:10 +000078typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;
Aditya Kumar801394a2018-09-07 15:03:49 +000079
80// From: https://reviews.llvm.org/D22558
81// Exit is not part of the region.
82static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
83 DominatorTree *DT, PostDomTree *PDT,
84 SmallVectorImpl<BasicBlock *> &Region) {
85 if (!DT->dominates(Entry, Exit))
86 return false;
87
88 if (!PDT->dominates(Exit, Entry))
89 return false;
90
Aditya Kumar801394a2018-09-07 15:03:49 +000091 for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
92 if (*I == Exit) {
93 I.skipChildren();
94 continue;
95 }
96 if (!DT->dominates(Entry, *I))
97 return false;
98 Region.push_back(*I);
99 ++I;
100 }
101 return true;
102}
103
104bool blockEndsInUnreachable(const BasicBlock &BB) {
105 if (BB.empty())
106 return true;
Chandler Carruthedb12a82018-10-15 10:04:59 +0000107 const Instruction *I = BB.getTerminator();
Aditya Kumar801394a2018-09-07 15:03:49 +0000108 if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I))
109 return true;
110 // Unreachable blocks do not have any successor.
111 return succ_empty(&BB);
112}
113
Aditya Kumara27014b2018-10-03 06:21:05 +0000114static bool exceptionHandlingFunctions(const CallInst *CI) {
115 auto F = CI->getCalledFunction();
116 if (!F)
117 return false;
118 auto FName = F->getName();
119 return FName == "__cxa_begin_catch" ||
120 FName == "__cxa_free_exception" ||
121 FName == "__cxa_allocate_exception" ||
122 FName == "__cxa_begin_catch" ||
123 FName == "__cxa_end_catch";
124}
125
Aditya Kumar801394a2018-09-07 15:03:49 +0000126static
127bool unlikelyExecuted(const BasicBlock &BB) {
128 if (blockEndsInUnreachable(BB))
129 return true;
130 // Exception handling blocks are unlikely executed.
131 if (BB.isEHPad())
132 return true;
133 for (const Instruction &I : BB)
134 if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
135 // The block is cold if it calls functions tagged as cold or noreturn.
136 if (CI->hasFnAttr(Attribute::Cold) ||
Aditya Kumara27014b2018-10-03 06:21:05 +0000137 CI->hasFnAttr(Attribute::NoReturn) ||
138 exceptionHandlingFunctions(CI))
Aditya Kumar801394a2018-09-07 15:03:49 +0000139 return true;
140
141 // Assume that inline assembly is hot code.
142 if (isa<InlineAsm>(CI->getCalledValue()))
143 return false;
144 }
145 return false;
146}
147
148static DenseSetBB getHotBlocks(Function &F) {
Sebastian Pop12171602018-09-14 20:36:10 +0000149
150 // Mark all cold basic blocks.
151 DenseSetBB ColdBlocks;
Aditya Kumar801394a2018-09-07 15:03:49 +0000152 for (BasicBlock &BB : F)
153 if (unlikelyExecuted(BB))
Sebastian Pop12171602018-09-14 20:36:10 +0000154 ColdBlocks.insert((const BasicBlock *)&BB);
155
156 // Forward propagation: basic blocks are hot when they are reachable from the
157 // beginning of the function through a path that does not contain cold blocks.
Aditya Kumar801394a2018-09-07 15:03:49 +0000158 SmallVector<const BasicBlock *, 8> WL;
Sebastian Pop12171602018-09-14 20:36:10 +0000159 DenseSetBB HotBlocks;
Aditya Kumar801394a2018-09-07 15:03:49 +0000160
161 const BasicBlock *It = &F.front();
Aditya Kumar801394a2018-09-07 15:03:49 +0000162 if (!ColdBlocks.count(It)) {
Sebastian Pop12171602018-09-14 20:36:10 +0000163 HotBlocks.insert(It);
164 // Breadth First Search to mark edges reachable from hot.
Aditya Kumar801394a2018-09-07 15:03:49 +0000165 WL.push_back(It);
166 while (WL.size() > 0) {
167 It = WL.pop_back_val();
Sebastian Pop12171602018-09-14 20:36:10 +0000168
169 for (const BasicBlock *Succ : successors(It)) {
Aditya Kumar801394a2018-09-07 15:03:49 +0000170 // Do not visit blocks that are cold.
Sebastian Pop12171602018-09-14 20:36:10 +0000171 if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {
172 HotBlocks.insert(Succ);
Aditya Kumar801394a2018-09-07 15:03:49 +0000173 WL.push_back(Succ);
174 }
175 }
176 }
177 }
178
Sebastian Pop12171602018-09-14 20:36:10 +0000179 assert(WL.empty() && "work list should be empty");
180
181 DenseMapBBInt NumHotSuccessors;
182 // Back propagation: when all successors of a basic block are cold, the
183 // basic block is cold as well.
184 for (BasicBlock &BBRef : F) {
185 const BasicBlock *BB = &BBRef;
186 if (HotBlocks.count(BB)) {
187 // Keep a count of hot successors for every hot block.
188 NumHotSuccessors[BB] = 0;
189 for (const BasicBlock *Succ : successors(BB))
190 if (!ColdBlocks.count(Succ))
191 NumHotSuccessors[BB] += 1;
192
193 // Add to work list the blocks with all successors cold. Those are the
194 // root nodes in the next loop, where we will move those blocks from
195 // HotBlocks to ColdBlocks and iterate over their predecessors.
196 if (NumHotSuccessors[BB] == 0)
197 WL.push_back(BB);
198 }
199 }
200
201 while (WL.size() > 0) {
202 It = WL.pop_back_val();
203 if (ColdBlocks.count(It))
204 continue;
205
206 // Move the block from HotBlocks to ColdBlocks.
207 HotBlocks.erase(It);
208 ColdBlocks.insert(It);
209
210 // Iterate over the predecessors.
211 for (const BasicBlock *Pred : predecessors(It)) {
212 if (HotBlocks.count(Pred)) {
213 NumHotSuccessors[Pred] -= 1;
214
215 // If Pred has no more hot successors, add it to the work list.
216 if (NumHotSuccessors[Pred] == 0)
217 WL.push_back(Pred);
218 }
219 }
220 }
221
222 return HotBlocks;
Aditya Kumar801394a2018-09-07 15:03:49 +0000223}
224
225class HotColdSplitting {
226public:
227 HotColdSplitting(ProfileSummaryInfo *ProfSI,
228 function_ref<BlockFrequencyInfo *(Function &)> GBFI,
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000229 function_ref<TargetTransformInfo &(Function &)> GTTI,
Aditya Kumar801394a2018-09-07 15:03:49 +0000230 std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000231 : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
Aditya Kumar801394a2018-09-07 15:03:49 +0000232 bool run(Module &M);
233
234private:
235 bool shouldOutlineFrom(const Function &F) const;
Sebastian Pop0f30f082018-09-14 20:36:19 +0000236 const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,
237 DominatorTree *DT, PostDomTree *PDT);
Aditya Kumar801394a2018-09-07 15:03:49 +0000238 Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
239 DominatorTree *DT, BlockFrequencyInfo *BFI,
240 OptimizationRemarkEmitter &ORE);
241 bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
242 const BasicBlock *Exit) const {
243 if (!Exit)
244 return false;
Sebastian Pop3abcf692018-09-14 20:36:14 +0000245
Aditya Kumar801394a2018-09-07 15:03:49 +0000246 // Regions with landing pads etc.
247 for (const BasicBlock *BB : Region) {
248 if (BB->isEHPad() || BB->hasAddressTaken())
249 return false;
250 }
251 return true;
252 }
253 SmallPtrSet<const Function *, 2> OutlinedFunctions;
254 ProfileSummaryInfo *PSI;
255 function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000256 function_ref<TargetTransformInfo &(Function &)> GetTTI;
Aditya Kumar801394a2018-09-07 15:03:49 +0000257 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
258};
259
260class HotColdSplittingLegacyPass : public ModulePass {
261public:
262 static char ID;
263 HotColdSplittingLegacyPass() : ModulePass(ID) {
264 initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
265 }
266
267 void getAnalysisUsage(AnalysisUsage &AU) const override {
268 AU.addRequired<AssumptionCacheTracker>();
269 AU.addRequired<BlockFrequencyInfoWrapperPass>();
270 AU.addRequired<ProfileSummaryInfoWrapperPass>();
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000271 AU.addRequired<TargetTransformInfoWrapperPass>();
Aditya Kumar801394a2018-09-07 15:03:49 +0000272 }
273
274 bool runOnModule(Module &M) override;
275};
276
277} // end anonymous namespace
278
279// Returns false if the function should not be considered for hot-cold split
Sebastian Pop0f30f082018-09-14 20:36:19 +0000280// optimization.
Aditya Kumar801394a2018-09-07 15:03:49 +0000281bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
Sebastian Pop0f30f082018-09-14 20:36:19 +0000282 // Do not try to outline again from an already outlined cold function.
283 if (OutlinedFunctions.count(&F))
284 return false;
285
Aditya Kumar801394a2018-09-07 15:03:49 +0000286 if (F.size() <= 2)
287 return false;
288
289 if (F.hasAddressTaken())
290 return false;
291
292 if (F.hasFnAttribute(Attribute::AlwaysInline))
293 return false;
294
295 if (F.hasFnAttribute(Attribute::NoInline))
296 return false;
297
298 if (F.getCallingConv() == CallingConv::Cold)
299 return false;
300
301 if (PSI->isFunctionEntryCold(&F))
302 return false;
303 return true;
304}
305
306Function *
307HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
308 DominatorTree *DT, BlockFrequencyInfo *BFI,
309 OptimizationRemarkEmitter &ORE) {
310 LLVM_DEBUG(for (auto *BB : Region)
311 llvm::dbgs() << "\nExtracting: " << *BB;);
Sebastian Pop12171602018-09-14 20:36:10 +0000312
Aditya Kumar801394a2018-09-07 15:03:49 +0000313 // TODO: Pass BFI and BPI to update profile information.
Sebastian Pop12171602018-09-14 20:36:10 +0000314 CodeExtractor CE(Region, DT);
Aditya Kumar801394a2018-09-07 15:03:49 +0000315
316 SetVector<Value *> Inputs, Outputs, Sinks;
317 CE.findInputsOutputs(Inputs, Outputs, Sinks);
318
319 // Do not extract regions that have live exit variables.
320 if (Outputs.size() > 0)
321 return nullptr;
322
323 if (Function *OutF = CE.extractCodeRegion()) {
324 User *U = *OutF->user_begin();
325 CallInst *CI = cast<CallInst>(U);
326 CallSite CS(CI);
327 NumColdSESEOutlined++;
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000328 if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
329 OutF->setCallingConv(CallingConv::Cold);
330 CS.setCallingConv(CallingConv::Cold);
331 }
Aditya Kumar801394a2018-09-07 15:03:49 +0000332 CI->setIsNoInline();
Sebastian Pop0f30f082018-09-14 20:36:19 +0000333 LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
Aditya Kumar801394a2018-09-07 15:03:49 +0000334 return OutF;
335 }
336
337 ORE.emit([&]() {
338 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
339 &*Region[0]->begin())
340 << "Failed to extract region at block "
341 << ore::NV("Block", Region.front());
342 });
343 return nullptr;
344}
345
346// Return the function created after outlining, nullptr otherwise.
Sebastian Pop0f30f082018-09-14 20:36:19 +0000347const Function *HotColdSplitting::outlineColdBlocks(Function &F,
348 const DenseSetBB &HotBlocks,
349 DominatorTree *DT,
350 PostDomTree *PDT) {
Aditya Kumar801394a2018-09-07 15:03:49 +0000351 auto BFI = GetBFI(F);
352 auto &ORE = (*GetORE)(F);
353 // Walking the dominator tree allows us to find the largest
354 // cold region.
355 BasicBlock *Begin = DT->getRootNode()->getBlock();
356 for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
357 BasicBlock *BB = *I;
Sebastian Pop12171602018-09-14 20:36:10 +0000358 if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) {
Aditya Kumar801394a2018-09-07 15:03:49 +0000359 SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
Sebastian Pop3abcf692018-09-14 20:36:14 +0000360 BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock();
Aditya Kumar801394a2018-09-07 15:03:49 +0000361 BasicBlock *ExitColdRegion = nullptr;
Sebastian Pop0f30f082018-09-14 20:36:19 +0000362
Aditya Kumar801394a2018-09-07 15:03:49 +0000363 // Estimated cold region between a BB and its dom-frontier.
Sebastian Pop0f30f082018-09-14 20:36:19 +0000364 while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
Aditya Kumar801394a2018-09-07 15:03:49 +0000365 isOutlineCandidate(Region, Exit)) {
366 ExitColdRegion = Exit;
367 ValidColdRegion = Region;
368 Region.clear();
369 // Update Exit recursively to its dom-frontier.
370 Exit = (*PDT)[Exit]->getIDom()->getBlock();
371 }
372 if (ExitColdRegion) {
Sebastian Pop3abcf692018-09-14 20:36:14 +0000373 // Do not outline a region with only one block.
374 if (ValidColdRegion.size() == 1)
375 continue;
376
Aditya Kumar801394a2018-09-07 15:03:49 +0000377 ++NumColdSESEFound;
Sebastian Pop0f30f082018-09-14 20:36:19 +0000378 ValidColdRegion.push_back(ExitColdRegion);
Aditya Kumar801394a2018-09-07 15:03:49 +0000379 // Candidate for outlining. FIXME: Continue outlining.
Aditya Kumar801394a2018-09-07 15:03:49 +0000380 return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
381 }
382 }
383 }
384 return nullptr;
385}
386
387bool HotColdSplitting::run(Module &M) {
388 for (auto &F : M) {
389 if (!shouldOutlineFrom(F))
390 continue;
391 DominatorTree DT(F);
392 PostDomTree PDT(F);
393 PDT.recalculate(F);
394 DenseSetBB HotBlocks;
395 if (EnableStaticAnalyis) // Static analysis of cold blocks.
396 HotBlocks = getHotBlocks(F);
397
Sebastian Pop0f30f082018-09-14 20:36:19 +0000398 const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
Aditya Kumar801394a2018-09-07 15:03:49 +0000399 if (Outlined)
400 OutlinedFunctions.insert(Outlined);
401 }
402 return true;
403}
404
405bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
406 if (skipModule(M))
407 return false;
408 ProfileSummaryInfo *PSI =
409 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000410 auto GTTI = [this](Function &F) -> TargetTransformInfo & {
411 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
412 };
Aditya Kumar801394a2018-09-07 15:03:49 +0000413 auto GBFI = [this](Function &F) {
414 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
415 };
416 std::unique_ptr<OptimizationRemarkEmitter> ORE;
417 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
418 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
419 ORE.reset(new OptimizationRemarkEmitter(&F));
420 return *ORE.get();
421 };
422
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000423 return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
Aditya Kumar801394a2018-09-07 15:03:49 +0000424}
425
Aditya Kumar9e20ade2018-10-03 05:55:20 +0000426PreservedAnalyses
427HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
428 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
429
430 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
431 [&FAM](Function &F) -> AssumptionCache & {
432 return FAM.getResult<AssumptionAnalysis>(F);
433 };
434
435 auto GBFI = [&FAM](Function &F) {
436 return &FAM.getResult<BlockFrequencyAnalysis>(F);
437 };
438
439 std::function<TargetTransformInfo &(Function &)> GTTI =
440 [&FAM](Function &F) -> TargetTransformInfo & {
441 return FAM.getResult<TargetIRAnalysis>(F);
442 };
443
444 std::unique_ptr<OptimizationRemarkEmitter> ORE;
445 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
446 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
447 ORE.reset(new OptimizationRemarkEmitter(&F));
448 return *ORE.get();
449 };
450
451 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
452
453 if (HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M))
454 return PreservedAnalyses::none();
455 return PreservedAnalyses::all();
456}
457
Aditya Kumar801394a2018-09-07 15:03:49 +0000458char HotColdSplittingLegacyPass::ID = 0;
459INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
460 "Hot Cold Splitting", false, false)
461INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
462INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
463INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
464 "Hot Cold Splitting", false, false)
465
466ModulePass *llvm::createHotColdSplittingPass() {
467 return new HotColdSplittingLegacyPass();
468}