blob: be64da5d8087fb059ad5b893ea46fb11500be717 [file] [log] [blame]
Aditya Kumar801394a2018-09-07 15:03:49 +00001//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Outline cold regions to a separate function.
11// TODO: Update BFI and BPI
12// TODO: Add all the outlined functions to a separate section.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/BranchProbabilityInfo.h"
21#include "llvm/Analysis/CFG.h"
22#include "llvm/Analysis/OptimizationRemarkEmitter.h"
23#include "llvm/Analysis/PostDominators.h"
24#include "llvm/Analysis/ProfileSummaryInfo.h"
Sebastian Popa1f20fc2018-09-10 15:08:02 +000025#include "llvm/Analysis/TargetTransformInfo.h"
Aditya Kumar801394a2018-09-07 15:03:49 +000026#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/CFG.h"
28#include "llvm/IR/DataLayout.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Instructions.h"
34#include "llvm/IR/Metadata.h"
35#include "llvm/IR/Module.h"
36#include "llvm/IR/PassManager.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/Use.h"
39#include "llvm/IR/User.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/BlockFrequency.h"
43#include "llvm/Support/BranchProbability.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/IPO.h"
47#include "llvm/Transforms/Scalar.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/Cloning.h"
50#include "llvm/Transforms/Utils/CodeExtractor.h"
51#include "llvm/Transforms/Utils/Local.h"
52#include "llvm/Transforms/Utils/SSAUpdater.h"
53#include "llvm/Transforms/Utils/ValueMapper.h"
54#include <algorithm>
55#include <cassert>
56
57#define DEBUG_TYPE "hotcoldsplit"
58
59STATISTIC(NumColdSESEFound,
60 "Number of cold single entry single exit (SESE) regions found.");
61STATISTIC(NumColdSESEOutlined,
62 "Number of cold single entry single exit (SESE) regions outlined.");
63
64using namespace llvm;
65
66static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
67 cl::init(true), cl::Hidden);
68
69
70namespace {
71
72struct PostDomTree : PostDomTreeBase<BasicBlock> {
73 PostDomTree(Function &F) { recalculate(F); }
74};
75
76typedef DenseSet<const BasicBlock *> DenseSetBB;
Sebastian Pop12171602018-09-14 20:36:10 +000077typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;
Aditya Kumar801394a2018-09-07 15:03:49 +000078
79// From: https://reviews.llvm.org/D22558
80// Exit is not part of the region.
81static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
82 DominatorTree *DT, PostDomTree *PDT,
83 SmallVectorImpl<BasicBlock *> &Region) {
84 if (!DT->dominates(Entry, Exit))
85 return false;
86
87 if (!PDT->dominates(Exit, Entry))
88 return false;
89
Aditya Kumar801394a2018-09-07 15:03:49 +000090 for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
91 if (*I == Exit) {
92 I.skipChildren();
93 continue;
94 }
95 if (!DT->dominates(Entry, *I))
96 return false;
97 Region.push_back(*I);
98 ++I;
99 }
100 return true;
101}
102
103bool blockEndsInUnreachable(const BasicBlock &BB) {
104 if (BB.empty())
105 return true;
106 const TerminatorInst *I = BB.getTerminator();
107 if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I))
108 return true;
109 // Unreachable blocks do not have any successor.
110 return succ_empty(&BB);
111}
112
113static
114bool unlikelyExecuted(const BasicBlock &BB) {
115 if (blockEndsInUnreachable(BB))
116 return true;
117 // Exception handling blocks are unlikely executed.
118 if (BB.isEHPad())
119 return true;
120 for (const Instruction &I : BB)
121 if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
122 // The block is cold if it calls functions tagged as cold or noreturn.
123 if (CI->hasFnAttr(Attribute::Cold) ||
124 CI->hasFnAttr(Attribute::NoReturn))
125 return true;
126
127 // Assume that inline assembly is hot code.
128 if (isa<InlineAsm>(CI->getCalledValue()))
129 return false;
130 }
131 return false;
132}
133
134static DenseSetBB getHotBlocks(Function &F) {
Sebastian Pop12171602018-09-14 20:36:10 +0000135
136 // Mark all cold basic blocks.
137 DenseSetBB ColdBlocks;
Aditya Kumar801394a2018-09-07 15:03:49 +0000138 for (BasicBlock &BB : F)
139 if (unlikelyExecuted(BB))
Sebastian Pop12171602018-09-14 20:36:10 +0000140 ColdBlocks.insert((const BasicBlock *)&BB);
141
142 // Forward propagation: basic blocks are hot when they are reachable from the
143 // beginning of the function through a path that does not contain cold blocks.
Aditya Kumar801394a2018-09-07 15:03:49 +0000144 SmallVector<const BasicBlock *, 8> WL;
Sebastian Pop12171602018-09-14 20:36:10 +0000145 DenseSetBB HotBlocks;
Aditya Kumar801394a2018-09-07 15:03:49 +0000146
147 const BasicBlock *It = &F.front();
Aditya Kumar801394a2018-09-07 15:03:49 +0000148 if (!ColdBlocks.count(It)) {
Sebastian Pop12171602018-09-14 20:36:10 +0000149 HotBlocks.insert(It);
150 // Breadth First Search to mark edges reachable from hot.
Aditya Kumar801394a2018-09-07 15:03:49 +0000151 WL.push_back(It);
152 while (WL.size() > 0) {
153 It = WL.pop_back_val();
Sebastian Pop12171602018-09-14 20:36:10 +0000154
155 for (const BasicBlock *Succ : successors(It)) {
Aditya Kumar801394a2018-09-07 15:03:49 +0000156 // Do not visit blocks that are cold.
Sebastian Pop12171602018-09-14 20:36:10 +0000157 if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {
158 HotBlocks.insert(Succ);
Aditya Kumar801394a2018-09-07 15:03:49 +0000159 WL.push_back(Succ);
160 }
161 }
162 }
163 }
164
Sebastian Pop12171602018-09-14 20:36:10 +0000165 assert(WL.empty() && "work list should be empty");
166
167 DenseMapBBInt NumHotSuccessors;
168 // Back propagation: when all successors of a basic block are cold, the
169 // basic block is cold as well.
170 for (BasicBlock &BBRef : F) {
171 const BasicBlock *BB = &BBRef;
172 if (HotBlocks.count(BB)) {
173 // Keep a count of hot successors for every hot block.
174 NumHotSuccessors[BB] = 0;
175 for (const BasicBlock *Succ : successors(BB))
176 if (!ColdBlocks.count(Succ))
177 NumHotSuccessors[BB] += 1;
178
179 // Add to work list the blocks with all successors cold. Those are the
180 // root nodes in the next loop, where we will move those blocks from
181 // HotBlocks to ColdBlocks and iterate over their predecessors.
182 if (NumHotSuccessors[BB] == 0)
183 WL.push_back(BB);
184 }
185 }
186
187 while (WL.size() > 0) {
188 It = WL.pop_back_val();
189 if (ColdBlocks.count(It))
190 continue;
191
192 // Move the block from HotBlocks to ColdBlocks.
193 HotBlocks.erase(It);
194 ColdBlocks.insert(It);
195
196 // Iterate over the predecessors.
197 for (const BasicBlock *Pred : predecessors(It)) {
198 if (HotBlocks.count(Pred)) {
199 NumHotSuccessors[Pred] -= 1;
200
201 // If Pred has no more hot successors, add it to the work list.
202 if (NumHotSuccessors[Pred] == 0)
203 WL.push_back(Pred);
204 }
205 }
206 }
207
208 return HotBlocks;
Aditya Kumar801394a2018-09-07 15:03:49 +0000209}
210
211class HotColdSplitting {
212public:
213 HotColdSplitting(ProfileSummaryInfo *ProfSI,
214 function_ref<BlockFrequencyInfo *(Function &)> GBFI,
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000215 function_ref<TargetTransformInfo &(Function &)> GTTI,
Aditya Kumar801394a2018-09-07 15:03:49 +0000216 std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000217 : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
Aditya Kumar801394a2018-09-07 15:03:49 +0000218 bool run(Module &M);
219
220private:
221 bool shouldOutlineFrom(const Function &F) const;
Sebastian Pop0f30f082018-09-14 20:36:19 +0000222 const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,
223 DominatorTree *DT, PostDomTree *PDT);
Aditya Kumar801394a2018-09-07 15:03:49 +0000224 Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
225 DominatorTree *DT, BlockFrequencyInfo *BFI,
226 OptimizationRemarkEmitter &ORE);
227 bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
228 const BasicBlock *Exit) const {
229 if (!Exit)
230 return false;
Sebastian Pop3abcf692018-09-14 20:36:14 +0000231
Aditya Kumar801394a2018-09-07 15:03:49 +0000232 // Regions with landing pads etc.
233 for (const BasicBlock *BB : Region) {
234 if (BB->isEHPad() || BB->hasAddressTaken())
235 return false;
236 }
237 return true;
238 }
239 SmallPtrSet<const Function *, 2> OutlinedFunctions;
240 ProfileSummaryInfo *PSI;
241 function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000242 function_ref<TargetTransformInfo &(Function &)> GetTTI;
Aditya Kumar801394a2018-09-07 15:03:49 +0000243 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
244};
245
246class HotColdSplittingLegacyPass : public ModulePass {
247public:
248 static char ID;
249 HotColdSplittingLegacyPass() : ModulePass(ID) {
250 initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
251 }
252
253 void getAnalysisUsage(AnalysisUsage &AU) const override {
254 AU.addRequired<AssumptionCacheTracker>();
255 AU.addRequired<BlockFrequencyInfoWrapperPass>();
256 AU.addRequired<ProfileSummaryInfoWrapperPass>();
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000257 AU.addRequired<TargetTransformInfoWrapperPass>();
Aditya Kumar801394a2018-09-07 15:03:49 +0000258 }
259
260 bool runOnModule(Module &M) override;
261};
262
263} // end anonymous namespace
264
265// Returns false if the function should not be considered for hot-cold split
Sebastian Pop0f30f082018-09-14 20:36:19 +0000266// optimization.
Aditya Kumar801394a2018-09-07 15:03:49 +0000267bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
Sebastian Pop0f30f082018-09-14 20:36:19 +0000268 // Do not try to outline again from an already outlined cold function.
269 if (OutlinedFunctions.count(&F))
270 return false;
271
Aditya Kumar801394a2018-09-07 15:03:49 +0000272 if (F.size() <= 2)
273 return false;
274
275 if (F.hasAddressTaken())
276 return false;
277
278 if (F.hasFnAttribute(Attribute::AlwaysInline))
279 return false;
280
281 if (F.hasFnAttribute(Attribute::NoInline))
282 return false;
283
284 if (F.getCallingConv() == CallingConv::Cold)
285 return false;
286
287 if (PSI->isFunctionEntryCold(&F))
288 return false;
289 return true;
290}
291
292Function *
293HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
294 DominatorTree *DT, BlockFrequencyInfo *BFI,
295 OptimizationRemarkEmitter &ORE) {
296 LLVM_DEBUG(for (auto *BB : Region)
297 llvm::dbgs() << "\nExtracting: " << *BB;);
Sebastian Pop12171602018-09-14 20:36:10 +0000298
Aditya Kumar801394a2018-09-07 15:03:49 +0000299 // TODO: Pass BFI and BPI to update profile information.
Sebastian Pop12171602018-09-14 20:36:10 +0000300 CodeExtractor CE(Region, DT);
Aditya Kumar801394a2018-09-07 15:03:49 +0000301
302 SetVector<Value *> Inputs, Outputs, Sinks;
303 CE.findInputsOutputs(Inputs, Outputs, Sinks);
304
305 // Do not extract regions that have live exit variables.
306 if (Outputs.size() > 0)
307 return nullptr;
308
309 if (Function *OutF = CE.extractCodeRegion()) {
310 User *U = *OutF->user_begin();
311 CallInst *CI = cast<CallInst>(U);
312 CallSite CS(CI);
313 NumColdSESEOutlined++;
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000314 if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
315 OutF->setCallingConv(CallingConv::Cold);
316 CS.setCallingConv(CallingConv::Cold);
317 }
Aditya Kumar801394a2018-09-07 15:03:49 +0000318 CI->setIsNoInline();
Sebastian Pop0f30f082018-09-14 20:36:19 +0000319 LLVM_DEBUG(llvm::dbgs() << "Outlined Region: " << *OutF);
Aditya Kumar801394a2018-09-07 15:03:49 +0000320 return OutF;
321 }
322
323 ORE.emit([&]() {
324 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
325 &*Region[0]->begin())
326 << "Failed to extract region at block "
327 << ore::NV("Block", Region.front());
328 });
329 return nullptr;
330}
331
332// Return the function created after outlining, nullptr otherwise.
Sebastian Pop0f30f082018-09-14 20:36:19 +0000333const Function *HotColdSplitting::outlineColdBlocks(Function &F,
334 const DenseSetBB &HotBlocks,
335 DominatorTree *DT,
336 PostDomTree *PDT) {
Aditya Kumar801394a2018-09-07 15:03:49 +0000337 auto BFI = GetBFI(F);
338 auto &ORE = (*GetORE)(F);
339 // Walking the dominator tree allows us to find the largest
340 // cold region.
341 BasicBlock *Begin = DT->getRootNode()->getBlock();
342 for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
343 BasicBlock *BB = *I;
Sebastian Pop12171602018-09-14 20:36:10 +0000344 if (PSI->isColdBB(BB, BFI) || !HotBlocks.count(BB)) {
Aditya Kumar801394a2018-09-07 15:03:49 +0000345 SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
Sebastian Pop3abcf692018-09-14 20:36:14 +0000346 BasicBlock *Exit = (*PDT)[BB]->getIDom()->getBlock();
Aditya Kumar801394a2018-09-07 15:03:49 +0000347 BasicBlock *ExitColdRegion = nullptr;
Sebastian Pop0f30f082018-09-14 20:36:19 +0000348
Aditya Kumar801394a2018-09-07 15:03:49 +0000349 // Estimated cold region between a BB and its dom-frontier.
Sebastian Pop0f30f082018-09-14 20:36:19 +0000350 while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
Aditya Kumar801394a2018-09-07 15:03:49 +0000351 isOutlineCandidate(Region, Exit)) {
352 ExitColdRegion = Exit;
353 ValidColdRegion = Region;
354 Region.clear();
355 // Update Exit recursively to its dom-frontier.
356 Exit = (*PDT)[Exit]->getIDom()->getBlock();
357 }
358 if (ExitColdRegion) {
Sebastian Pop3abcf692018-09-14 20:36:14 +0000359 // Do not outline a region with only one block.
360 if (ValidColdRegion.size() == 1)
361 continue;
362
Aditya Kumar801394a2018-09-07 15:03:49 +0000363 ++NumColdSESEFound;
Sebastian Pop0f30f082018-09-14 20:36:19 +0000364 ValidColdRegion.push_back(ExitColdRegion);
Aditya Kumar801394a2018-09-07 15:03:49 +0000365 // Candidate for outlining. FIXME: Continue outlining.
Aditya Kumar801394a2018-09-07 15:03:49 +0000366 return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
367 }
368 }
369 }
370 return nullptr;
371}
372
373bool HotColdSplitting::run(Module &M) {
374 for (auto &F : M) {
375 if (!shouldOutlineFrom(F))
376 continue;
377 DominatorTree DT(F);
378 PostDomTree PDT(F);
379 PDT.recalculate(F);
380 DenseSetBB HotBlocks;
381 if (EnableStaticAnalyis) // Static analysis of cold blocks.
382 HotBlocks = getHotBlocks(F);
383
Sebastian Pop0f30f082018-09-14 20:36:19 +0000384 const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
Aditya Kumar801394a2018-09-07 15:03:49 +0000385 if (Outlined)
386 OutlinedFunctions.insert(Outlined);
387 }
388 return true;
389}
390
391bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
392 if (skipModule(M))
393 return false;
394 ProfileSummaryInfo *PSI =
395 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000396 auto GTTI = [this](Function &F) -> TargetTransformInfo & {
397 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
398 };
Aditya Kumar801394a2018-09-07 15:03:49 +0000399 auto GBFI = [this](Function &F) {
400 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
401 };
402 std::unique_ptr<OptimizationRemarkEmitter> ORE;
403 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
404 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
405 ORE.reset(new OptimizationRemarkEmitter(&F));
406 return *ORE.get();
407 };
408
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000409 return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
Aditya Kumar801394a2018-09-07 15:03:49 +0000410}
411
412char HotColdSplittingLegacyPass::ID = 0;
413INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
414 "Hot Cold Splitting", false, false)
415INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
416INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
417INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
418 "Hot Cold Splitting", false, false)
419
420ModulePass *llvm::createHotColdSplittingPass() {
421 return new HotColdSplittingLegacyPass();
422}