blob: 8542f56bbd02eb69d454c3e9e0e374845f577627 [file] [log] [blame]
Aditya Kumar801394a2018-09-07 15:03:49 +00001//===- HotColdSplitting.cpp -- Outline Cold Regions -------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Outline cold regions to a separate function.
11// TODO: Update BFI and BPI
12// TODO: Add all the outlined functions to a separate section.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/BranchProbabilityInfo.h"
21#include "llvm/Analysis/CFG.h"
22#include "llvm/Analysis/OptimizationRemarkEmitter.h"
23#include "llvm/Analysis/PostDominators.h"
24#include "llvm/Analysis/ProfileSummaryInfo.h"
Sebastian Popa1f20fc2018-09-10 15:08:02 +000025#include "llvm/Analysis/TargetTransformInfo.h"
Aditya Kumar801394a2018-09-07 15:03:49 +000026#include "llvm/IR/BasicBlock.h"
27#include "llvm/IR/CFG.h"
28#include "llvm/IR/DataLayout.h"
29#include "llvm/IR/DiagnosticInfo.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Instructions.h"
34#include "llvm/IR/Metadata.h"
35#include "llvm/IR/Module.h"
36#include "llvm/IR/PassManager.h"
37#include "llvm/IR/Type.h"
38#include "llvm/IR/Use.h"
39#include "llvm/IR/User.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Pass.h"
42#include "llvm/Support/BlockFrequency.h"
43#include "llvm/Support/BranchProbability.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Transforms/IPO.h"
47#include "llvm/Transforms/Scalar.h"
48#include "llvm/Transforms/Utils/BasicBlockUtils.h"
49#include "llvm/Transforms/Utils/Cloning.h"
50#include "llvm/Transforms/Utils/CodeExtractor.h"
51#include "llvm/Transforms/Utils/Local.h"
52#include "llvm/Transforms/Utils/SSAUpdater.h"
53#include "llvm/Transforms/Utils/ValueMapper.h"
54#include <algorithm>
55#include <cassert>
56
57#define DEBUG_TYPE "hotcoldsplit"
58
59STATISTIC(NumColdSESEFound,
60 "Number of cold single entry single exit (SESE) regions found.");
61STATISTIC(NumColdSESEOutlined,
62 "Number of cold single entry single exit (SESE) regions outlined.");
63
64using namespace llvm;
65
66static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
67 cl::init(true), cl::Hidden);
68
69
70namespace {
71
72struct PostDomTree : PostDomTreeBase<BasicBlock> {
73 PostDomTree(Function &F) { recalculate(F); }
74};
75
76typedef DenseSet<const BasicBlock *> DenseSetBB;
77
78// From: https://reviews.llvm.org/D22558
79// Exit is not part of the region.
80static bool isSingleEntrySingleExit(BasicBlock *Entry, const BasicBlock *Exit,
81 DominatorTree *DT, PostDomTree *PDT,
82 SmallVectorImpl<BasicBlock *> &Region) {
83 if (!DT->dominates(Entry, Exit))
84 return false;
85
86 if (!PDT->dominates(Exit, Entry))
87 return false;
88
89 Region.push_back(Entry);
90 for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
91 if (*I == Exit) {
92 I.skipChildren();
93 continue;
94 }
95 if (!DT->dominates(Entry, *I))
96 return false;
97 Region.push_back(*I);
98 ++I;
99 }
100 return true;
101}
102
103bool blockEndsInUnreachable(const BasicBlock &BB) {
104 if (BB.empty())
105 return true;
106 const TerminatorInst *I = BB.getTerminator();
107 if (isa<ReturnInst>(I) || isa<IndirectBrInst>(I))
108 return true;
109 // Unreachable blocks do not have any successor.
110 return succ_empty(&BB);
111}
112
113static
114bool unlikelyExecuted(const BasicBlock &BB) {
115 if (blockEndsInUnreachable(BB))
116 return true;
117 // Exception handling blocks are unlikely executed.
118 if (BB.isEHPad())
119 return true;
120 for (const Instruction &I : BB)
121 if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
122 // The block is cold if it calls functions tagged as cold or noreturn.
123 if (CI->hasFnAttr(Attribute::Cold) ||
124 CI->hasFnAttr(Attribute::NoReturn))
125 return true;
126
127 // Assume that inline assembly is hot code.
128 if (isa<InlineAsm>(CI->getCalledValue()))
129 return false;
130 }
131 return false;
132}
133
134static DenseSetBB getHotBlocks(Function &F) {
135 // First mark all function basic blocks as hot or cold.
136 DenseSet<const BasicBlock *> ColdBlocks;
137 for (BasicBlock &BB : F)
138 if (unlikelyExecuted(BB))
139 ColdBlocks.insert(&BB);
140 // Forward propagation.
141 DenseSetBB AllColdBlocks;
142 SmallVector<const BasicBlock *, 8> WL;
143 DenseSetBB Visited; // Track hot blocks.
144
145 const BasicBlock *It = &F.front();
146 const TerminatorInst *TI = It->getTerminator();
147 if (!ColdBlocks.count(It)) {
148 Visited.insert(It);
149 // Breadth First Search to mark edges not reachable from cold.
150 WL.push_back(It);
151 while (WL.size() > 0) {
152 It = WL.pop_back_val();
153 for (const BasicBlock *Succ : successors(TI)) {
154 // Do not visit blocks that are cold.
155 if (!ColdBlocks.count(Succ) && !Visited.count(Succ)) {
156 Visited.insert(Succ);
157 WL.push_back(Succ);
158 }
159 }
160 }
161 }
162
163 return Visited;
164}
165
166class HotColdSplitting {
167public:
168 HotColdSplitting(ProfileSummaryInfo *ProfSI,
169 function_ref<BlockFrequencyInfo *(Function &)> GBFI,
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000170 function_ref<TargetTransformInfo &(Function &)> GTTI,
Aditya Kumar801394a2018-09-07 15:03:49 +0000171 std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000172 : PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
Aditya Kumar801394a2018-09-07 15:03:49 +0000173 bool run(Module &M);
174
175private:
176 bool shouldOutlineFrom(const Function &F) const;
177 Function *outlineColdBlocks(Function &F,
178 const DenseSet<const BasicBlock *> &ColdBlock,
179 DominatorTree *DT, PostDomTree *PDT);
180 Function *extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
181 DominatorTree *DT, BlockFrequencyInfo *BFI,
182 OptimizationRemarkEmitter &ORE);
183 bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
184 const BasicBlock *Exit) const {
185 if (!Exit)
186 return false;
187 // TODO: Find a better metric to compute the size of region being outlined.
188 if (Region.size() == 1)
189 return false;
190 // Regions with landing pads etc.
191 for (const BasicBlock *BB : Region) {
192 if (BB->isEHPad() || BB->hasAddressTaken())
193 return false;
194 }
195 return true;
196 }
197 SmallPtrSet<const Function *, 2> OutlinedFunctions;
198 ProfileSummaryInfo *PSI;
199 function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000200 function_ref<TargetTransformInfo &(Function &)> GetTTI;
Aditya Kumar801394a2018-09-07 15:03:49 +0000201 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
202};
203
204class HotColdSplittingLegacyPass : public ModulePass {
205public:
206 static char ID;
207 HotColdSplittingLegacyPass() : ModulePass(ID) {
208 initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
209 }
210
211 void getAnalysisUsage(AnalysisUsage &AU) const override {
212 AU.addRequired<AssumptionCacheTracker>();
213 AU.addRequired<BlockFrequencyInfoWrapperPass>();
214 AU.addRequired<ProfileSummaryInfoWrapperPass>();
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000215 AU.addRequired<TargetTransformInfoWrapperPass>();
Aditya Kumar801394a2018-09-07 15:03:49 +0000216 }
217
218 bool runOnModule(Module &M) override;
219};
220
221} // end anonymous namespace
222
223// Returns false if the function should not be considered for hot-cold split
224// optimization. Already outlined functions have coldcc so no need to check
225// for them here.
226bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
227 if (F.size() <= 2)
228 return false;
229
230 if (F.hasAddressTaken())
231 return false;
232
233 if (F.hasFnAttribute(Attribute::AlwaysInline))
234 return false;
235
236 if (F.hasFnAttribute(Attribute::NoInline))
237 return false;
238
239 if (F.getCallingConv() == CallingConv::Cold)
240 return false;
241
242 if (PSI->isFunctionEntryCold(&F))
243 return false;
244 return true;
245}
246
247Function *
248HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,
249 DominatorTree *DT, BlockFrequencyInfo *BFI,
250 OptimizationRemarkEmitter &ORE) {
251 LLVM_DEBUG(for (auto *BB : Region)
252 llvm::dbgs() << "\nExtracting: " << *BB;);
253 // TODO: Pass BFI and BPI to update profile information.
254 CodeExtractor CE(Region, DT, /*AggregateArgs*/ false, nullptr, nullptr,
255 /* AllowVarargs */ false);
256
257 SetVector<Value *> Inputs, Outputs, Sinks;
258 CE.findInputsOutputs(Inputs, Outputs, Sinks);
259
260 // Do not extract regions that have live exit variables.
261 if (Outputs.size() > 0)
262 return nullptr;
263
264 if (Function *OutF = CE.extractCodeRegion()) {
265 User *U = *OutF->user_begin();
266 CallInst *CI = cast<CallInst>(U);
267 CallSite CS(CI);
268 NumColdSESEOutlined++;
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000269 if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
270 OutF->setCallingConv(CallingConv::Cold);
271 CS.setCallingConv(CallingConv::Cold);
272 }
Aditya Kumar801394a2018-09-07 15:03:49 +0000273 CI->setIsNoInline();
274 LLVM_DEBUG(llvm::dbgs() << "Outlined Region at block: " << Region.front());
275 return OutF;
276 }
277
278 ORE.emit([&]() {
279 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
280 &*Region[0]->begin())
281 << "Failed to extract region at block "
282 << ore::NV("Block", Region.front());
283 });
284 return nullptr;
285}
286
287// Return the function created after outlining, nullptr otherwise.
288Function *HotColdSplitting::outlineColdBlocks(Function &F,
289 const DenseSetBB &HotBlock,
290 DominatorTree *DT,
291 PostDomTree *PDT) {
292 auto BFI = GetBFI(F);
293 auto &ORE = (*GetORE)(F);
294 // Walking the dominator tree allows us to find the largest
295 // cold region.
296 BasicBlock *Begin = DT->getRootNode()->getBlock();
297 for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
298 BasicBlock *BB = *I;
299 if (PSI->isColdBB(BB, BFI) || !HotBlock.count(BB)) {
300 SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
301 auto *BBNode = (*PDT)[BB];
302 auto Exit = BBNode->getIDom()->getBlock();
303 // We might need a virtual exit which post-dominates all basic blocks.
304 if (!Exit)
305 continue;
306 BasicBlock *ExitColdRegion = nullptr;
307 // Estimated cold region between a BB and its dom-frontier.
308 while (isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
309 isOutlineCandidate(Region, Exit)) {
310 ExitColdRegion = Exit;
311 ValidColdRegion = Region;
312 Region.clear();
313 // Update Exit recursively to its dom-frontier.
314 Exit = (*PDT)[Exit]->getIDom()->getBlock();
315 }
316 if (ExitColdRegion) {
317 ++NumColdSESEFound;
318 // Candidate for outlining. FIXME: Continue outlining.
319 // FIXME: Shouldn't need uniquing, debug isSingleEntrySingleExit
320 //std::sort(ValidColdRegion.begin(), ValidColdRegion.end());
321 auto last = std::unique(ValidColdRegion.begin(), ValidColdRegion.end());
322 ValidColdRegion.erase(last, ValidColdRegion.end());
323 return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
324 }
325 }
326 }
327 return nullptr;
328}
329
330bool HotColdSplitting::run(Module &M) {
331 for (auto &F : M) {
332 if (!shouldOutlineFrom(F))
333 continue;
334 DominatorTree DT(F);
335 PostDomTree PDT(F);
336 PDT.recalculate(F);
337 DenseSetBB HotBlocks;
338 if (EnableStaticAnalyis) // Static analysis of cold blocks.
339 HotBlocks = getHotBlocks(F);
340
341 auto Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);
342 if (Outlined)
343 OutlinedFunctions.insert(Outlined);
344 }
345 return true;
346}
347
348bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
349 if (skipModule(M))
350 return false;
351 ProfileSummaryInfo *PSI =
352 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000353 auto GTTI = [this](Function &F) -> TargetTransformInfo & {
354 return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
355 };
Aditya Kumar801394a2018-09-07 15:03:49 +0000356 auto GBFI = [this](Function &F) {
357 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
358 };
359 std::unique_ptr<OptimizationRemarkEmitter> ORE;
360 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
361 [&ORE](Function &F) -> OptimizationRemarkEmitter & {
362 ORE.reset(new OptimizationRemarkEmitter(&F));
363 return *ORE.get();
364 };
365
Sebastian Popa1f20fc2018-09-10 15:08:02 +0000366 return HotColdSplitting(PSI, GBFI, GTTI, &GetORE).run(M);
Aditya Kumar801394a2018-09-07 15:03:49 +0000367}
368
369char HotColdSplittingLegacyPass::ID = 0;
370INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
371 "Hot Cold Splitting", false, false)
372INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
373INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
374INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
375 "Hot Cold Splitting", false, false)
376
377ModulePass *llvm::createHotColdSplittingPass() {
378 return new HotColdSplittingLegacyPass();
379}