blob: 7cb806a8890997cfb1e3b7bda8fbf51923b72151 [file] [log] [blame]
Owen Anderson2f82e272009-06-14 08:26:32 +00001//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs partial inlining, typically by inlining an if statement
11// that surrounds the body of the function.
12//
13//===----------------------------------------------------------------------===//
14
Easwaran Raman1832bf62016-06-27 16:50:18 +000015#include "llvm/Transforms/IPO/PartialInlining.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000016#include "llvm/ADT/Statistic.h"
Sean Silvaf8015752016-08-02 02:15:45 +000017#include "llvm/Analysis/BlockFrequencyInfo.h"
18#include "llvm/Analysis/BranchProbabilityInfo.h"
Xinliang David Li66bdfca2017-05-12 23:41:43 +000019#include "llvm/Analysis/CodeMetrics.h"
Xinliang David Li61338462017-05-02 02:44:14 +000020#include "llvm/Analysis/InlineCost.h"
Sean Silvaf8015752016-08-02 02:15:45 +000021#include "llvm/Analysis/LoopInfo.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000022#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000023#include "llvm/Analysis/ProfileSummaryInfo.h"
24#include "llvm/Analysis/TargetLibraryInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth1305dc32014-03-04 11:45:46 +000026#include "llvm/IR/CFG.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000027#include "llvm/IR/DiagnosticInfo.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000028#include "llvm/IR/Dominators.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000029#include "llvm/IR/Instructions.h"
30#include "llvm/IR/Module.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000031#include "llvm/Pass.h"
Easwaran Raman1832bf62016-06-27 16:50:18 +000032#include "llvm/Transforms/IPO.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000033#include "llvm/Transforms/Utils/Cloning.h"
Chandler Carruth0fde0012012-05-04 10:18:49 +000034#include "llvm/Transforms/Utils/CodeExtractor.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000035using namespace llvm;
36
Xinliang David Li15744ad2017-04-23 21:40:58 +000037#define DEBUG_TYPE "partial-inlining"
Chandler Carruth964daaa2014-04-22 02:55:47 +000038
Xinliang David Li61338462017-05-02 02:44:14 +000039STATISTIC(NumPartialInlined,
40 "Number of callsites functions partially inlined into.");
Owen Andersonbd6a2132009-06-15 20:50:26 +000041
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000042// Command line option to disable partial-inlining. The default is false:
43static cl::opt<bool>
44 DisablePartialInlining("disable-partial-inlining", cl::init(false),
45 cl::Hidden, cl::desc("Disable partial ininling"));
Xinliang David Li66bdfca2017-05-12 23:41:43 +000046// This is an option used by testing:
47static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
48 cl::init(false), cl::ZeroOrMore,
49 cl::ReallyHidden,
50 cl::desc("Skip Cost Analysis"));
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000051
Xinliang David Lid21601a2017-04-27 16:34:00 +000052static cl::opt<unsigned> MaxNumInlineBlocks(
53 "max-num-inline-blocks", cl::init(5), cl::Hidden,
Chad Rosierf98335e2017-08-24 21:21:09 +000054 cl::desc("Max number of blocks to be partially inlined"));
Xinliang David Lid21601a2017-04-27 16:34:00 +000055
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000056// Command line option to set the maximum number of partial inlining allowed
57// for the module. The default value of -1 means no limit.
58static cl::opt<int> MaxNumPartialInlining(
59 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
60 cl::desc("Max number of partial inlining. The default is unlimited"));
61
Xinliang David Li66bdfca2017-05-12 23:41:43 +000062// Used only when PGO or user annotated branch data is absent. It is
63// the least value that is used to weigh the outline region. If BFI
64// produces larger value, the BFI value will be used.
65static cl::opt<int>
66 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
67 cl::Hidden, cl::ZeroOrMore,
68 cl::desc("Relative frequency of outline region to "
69 "the entry block"));
70
Xinliang David Li0b7d8582017-06-02 22:08:04 +000071static cl::opt<unsigned> ExtraOutliningPenalty(
72 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
73 cl::desc("A debug option to add additional penalty to the computed one."));
74
Owen Anderson2f82e272009-06-14 08:26:32 +000075namespace {
Xinliang David Lid21601a2017-04-27 16:34:00 +000076
77struct FunctionOutliningInfo {
78 FunctionOutliningInfo()
79 : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
80 ReturnBlockPreds() {}
81 // Returns the number of blocks to be inlined including all blocks
82 // in Entries and one return block.
83 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
84
85 // A set of blocks including the function entry that guard
86 // the region to be outlined.
87 SmallVector<BasicBlock *, 4> Entries;
88 // The return block that is not included in the outlined region.
89 BasicBlock *ReturnBlock;
Xinliang David Li0b7d8582017-06-02 22:08:04 +000090 // The dominating block of the region to be outlined.
Xinliang David Lid21601a2017-04-27 16:34:00 +000091 BasicBlock *NonReturnBlock;
92 // The set of blocks in Entries that that are predecessors to ReturnBlock
93 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
94};
95
Sean Silvafe5abd52016-07-25 05:00:00 +000096struct PartialInlinerImpl {
Xinliang David Li61338462017-05-02 02:44:14 +000097 PartialInlinerImpl(
98 std::function<AssumptionCache &(Function &)> *GetAC,
99 std::function<TargetTransformInfo &(Function &)> *GTTI,
100 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
101 ProfileSummaryInfo *ProfSI)
102 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
Sean Silvafe5abd52016-07-25 05:00:00 +0000103 bool run(Module &M);
104 Function *unswitchFunction(Function *F);
105
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000106 // This class speculatively clones the the function to be partial inlined.
107 // At the end of partial inlining, the remaining callsites to the cloned
108 // function that are not partially inlined will be fixed up to reference
109 // the original function, and the cloned function will be erased.
110 struct FunctionCloner {
111 FunctionCloner(Function *F, FunctionOutliningInfo *OI);
112 ~FunctionCloner();
113
114 // Prepare for function outlining: making sure there is only
115 // one incoming edge from the extracted/outlined region to
116 // the return block.
117 void NormalizeReturnBlock();
118
119 // Do function outlining:
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000120 Function *doFunctionOutlining();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000121
122 Function *OrigFunc = nullptr;
123 Function *ClonedFunc = nullptr;
124 Function *OutlinedFunc = nullptr;
125 BasicBlock *OutliningCallBB = nullptr;
126 // ClonedFunc is inlined in one of its callers after function
127 // outlining.
128 bool IsFunctionInlined = false;
129 // The cost of the region to be outlined.
130 int OutlinedRegionCost = 0;
131 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
132 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
133 };
134
Sean Silvafe5abd52016-07-25 05:00:00 +0000135private:
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000136 int NumPartialInlining = 0;
Xinliang David Li61338462017-05-02 02:44:14 +0000137 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
138 std::function<TargetTransformInfo &(Function &)> *GetTTI;
139 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
140 ProfileSummaryInfo *PSI;
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000141
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000142 // Return the frequency of the OutlininingBB relative to F's entry point.
143 // The result is no larger than 1 and is represented using BP.
144 // (Note that the outlined region's 'head' block can only have incoming
145 // edges from the guarding entry blocks).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000146 BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000147
148 // Return true if the callee of CS should be partially inlined with
149 // profit.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000150 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
151 BlockFrequency WeightedOutliningRcost,
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000152 OptimizationRemarkEmitter &ORE);
153
154 // Try to inline DuplicateFunction (cloned from F with call to
155 // the OutlinedFunction into its callers. Return true
156 // if there is any successful inlining.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000157 bool tryPartialInline(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000158
159 // Compute the mapping from use site of DuplicationFunction to the enclosing
160 // BB's profile count.
161 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
162 DenseMap<User *, uint64_t> &SiteCountMap);
163
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000164 bool IsLimitReached() {
165 return (MaxNumPartialInlining != -1 &&
166 NumPartialInlining >= MaxNumPartialInlining);
167 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000168
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000169 static CallSite getCallSite(User *U) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000170 CallSite CS;
171 if (CallInst *CI = dyn_cast<CallInst>(U))
172 CS = CallSite(CI);
173 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
174 CS = CallSite(II);
175 else
176 llvm_unreachable("All uses must be calls");
177 return CS;
178 }
179
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000180 static CallSite getOneCallSiteTo(Function *F) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000181 User *User = *F->user_begin();
182 return getCallSite(User);
183 }
184
185 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
186 CallSite CS = getOneCallSiteTo(F);
187 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
188 BasicBlock *Block = CS.getParent();
189 return std::make_tuple(DLoc, Block);
190 }
191
192 // Returns the costs associated with function outlining:
193 // - The first value is the non-weighted runtime cost for making the call
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000194 // to the outlined function, including the addtional setup cost in the
195 // outlined function itself;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000196 // - The second value is the estimated size of the new call sequence in
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000197 // basic block Cloner.OutliningCallBB;
198 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000199 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
200 // approximate both the size and runtime cost (Note that in the current
201 // inline cost analysis, there is no clear distinction there either).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000202 static int computeBBInlineCost(BasicBlock *BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000203
204 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
205
Sean Silvafe5abd52016-07-25 05:00:00 +0000206};
Xinliang David Lid21601a2017-04-27 16:34:00 +0000207
Easwaran Raman1832bf62016-06-27 16:50:18 +0000208struct PartialInlinerLegacyPass : public ModulePass {
209 static char ID; // Pass identification, replacement for typeid
210 PartialInlinerLegacyPass() : ModulePass(ID) {
211 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
212 }
Craig Topper3e4c6972014-03-05 09:10:37 +0000213
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000214 void getAnalysisUsage(AnalysisUsage &AU) const override {
215 AU.addRequired<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000216 AU.addRequired<ProfileSummaryInfoWrapperPass>();
217 AU.addRequired<TargetTransformInfoWrapperPass>();
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000218 }
Easwaran Raman1832bf62016-06-27 16:50:18 +0000219 bool runOnModule(Module &M) override {
220 if (skipModule(M))
221 return false;
Craig Topper3e4c6972014-03-05 09:10:37 +0000222
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000223 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000224 TargetTransformInfoWrapperPass *TTIWP =
225 &getAnalysis<TargetTransformInfoWrapperPass>();
226 ProfileSummaryInfo *PSI =
227 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
228
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000229 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
230 [&ACT](Function &F) -> AssumptionCache & {
231 return ACT->getAssumptionCache(F);
232 };
Xinliang David Li61338462017-05-02 02:44:14 +0000233
234 std::function<TargetTransformInfo &(Function &)> GetTTI =
235 [&TTIWP](Function &F) -> TargetTransformInfo & {
236 return TTIWP->getTTI(F);
237 };
238
239 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
Sean Silvafe5abd52016-07-25 05:00:00 +0000240 }
Sean Silva519323d2016-07-25 05:57:59 +0000241};
Alexander Kornienkof00654e2015-06-23 09:49:53 +0000242}
Owen Anderson2f82e272009-06-14 08:26:32 +0000243
Xinliang David Lid21601a2017-04-27 16:34:00 +0000244std::unique_ptr<FunctionOutliningInfo>
245PartialInlinerImpl::computeOutliningInfo(Function *F) {
Sean Silva519323d2016-07-25 05:57:59 +0000246 BasicBlock *EntryBlock = &F->front();
247 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
Owen Andersonf0081db2009-09-08 19:53:15 +0000248 if (!BR || BR->isUnconditional())
Xinliang David Lid21601a2017-04-27 16:34:00 +0000249 return std::unique_ptr<FunctionOutliningInfo>();
Sean Silvafe5abd52016-07-25 05:00:00 +0000250
Xinliang David Lid21601a2017-04-27 16:34:00 +0000251 // Returns true if Succ is BB's successor
252 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
253 return is_contained(successors(BB), Succ);
254 };
255
256 auto SuccSize = [](BasicBlock *BB) {
257 return std::distance(succ_begin(BB), succ_end(BB));
258 };
259
260 auto IsReturnBlock = [](BasicBlock *BB) {
261 TerminatorInst *TI = BB->getTerminator();
262 return isa<ReturnInst>(TI);
263 };
264
Davide Italianoaa42a102017-05-08 20:44:01 +0000265 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000266 if (IsReturnBlock(Succ1))
267 return std::make_tuple(Succ1, Succ2);
268 if (IsReturnBlock(Succ2))
269 return std::make_tuple(Succ2, Succ1);
270
271 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
272 };
273
274 // Detect a triangular shape:
Davide Italianoaa42a102017-05-08 20:44:01 +0000275 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000276 if (IsSuccessor(Succ1, Succ2))
277 return std::make_tuple(Succ1, Succ2);
278 if (IsSuccessor(Succ2, Succ1))
279 return std::make_tuple(Succ2, Succ1);
280
281 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
282 };
283
284 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
285 llvm::make_unique<FunctionOutliningInfo>();
286
287 BasicBlock *CurrEntry = EntryBlock;
288 bool CandidateFound = false;
289 do {
290 // The number of blocks to be inlined has already reached
291 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
292 // disables partial inlining for the function.
293 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
294 break;
295
296 if (SuccSize(CurrEntry) != 2)
297 break;
298
299 BasicBlock *Succ1 = *succ_begin(CurrEntry);
300 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
301
302 BasicBlock *ReturnBlock, *NonReturnBlock;
303 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
304
305 if (ReturnBlock) {
306 OutliningInfo->Entries.push_back(CurrEntry);
307 OutliningInfo->ReturnBlock = ReturnBlock;
308 OutliningInfo->NonReturnBlock = NonReturnBlock;
309 CandidateFound = true;
310 break;
311 }
312
313 BasicBlock *CommSucc;
314 BasicBlock *OtherSucc;
315 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
316
317 if (!CommSucc)
318 break;
319
320 OutliningInfo->Entries.push_back(CurrEntry);
321 CurrEntry = OtherSucc;
322
323 } while (true);
324
325 if (!CandidateFound)
326 return std::unique_ptr<FunctionOutliningInfo>();
327
328 // Do sanity check of the entries: threre should not
329 // be any successors (not in the entry set) other than
330 // {ReturnBlock, NonReturnBlock}
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000331 assert(OutliningInfo->Entries[0] == &F->front() &&
332 "Function Entry must be the first in Entries vector");
Xinliang David Lid21601a2017-04-27 16:34:00 +0000333 DenseSet<BasicBlock *> Entries;
334 for (BasicBlock *E : OutliningInfo->Entries)
335 Entries.insert(E);
336
337 // Returns true of BB has Predecessor which is not
338 // in Entries set.
339 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
340 for (auto Pred : predecessors(BB)) {
341 if (!Entries.count(Pred))
342 return true;
343 }
344 return false;
345 };
346 auto CheckAndNormalizeCandidate =
347 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
348 for (BasicBlock *E : OutliningInfo->Entries) {
349 for (auto Succ : successors(E)) {
350 if (Entries.count(Succ))
351 continue;
352 if (Succ == OutliningInfo->ReturnBlock)
353 OutliningInfo->ReturnBlockPreds.push_back(E);
354 else if (Succ != OutliningInfo->NonReturnBlock)
355 return false;
356 }
357 // There should not be any outside incoming edges either:
358 if (HasNonEntryPred(E))
359 return false;
360 }
361 return true;
362 };
363
364 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
365 return std::unique_ptr<FunctionOutliningInfo>();
366
367 // Now further growing the candidate's inlining region by
368 // peeling off dominating blocks from the outlining region:
369 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
370 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
371 if (SuccSize(Cand) != 2)
372 break;
373
374 if (HasNonEntryPred(Cand))
375 break;
376
377 BasicBlock *Succ1 = *succ_begin(Cand);
378 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
379
380 BasicBlock *ReturnBlock, *NonReturnBlock;
381 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
382 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
383 break;
384
385 if (NonReturnBlock->getSinglePredecessor() != Cand)
386 break;
387
388 // Now grow and update OutlininigInfo:
389 OutliningInfo->Entries.push_back(Cand);
390 OutliningInfo->NonReturnBlock = NonReturnBlock;
391 OutliningInfo->ReturnBlockPreds.push_back(Cand);
392 Entries.insert(Cand);
Reid Klecknerc26a17a2015-02-04 19:14:57 +0000393 }
Sean Silvafe5abd52016-07-25 05:00:00 +0000394
Xinliang David Lid21601a2017-04-27 16:34:00 +0000395 return OutliningInfo;
396}
397
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000398// Check if there is PGO data or user annoated branch data:
399static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
400 if (F->getEntryCount())
401 return true;
402 // Now check if any of the entry block has MD_prof data:
403 for (auto *E : OI->Entries) {
404 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
405 if (!BR || BR->isUnconditional())
406 continue;
407 uint64_t T, F;
408 if (BR->extractProfMetadata(T, F))
409 return true;
410 }
411 return false;
412}
413
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000414BranchProbability
415PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000416
417 auto EntryFreq =
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000418 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
419 auto OutliningCallFreq =
420 Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000421
422 auto OutlineRegionRelFreq =
423 BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
424 EntryFreq.getFrequency());
425
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000426 if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000427 return OutlineRegionRelFreq;
428
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000429 // When profile data is not available, we need to be conservative in
430 // estimating the overall savings. Static branch prediction can usually
431 // guess the branch direction right (taken/non-taken), but the guessed
432 // branch probability is usually not biased enough. In case when the
433 // outlined region is predicted to be likely, its probability needs
434 // to be made higher (more biased) to not under-estimate the cost of
435 // function outlining. On the other hand, if the outlined region
436 // is predicted to be less likely, the predicted probablity is usually
437 // higher than the actual. For instance, the actual probability of the
438 // less likely target is only 5%, but the guessed probablity can be
439 // 40%. In the latter case, there is no need for further adjustement.
440 // FIXME: add an option for this.
441 if (OutlineRegionRelFreq < BranchProbability(45, 100))
442 return OutlineRegionRelFreq;
443
444 OutlineRegionRelFreq = std::max(
445 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000446
447 return OutlineRegionRelFreq;
448}
449
450bool PartialInlinerImpl::shouldPartialInline(
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000451 CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
452 OptimizationRemarkEmitter &ORE) {
453
Xinliang David Li61338462017-05-02 02:44:14 +0000454 using namespace ore;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000455 if (SkipCostAnalysis)
456 return true;
457
Xinliang David Li61338462017-05-02 02:44:14 +0000458 Instruction *Call = CS.getInstruction();
459 Function *Callee = CS.getCalledFunction();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000460 assert(Callee == Cloner.ClonedFunc);
461
Xinliang David Li61338462017-05-02 02:44:14 +0000462 Function *Caller = CS.getCaller();
463 auto &CalleeTTI = (*GetTTI)(*Callee);
464 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
Haicheng Wu0812c5b2017-08-21 20:00:09 +0000465 *GetAssumptionCache, GetBFI, PSI, &ORE);
Xinliang David Li61338462017-05-02 02:44:14 +0000466
467 if (IC.isAlways()) {
468 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000469 << NV("Callee", Cloner.OrigFunc)
Xinliang David Li61338462017-05-02 02:44:14 +0000470 << " should always be fully inlined, not partially");
471 return false;
472 }
473
474 if (IC.isNever()) {
475 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000476 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000477 << NV("Caller", Caller)
478 << " because it should never be inlined (cost=never)");
479 return false;
480 }
481
482 if (!IC) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000483 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000484 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000485 << NV("Caller", Caller) << " because too costly to inline (cost="
486 << NV("Cost", IC.getCost()) << ", threshold="
487 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
488 return false;
489 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000490 const DataLayout &DL = Caller->getParent()->getDataLayout();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000491
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000492 // The savings of eliminating the call:
493 int NonWeightedSavings = getCallsiteCost(CS, DL);
494 BlockFrequency NormWeightedSavings(NonWeightedSavings);
495
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000496 // Weighted saving is smaller than weighted cost, return false
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000497 if (NormWeightedSavings < WeightedOutliningRcost) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000498 ORE.emit(
499 OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000500 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000501 << NV("Caller", Caller) << " runtime overhead (overhead="
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000502 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000503 << ", savings="
504 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
505 << " of making the outlined call is too high");
506
507 return false;
508 }
Xinliang David Li61338462017-05-02 02:44:14 +0000509
510 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000511 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000512 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
513 << " (threshold="
514 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
515 return true;
516}
517
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000518// TODO: Ideally we should share Inliner's InlineCost Analysis code.
519// For now use a simplified version. The returned 'InlineCost' will be used
520// to esimate the size cost as well as runtime cost of the BB.
521int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
522 int InlineCost = 0;
523 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
524 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
525 if (isa<DbgInfoIntrinsic>(I))
526 continue;
527
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000528 switch (I->getOpcode()) {
529 case Instruction::BitCast:
530 case Instruction::PtrToInt:
531 case Instruction::IntToPtr:
532 case Instruction::Alloca:
533 continue;
534 case Instruction::GetElementPtr:
535 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
536 continue;
537 default:
538 break;
539 }
540
541 IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
542 if (IntrInst) {
543 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
544 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
545 continue;
546 }
547
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000548 if (CallInst *CI = dyn_cast<CallInst>(I)) {
549 InlineCost += getCallsiteCost(CallSite(CI), DL);
550 continue;
551 }
552
553 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
554 InlineCost += getCallsiteCost(CallSite(II), DL);
555 continue;
556 }
557
558 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
559 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
560 continue;
561 }
562 InlineCost += InlineConstants::InstrCost;
563 }
564 return InlineCost;
565}
566
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000567std::tuple<int, int>
568PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000569
570 // Now compute the cost of the call sequence to the outlined function
571 // 'OutlinedFunction' in BB 'OutliningCallBB':
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000572 int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000573
574 // Now compute the cost of the extracted/outlined function itself:
575 int OutlinedFunctionCost = 0;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000576 for (BasicBlock &BB : *Cloner.OutlinedFunc) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000577 OutlinedFunctionCost += computeBBInlineCost(&BB);
578 }
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000579
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000580 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000581 "Outlined function cost should be no less than the outlined region");
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000582 // The code extractor introduces a new root and exit stub blocks with
583 // additional unconditional branches. Those branches will be eliminated
584 // later with bb layout. The cost should be adjusted accordingly:
585 OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000586
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000587 int OutliningRuntimeOverhead =
588 OutliningFuncCallCost +
589 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
590 ExtraOutliningPenalty;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000591
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000592 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000593}
594
595// Create the callsite to profile count map which is
596// used to update the original function's entry count,
597// after the function is partially inlined into the callsite.
598void PartialInlinerImpl::computeCallsiteToProfCountMap(
599 Function *DuplicateFunction,
600 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
601 std::vector<User *> Users(DuplicateFunction->user_begin(),
602 DuplicateFunction->user_end());
603 Function *CurrentCaller = nullptr;
Vitaly Bukaa6374892017-05-27 05:32:09 +0000604 std::unique_ptr<BlockFrequencyInfo> TempBFI;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000605 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
606
607 auto ComputeCurrBFI = [&,this](Function *Caller) {
608 // For the old pass manager:
609 if (!GetBFI) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000610 DominatorTree DT(*Caller);
611 LoopInfo LI(DT);
612 BranchProbabilityInfo BPI(*Caller, LI);
Vitaly Bukaa6374892017-05-27 05:32:09 +0000613 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
614 CurrentCallerBFI = TempBFI.get();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000615 } else {
616 // New pass manager:
617 CurrentCallerBFI = &(*GetBFI)(*Caller);
618 }
619 };
620
621 for (User *User : Users) {
622 CallSite CS = getCallSite(User);
623 Function *Caller = CS.getCaller();
624 if (CurrentCaller != Caller) {
625 CurrentCaller = Caller;
626 ComputeCurrBFI(Caller);
627 } else {
628 assert(CurrentCallerBFI && "CallerBFI is not set");
629 }
630 BasicBlock *CallBB = CS.getInstruction()->getParent();
631 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
632 if (Count)
633 CallSiteToProfCountMap[User] = *Count;
634 else
635 CallSiteToProfCountMap[User] = 0;
636 }
637}
638
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000639PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
640 FunctionOutliningInfo *OI)
641 : OrigFunc(F) {
642 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
643
644 // Clone the function, so that we can hack away on it.
645 ValueToValueMapTy VMap;
646 ClonedFunc = CloneFunction(F, VMap);
647
648 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
649 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
650 for (BasicBlock *BB : OI->Entries) {
651 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
652 }
653 for (BasicBlock *E : OI->ReturnBlockPreds) {
654 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
655 ClonedOI->ReturnBlockPreds.push_back(NewE);
656 }
657 // Go ahead and update all uses to the duplicate, so that we can just
658 // use the inliner functionality when we're done hacking.
659 F->replaceAllUsesWith(ClonedFunc);
660}
661
662void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
663
664 auto getFirstPHI = [](BasicBlock *BB) {
665 BasicBlock::iterator I = BB->begin();
666 PHINode *FirstPhi = nullptr;
667 while (I != BB->end()) {
668 PHINode *Phi = dyn_cast<PHINode>(I);
669 if (!Phi)
670 break;
671 if (!FirstPhi) {
672 FirstPhi = Phi;
673 break;
674 }
675 }
676 return FirstPhi;
677 };
678
679 // Special hackery is needed with PHI nodes that have inputs from more than
680 // one extracted block. For simplicity, just split the PHIs into a two-level
681 // sequence of PHIs, some of which will go in the extracted region, and some
682 // of which will go outside.
683 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
684 // only split block when necessary:
685 PHINode *FirstPhi = getFirstPHI(PreReturn);
686 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
687
688 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
689 return;
690
691 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
692 Value *CommonValue = PN->getIncomingValue(0);
693 if (all_of(PN->incoming_values(),
694 [&](Value *V) { return V == CommonValue; }))
695 return CommonValue;
696 return nullptr;
697 };
698
699 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
700 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
701 BasicBlock::iterator I = PreReturn->begin();
702 Instruction *Ins = &ClonedOI->ReturnBlock->front();
703 SmallVector<Instruction *, 4> DeadPhis;
704 while (I != PreReturn->end()) {
705 PHINode *OldPhi = dyn_cast<PHINode>(I);
706 if (!OldPhi)
707 break;
708
709 PHINode *RetPhi =
710 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
711 OldPhi->replaceAllUsesWith(RetPhi);
712 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
713
714 RetPhi->addIncoming(&*I, PreReturn);
715 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
716 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
717 OldPhi->removeIncomingValue(E);
718 }
719
720 // After incoming values splitting, the old phi may become trivial.
721 // Keeping the trivial phi can introduce definition inside the outline
722 // region which is live-out, causing necessary overhead (load, store
723 // arg passing etc).
724 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
725 OldPhi->replaceAllUsesWith(OldPhiVal);
726 DeadPhis.push_back(OldPhi);
727 }
728 ++I;
729 }
730 for (auto *DP : DeadPhis)
731 DP->eraseFromParent();
732
733 for (auto E : ClonedOI->ReturnBlockPreds) {
734 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
735 }
736}
737
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000738Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000739 // Returns true if the block is to be partial inlined into the caller
740 // (i.e. not to be extracted to the out of line function)
741 auto ToBeInlined = [&, this](BasicBlock *BB) {
742 return BB == ClonedOI->ReturnBlock ||
743 (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
744 ClonedOI->Entries.end());
745 };
746
747 // Gather up the blocks that we're going to extract.
748 std::vector<BasicBlock *> ToExtract;
749 ToExtract.push_back(ClonedOI->NonReturnBlock);
750 OutlinedRegionCost +=
751 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
752 for (BasicBlock &BB : *ClonedFunc)
753 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
754 ToExtract.push_back(&BB);
755 // FIXME: the code extractor may hoist/sink more code
756 // into the outlined function which may make the outlining
757 // overhead (the difference of the outlined function cost
758 // and OutliningRegionCost) look larger.
759 OutlinedRegionCost += computeBBInlineCost(&BB);
760 }
761
762 // The CodeExtractor needs a dominator tree.
763 DominatorTree DT;
764 DT.recalculate(*ClonedFunc);
765
766 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
767 LoopInfo LI(DT);
768 BranchProbabilityInfo BPI(*ClonedFunc, LI);
769 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
770
771 // Extract the body of the if.
772 OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
773 ClonedFuncBFI.get(), &BPI)
774 .extractCodeRegion();
775
776 if (OutlinedFunc) {
777 OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
778 .getInstruction()
779 ->getParent();
780 assert(OutliningCallBB->getParent() == ClonedFunc);
781 }
782
783 return OutlinedFunc;
784}
785
786PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
787 // Ditch the duplicate, since we're done with it, and rewrite all remaining
788 // users (function pointers, etc.) back to the original function.
789 ClonedFunc->replaceAllUsesWith(OrigFunc);
790 ClonedFunc->eraseFromParent();
791 if (!IsFunctionInlined) {
792 // Remove the function that is speculatively created if there is no
793 // reference.
794 if (OutlinedFunc)
795 OutlinedFunc->eraseFromParent();
796 }
797}
798
Xinliang David Lid21601a2017-04-27 16:34:00 +0000799Function *PartialInlinerImpl::unswitchFunction(Function *F) {
800
801 if (F->hasAddressTaken())
802 return nullptr;
803
Xinliang David Liab8722f2017-05-02 18:43:21 +0000804 // Let inliner handle it
805 if (F->hasFnAttribute(Attribute::AlwaysInline))
806 return nullptr;
807
808 if (F->hasFnAttribute(Attribute::NoInline))
809 return nullptr;
810
811 if (PSI->isFunctionEntryCold(F))
812 return nullptr;
813
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000814 if (F->user_begin() == F->user_end())
815 return nullptr;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000816
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000817 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
818
819 if (!OI)
Craig Topperf40110f2014-04-25 05:29:35 +0000820 return nullptr;
Sean Silvafe5abd52016-07-25 05:00:00 +0000821
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000822 FunctionCloner Cloner(F, OI.get());
823 Cloner.NormalizeReturnBlock();
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000824 Function *OutlinedFunction = Cloner.doFunctionOutlining();
Sean Silvafe5abd52016-07-25 05:00:00 +0000825
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000826 bool AnyInline = tryPartialInline(Cloner);
Xinliang David Li392e9752017-05-14 02:54:02 +0000827
828 if (AnyInline)
829 return OutlinedFunction;
830
Xinliang David Li392e9752017-05-14 02:54:02 +0000831 return nullptr;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000832}
Sean Silvafe5abd52016-07-25 05:00:00 +0000833
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000834bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000835 int NonWeightedRcost;
836 int SizeCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000837
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000838 if (Cloner.OutlinedFunc == nullptr)
839 return false;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000840
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000841 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
842
843 auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
844 auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000845
846 // The call sequence to the outlined function is larger than the original
847 // outlined region size, it does not increase the chances of inlining
Chad Rosier4cb2e822017-08-24 20:29:02 +0000848 // the function with outlining (The inliner uses the size increase to
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000849 // model the cost of inlining a callee).
850 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
851 OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000852 DebugLoc DLoc;
853 BasicBlock *Block;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000854 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000855 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
856 DLoc, Block)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000857 << ore::NV("Function", Cloner.OrigFunc)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000858 << " not partially inlined into callers (Original Size = "
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000859 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000860 << ", Size of call sequence to outlined function = "
861 << ore::NV("NewSize", SizeCost) << ")");
862 return false;
863 }
864
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000865 assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000866 "F's users should all be replaced!");
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000867
868 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
869 Cloner.ClonedFunc->user_end());
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000870
871 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000872 if (Cloner.OrigFunc->getEntryCount())
873 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000874
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000875 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000876 uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000877
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000878 bool AnyInline = false;
879 for (User *User : Users) {
880 CallSite CS = getCallSite(User);
881
882 if (IsLimitReached())
883 continue;
884
885 OptimizationRemarkEmitter ORE(CS.getCaller());
886
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000887 if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000888 continue;
889
890 ORE.emit(
891 OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000892 << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000893 << ore::NV("Caller", CS.getCaller()));
894
895 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
896 InlineFunction(CS, IFI);
897
898 // Now update the entry count:
899 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
900 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
901 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
902 }
903
904 AnyInline = true;
905 NumPartialInlining++;
906 // Update the stats
907 NumPartialInlined++;
908 }
909
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000910 if (AnyInline) {
911 Cloner.IsFunctionInlined = true;
912 if (CalleeEntryCount)
913 Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
914 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000915
916 return AnyInline;
Owen Anderson2f82e272009-06-14 08:26:32 +0000917}
918
Sean Silvafe5abd52016-07-25 05:00:00 +0000919bool PartialInlinerImpl::run(Module &M) {
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000920 if (DisablePartialInlining)
921 return false;
922
Sean Silva519323d2016-07-25 05:57:59 +0000923 std::vector<Function *> Worklist;
924 Worklist.reserve(M.size());
Benjamin Kramer135f7352016-06-26 12:28:59 +0000925 for (Function &F : M)
926 if (!F.use_empty() && !F.isDeclaration())
Sean Silva519323d2016-07-25 05:57:59 +0000927 Worklist.push_back(&F);
Benjamin Kramer135f7352016-06-26 12:28:59 +0000928
Sean Silva519323d2016-07-25 05:57:59 +0000929 bool Changed = false;
930 while (!Worklist.empty()) {
931 Function *CurrFunc = Worklist.back();
932 Worklist.pop_back();
Sean Silvafe5abd52016-07-25 05:00:00 +0000933
Sean Silva519323d2016-07-25 05:57:59 +0000934 if (CurrFunc->use_empty())
935 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000936
Sean Silva519323d2016-07-25 05:57:59 +0000937 bool Recursive = false;
938 for (User *U : CurrFunc->users())
939 if (Instruction *I = dyn_cast<Instruction>(U))
940 if (I->getParent()->getParent() == CurrFunc) {
941 Recursive = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000942 break;
943 }
Sean Silva519323d2016-07-25 05:57:59 +0000944 if (Recursive)
945 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000946
Sean Silvaf8015752016-08-02 02:15:45 +0000947 if (Function *NewFunc = unswitchFunction(CurrFunc)) {
948 Worklist.push_back(NewFunc);
Sean Silva519323d2016-07-25 05:57:59 +0000949 Changed = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000950 }
Owen Anderson2f82e272009-06-14 08:26:32 +0000951 }
Easwaran Raman1832bf62016-06-27 16:50:18 +0000952
Sean Silva519323d2016-07-25 05:57:59 +0000953 return Changed;
Sean Silvafe5abd52016-07-25 05:00:00 +0000954}
955
956char PartialInlinerLegacyPass::ID = 0;
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000957INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
958 "Partial Inliner", false, false)
959INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Xinliang David Li61338462017-05-02 02:44:14 +0000960INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
961INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000962INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
963 "Partial Inliner", false, false)
Sean Silvafe5abd52016-07-25 05:00:00 +0000964
965ModulePass *llvm::createPartialInliningPass() {
966 return new PartialInlinerLegacyPass();
967}
968
969PreservedAnalyses PartialInlinerPass::run(Module &M,
970 ModuleAnalysisManager &AM) {
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000971 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
Xinliang David Li61338462017-05-02 02:44:14 +0000972
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000973 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
974 [&FAM](Function &F) -> AssumptionCache & {
975 return FAM.getResult<AssumptionAnalysis>(F);
976 };
Xinliang David Li61338462017-05-02 02:44:14 +0000977
978 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
979 [&FAM](Function &F) -> BlockFrequencyInfo & {
980 return FAM.getResult<BlockFrequencyAnalysis>(F);
981 };
982
983 std::function<TargetTransformInfo &(Function &)> GetTTI =
984 [&FAM](Function &F) -> TargetTransformInfo & {
985 return FAM.getResult<TargetIRAnalysis>(F);
986 };
987
988 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
989
990 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
Easwaran Raman1832bf62016-06-27 16:50:18 +0000991 return PreservedAnalyses::none();
992 return PreservedAnalyses::all();
Duncan Sands29c8efc2009-07-03 15:30:58 +0000993}