blob: 257ca157ab6e2f5260def67ca4956498ef8f13b8 [file] [log] [blame]
Owen Anderson2f82e272009-06-14 08:26:32 +00001//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs partial inlining, typically by inlining an if statement
11// that surrounds the body of the function.
12//
13//===----------------------------------------------------------------------===//
14
Easwaran Raman1832bf62016-06-27 16:50:18 +000015#include "llvm/Transforms/IPO/PartialInlining.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000016#include "llvm/ADT/Statistic.h"
Sean Silvaf8015752016-08-02 02:15:45 +000017#include "llvm/Analysis/BlockFrequencyInfo.h"
18#include "llvm/Analysis/BranchProbabilityInfo.h"
Xinliang David Li66bdfca2017-05-12 23:41:43 +000019#include "llvm/Analysis/CodeMetrics.h"
Xinliang David Li61338462017-05-02 02:44:14 +000020#include "llvm/Analysis/InlineCost.h"
Sean Silvaf8015752016-08-02 02:15:45 +000021#include "llvm/Analysis/LoopInfo.h"
Adam Nemet0965da22017-10-09 23:19:02 +000022#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Xinliang David Li61338462017-05-02 02:44:14 +000023#include "llvm/Analysis/ProfileSummaryInfo.h"
24#include "llvm/Analysis/TargetLibraryInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth1305dc32014-03-04 11:45:46 +000026#include "llvm/IR/CFG.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000027#include "llvm/IR/DiagnosticInfo.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000028#include "llvm/IR/Dominators.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000029#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000030#include "llvm/IR/IntrinsicInst.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000031#include "llvm/IR/Module.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000032#include "llvm/Pass.h"
Easwaran Raman1832bf62016-06-27 16:50:18 +000033#include "llvm/Transforms/IPO.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000034#include "llvm/Transforms/Utils/Cloning.h"
Chandler Carruth0fde0012012-05-04 10:18:49 +000035#include "llvm/Transforms/Utils/CodeExtractor.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000036using namespace llvm;
37
Xinliang David Li15744ad2017-04-23 21:40:58 +000038#define DEBUG_TYPE "partial-inlining"
Chandler Carruth964daaa2014-04-22 02:55:47 +000039
Xinliang David Li61338462017-05-02 02:44:14 +000040STATISTIC(NumPartialInlined,
41 "Number of callsites functions partially inlined into.");
Owen Andersonbd6a2132009-06-15 20:50:26 +000042
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000043// Command line option to disable partial-inlining. The default is false:
44static cl::opt<bool>
45 DisablePartialInlining("disable-partial-inlining", cl::init(false),
46 cl::Hidden, cl::desc("Disable partial ininling"));
Xinliang David Li66bdfca2017-05-12 23:41:43 +000047// This is an option used by testing:
48static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
49 cl::init(false), cl::ZeroOrMore,
50 cl::ReallyHidden,
51 cl::desc("Skip Cost Analysis"));
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000052
Xinliang David Lid21601a2017-04-27 16:34:00 +000053static cl::opt<unsigned> MaxNumInlineBlocks(
54 "max-num-inline-blocks", cl::init(5), cl::Hidden,
Chad Rosierf98335e2017-08-24 21:21:09 +000055 cl::desc("Max number of blocks to be partially inlined"));
Xinliang David Lid21601a2017-04-27 16:34:00 +000056
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000057// Command line option to set the maximum number of partial inlining allowed
58// for the module. The default value of -1 means no limit.
59static cl::opt<int> MaxNumPartialInlining(
60 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
61 cl::desc("Max number of partial inlining. The default is unlimited"));
62
Xinliang David Li66bdfca2017-05-12 23:41:43 +000063// Used only when PGO or user annotated branch data is absent. It is
64// the least value that is used to weigh the outline region. If BFI
65// produces larger value, the BFI value will be used.
66static cl::opt<int>
67 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
68 cl::Hidden, cl::ZeroOrMore,
69 cl::desc("Relative frequency of outline region to "
70 "the entry block"));
71
Xinliang David Li0b7d8582017-06-02 22:08:04 +000072static cl::opt<unsigned> ExtraOutliningPenalty(
73 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
74 cl::desc("A debug option to add additional penalty to the computed one."));
75
Owen Anderson2f82e272009-06-14 08:26:32 +000076namespace {
Xinliang David Lid21601a2017-04-27 16:34:00 +000077
78struct FunctionOutliningInfo {
79 FunctionOutliningInfo()
80 : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
81 ReturnBlockPreds() {}
82 // Returns the number of blocks to be inlined including all blocks
83 // in Entries and one return block.
84 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
85
86 // A set of blocks including the function entry that guard
87 // the region to be outlined.
88 SmallVector<BasicBlock *, 4> Entries;
89 // The return block that is not included in the outlined region.
90 BasicBlock *ReturnBlock;
Xinliang David Li0b7d8582017-06-02 22:08:04 +000091 // The dominating block of the region to be outlined.
Xinliang David Lid21601a2017-04-27 16:34:00 +000092 BasicBlock *NonReturnBlock;
93 // The set of blocks in Entries that that are predecessors to ReturnBlock
94 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
95};
96
Sean Silvafe5abd52016-07-25 05:00:00 +000097struct PartialInlinerImpl {
Xinliang David Li61338462017-05-02 02:44:14 +000098 PartialInlinerImpl(
99 std::function<AssumptionCache &(Function &)> *GetAC,
100 std::function<TargetTransformInfo &(Function &)> *GTTI,
101 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
102 ProfileSummaryInfo *ProfSI)
103 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
Sean Silvafe5abd52016-07-25 05:00:00 +0000104 bool run(Module &M);
105 Function *unswitchFunction(Function *F);
106
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000107 // This class speculatively clones the the function to be partial inlined.
108 // At the end of partial inlining, the remaining callsites to the cloned
109 // function that are not partially inlined will be fixed up to reference
110 // the original function, and the cloned function will be erased.
111 struct FunctionCloner {
112 FunctionCloner(Function *F, FunctionOutliningInfo *OI);
113 ~FunctionCloner();
114
115 // Prepare for function outlining: making sure there is only
116 // one incoming edge from the extracted/outlined region to
117 // the return block.
118 void NormalizeReturnBlock();
119
120 // Do function outlining:
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000121 Function *doFunctionOutlining();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000122
123 Function *OrigFunc = nullptr;
124 Function *ClonedFunc = nullptr;
125 Function *OutlinedFunc = nullptr;
126 BasicBlock *OutliningCallBB = nullptr;
127 // ClonedFunc is inlined in one of its callers after function
128 // outlining.
129 bool IsFunctionInlined = false;
130 // The cost of the region to be outlined.
131 int OutlinedRegionCost = 0;
132 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
133 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
134 };
135
Sean Silvafe5abd52016-07-25 05:00:00 +0000136private:
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000137 int NumPartialInlining = 0;
Xinliang David Li61338462017-05-02 02:44:14 +0000138 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
139 std::function<TargetTransformInfo &(Function &)> *GetTTI;
140 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
141 ProfileSummaryInfo *PSI;
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000142
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000143 // Return the frequency of the OutlininingBB relative to F's entry point.
144 // The result is no larger than 1 and is represented using BP.
145 // (Note that the outlined region's 'head' block can only have incoming
146 // edges from the guarding entry blocks).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000147 BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000148
149 // Return true if the callee of CS should be partially inlined with
150 // profit.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000151 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
152 BlockFrequency WeightedOutliningRcost,
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000153 OptimizationRemarkEmitter &ORE);
154
155 // Try to inline DuplicateFunction (cloned from F with call to
156 // the OutlinedFunction into its callers. Return true
157 // if there is any successful inlining.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000158 bool tryPartialInline(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000159
160 // Compute the mapping from use site of DuplicationFunction to the enclosing
161 // BB's profile count.
162 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
163 DenseMap<User *, uint64_t> &SiteCountMap);
164
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000165 bool IsLimitReached() {
166 return (MaxNumPartialInlining != -1 &&
167 NumPartialInlining >= MaxNumPartialInlining);
168 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000169
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000170 static CallSite getCallSite(User *U) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000171 CallSite CS;
172 if (CallInst *CI = dyn_cast<CallInst>(U))
173 CS = CallSite(CI);
174 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
175 CS = CallSite(II);
176 else
177 llvm_unreachable("All uses must be calls");
178 return CS;
179 }
180
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000181 static CallSite getOneCallSiteTo(Function *F) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000182 User *User = *F->user_begin();
183 return getCallSite(User);
184 }
185
186 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
187 CallSite CS = getOneCallSiteTo(F);
188 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
189 BasicBlock *Block = CS.getParent();
190 return std::make_tuple(DLoc, Block);
191 }
192
193 // Returns the costs associated with function outlining:
194 // - The first value is the non-weighted runtime cost for making the call
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000195 // to the outlined function, including the addtional setup cost in the
196 // outlined function itself;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000197 // - The second value is the estimated size of the new call sequence in
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000198 // basic block Cloner.OutliningCallBB;
199 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000200 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
201 // approximate both the size and runtime cost (Note that in the current
202 // inline cost analysis, there is no clear distinction there either).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000203 static int computeBBInlineCost(BasicBlock *BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000204
205 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
206
Sean Silvafe5abd52016-07-25 05:00:00 +0000207};
Xinliang David Lid21601a2017-04-27 16:34:00 +0000208
Easwaran Raman1832bf62016-06-27 16:50:18 +0000209struct PartialInlinerLegacyPass : public ModulePass {
210 static char ID; // Pass identification, replacement for typeid
211 PartialInlinerLegacyPass() : ModulePass(ID) {
212 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
213 }
Craig Topper3e4c6972014-03-05 09:10:37 +0000214
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000215 void getAnalysisUsage(AnalysisUsage &AU) const override {
216 AU.addRequired<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000217 AU.addRequired<ProfileSummaryInfoWrapperPass>();
218 AU.addRequired<TargetTransformInfoWrapperPass>();
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000219 }
Easwaran Raman1832bf62016-06-27 16:50:18 +0000220 bool runOnModule(Module &M) override {
221 if (skipModule(M))
222 return false;
Craig Topper3e4c6972014-03-05 09:10:37 +0000223
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000224 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000225 TargetTransformInfoWrapperPass *TTIWP =
226 &getAnalysis<TargetTransformInfoWrapperPass>();
227 ProfileSummaryInfo *PSI =
228 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
229
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000230 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
231 [&ACT](Function &F) -> AssumptionCache & {
232 return ACT->getAssumptionCache(F);
233 };
Xinliang David Li61338462017-05-02 02:44:14 +0000234
235 std::function<TargetTransformInfo &(Function &)> GetTTI =
236 [&TTIWP](Function &F) -> TargetTransformInfo & {
237 return TTIWP->getTTI(F);
238 };
239
240 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
Sean Silvafe5abd52016-07-25 05:00:00 +0000241 }
Sean Silva519323d2016-07-25 05:57:59 +0000242};
Alexander Kornienkof00654e2015-06-23 09:49:53 +0000243}
Owen Anderson2f82e272009-06-14 08:26:32 +0000244
Xinliang David Lid21601a2017-04-27 16:34:00 +0000245std::unique_ptr<FunctionOutliningInfo>
246PartialInlinerImpl::computeOutliningInfo(Function *F) {
Sean Silva519323d2016-07-25 05:57:59 +0000247 BasicBlock *EntryBlock = &F->front();
248 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
Owen Andersonf0081db2009-09-08 19:53:15 +0000249 if (!BR || BR->isUnconditional())
Xinliang David Lid21601a2017-04-27 16:34:00 +0000250 return std::unique_ptr<FunctionOutliningInfo>();
Sean Silvafe5abd52016-07-25 05:00:00 +0000251
Xinliang David Lid21601a2017-04-27 16:34:00 +0000252 // Returns true if Succ is BB's successor
253 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
254 return is_contained(successors(BB), Succ);
255 };
256
257 auto SuccSize = [](BasicBlock *BB) {
258 return std::distance(succ_begin(BB), succ_end(BB));
259 };
260
261 auto IsReturnBlock = [](BasicBlock *BB) {
262 TerminatorInst *TI = BB->getTerminator();
263 return isa<ReturnInst>(TI);
264 };
265
Davide Italianoaa42a102017-05-08 20:44:01 +0000266 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000267 if (IsReturnBlock(Succ1))
268 return std::make_tuple(Succ1, Succ2);
269 if (IsReturnBlock(Succ2))
270 return std::make_tuple(Succ2, Succ1);
271
272 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
273 };
274
275 // Detect a triangular shape:
Davide Italianoaa42a102017-05-08 20:44:01 +0000276 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000277 if (IsSuccessor(Succ1, Succ2))
278 return std::make_tuple(Succ1, Succ2);
279 if (IsSuccessor(Succ2, Succ1))
280 return std::make_tuple(Succ2, Succ1);
281
282 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
283 };
284
285 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
286 llvm::make_unique<FunctionOutliningInfo>();
287
288 BasicBlock *CurrEntry = EntryBlock;
289 bool CandidateFound = false;
290 do {
291 // The number of blocks to be inlined has already reached
292 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
293 // disables partial inlining for the function.
294 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
295 break;
296
297 if (SuccSize(CurrEntry) != 2)
298 break;
299
300 BasicBlock *Succ1 = *succ_begin(CurrEntry);
301 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
302
303 BasicBlock *ReturnBlock, *NonReturnBlock;
304 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
305
306 if (ReturnBlock) {
307 OutliningInfo->Entries.push_back(CurrEntry);
308 OutliningInfo->ReturnBlock = ReturnBlock;
309 OutliningInfo->NonReturnBlock = NonReturnBlock;
310 CandidateFound = true;
311 break;
312 }
313
314 BasicBlock *CommSucc;
315 BasicBlock *OtherSucc;
316 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
317
318 if (!CommSucc)
319 break;
320
321 OutliningInfo->Entries.push_back(CurrEntry);
322 CurrEntry = OtherSucc;
323
324 } while (true);
325
326 if (!CandidateFound)
327 return std::unique_ptr<FunctionOutliningInfo>();
328
329 // Do sanity check of the entries: threre should not
330 // be any successors (not in the entry set) other than
331 // {ReturnBlock, NonReturnBlock}
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000332 assert(OutliningInfo->Entries[0] == &F->front() &&
333 "Function Entry must be the first in Entries vector");
Xinliang David Lid21601a2017-04-27 16:34:00 +0000334 DenseSet<BasicBlock *> Entries;
335 for (BasicBlock *E : OutliningInfo->Entries)
336 Entries.insert(E);
337
338 // Returns true of BB has Predecessor which is not
339 // in Entries set.
340 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
341 for (auto Pred : predecessors(BB)) {
342 if (!Entries.count(Pred))
343 return true;
344 }
345 return false;
346 };
347 auto CheckAndNormalizeCandidate =
348 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
349 for (BasicBlock *E : OutliningInfo->Entries) {
350 for (auto Succ : successors(E)) {
351 if (Entries.count(Succ))
352 continue;
353 if (Succ == OutliningInfo->ReturnBlock)
354 OutliningInfo->ReturnBlockPreds.push_back(E);
355 else if (Succ != OutliningInfo->NonReturnBlock)
356 return false;
357 }
358 // There should not be any outside incoming edges either:
359 if (HasNonEntryPred(E))
360 return false;
361 }
362 return true;
363 };
364
365 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
366 return std::unique_ptr<FunctionOutliningInfo>();
367
368 // Now further growing the candidate's inlining region by
369 // peeling off dominating blocks from the outlining region:
370 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
371 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
372 if (SuccSize(Cand) != 2)
373 break;
374
375 if (HasNonEntryPred(Cand))
376 break;
377
378 BasicBlock *Succ1 = *succ_begin(Cand);
379 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
380
381 BasicBlock *ReturnBlock, *NonReturnBlock;
382 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
383 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
384 break;
385
386 if (NonReturnBlock->getSinglePredecessor() != Cand)
387 break;
388
389 // Now grow and update OutlininigInfo:
390 OutliningInfo->Entries.push_back(Cand);
391 OutliningInfo->NonReturnBlock = NonReturnBlock;
392 OutliningInfo->ReturnBlockPreds.push_back(Cand);
393 Entries.insert(Cand);
Reid Klecknerc26a17a2015-02-04 19:14:57 +0000394 }
Sean Silvafe5abd52016-07-25 05:00:00 +0000395
Xinliang David Lid21601a2017-04-27 16:34:00 +0000396 return OutliningInfo;
397}
398
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000399// Check if there is PGO data or user annoated branch data:
400static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
401 if (F->getEntryCount())
402 return true;
403 // Now check if any of the entry block has MD_prof data:
404 for (auto *E : OI->Entries) {
405 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
406 if (!BR || BR->isUnconditional())
407 continue;
408 uint64_t T, F;
409 if (BR->extractProfMetadata(T, F))
410 return true;
411 }
412 return false;
413}
414
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000415BranchProbability
416PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000417
418 auto EntryFreq =
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000419 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
420 auto OutliningCallFreq =
421 Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000422
423 auto OutlineRegionRelFreq =
424 BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
425 EntryFreq.getFrequency());
426
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000427 if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000428 return OutlineRegionRelFreq;
429
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000430 // When profile data is not available, we need to be conservative in
431 // estimating the overall savings. Static branch prediction can usually
432 // guess the branch direction right (taken/non-taken), but the guessed
433 // branch probability is usually not biased enough. In case when the
434 // outlined region is predicted to be likely, its probability needs
435 // to be made higher (more biased) to not under-estimate the cost of
436 // function outlining. On the other hand, if the outlined region
437 // is predicted to be less likely, the predicted probablity is usually
438 // higher than the actual. For instance, the actual probability of the
439 // less likely target is only 5%, but the guessed probablity can be
440 // 40%. In the latter case, there is no need for further adjustement.
441 // FIXME: add an option for this.
442 if (OutlineRegionRelFreq < BranchProbability(45, 100))
443 return OutlineRegionRelFreq;
444
445 OutlineRegionRelFreq = std::max(
446 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000447
448 return OutlineRegionRelFreq;
449}
450
451bool PartialInlinerImpl::shouldPartialInline(
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000452 CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
453 OptimizationRemarkEmitter &ORE) {
454
Xinliang David Li61338462017-05-02 02:44:14 +0000455 using namespace ore;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000456 if (SkipCostAnalysis)
457 return true;
458
Xinliang David Li61338462017-05-02 02:44:14 +0000459 Instruction *Call = CS.getInstruction();
460 Function *Callee = CS.getCalledFunction();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000461 assert(Callee == Cloner.ClonedFunc);
462
Xinliang David Li61338462017-05-02 02:44:14 +0000463 Function *Caller = CS.getCaller();
464 auto &CalleeTTI = (*GetTTI)(*Callee);
465 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
Haicheng Wu0812c5b2017-08-21 20:00:09 +0000466 *GetAssumptionCache, GetBFI, PSI, &ORE);
Xinliang David Li61338462017-05-02 02:44:14 +0000467
468 if (IC.isAlways()) {
469 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000470 << NV("Callee", Cloner.OrigFunc)
Xinliang David Li61338462017-05-02 02:44:14 +0000471 << " should always be fully inlined, not partially");
472 return false;
473 }
474
475 if (IC.isNever()) {
476 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000477 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000478 << NV("Caller", Caller)
479 << " because it should never be inlined (cost=never)");
480 return false;
481 }
482
483 if (!IC) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000484 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000485 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000486 << NV("Caller", Caller) << " because too costly to inline (cost="
487 << NV("Cost", IC.getCost()) << ", threshold="
488 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
489 return false;
490 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000491 const DataLayout &DL = Caller->getParent()->getDataLayout();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000492
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000493 // The savings of eliminating the call:
494 int NonWeightedSavings = getCallsiteCost(CS, DL);
495 BlockFrequency NormWeightedSavings(NonWeightedSavings);
496
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000497 // Weighted saving is smaller than weighted cost, return false
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000498 if (NormWeightedSavings < WeightedOutliningRcost) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000499 ORE.emit(
500 OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000501 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000502 << NV("Caller", Caller) << " runtime overhead (overhead="
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000503 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000504 << ", savings="
505 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
506 << " of making the outlined call is too high");
507
508 return false;
509 }
Xinliang David Li61338462017-05-02 02:44:14 +0000510
511 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000512 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000513 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
514 << " (threshold="
515 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
516 return true;
517}
518
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000519// TODO: Ideally we should share Inliner's InlineCost Analysis code.
520// For now use a simplified version. The returned 'InlineCost' will be used
521// to esimate the size cost as well as runtime cost of the BB.
522int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
523 int InlineCost = 0;
524 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
525 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
526 if (isa<DbgInfoIntrinsic>(I))
527 continue;
528
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000529 switch (I->getOpcode()) {
530 case Instruction::BitCast:
531 case Instruction::PtrToInt:
532 case Instruction::IntToPtr:
533 case Instruction::Alloca:
534 continue;
535 case Instruction::GetElementPtr:
536 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
537 continue;
538 default:
539 break;
540 }
541
542 IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
543 if (IntrInst) {
544 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
545 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
546 continue;
547 }
548
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000549 if (CallInst *CI = dyn_cast<CallInst>(I)) {
550 InlineCost += getCallsiteCost(CallSite(CI), DL);
551 continue;
552 }
553
554 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
555 InlineCost += getCallsiteCost(CallSite(II), DL);
556 continue;
557 }
558
559 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
560 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
561 continue;
562 }
563 InlineCost += InlineConstants::InstrCost;
564 }
565 return InlineCost;
566}
567
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000568std::tuple<int, int>
569PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000570
571 // Now compute the cost of the call sequence to the outlined function
572 // 'OutlinedFunction' in BB 'OutliningCallBB':
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000573 int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000574
575 // Now compute the cost of the extracted/outlined function itself:
576 int OutlinedFunctionCost = 0;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000577 for (BasicBlock &BB : *Cloner.OutlinedFunc) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000578 OutlinedFunctionCost += computeBBInlineCost(&BB);
579 }
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000580
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000581 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000582 "Outlined function cost should be no less than the outlined region");
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000583 // The code extractor introduces a new root and exit stub blocks with
584 // additional unconditional branches. Those branches will be eliminated
585 // later with bb layout. The cost should be adjusted accordingly:
586 OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000587
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000588 int OutliningRuntimeOverhead =
589 OutliningFuncCallCost +
590 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
591 ExtraOutliningPenalty;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000592
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000593 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000594}
595
596// Create the callsite to profile count map which is
597// used to update the original function's entry count,
598// after the function is partially inlined into the callsite.
599void PartialInlinerImpl::computeCallsiteToProfCountMap(
600 Function *DuplicateFunction,
601 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
602 std::vector<User *> Users(DuplicateFunction->user_begin(),
603 DuplicateFunction->user_end());
604 Function *CurrentCaller = nullptr;
Vitaly Bukaa6374892017-05-27 05:32:09 +0000605 std::unique_ptr<BlockFrequencyInfo> TempBFI;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000606 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
607
608 auto ComputeCurrBFI = [&,this](Function *Caller) {
609 // For the old pass manager:
610 if (!GetBFI) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000611 DominatorTree DT(*Caller);
612 LoopInfo LI(DT);
613 BranchProbabilityInfo BPI(*Caller, LI);
Vitaly Bukaa6374892017-05-27 05:32:09 +0000614 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
615 CurrentCallerBFI = TempBFI.get();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000616 } else {
617 // New pass manager:
618 CurrentCallerBFI = &(*GetBFI)(*Caller);
619 }
620 };
621
622 for (User *User : Users) {
623 CallSite CS = getCallSite(User);
624 Function *Caller = CS.getCaller();
625 if (CurrentCaller != Caller) {
626 CurrentCaller = Caller;
627 ComputeCurrBFI(Caller);
628 } else {
629 assert(CurrentCallerBFI && "CallerBFI is not set");
630 }
631 BasicBlock *CallBB = CS.getInstruction()->getParent();
632 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
633 if (Count)
634 CallSiteToProfCountMap[User] = *Count;
635 else
636 CallSiteToProfCountMap[User] = 0;
637 }
638}
639
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000640PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
641 FunctionOutliningInfo *OI)
642 : OrigFunc(F) {
643 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
644
645 // Clone the function, so that we can hack away on it.
646 ValueToValueMapTy VMap;
647 ClonedFunc = CloneFunction(F, VMap);
648
649 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
650 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
651 for (BasicBlock *BB : OI->Entries) {
652 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
653 }
654 for (BasicBlock *E : OI->ReturnBlockPreds) {
655 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
656 ClonedOI->ReturnBlockPreds.push_back(NewE);
657 }
658 // Go ahead and update all uses to the duplicate, so that we can just
659 // use the inliner functionality when we're done hacking.
660 F->replaceAllUsesWith(ClonedFunc);
661}
662
663void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
664
665 auto getFirstPHI = [](BasicBlock *BB) {
666 BasicBlock::iterator I = BB->begin();
667 PHINode *FirstPhi = nullptr;
668 while (I != BB->end()) {
669 PHINode *Phi = dyn_cast<PHINode>(I);
670 if (!Phi)
671 break;
672 if (!FirstPhi) {
673 FirstPhi = Phi;
674 break;
675 }
676 }
677 return FirstPhi;
678 };
679
680 // Special hackery is needed with PHI nodes that have inputs from more than
681 // one extracted block. For simplicity, just split the PHIs into a two-level
682 // sequence of PHIs, some of which will go in the extracted region, and some
683 // of which will go outside.
684 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
685 // only split block when necessary:
686 PHINode *FirstPhi = getFirstPHI(PreReturn);
687 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
688
689 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
690 return;
691
692 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
693 Value *CommonValue = PN->getIncomingValue(0);
694 if (all_of(PN->incoming_values(),
695 [&](Value *V) { return V == CommonValue; }))
696 return CommonValue;
697 return nullptr;
698 };
699
700 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
701 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
702 BasicBlock::iterator I = PreReturn->begin();
703 Instruction *Ins = &ClonedOI->ReturnBlock->front();
704 SmallVector<Instruction *, 4> DeadPhis;
705 while (I != PreReturn->end()) {
706 PHINode *OldPhi = dyn_cast<PHINode>(I);
707 if (!OldPhi)
708 break;
709
710 PHINode *RetPhi =
711 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
712 OldPhi->replaceAllUsesWith(RetPhi);
713 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
714
715 RetPhi->addIncoming(&*I, PreReturn);
716 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
717 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
718 OldPhi->removeIncomingValue(E);
719 }
720
721 // After incoming values splitting, the old phi may become trivial.
722 // Keeping the trivial phi can introduce definition inside the outline
723 // region which is live-out, causing necessary overhead (load, store
724 // arg passing etc).
725 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
726 OldPhi->replaceAllUsesWith(OldPhiVal);
727 DeadPhis.push_back(OldPhi);
728 }
729 ++I;
730 }
731 for (auto *DP : DeadPhis)
732 DP->eraseFromParent();
733
734 for (auto E : ClonedOI->ReturnBlockPreds) {
735 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
736 }
737}
738
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000739Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000740 // Returns true if the block is to be partial inlined into the caller
741 // (i.e. not to be extracted to the out of line function)
742 auto ToBeInlined = [&, this](BasicBlock *BB) {
743 return BB == ClonedOI->ReturnBlock ||
744 (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
745 ClonedOI->Entries.end());
746 };
747
748 // Gather up the blocks that we're going to extract.
749 std::vector<BasicBlock *> ToExtract;
750 ToExtract.push_back(ClonedOI->NonReturnBlock);
751 OutlinedRegionCost +=
752 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
753 for (BasicBlock &BB : *ClonedFunc)
754 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
755 ToExtract.push_back(&BB);
756 // FIXME: the code extractor may hoist/sink more code
757 // into the outlined function which may make the outlining
758 // overhead (the difference of the outlined function cost
759 // and OutliningRegionCost) look larger.
760 OutlinedRegionCost += computeBBInlineCost(&BB);
761 }
762
763 // The CodeExtractor needs a dominator tree.
764 DominatorTree DT;
765 DT.recalculate(*ClonedFunc);
766
767 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
768 LoopInfo LI(DT);
769 BranchProbabilityInfo BPI(*ClonedFunc, LI);
770 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
771
772 // Extract the body of the if.
773 OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
774 ClonedFuncBFI.get(), &BPI)
775 .extractCodeRegion();
776
777 if (OutlinedFunc) {
778 OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
779 .getInstruction()
780 ->getParent();
781 assert(OutliningCallBB->getParent() == ClonedFunc);
782 }
783
784 return OutlinedFunc;
785}
786
787PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
788 // Ditch the duplicate, since we're done with it, and rewrite all remaining
789 // users (function pointers, etc.) back to the original function.
790 ClonedFunc->replaceAllUsesWith(OrigFunc);
791 ClonedFunc->eraseFromParent();
792 if (!IsFunctionInlined) {
793 // Remove the function that is speculatively created if there is no
794 // reference.
795 if (OutlinedFunc)
796 OutlinedFunc->eraseFromParent();
797 }
798}
799
Xinliang David Lid21601a2017-04-27 16:34:00 +0000800Function *PartialInlinerImpl::unswitchFunction(Function *F) {
801
802 if (F->hasAddressTaken())
803 return nullptr;
804
Xinliang David Liab8722f2017-05-02 18:43:21 +0000805 // Let inliner handle it
806 if (F->hasFnAttribute(Attribute::AlwaysInline))
807 return nullptr;
808
809 if (F->hasFnAttribute(Attribute::NoInline))
810 return nullptr;
811
812 if (PSI->isFunctionEntryCold(F))
813 return nullptr;
814
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000815 if (F->user_begin() == F->user_end())
816 return nullptr;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000817
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000818 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
819
820 if (!OI)
Craig Topperf40110f2014-04-25 05:29:35 +0000821 return nullptr;
Sean Silvafe5abd52016-07-25 05:00:00 +0000822
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000823 FunctionCloner Cloner(F, OI.get());
824 Cloner.NormalizeReturnBlock();
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000825 Function *OutlinedFunction = Cloner.doFunctionOutlining();
Sean Silvafe5abd52016-07-25 05:00:00 +0000826
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000827 bool AnyInline = tryPartialInline(Cloner);
Xinliang David Li392e9752017-05-14 02:54:02 +0000828
829 if (AnyInline)
830 return OutlinedFunction;
831
Xinliang David Li392e9752017-05-14 02:54:02 +0000832 return nullptr;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000833}
Sean Silvafe5abd52016-07-25 05:00:00 +0000834
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000835bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000836 int NonWeightedRcost;
837 int SizeCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000838
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000839 if (Cloner.OutlinedFunc == nullptr)
840 return false;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000841
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000842 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
843
844 auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
845 auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000846
847 // The call sequence to the outlined function is larger than the original
848 // outlined region size, it does not increase the chances of inlining
Chad Rosier4cb2e822017-08-24 20:29:02 +0000849 // the function with outlining (The inliner uses the size increase to
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000850 // model the cost of inlining a callee).
851 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
852 OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000853 DebugLoc DLoc;
854 BasicBlock *Block;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000855 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000856 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
857 DLoc, Block)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000858 << ore::NV("Function", Cloner.OrigFunc)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000859 << " not partially inlined into callers (Original Size = "
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000860 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000861 << ", Size of call sequence to outlined function = "
862 << ore::NV("NewSize", SizeCost) << ")");
863 return false;
864 }
865
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000866 assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000867 "F's users should all be replaced!");
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000868
869 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
870 Cloner.ClonedFunc->user_end());
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000871
872 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000873 if (Cloner.OrigFunc->getEntryCount())
874 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000875
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000876 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000877 uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000878
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000879 bool AnyInline = false;
880 for (User *User : Users) {
881 CallSite CS = getCallSite(User);
882
883 if (IsLimitReached())
884 continue;
885
886 OptimizationRemarkEmitter ORE(CS.getCaller());
887
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000888 if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000889 continue;
890
891 ORE.emit(
892 OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000893 << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000894 << ore::NV("Caller", CS.getCaller()));
895
896 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
897 InlineFunction(CS, IFI);
898
899 // Now update the entry count:
900 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
901 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
902 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
903 }
904
905 AnyInline = true;
906 NumPartialInlining++;
907 // Update the stats
908 NumPartialInlined++;
909 }
910
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000911 if (AnyInline) {
912 Cloner.IsFunctionInlined = true;
913 if (CalleeEntryCount)
914 Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
915 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000916
917 return AnyInline;
Owen Anderson2f82e272009-06-14 08:26:32 +0000918}
919
Sean Silvafe5abd52016-07-25 05:00:00 +0000920bool PartialInlinerImpl::run(Module &M) {
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000921 if (DisablePartialInlining)
922 return false;
923
Sean Silva519323d2016-07-25 05:57:59 +0000924 std::vector<Function *> Worklist;
925 Worklist.reserve(M.size());
Benjamin Kramer135f7352016-06-26 12:28:59 +0000926 for (Function &F : M)
927 if (!F.use_empty() && !F.isDeclaration())
Sean Silva519323d2016-07-25 05:57:59 +0000928 Worklist.push_back(&F);
Benjamin Kramer135f7352016-06-26 12:28:59 +0000929
Sean Silva519323d2016-07-25 05:57:59 +0000930 bool Changed = false;
931 while (!Worklist.empty()) {
932 Function *CurrFunc = Worklist.back();
933 Worklist.pop_back();
Sean Silvafe5abd52016-07-25 05:00:00 +0000934
Sean Silva519323d2016-07-25 05:57:59 +0000935 if (CurrFunc->use_empty())
936 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000937
Sean Silva519323d2016-07-25 05:57:59 +0000938 bool Recursive = false;
939 for (User *U : CurrFunc->users())
940 if (Instruction *I = dyn_cast<Instruction>(U))
941 if (I->getParent()->getParent() == CurrFunc) {
942 Recursive = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000943 break;
944 }
Sean Silva519323d2016-07-25 05:57:59 +0000945 if (Recursive)
946 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000947
Sean Silvaf8015752016-08-02 02:15:45 +0000948 if (Function *NewFunc = unswitchFunction(CurrFunc)) {
949 Worklist.push_back(NewFunc);
Sean Silva519323d2016-07-25 05:57:59 +0000950 Changed = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000951 }
Owen Anderson2f82e272009-06-14 08:26:32 +0000952 }
Easwaran Raman1832bf62016-06-27 16:50:18 +0000953
Sean Silva519323d2016-07-25 05:57:59 +0000954 return Changed;
Sean Silvafe5abd52016-07-25 05:00:00 +0000955}
956
957char PartialInlinerLegacyPass::ID = 0;
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000958INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
959 "Partial Inliner", false, false)
960INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Xinliang David Li61338462017-05-02 02:44:14 +0000961INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
962INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000963INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
964 "Partial Inliner", false, false)
Sean Silvafe5abd52016-07-25 05:00:00 +0000965
966ModulePass *llvm::createPartialInliningPass() {
967 return new PartialInlinerLegacyPass();
968}
969
970PreservedAnalyses PartialInlinerPass::run(Module &M,
971 ModuleAnalysisManager &AM) {
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000972 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
Xinliang David Li61338462017-05-02 02:44:14 +0000973
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000974 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
975 [&FAM](Function &F) -> AssumptionCache & {
976 return FAM.getResult<AssumptionAnalysis>(F);
977 };
Xinliang David Li61338462017-05-02 02:44:14 +0000978
979 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
980 [&FAM](Function &F) -> BlockFrequencyInfo & {
981 return FAM.getResult<BlockFrequencyAnalysis>(F);
982 };
983
984 std::function<TargetTransformInfo &(Function &)> GetTTI =
985 [&FAM](Function &F) -> TargetTransformInfo & {
986 return FAM.getResult<TargetIRAnalysis>(F);
987 };
988
989 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
990
991 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
Easwaran Raman1832bf62016-06-27 16:50:18 +0000992 return PreservedAnalyses::none();
993 return PreservedAnalyses::all();
Duncan Sands29c8efc2009-07-03 15:30:58 +0000994}