blob: ccaeed34ed87dd119f3fe380f8ffbeb8320e726b [file] [log] [blame]
Owen Anderson2f82e272009-06-14 08:26:32 +00001//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs partial inlining, typically by inlining an if statement
11// that surrounds the body of the function.
12//
13//===----------------------------------------------------------------------===//
14
Easwaran Raman1832bf62016-06-27 16:50:18 +000015#include "llvm/Transforms/IPO/PartialInlining.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000016#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/None.h"
19#include "llvm/ADT/Optional.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallVector.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/ADT/Statistic.h"
Sean Silvaf8015752016-08-02 02:15:45 +000023#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000025#include "llvm/Analysis/InlineCost.h"
Sean Silvaf8015752016-08-02 02:15:45 +000026#include "llvm/Analysis/LoopInfo.h"
Adam Nemet0965da22017-10-09 23:19:02 +000027#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Xinliang David Li61338462017-05-02 02:44:14 +000028#include "llvm/Analysis/ProfileSummaryInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000029#include "llvm/Analysis/TargetTransformInfo.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000030#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
Chandler Carruth1305dc32014-03-04 11:45:46 +000032#include "llvm/IR/CFG.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000033#include "llvm/IR/CallSite.h"
34#include "llvm/IR/DebugLoc.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000035#include "llvm/IR/DiagnosticInfo.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000036#include "llvm/IR/Dominators.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000037#include "llvm/IR/Function.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000040#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000041#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000042#include "llvm/IR/Intrinsics.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000043#include "llvm/IR/Module.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000044#include "llvm/IR/User.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000045#include "llvm/Pass.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000046#include "llvm/Support/BlockFrequency.h"
47#include "llvm/Support/BranchProbability.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/ErrorHandling.h"
Easwaran Raman1832bf62016-06-27 16:50:18 +000051#include "llvm/Transforms/IPO.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000052#include "llvm/Transforms/Utils/Cloning.h"
Chandler Carruth0fde0012012-05-04 10:18:49 +000053#include "llvm/Transforms/Utils/CodeExtractor.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000054#include "llvm/Transforms/Utils/ValueMapper.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <functional>
59#include <iterator>
60#include <memory>
61#include <tuple>
62#include <vector>
63
Owen Anderson2f82e272009-06-14 08:26:32 +000064using namespace llvm;
65
Xinliang David Li15744ad2017-04-23 21:40:58 +000066#define DEBUG_TYPE "partial-inlining"
Chandler Carruth964daaa2014-04-22 02:55:47 +000067
Xinliang David Li61338462017-05-02 02:44:14 +000068STATISTIC(NumPartialInlined,
69 "Number of callsites functions partially inlined into.");
Owen Andersonbd6a2132009-06-15 20:50:26 +000070
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000071// Command line option to disable partial-inlining. The default is false:
72static cl::opt<bool>
73 DisablePartialInlining("disable-partial-inlining", cl::init(false),
74 cl::Hidden, cl::desc("Disable partial ininling"));
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000075
Xinliang David Li66bdfca2017-05-12 23:41:43 +000076// This is an option used by testing:
77static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
78 cl::init(false), cl::ZeroOrMore,
79 cl::ReallyHidden,
80 cl::desc("Skip Cost Analysis"));
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000081
Xinliang David Lid21601a2017-04-27 16:34:00 +000082static cl::opt<unsigned> MaxNumInlineBlocks(
83 "max-num-inline-blocks", cl::init(5), cl::Hidden,
Chad Rosierf98335e2017-08-24 21:21:09 +000084 cl::desc("Max number of blocks to be partially inlined"));
Xinliang David Lid21601a2017-04-27 16:34:00 +000085
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000086// Command line option to set the maximum number of partial inlining allowed
87// for the module. The default value of -1 means no limit.
88static cl::opt<int> MaxNumPartialInlining(
89 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
90 cl::desc("Max number of partial inlining. The default is unlimited"));
91
Xinliang David Li66bdfca2017-05-12 23:41:43 +000092// Used only when PGO or user annotated branch data is absent. It is
93// the least value that is used to weigh the outline region. If BFI
94// produces larger value, the BFI value will be used.
95static cl::opt<int>
96 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
97 cl::Hidden, cl::ZeroOrMore,
98 cl::desc("Relative frequency of outline region to "
99 "the entry block"));
100
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000101static cl::opt<unsigned> ExtraOutliningPenalty(
102 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
103 cl::desc("A debug option to add additional penalty to the computed one."));
104
Owen Anderson2f82e272009-06-14 08:26:32 +0000105namespace {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000106
107struct FunctionOutliningInfo {
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000108 FunctionOutliningInfo() = default;
109
Xinliang David Lid21601a2017-04-27 16:34:00 +0000110 // Returns the number of blocks to be inlined including all blocks
111 // in Entries and one return block.
112 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
113
114 // A set of blocks including the function entry that guard
115 // the region to be outlined.
116 SmallVector<BasicBlock *, 4> Entries;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000117
Xinliang David Lid21601a2017-04-27 16:34:00 +0000118 // The return block that is not included in the outlined region.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000119 BasicBlock *ReturnBlock = nullptr;
120
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000121 // The dominating block of the region to be outlined.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000122 BasicBlock *NonReturnBlock = nullptr;
123
Xinliang David Lid21601a2017-04-27 16:34:00 +0000124 // The set of blocks in Entries that that are predecessors to ReturnBlock
125 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
126};
127
Sean Silvafe5abd52016-07-25 05:00:00 +0000128struct PartialInlinerImpl {
Xinliang David Li61338462017-05-02 02:44:14 +0000129 PartialInlinerImpl(
130 std::function<AssumptionCache &(Function &)> *GetAC,
131 std::function<TargetTransformInfo &(Function &)> *GTTI,
132 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
133 ProfileSummaryInfo *ProfSI)
134 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000135
Sean Silvafe5abd52016-07-25 05:00:00 +0000136 bool run(Module &M);
137 Function *unswitchFunction(Function *F);
138
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000139 // This class speculatively clones the the function to be partial inlined.
140 // At the end of partial inlining, the remaining callsites to the cloned
141 // function that are not partially inlined will be fixed up to reference
142 // the original function, and the cloned function will be erased.
143 struct FunctionCloner {
144 FunctionCloner(Function *F, FunctionOutliningInfo *OI);
145 ~FunctionCloner();
146
147 // Prepare for function outlining: making sure there is only
148 // one incoming edge from the extracted/outlined region to
149 // the return block.
150 void NormalizeReturnBlock();
151
152 // Do function outlining:
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000153 Function *doFunctionOutlining();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000154
155 Function *OrigFunc = nullptr;
156 Function *ClonedFunc = nullptr;
157 Function *OutlinedFunc = nullptr;
158 BasicBlock *OutliningCallBB = nullptr;
159 // ClonedFunc is inlined in one of its callers after function
160 // outlining.
161 bool IsFunctionInlined = false;
162 // The cost of the region to be outlined.
163 int OutlinedRegionCost = 0;
164 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
165 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
166 };
167
Sean Silvafe5abd52016-07-25 05:00:00 +0000168private:
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000169 int NumPartialInlining = 0;
Xinliang David Li61338462017-05-02 02:44:14 +0000170 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
171 std::function<TargetTransformInfo &(Function &)> *GetTTI;
172 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
173 ProfileSummaryInfo *PSI;
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000174
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000175 // Return the frequency of the OutlininingBB relative to F's entry point.
176 // The result is no larger than 1 and is represented using BP.
177 // (Note that the outlined region's 'head' block can only have incoming
178 // edges from the guarding entry blocks).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000179 BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000180
181 // Return true if the callee of CS should be partially inlined with
182 // profit.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000183 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
184 BlockFrequency WeightedOutliningRcost,
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000185 OptimizationRemarkEmitter &ORE);
186
187 // Try to inline DuplicateFunction (cloned from F with call to
188 // the OutlinedFunction into its callers. Return true
189 // if there is any successful inlining.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000190 bool tryPartialInline(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000191
192 // Compute the mapping from use site of DuplicationFunction to the enclosing
193 // BB's profile count.
194 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
195 DenseMap<User *, uint64_t> &SiteCountMap);
196
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000197 bool IsLimitReached() {
198 return (MaxNumPartialInlining != -1 &&
199 NumPartialInlining >= MaxNumPartialInlining);
200 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000201
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000202 static CallSite getCallSite(User *U) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000203 CallSite CS;
204 if (CallInst *CI = dyn_cast<CallInst>(U))
205 CS = CallSite(CI);
206 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
207 CS = CallSite(II);
208 else
209 llvm_unreachable("All uses must be calls");
210 return CS;
211 }
212
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000213 static CallSite getOneCallSiteTo(Function *F) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000214 User *User = *F->user_begin();
215 return getCallSite(User);
216 }
217
218 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
219 CallSite CS = getOneCallSiteTo(F);
220 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
221 BasicBlock *Block = CS.getParent();
222 return std::make_tuple(DLoc, Block);
223 }
224
225 // Returns the costs associated with function outlining:
226 // - The first value is the non-weighted runtime cost for making the call
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000227 // to the outlined function, including the addtional setup cost in the
228 // outlined function itself;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000229 // - The second value is the estimated size of the new call sequence in
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000230 // basic block Cloner.OutliningCallBB;
231 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000232
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000233 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
234 // approximate both the size and runtime cost (Note that in the current
235 // inline cost analysis, there is no clear distinction there either).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000236 static int computeBBInlineCost(BasicBlock *BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000237
238 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
Sean Silvafe5abd52016-07-25 05:00:00 +0000239};
Xinliang David Lid21601a2017-04-27 16:34:00 +0000240
Easwaran Raman1832bf62016-06-27 16:50:18 +0000241struct PartialInlinerLegacyPass : public ModulePass {
242 static char ID; // Pass identification, replacement for typeid
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000243
Easwaran Raman1832bf62016-06-27 16:50:18 +0000244 PartialInlinerLegacyPass() : ModulePass(ID) {
245 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
246 }
Craig Topper3e4c6972014-03-05 09:10:37 +0000247
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000248 void getAnalysisUsage(AnalysisUsage &AU) const override {
249 AU.addRequired<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000250 AU.addRequired<ProfileSummaryInfoWrapperPass>();
251 AU.addRequired<TargetTransformInfoWrapperPass>();
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000252 }
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000253
Easwaran Raman1832bf62016-06-27 16:50:18 +0000254 bool runOnModule(Module &M) override {
255 if (skipModule(M))
256 return false;
Craig Topper3e4c6972014-03-05 09:10:37 +0000257
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000258 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000259 TargetTransformInfoWrapperPass *TTIWP =
260 &getAnalysis<TargetTransformInfoWrapperPass>();
261 ProfileSummaryInfo *PSI =
262 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
263
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000264 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
265 [&ACT](Function &F) -> AssumptionCache & {
266 return ACT->getAssumptionCache(F);
267 };
Xinliang David Li61338462017-05-02 02:44:14 +0000268
269 std::function<TargetTransformInfo &(Function &)> GetTTI =
270 [&TTIWP](Function &F) -> TargetTransformInfo & {
271 return TTIWP->getTTI(F);
272 };
273
274 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
Sean Silvafe5abd52016-07-25 05:00:00 +0000275 }
Sean Silva519323d2016-07-25 05:57:59 +0000276};
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000277
278} // end anonymous namespace
Owen Anderson2f82e272009-06-14 08:26:32 +0000279
Xinliang David Lid21601a2017-04-27 16:34:00 +0000280std::unique_ptr<FunctionOutliningInfo>
281PartialInlinerImpl::computeOutliningInfo(Function *F) {
Sean Silva519323d2016-07-25 05:57:59 +0000282 BasicBlock *EntryBlock = &F->front();
283 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
Owen Andersonf0081db2009-09-08 19:53:15 +0000284 if (!BR || BR->isUnconditional())
Xinliang David Lid21601a2017-04-27 16:34:00 +0000285 return std::unique_ptr<FunctionOutliningInfo>();
Sean Silvafe5abd52016-07-25 05:00:00 +0000286
Xinliang David Lid21601a2017-04-27 16:34:00 +0000287 // Returns true if Succ is BB's successor
288 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
289 return is_contained(successors(BB), Succ);
290 };
291
292 auto SuccSize = [](BasicBlock *BB) {
293 return std::distance(succ_begin(BB), succ_end(BB));
294 };
295
296 auto IsReturnBlock = [](BasicBlock *BB) {
297 TerminatorInst *TI = BB->getTerminator();
298 return isa<ReturnInst>(TI);
299 };
300
Davide Italianoaa42a102017-05-08 20:44:01 +0000301 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000302 if (IsReturnBlock(Succ1))
303 return std::make_tuple(Succ1, Succ2);
304 if (IsReturnBlock(Succ2))
305 return std::make_tuple(Succ2, Succ1);
306
307 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
308 };
309
310 // Detect a triangular shape:
Davide Italianoaa42a102017-05-08 20:44:01 +0000311 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000312 if (IsSuccessor(Succ1, Succ2))
313 return std::make_tuple(Succ1, Succ2);
314 if (IsSuccessor(Succ2, Succ1))
315 return std::make_tuple(Succ2, Succ1);
316
317 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
318 };
319
320 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
321 llvm::make_unique<FunctionOutliningInfo>();
322
323 BasicBlock *CurrEntry = EntryBlock;
324 bool CandidateFound = false;
325 do {
326 // The number of blocks to be inlined has already reached
327 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
328 // disables partial inlining for the function.
329 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
330 break;
331
332 if (SuccSize(CurrEntry) != 2)
333 break;
334
335 BasicBlock *Succ1 = *succ_begin(CurrEntry);
336 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
337
338 BasicBlock *ReturnBlock, *NonReturnBlock;
339 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
340
341 if (ReturnBlock) {
342 OutliningInfo->Entries.push_back(CurrEntry);
343 OutliningInfo->ReturnBlock = ReturnBlock;
344 OutliningInfo->NonReturnBlock = NonReturnBlock;
345 CandidateFound = true;
346 break;
347 }
348
349 BasicBlock *CommSucc;
350 BasicBlock *OtherSucc;
351 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
352
353 if (!CommSucc)
354 break;
355
356 OutliningInfo->Entries.push_back(CurrEntry);
357 CurrEntry = OtherSucc;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000358 } while (true);
359
360 if (!CandidateFound)
361 return std::unique_ptr<FunctionOutliningInfo>();
362
363 // Do sanity check of the entries: threre should not
364 // be any successors (not in the entry set) other than
365 // {ReturnBlock, NonReturnBlock}
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000366 assert(OutliningInfo->Entries[0] == &F->front() &&
367 "Function Entry must be the first in Entries vector");
Xinliang David Lid21601a2017-04-27 16:34:00 +0000368 DenseSet<BasicBlock *> Entries;
369 for (BasicBlock *E : OutliningInfo->Entries)
370 Entries.insert(E);
371
372 // Returns true of BB has Predecessor which is not
373 // in Entries set.
374 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
375 for (auto Pred : predecessors(BB)) {
376 if (!Entries.count(Pred))
377 return true;
378 }
379 return false;
380 };
381 auto CheckAndNormalizeCandidate =
382 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
383 for (BasicBlock *E : OutliningInfo->Entries) {
384 for (auto Succ : successors(E)) {
385 if (Entries.count(Succ))
386 continue;
387 if (Succ == OutliningInfo->ReturnBlock)
388 OutliningInfo->ReturnBlockPreds.push_back(E);
389 else if (Succ != OutliningInfo->NonReturnBlock)
390 return false;
391 }
392 // There should not be any outside incoming edges either:
393 if (HasNonEntryPred(E))
394 return false;
395 }
396 return true;
397 };
398
399 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
400 return std::unique_ptr<FunctionOutliningInfo>();
401
402 // Now further growing the candidate's inlining region by
403 // peeling off dominating blocks from the outlining region:
404 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
405 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
406 if (SuccSize(Cand) != 2)
407 break;
408
409 if (HasNonEntryPred(Cand))
410 break;
411
412 BasicBlock *Succ1 = *succ_begin(Cand);
413 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
414
415 BasicBlock *ReturnBlock, *NonReturnBlock;
416 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
417 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
418 break;
419
420 if (NonReturnBlock->getSinglePredecessor() != Cand)
421 break;
422
423 // Now grow and update OutlininigInfo:
424 OutliningInfo->Entries.push_back(Cand);
425 OutliningInfo->NonReturnBlock = NonReturnBlock;
426 OutliningInfo->ReturnBlockPreds.push_back(Cand);
427 Entries.insert(Cand);
Reid Klecknerc26a17a2015-02-04 19:14:57 +0000428 }
Sean Silvafe5abd52016-07-25 05:00:00 +0000429
Xinliang David Lid21601a2017-04-27 16:34:00 +0000430 return OutliningInfo;
431}
432
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000433// Check if there is PGO data or user annoated branch data:
434static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
435 if (F->getEntryCount())
436 return true;
437 // Now check if any of the entry block has MD_prof data:
438 for (auto *E : OI->Entries) {
439 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
440 if (!BR || BR->isUnconditional())
441 continue;
442 uint64_t T, F;
443 if (BR->extractProfMetadata(T, F))
444 return true;
445 }
446 return false;
447}
448
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000449BranchProbability
450PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000451 auto EntryFreq =
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000452 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
453 auto OutliningCallFreq =
454 Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000455
456 auto OutlineRegionRelFreq =
457 BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
458 EntryFreq.getFrequency());
459
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000460 if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000461 return OutlineRegionRelFreq;
462
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000463 // When profile data is not available, we need to be conservative in
464 // estimating the overall savings. Static branch prediction can usually
465 // guess the branch direction right (taken/non-taken), but the guessed
466 // branch probability is usually not biased enough. In case when the
467 // outlined region is predicted to be likely, its probability needs
468 // to be made higher (more biased) to not under-estimate the cost of
469 // function outlining. On the other hand, if the outlined region
470 // is predicted to be less likely, the predicted probablity is usually
471 // higher than the actual. For instance, the actual probability of the
472 // less likely target is only 5%, but the guessed probablity can be
473 // 40%. In the latter case, there is no need for further adjustement.
474 // FIXME: add an option for this.
475 if (OutlineRegionRelFreq < BranchProbability(45, 100))
476 return OutlineRegionRelFreq;
477
478 OutlineRegionRelFreq = std::max(
479 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000480
481 return OutlineRegionRelFreq;
482}
483
484bool PartialInlinerImpl::shouldPartialInline(
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000485 CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
486 OptimizationRemarkEmitter &ORE) {
Xinliang David Li61338462017-05-02 02:44:14 +0000487 using namespace ore;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000488
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000489 if (SkipCostAnalysis)
490 return true;
491
Xinliang David Li61338462017-05-02 02:44:14 +0000492 Instruction *Call = CS.getInstruction();
493 Function *Callee = CS.getCalledFunction();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000494 assert(Callee == Cloner.ClonedFunc);
495
Xinliang David Li61338462017-05-02 02:44:14 +0000496 Function *Caller = CS.getCaller();
497 auto &CalleeTTI = (*GetTTI)(*Callee);
498 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
Haicheng Wu0812c5b2017-08-21 20:00:09 +0000499 *GetAssumptionCache, GetBFI, PSI, &ORE);
Xinliang David Li61338462017-05-02 02:44:14 +0000500
501 if (IC.isAlways()) {
502 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000503 << NV("Callee", Cloner.OrigFunc)
Xinliang David Li61338462017-05-02 02:44:14 +0000504 << " should always be fully inlined, not partially");
505 return false;
506 }
507
508 if (IC.isNever()) {
509 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000510 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000511 << NV("Caller", Caller)
512 << " because it should never be inlined (cost=never)");
513 return false;
514 }
515
516 if (!IC) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000517 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000518 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000519 << NV("Caller", Caller) << " because too costly to inline (cost="
520 << NV("Cost", IC.getCost()) << ", threshold="
521 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
522 return false;
523 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000524 const DataLayout &DL = Caller->getParent()->getDataLayout();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000525
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000526 // The savings of eliminating the call:
527 int NonWeightedSavings = getCallsiteCost(CS, DL);
528 BlockFrequency NormWeightedSavings(NonWeightedSavings);
529
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000530 // Weighted saving is smaller than weighted cost, return false
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000531 if (NormWeightedSavings < WeightedOutliningRcost) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000532 ORE.emit(
533 OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000534 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000535 << NV("Caller", Caller) << " runtime overhead (overhead="
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000536 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000537 << ", savings="
538 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
539 << " of making the outlined call is too high");
540
541 return false;
542 }
Xinliang David Li61338462017-05-02 02:44:14 +0000543
544 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000545 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000546 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
547 << " (threshold="
548 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
549 return true;
550}
551
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000552// TODO: Ideally we should share Inliner's InlineCost Analysis code.
553// For now use a simplified version. The returned 'InlineCost' will be used
554// to esimate the size cost as well as runtime cost of the BB.
555int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
556 int InlineCost = 0;
557 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
558 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
559 if (isa<DbgInfoIntrinsic>(I))
560 continue;
561
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000562 switch (I->getOpcode()) {
563 case Instruction::BitCast:
564 case Instruction::PtrToInt:
565 case Instruction::IntToPtr:
566 case Instruction::Alloca:
567 continue;
568 case Instruction::GetElementPtr:
569 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
570 continue;
571 default:
572 break;
573 }
574
575 IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
576 if (IntrInst) {
577 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
578 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
579 continue;
580 }
581
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000582 if (CallInst *CI = dyn_cast<CallInst>(I)) {
583 InlineCost += getCallsiteCost(CallSite(CI), DL);
584 continue;
585 }
586
587 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
588 InlineCost += getCallsiteCost(CallSite(II), DL);
589 continue;
590 }
591
592 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
593 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
594 continue;
595 }
596 InlineCost += InlineConstants::InstrCost;
597 }
598 return InlineCost;
599}
600
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000601std::tuple<int, int>
602PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000603 // Now compute the cost of the call sequence to the outlined function
604 // 'OutlinedFunction' in BB 'OutliningCallBB':
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000605 int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000606
607 // Now compute the cost of the extracted/outlined function itself:
608 int OutlinedFunctionCost = 0;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000609 for (BasicBlock &BB : *Cloner.OutlinedFunc) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000610 OutlinedFunctionCost += computeBBInlineCost(&BB);
611 }
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000612
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000613 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000614 "Outlined function cost should be no less than the outlined region");
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000615 // The code extractor introduces a new root and exit stub blocks with
616 // additional unconditional branches. Those branches will be eliminated
617 // later with bb layout. The cost should be adjusted accordingly:
618 OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000619
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000620 int OutliningRuntimeOverhead =
621 OutliningFuncCallCost +
622 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
623 ExtraOutliningPenalty;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000624
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000625 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000626}
627
628// Create the callsite to profile count map which is
629// used to update the original function's entry count,
630// after the function is partially inlined into the callsite.
631void PartialInlinerImpl::computeCallsiteToProfCountMap(
632 Function *DuplicateFunction,
633 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
634 std::vector<User *> Users(DuplicateFunction->user_begin(),
635 DuplicateFunction->user_end());
636 Function *CurrentCaller = nullptr;
Vitaly Bukaa6374892017-05-27 05:32:09 +0000637 std::unique_ptr<BlockFrequencyInfo> TempBFI;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000638 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
639
640 auto ComputeCurrBFI = [&,this](Function *Caller) {
641 // For the old pass manager:
642 if (!GetBFI) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000643 DominatorTree DT(*Caller);
644 LoopInfo LI(DT);
645 BranchProbabilityInfo BPI(*Caller, LI);
Vitaly Bukaa6374892017-05-27 05:32:09 +0000646 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
647 CurrentCallerBFI = TempBFI.get();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000648 } else {
649 // New pass manager:
650 CurrentCallerBFI = &(*GetBFI)(*Caller);
651 }
652 };
653
654 for (User *User : Users) {
655 CallSite CS = getCallSite(User);
656 Function *Caller = CS.getCaller();
657 if (CurrentCaller != Caller) {
658 CurrentCaller = Caller;
659 ComputeCurrBFI(Caller);
660 } else {
661 assert(CurrentCallerBFI && "CallerBFI is not set");
662 }
663 BasicBlock *CallBB = CS.getInstruction()->getParent();
664 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
665 if (Count)
666 CallSiteToProfCountMap[User] = *Count;
667 else
668 CallSiteToProfCountMap[User] = 0;
669 }
670}
671
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000672PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
673 FunctionOutliningInfo *OI)
674 : OrigFunc(F) {
675 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
676
677 // Clone the function, so that we can hack away on it.
678 ValueToValueMapTy VMap;
679 ClonedFunc = CloneFunction(F, VMap);
680
681 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
682 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
683 for (BasicBlock *BB : OI->Entries) {
684 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
685 }
686 for (BasicBlock *E : OI->ReturnBlockPreds) {
687 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
688 ClonedOI->ReturnBlockPreds.push_back(NewE);
689 }
690 // Go ahead and update all uses to the duplicate, so that we can just
691 // use the inliner functionality when we're done hacking.
692 F->replaceAllUsesWith(ClonedFunc);
693}
694
695void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000696 auto getFirstPHI = [](BasicBlock *BB) {
697 BasicBlock::iterator I = BB->begin();
698 PHINode *FirstPhi = nullptr;
699 while (I != BB->end()) {
700 PHINode *Phi = dyn_cast<PHINode>(I);
701 if (!Phi)
702 break;
703 if (!FirstPhi) {
704 FirstPhi = Phi;
705 break;
706 }
707 }
708 return FirstPhi;
709 };
710
711 // Special hackery is needed with PHI nodes that have inputs from more than
712 // one extracted block. For simplicity, just split the PHIs into a two-level
713 // sequence of PHIs, some of which will go in the extracted region, and some
714 // of which will go outside.
715 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
716 // only split block when necessary:
717 PHINode *FirstPhi = getFirstPHI(PreReturn);
718 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
719
720 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
721 return;
722
723 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
724 Value *CommonValue = PN->getIncomingValue(0);
725 if (all_of(PN->incoming_values(),
726 [&](Value *V) { return V == CommonValue; }))
727 return CommonValue;
728 return nullptr;
729 };
730
731 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
732 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
733 BasicBlock::iterator I = PreReturn->begin();
734 Instruction *Ins = &ClonedOI->ReturnBlock->front();
735 SmallVector<Instruction *, 4> DeadPhis;
736 while (I != PreReturn->end()) {
737 PHINode *OldPhi = dyn_cast<PHINode>(I);
738 if (!OldPhi)
739 break;
740
741 PHINode *RetPhi =
742 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
743 OldPhi->replaceAllUsesWith(RetPhi);
744 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
745
746 RetPhi->addIncoming(&*I, PreReturn);
747 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
748 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
749 OldPhi->removeIncomingValue(E);
750 }
751
752 // After incoming values splitting, the old phi may become trivial.
753 // Keeping the trivial phi can introduce definition inside the outline
754 // region which is live-out, causing necessary overhead (load, store
755 // arg passing etc).
756 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
757 OldPhi->replaceAllUsesWith(OldPhiVal);
758 DeadPhis.push_back(OldPhi);
759 }
760 ++I;
761 }
762 for (auto *DP : DeadPhis)
763 DP->eraseFromParent();
764
765 for (auto E : ClonedOI->ReturnBlockPreds) {
766 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
767 }
768}
769
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000770Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000771 // Returns true if the block is to be partial inlined into the caller
772 // (i.e. not to be extracted to the out of line function)
773 auto ToBeInlined = [&, this](BasicBlock *BB) {
774 return BB == ClonedOI->ReturnBlock ||
775 (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
776 ClonedOI->Entries.end());
777 };
778
779 // Gather up the blocks that we're going to extract.
780 std::vector<BasicBlock *> ToExtract;
781 ToExtract.push_back(ClonedOI->NonReturnBlock);
782 OutlinedRegionCost +=
783 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
784 for (BasicBlock &BB : *ClonedFunc)
785 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
786 ToExtract.push_back(&BB);
787 // FIXME: the code extractor may hoist/sink more code
788 // into the outlined function which may make the outlining
789 // overhead (the difference of the outlined function cost
790 // and OutliningRegionCost) look larger.
791 OutlinedRegionCost += computeBBInlineCost(&BB);
792 }
793
794 // The CodeExtractor needs a dominator tree.
795 DominatorTree DT;
796 DT.recalculate(*ClonedFunc);
797
798 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
799 LoopInfo LI(DT);
800 BranchProbabilityInfo BPI(*ClonedFunc, LI);
801 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
802
803 // Extract the body of the if.
804 OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
805 ClonedFuncBFI.get(), &BPI)
806 .extractCodeRegion();
807
808 if (OutlinedFunc) {
809 OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
810 .getInstruction()
811 ->getParent();
812 assert(OutliningCallBB->getParent() == ClonedFunc);
813 }
814
815 return OutlinedFunc;
816}
817
818PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
819 // Ditch the duplicate, since we're done with it, and rewrite all remaining
820 // users (function pointers, etc.) back to the original function.
821 ClonedFunc->replaceAllUsesWith(OrigFunc);
822 ClonedFunc->eraseFromParent();
823 if (!IsFunctionInlined) {
824 // Remove the function that is speculatively created if there is no
825 // reference.
826 if (OutlinedFunc)
827 OutlinedFunc->eraseFromParent();
828 }
829}
830
Xinliang David Lid21601a2017-04-27 16:34:00 +0000831Function *PartialInlinerImpl::unswitchFunction(Function *F) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000832 if (F->hasAddressTaken())
833 return nullptr;
834
Xinliang David Liab8722f2017-05-02 18:43:21 +0000835 // Let inliner handle it
836 if (F->hasFnAttribute(Attribute::AlwaysInline))
837 return nullptr;
838
839 if (F->hasFnAttribute(Attribute::NoInline))
840 return nullptr;
841
842 if (PSI->isFunctionEntryCold(F))
843 return nullptr;
844
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000845 if (F->user_begin() == F->user_end())
846 return nullptr;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000847
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000848 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
849
850 if (!OI)
Craig Topperf40110f2014-04-25 05:29:35 +0000851 return nullptr;
Sean Silvafe5abd52016-07-25 05:00:00 +0000852
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000853 FunctionCloner Cloner(F, OI.get());
854 Cloner.NormalizeReturnBlock();
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000855 Function *OutlinedFunction = Cloner.doFunctionOutlining();
Sean Silvafe5abd52016-07-25 05:00:00 +0000856
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000857 bool AnyInline = tryPartialInline(Cloner);
Xinliang David Li392e9752017-05-14 02:54:02 +0000858
859 if (AnyInline)
860 return OutlinedFunction;
861
Xinliang David Li392e9752017-05-14 02:54:02 +0000862 return nullptr;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000863}
Sean Silvafe5abd52016-07-25 05:00:00 +0000864
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000865bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000866 int NonWeightedRcost;
867 int SizeCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000868
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000869 if (Cloner.OutlinedFunc == nullptr)
870 return false;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000871
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000872 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
873
874 auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
875 auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000876
877 // The call sequence to the outlined function is larger than the original
878 // outlined region size, it does not increase the chances of inlining
Chad Rosier4cb2e822017-08-24 20:29:02 +0000879 // the function with outlining (The inliner uses the size increase to
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000880 // model the cost of inlining a callee).
881 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
882 OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000883 DebugLoc DLoc;
884 BasicBlock *Block;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000885 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000886 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
887 DLoc, Block)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000888 << ore::NV("Function", Cloner.OrigFunc)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000889 << " not partially inlined into callers (Original Size = "
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000890 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000891 << ", Size of call sequence to outlined function = "
892 << ore::NV("NewSize", SizeCost) << ")");
893 return false;
894 }
895
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000896 assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000897 "F's users should all be replaced!");
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000898
899 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
900 Cloner.ClonedFunc->user_end());
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000901
902 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000903 if (Cloner.OrigFunc->getEntryCount())
904 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000905
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000906 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000907 uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000908
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000909 bool AnyInline = false;
910 for (User *User : Users) {
911 CallSite CS = getCallSite(User);
912
913 if (IsLimitReached())
914 continue;
915
916 OptimizationRemarkEmitter ORE(CS.getCaller());
917
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000918 if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000919 continue;
920
921 ORE.emit(
922 OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000923 << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000924 << ore::NV("Caller", CS.getCaller()));
925
926 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
927 InlineFunction(CS, IFI);
928
929 // Now update the entry count:
930 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
931 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
932 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
933 }
934
935 AnyInline = true;
936 NumPartialInlining++;
937 // Update the stats
938 NumPartialInlined++;
939 }
940
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000941 if (AnyInline) {
942 Cloner.IsFunctionInlined = true;
943 if (CalleeEntryCount)
944 Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
945 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000946
947 return AnyInline;
Owen Anderson2f82e272009-06-14 08:26:32 +0000948}
949
Sean Silvafe5abd52016-07-25 05:00:00 +0000950bool PartialInlinerImpl::run(Module &M) {
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000951 if (DisablePartialInlining)
952 return false;
953
Sean Silva519323d2016-07-25 05:57:59 +0000954 std::vector<Function *> Worklist;
955 Worklist.reserve(M.size());
Benjamin Kramer135f7352016-06-26 12:28:59 +0000956 for (Function &F : M)
957 if (!F.use_empty() && !F.isDeclaration())
Sean Silva519323d2016-07-25 05:57:59 +0000958 Worklist.push_back(&F);
Benjamin Kramer135f7352016-06-26 12:28:59 +0000959
Sean Silva519323d2016-07-25 05:57:59 +0000960 bool Changed = false;
961 while (!Worklist.empty()) {
962 Function *CurrFunc = Worklist.back();
963 Worklist.pop_back();
Sean Silvafe5abd52016-07-25 05:00:00 +0000964
Sean Silva519323d2016-07-25 05:57:59 +0000965 if (CurrFunc->use_empty())
966 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000967
Sean Silva519323d2016-07-25 05:57:59 +0000968 bool Recursive = false;
969 for (User *U : CurrFunc->users())
970 if (Instruction *I = dyn_cast<Instruction>(U))
971 if (I->getParent()->getParent() == CurrFunc) {
972 Recursive = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000973 break;
974 }
Sean Silva519323d2016-07-25 05:57:59 +0000975 if (Recursive)
976 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000977
Sean Silvaf8015752016-08-02 02:15:45 +0000978 if (Function *NewFunc = unswitchFunction(CurrFunc)) {
979 Worklist.push_back(NewFunc);
Sean Silva519323d2016-07-25 05:57:59 +0000980 Changed = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000981 }
Owen Anderson2f82e272009-06-14 08:26:32 +0000982 }
Easwaran Raman1832bf62016-06-27 16:50:18 +0000983
Sean Silva519323d2016-07-25 05:57:59 +0000984 return Changed;
Sean Silvafe5abd52016-07-25 05:00:00 +0000985}
986
987char PartialInlinerLegacyPass::ID = 0;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000988
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000989INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
990 "Partial Inliner", false, false)
991INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Xinliang David Li61338462017-05-02 02:44:14 +0000992INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
993INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000994INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
995 "Partial Inliner", false, false)
Sean Silvafe5abd52016-07-25 05:00:00 +0000996
997ModulePass *llvm::createPartialInliningPass() {
998 return new PartialInlinerLegacyPass();
999}
1000
1001PreservedAnalyses PartialInlinerPass::run(Module &M,
1002 ModuleAnalysisManager &AM) {
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001003 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
Xinliang David Li61338462017-05-02 02:44:14 +00001004
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001005 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1006 [&FAM](Function &F) -> AssumptionCache & {
1007 return FAM.getResult<AssumptionAnalysis>(F);
1008 };
Xinliang David Li61338462017-05-02 02:44:14 +00001009
1010 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1011 [&FAM](Function &F) -> BlockFrequencyInfo & {
1012 return FAM.getResult<BlockFrequencyAnalysis>(F);
1013 };
1014
1015 std::function<TargetTransformInfo &(Function &)> GetTTI =
1016 [&FAM](Function &F) -> TargetTransformInfo & {
1017 return FAM.getResult<TargetIRAnalysis>(F);
1018 };
1019
1020 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1021
1022 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
Easwaran Raman1832bf62016-06-27 16:50:18 +00001023 return PreservedAnalyses::none();
1024 return PreservedAnalyses::all();
Duncan Sands29c8efc2009-07-03 15:30:58 +00001025}