blob: b5267f75e417f9a8a94b16445a04a49217f10856 [file] [log] [blame]
Owen Anderson2f82e272009-06-14 08:26:32 +00001//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs partial inlining, typically by inlining an if statement
11// that surrounds the body of the function.
12//
13//===----------------------------------------------------------------------===//
14
Easwaran Raman1832bf62016-06-27 16:50:18 +000015#include "llvm/Transforms/IPO/PartialInlining.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000016#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/None.h"
19#include "llvm/ADT/Optional.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallVector.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/ADT/Statistic.h"
Sean Silvaf8015752016-08-02 02:15:45 +000023#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000025#include "llvm/Analysis/InlineCost.h"
Sean Silvaf8015752016-08-02 02:15:45 +000026#include "llvm/Analysis/LoopInfo.h"
Adam Nemet0965da22017-10-09 23:19:02 +000027#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Xinliang David Li61338462017-05-02 02:44:14 +000028#include "llvm/Analysis/ProfileSummaryInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000029#include "llvm/Analysis/TargetTransformInfo.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000030#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
Chandler Carruth1305dc32014-03-04 11:45:46 +000032#include "llvm/IR/CFG.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000033#include "llvm/IR/CallSite.h"
34#include "llvm/IR/DebugLoc.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000035#include "llvm/IR/DiagnosticInfo.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000036#include "llvm/IR/Dominators.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000037#include "llvm/IR/Function.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000040#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000041#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000042#include "llvm/IR/Intrinsics.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000043#include "llvm/IR/Module.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000044#include "llvm/IR/User.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000045#include "llvm/Pass.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000046#include "llvm/Support/BlockFrequency.h"
47#include "llvm/Support/BranchProbability.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/ErrorHandling.h"
Easwaran Raman1832bf62016-06-27 16:50:18 +000051#include "llvm/Transforms/IPO.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000052#include "llvm/Transforms/Utils/Cloning.h"
Chandler Carruth0fde0012012-05-04 10:18:49 +000053#include "llvm/Transforms/Utils/CodeExtractor.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000054#include "llvm/Transforms/Utils/ValueMapper.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <functional>
59#include <iterator>
60#include <memory>
61#include <tuple>
62#include <vector>
63
Owen Anderson2f82e272009-06-14 08:26:32 +000064using namespace llvm;
65
Xinliang David Li15744ad2017-04-23 21:40:58 +000066#define DEBUG_TYPE "partial-inlining"
Chandler Carruth964daaa2014-04-22 02:55:47 +000067
Xinliang David Li61338462017-05-02 02:44:14 +000068STATISTIC(NumPartialInlined,
69 "Number of callsites functions partially inlined into.");
Owen Andersonbd6a2132009-06-15 20:50:26 +000070
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000071// Command line option to disable partial-inlining. The default is false:
72static cl::opt<bool>
73 DisablePartialInlining("disable-partial-inlining", cl::init(false),
74 cl::Hidden, cl::desc("Disable partial ininling"));
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000075
Xinliang David Li66bdfca2017-05-12 23:41:43 +000076// This is an option used by testing:
77static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
78 cl::init(false), cl::ZeroOrMore,
79 cl::ReallyHidden,
80 cl::desc("Skip Cost Analysis"));
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000081
Xinliang David Lid21601a2017-04-27 16:34:00 +000082static cl::opt<unsigned> MaxNumInlineBlocks(
83 "max-num-inline-blocks", cl::init(5), cl::Hidden,
Chad Rosierf98335e2017-08-24 21:21:09 +000084 cl::desc("Max number of blocks to be partially inlined"));
Xinliang David Lid21601a2017-04-27 16:34:00 +000085
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000086// Command line option to set the maximum number of partial inlining allowed
87// for the module. The default value of -1 means no limit.
88static cl::opt<int> MaxNumPartialInlining(
89 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
90 cl::desc("Max number of partial inlining. The default is unlimited"));
91
Xinliang David Li66bdfca2017-05-12 23:41:43 +000092// Used only when PGO or user annotated branch data is absent. It is
93// the least value that is used to weigh the outline region. If BFI
94// produces larger value, the BFI value will be used.
95static cl::opt<int>
96 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
97 cl::Hidden, cl::ZeroOrMore,
98 cl::desc("Relative frequency of outline region to "
99 "the entry block"));
100
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000101static cl::opt<unsigned> ExtraOutliningPenalty(
102 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
103 cl::desc("A debug option to add additional penalty to the computed one."));
104
Owen Anderson2f82e272009-06-14 08:26:32 +0000105namespace {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000106
107struct FunctionOutliningInfo {
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000108 FunctionOutliningInfo() = default;
109
Xinliang David Lid21601a2017-04-27 16:34:00 +0000110 // Returns the number of blocks to be inlined including all blocks
111 // in Entries and one return block.
112 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
113
114 // A set of blocks including the function entry that guard
115 // the region to be outlined.
116 SmallVector<BasicBlock *, 4> Entries;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000117
Xinliang David Lid21601a2017-04-27 16:34:00 +0000118 // The return block that is not included in the outlined region.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000119 BasicBlock *ReturnBlock = nullptr;
120
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000121 // The dominating block of the region to be outlined.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000122 BasicBlock *NonReturnBlock = nullptr;
123
Xinliang David Lid21601a2017-04-27 16:34:00 +0000124 // The set of blocks in Entries that that are predecessors to ReturnBlock
125 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
126};
127
Sean Silvafe5abd52016-07-25 05:00:00 +0000128struct PartialInlinerImpl {
Xinliang David Li61338462017-05-02 02:44:14 +0000129 PartialInlinerImpl(
130 std::function<AssumptionCache &(Function &)> *GetAC,
131 std::function<TargetTransformInfo &(Function &)> *GTTI,
132 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
133 ProfileSummaryInfo *ProfSI)
134 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000135
Sean Silvafe5abd52016-07-25 05:00:00 +0000136 bool run(Module &M);
137 Function *unswitchFunction(Function *F);
138
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000139 // This class speculatively clones the the function to be partial inlined.
140 // At the end of partial inlining, the remaining callsites to the cloned
141 // function that are not partially inlined will be fixed up to reference
142 // the original function, and the cloned function will be erased.
143 struct FunctionCloner {
144 FunctionCloner(Function *F, FunctionOutliningInfo *OI);
145 ~FunctionCloner();
146
147 // Prepare for function outlining: making sure there is only
148 // one incoming edge from the extracted/outlined region to
149 // the return block.
150 void NormalizeReturnBlock();
151
152 // Do function outlining:
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000153 Function *doFunctionOutlining();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000154
155 Function *OrigFunc = nullptr;
156 Function *ClonedFunc = nullptr;
157 Function *OutlinedFunc = nullptr;
158 BasicBlock *OutliningCallBB = nullptr;
159 // ClonedFunc is inlined in one of its callers after function
160 // outlining.
161 bool IsFunctionInlined = false;
162 // The cost of the region to be outlined.
163 int OutlinedRegionCost = 0;
164 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
165 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
166 };
167
Sean Silvafe5abd52016-07-25 05:00:00 +0000168private:
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000169 int NumPartialInlining = 0;
Xinliang David Li61338462017-05-02 02:44:14 +0000170 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
171 std::function<TargetTransformInfo &(Function &)> *GetTTI;
172 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
173 ProfileSummaryInfo *PSI;
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000174
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000175 // Return the frequency of the OutlininingBB relative to F's entry point.
176 // The result is no larger than 1 and is represented using BP.
177 // (Note that the outlined region's 'head' block can only have incoming
178 // edges from the guarding entry blocks).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000179 BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000180
181 // Return true if the callee of CS should be partially inlined with
182 // profit.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000183 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
184 BlockFrequency WeightedOutliningRcost,
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000185 OptimizationRemarkEmitter &ORE);
186
187 // Try to inline DuplicateFunction (cloned from F with call to
188 // the OutlinedFunction into its callers. Return true
189 // if there is any successful inlining.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000190 bool tryPartialInline(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000191
192 // Compute the mapping from use site of DuplicationFunction to the enclosing
193 // BB's profile count.
194 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
195 DenseMap<User *, uint64_t> &SiteCountMap);
196
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000197 bool IsLimitReached() {
198 return (MaxNumPartialInlining != -1 &&
199 NumPartialInlining >= MaxNumPartialInlining);
200 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000201
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000202 static CallSite getCallSite(User *U) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000203 CallSite CS;
204 if (CallInst *CI = dyn_cast<CallInst>(U))
205 CS = CallSite(CI);
206 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
207 CS = CallSite(II);
208 else
209 llvm_unreachable("All uses must be calls");
210 return CS;
211 }
212
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000213 static CallSite getOneCallSiteTo(Function *F) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000214 User *User = *F->user_begin();
215 return getCallSite(User);
216 }
217
218 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
219 CallSite CS = getOneCallSiteTo(F);
220 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
221 BasicBlock *Block = CS.getParent();
222 return std::make_tuple(DLoc, Block);
223 }
224
225 // Returns the costs associated with function outlining:
226 // - The first value is the non-weighted runtime cost for making the call
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000227 // to the outlined function, including the addtional setup cost in the
228 // outlined function itself;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000229 // - The second value is the estimated size of the new call sequence in
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000230 // basic block Cloner.OutliningCallBB;
231 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000232
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000233 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
234 // approximate both the size and runtime cost (Note that in the current
235 // inline cost analysis, there is no clear distinction there either).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000236 static int computeBBInlineCost(BasicBlock *BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000237
238 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
Sean Silvafe5abd52016-07-25 05:00:00 +0000239};
Xinliang David Lid21601a2017-04-27 16:34:00 +0000240
Easwaran Raman1832bf62016-06-27 16:50:18 +0000241struct PartialInlinerLegacyPass : public ModulePass {
242 static char ID; // Pass identification, replacement for typeid
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000243
Easwaran Raman1832bf62016-06-27 16:50:18 +0000244 PartialInlinerLegacyPass() : ModulePass(ID) {
245 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
246 }
Craig Topper3e4c6972014-03-05 09:10:37 +0000247
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000248 void getAnalysisUsage(AnalysisUsage &AU) const override {
249 AU.addRequired<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000250 AU.addRequired<ProfileSummaryInfoWrapperPass>();
251 AU.addRequired<TargetTransformInfoWrapperPass>();
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000252 }
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000253
Easwaran Raman1832bf62016-06-27 16:50:18 +0000254 bool runOnModule(Module &M) override {
255 if (skipModule(M))
256 return false;
Craig Topper3e4c6972014-03-05 09:10:37 +0000257
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000258 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000259 TargetTransformInfoWrapperPass *TTIWP =
260 &getAnalysis<TargetTransformInfoWrapperPass>();
261 ProfileSummaryInfo *PSI =
262 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
263
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000264 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
265 [&ACT](Function &F) -> AssumptionCache & {
266 return ACT->getAssumptionCache(F);
267 };
Xinliang David Li61338462017-05-02 02:44:14 +0000268
269 std::function<TargetTransformInfo &(Function &)> GetTTI =
270 [&TTIWP](Function &F) -> TargetTransformInfo & {
271 return TTIWP->getTTI(F);
272 };
273
274 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
Sean Silvafe5abd52016-07-25 05:00:00 +0000275 }
Sean Silva519323d2016-07-25 05:57:59 +0000276};
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000277
278} // end anonymous namespace
Owen Anderson2f82e272009-06-14 08:26:32 +0000279
Xinliang David Lid21601a2017-04-27 16:34:00 +0000280std::unique_ptr<FunctionOutliningInfo>
281PartialInlinerImpl::computeOutliningInfo(Function *F) {
Sean Silva519323d2016-07-25 05:57:59 +0000282 BasicBlock *EntryBlock = &F->front();
283 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
Owen Andersonf0081db2009-09-08 19:53:15 +0000284 if (!BR || BR->isUnconditional())
Xinliang David Lid21601a2017-04-27 16:34:00 +0000285 return std::unique_ptr<FunctionOutliningInfo>();
Sean Silvafe5abd52016-07-25 05:00:00 +0000286
Xinliang David Lid21601a2017-04-27 16:34:00 +0000287 // Returns true if Succ is BB's successor
288 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
289 return is_contained(successors(BB), Succ);
290 };
291
292 auto SuccSize = [](BasicBlock *BB) {
293 return std::distance(succ_begin(BB), succ_end(BB));
294 };
295
296 auto IsReturnBlock = [](BasicBlock *BB) {
297 TerminatorInst *TI = BB->getTerminator();
298 return isa<ReturnInst>(TI);
299 };
300
Davide Italianoaa42a102017-05-08 20:44:01 +0000301 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000302 if (IsReturnBlock(Succ1))
303 return std::make_tuple(Succ1, Succ2);
304 if (IsReturnBlock(Succ2))
305 return std::make_tuple(Succ2, Succ1);
306
307 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
308 };
309
310 // Detect a triangular shape:
Davide Italianoaa42a102017-05-08 20:44:01 +0000311 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000312 if (IsSuccessor(Succ1, Succ2))
313 return std::make_tuple(Succ1, Succ2);
314 if (IsSuccessor(Succ2, Succ1))
315 return std::make_tuple(Succ2, Succ1);
316
317 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
318 };
319
320 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
321 llvm::make_unique<FunctionOutliningInfo>();
322
323 BasicBlock *CurrEntry = EntryBlock;
324 bool CandidateFound = false;
325 do {
326 // The number of blocks to be inlined has already reached
327 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
328 // disables partial inlining for the function.
329 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
330 break;
331
332 if (SuccSize(CurrEntry) != 2)
333 break;
334
335 BasicBlock *Succ1 = *succ_begin(CurrEntry);
336 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
337
338 BasicBlock *ReturnBlock, *NonReturnBlock;
339 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
340
341 if (ReturnBlock) {
342 OutliningInfo->Entries.push_back(CurrEntry);
343 OutliningInfo->ReturnBlock = ReturnBlock;
344 OutliningInfo->NonReturnBlock = NonReturnBlock;
345 CandidateFound = true;
346 break;
347 }
348
349 BasicBlock *CommSucc;
350 BasicBlock *OtherSucc;
351 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
352
353 if (!CommSucc)
354 break;
355
356 OutliningInfo->Entries.push_back(CurrEntry);
357 CurrEntry = OtherSucc;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000358 } while (true);
359
360 if (!CandidateFound)
361 return std::unique_ptr<FunctionOutliningInfo>();
362
363 // Do sanity check of the entries: threre should not
364 // be any successors (not in the entry set) other than
365 // {ReturnBlock, NonReturnBlock}
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000366 assert(OutliningInfo->Entries[0] == &F->front() &&
367 "Function Entry must be the first in Entries vector");
Xinliang David Lid21601a2017-04-27 16:34:00 +0000368 DenseSet<BasicBlock *> Entries;
369 for (BasicBlock *E : OutliningInfo->Entries)
370 Entries.insert(E);
371
372 // Returns true of BB has Predecessor which is not
373 // in Entries set.
374 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
375 for (auto Pred : predecessors(BB)) {
376 if (!Entries.count(Pred))
377 return true;
378 }
379 return false;
380 };
381 auto CheckAndNormalizeCandidate =
382 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
383 for (BasicBlock *E : OutliningInfo->Entries) {
384 for (auto Succ : successors(E)) {
385 if (Entries.count(Succ))
386 continue;
387 if (Succ == OutliningInfo->ReturnBlock)
388 OutliningInfo->ReturnBlockPreds.push_back(E);
389 else if (Succ != OutliningInfo->NonReturnBlock)
390 return false;
391 }
392 // There should not be any outside incoming edges either:
393 if (HasNonEntryPred(E))
394 return false;
395 }
396 return true;
397 };
398
399 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
400 return std::unique_ptr<FunctionOutliningInfo>();
401
402 // Now further growing the candidate's inlining region by
403 // peeling off dominating blocks from the outlining region:
404 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
405 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
406 if (SuccSize(Cand) != 2)
407 break;
408
409 if (HasNonEntryPred(Cand))
410 break;
411
412 BasicBlock *Succ1 = *succ_begin(Cand);
413 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
414
415 BasicBlock *ReturnBlock, *NonReturnBlock;
416 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
417 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
418 break;
419
420 if (NonReturnBlock->getSinglePredecessor() != Cand)
421 break;
422
423 // Now grow and update OutlininigInfo:
424 OutliningInfo->Entries.push_back(Cand);
425 OutliningInfo->NonReturnBlock = NonReturnBlock;
426 OutliningInfo->ReturnBlockPreds.push_back(Cand);
427 Entries.insert(Cand);
Reid Klecknerc26a17a2015-02-04 19:14:57 +0000428 }
Sean Silvafe5abd52016-07-25 05:00:00 +0000429
Xinliang David Lid21601a2017-04-27 16:34:00 +0000430 return OutliningInfo;
431}
432
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000433// Check if there is PGO data or user annoated branch data:
434static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
435 if (F->getEntryCount())
436 return true;
437 // Now check if any of the entry block has MD_prof data:
438 for (auto *E : OI->Entries) {
439 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
440 if (!BR || BR->isUnconditional())
441 continue;
442 uint64_t T, F;
443 if (BR->extractProfMetadata(T, F))
444 return true;
445 }
446 return false;
447}
448
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000449BranchProbability
450PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000451 auto EntryFreq =
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000452 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
453 auto OutliningCallFreq =
454 Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000455
456 auto OutlineRegionRelFreq =
457 BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
458 EntryFreq.getFrequency());
459
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000460 if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000461 return OutlineRegionRelFreq;
462
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000463 // When profile data is not available, we need to be conservative in
464 // estimating the overall savings. Static branch prediction can usually
465 // guess the branch direction right (taken/non-taken), but the guessed
466 // branch probability is usually not biased enough. In case when the
467 // outlined region is predicted to be likely, its probability needs
468 // to be made higher (more biased) to not under-estimate the cost of
469 // function outlining. On the other hand, if the outlined region
470 // is predicted to be less likely, the predicted probablity is usually
471 // higher than the actual. For instance, the actual probability of the
472 // less likely target is only 5%, but the guessed probablity can be
473 // 40%. In the latter case, there is no need for further adjustement.
474 // FIXME: add an option for this.
475 if (OutlineRegionRelFreq < BranchProbability(45, 100))
476 return OutlineRegionRelFreq;
477
478 OutlineRegionRelFreq = std::max(
479 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000480
481 return OutlineRegionRelFreq;
482}
483
484bool PartialInlinerImpl::shouldPartialInline(
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000485 CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
486 OptimizationRemarkEmitter &ORE) {
Xinliang David Li61338462017-05-02 02:44:14 +0000487 using namespace ore;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000488
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000489 if (SkipCostAnalysis)
490 return true;
491
Xinliang David Li61338462017-05-02 02:44:14 +0000492 Instruction *Call = CS.getInstruction();
493 Function *Callee = CS.getCalledFunction();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000494 assert(Callee == Cloner.ClonedFunc);
495
Xinliang David Li61338462017-05-02 02:44:14 +0000496 Function *Caller = CS.getCaller();
497 auto &CalleeTTI = (*GetTTI)(*Callee);
498 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
Haicheng Wu0812c5b2017-08-21 20:00:09 +0000499 *GetAssumptionCache, GetBFI, PSI, &ORE);
Xinliang David Li61338462017-05-02 02:44:14 +0000500
501 if (IC.isAlways()) {
Vivek Pandya95906582017-10-11 17:12:59 +0000502 ORE.emit([&]() {
503 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000504 << NV("Callee", Cloner.OrigFunc)
Vivek Pandya95906582017-10-11 17:12:59 +0000505 << " should always be fully inlined, not partially";
506 });
Xinliang David Li61338462017-05-02 02:44:14 +0000507 return false;
508 }
509
510 if (IC.isNever()) {
Vivek Pandya95906582017-10-11 17:12:59 +0000511 ORE.emit([&]() {
512 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000513 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000514 << NV("Caller", Caller)
Vivek Pandya95906582017-10-11 17:12:59 +0000515 << " because it should never be inlined (cost=never)";
516 });
Xinliang David Li61338462017-05-02 02:44:14 +0000517 return false;
518 }
519
520 if (!IC) {
Vivek Pandya95906582017-10-11 17:12:59 +0000521 ORE.emit([&]() {
522 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000523 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000524 << NV("Caller", Caller) << " because too costly to inline (cost="
525 << NV("Cost", IC.getCost()) << ", threshold="
Vivek Pandya95906582017-10-11 17:12:59 +0000526 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
527 });
Xinliang David Li61338462017-05-02 02:44:14 +0000528 return false;
529 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000530 const DataLayout &DL = Caller->getParent()->getDataLayout();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000531
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000532 // The savings of eliminating the call:
533 int NonWeightedSavings = getCallsiteCost(CS, DL);
534 BlockFrequency NormWeightedSavings(NonWeightedSavings);
535
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000536 // Weighted saving is smaller than weighted cost, return false
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000537 if (NormWeightedSavings < WeightedOutliningRcost) {
Vivek Pandya95906582017-10-11 17:12:59 +0000538 ORE.emit([&]() {
539 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
540 Call)
541 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
542 << NV("Caller", Caller) << " runtime overhead (overhead="
543 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
544 << ", savings="
545 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
546 << ")"
547 << " of making the outlined call is too high";
548 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000549
550 return false;
551 }
Xinliang David Li61338462017-05-02 02:44:14 +0000552
Vivek Pandya95906582017-10-11 17:12:59 +0000553 ORE.emit([&]() {
554 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000555 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000556 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
557 << " (threshold="
Vivek Pandya95906582017-10-11 17:12:59 +0000558 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
559 });
Xinliang David Li61338462017-05-02 02:44:14 +0000560 return true;
561}
562
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000563// TODO: Ideally we should share Inliner's InlineCost Analysis code.
564// For now use a simplified version. The returned 'InlineCost' will be used
565// to esimate the size cost as well as runtime cost of the BB.
566int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
567 int InlineCost = 0;
568 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
569 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
570 if (isa<DbgInfoIntrinsic>(I))
571 continue;
572
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000573 switch (I->getOpcode()) {
574 case Instruction::BitCast:
575 case Instruction::PtrToInt:
576 case Instruction::IntToPtr:
577 case Instruction::Alloca:
578 continue;
579 case Instruction::GetElementPtr:
580 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
581 continue;
582 default:
583 break;
584 }
585
586 IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
587 if (IntrInst) {
588 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
589 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
590 continue;
591 }
592
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000593 if (CallInst *CI = dyn_cast<CallInst>(I)) {
594 InlineCost += getCallsiteCost(CallSite(CI), DL);
595 continue;
596 }
597
598 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
599 InlineCost += getCallsiteCost(CallSite(II), DL);
600 continue;
601 }
602
603 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
604 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
605 continue;
606 }
607 InlineCost += InlineConstants::InstrCost;
608 }
609 return InlineCost;
610}
611
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000612std::tuple<int, int>
613PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000614 // Now compute the cost of the call sequence to the outlined function
615 // 'OutlinedFunction' in BB 'OutliningCallBB':
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000616 int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000617
618 // Now compute the cost of the extracted/outlined function itself:
619 int OutlinedFunctionCost = 0;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000620 for (BasicBlock &BB : *Cloner.OutlinedFunc) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000621 OutlinedFunctionCost += computeBBInlineCost(&BB);
622 }
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000623
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000624 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000625 "Outlined function cost should be no less than the outlined region");
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000626 // The code extractor introduces a new root and exit stub blocks with
627 // additional unconditional branches. Those branches will be eliminated
628 // later with bb layout. The cost should be adjusted accordingly:
629 OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000630
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000631 int OutliningRuntimeOverhead =
632 OutliningFuncCallCost +
633 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
634 ExtraOutliningPenalty;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000635
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000636 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000637}
638
639// Create the callsite to profile count map which is
640// used to update the original function's entry count,
641// after the function is partially inlined into the callsite.
642void PartialInlinerImpl::computeCallsiteToProfCountMap(
643 Function *DuplicateFunction,
644 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
645 std::vector<User *> Users(DuplicateFunction->user_begin(),
646 DuplicateFunction->user_end());
647 Function *CurrentCaller = nullptr;
Vitaly Bukaa6374892017-05-27 05:32:09 +0000648 std::unique_ptr<BlockFrequencyInfo> TempBFI;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000649 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
650
651 auto ComputeCurrBFI = [&,this](Function *Caller) {
652 // For the old pass manager:
653 if (!GetBFI) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000654 DominatorTree DT(*Caller);
655 LoopInfo LI(DT);
656 BranchProbabilityInfo BPI(*Caller, LI);
Vitaly Bukaa6374892017-05-27 05:32:09 +0000657 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
658 CurrentCallerBFI = TempBFI.get();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000659 } else {
660 // New pass manager:
661 CurrentCallerBFI = &(*GetBFI)(*Caller);
662 }
663 };
664
665 for (User *User : Users) {
666 CallSite CS = getCallSite(User);
667 Function *Caller = CS.getCaller();
668 if (CurrentCaller != Caller) {
669 CurrentCaller = Caller;
670 ComputeCurrBFI(Caller);
671 } else {
672 assert(CurrentCallerBFI && "CallerBFI is not set");
673 }
674 BasicBlock *CallBB = CS.getInstruction()->getParent();
675 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
676 if (Count)
677 CallSiteToProfCountMap[User] = *Count;
678 else
679 CallSiteToProfCountMap[User] = 0;
680 }
681}
682
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000683PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F,
684 FunctionOutliningInfo *OI)
685 : OrigFunc(F) {
686 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
687
688 // Clone the function, so that we can hack away on it.
689 ValueToValueMapTy VMap;
690 ClonedFunc = CloneFunction(F, VMap);
691
692 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
693 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
694 for (BasicBlock *BB : OI->Entries) {
695 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
696 }
697 for (BasicBlock *E : OI->ReturnBlockPreds) {
698 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
699 ClonedOI->ReturnBlockPreds.push_back(NewE);
700 }
701 // Go ahead and update all uses to the duplicate, so that we can just
702 // use the inliner functionality when we're done hacking.
703 F->replaceAllUsesWith(ClonedFunc);
704}
705
706void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000707 auto getFirstPHI = [](BasicBlock *BB) {
708 BasicBlock::iterator I = BB->begin();
709 PHINode *FirstPhi = nullptr;
710 while (I != BB->end()) {
711 PHINode *Phi = dyn_cast<PHINode>(I);
712 if (!Phi)
713 break;
714 if (!FirstPhi) {
715 FirstPhi = Phi;
716 break;
717 }
718 }
719 return FirstPhi;
720 };
721
722 // Special hackery is needed with PHI nodes that have inputs from more than
723 // one extracted block. For simplicity, just split the PHIs into a two-level
724 // sequence of PHIs, some of which will go in the extracted region, and some
725 // of which will go outside.
726 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
727 // only split block when necessary:
728 PHINode *FirstPhi = getFirstPHI(PreReturn);
729 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
730
731 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
732 return;
733
734 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
735 Value *CommonValue = PN->getIncomingValue(0);
736 if (all_of(PN->incoming_values(),
737 [&](Value *V) { return V == CommonValue; }))
738 return CommonValue;
739 return nullptr;
740 };
741
742 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
743 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
744 BasicBlock::iterator I = PreReturn->begin();
745 Instruction *Ins = &ClonedOI->ReturnBlock->front();
746 SmallVector<Instruction *, 4> DeadPhis;
747 while (I != PreReturn->end()) {
748 PHINode *OldPhi = dyn_cast<PHINode>(I);
749 if (!OldPhi)
750 break;
751
752 PHINode *RetPhi =
753 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
754 OldPhi->replaceAllUsesWith(RetPhi);
755 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
756
757 RetPhi->addIncoming(&*I, PreReturn);
758 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
759 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
760 OldPhi->removeIncomingValue(E);
761 }
762
763 // After incoming values splitting, the old phi may become trivial.
764 // Keeping the trivial phi can introduce definition inside the outline
765 // region which is live-out, causing necessary overhead (load, store
766 // arg passing etc).
767 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
768 OldPhi->replaceAllUsesWith(OldPhiVal);
769 DeadPhis.push_back(OldPhi);
770 }
771 ++I;
772 }
773 for (auto *DP : DeadPhis)
774 DP->eraseFromParent();
775
776 for (auto E : ClonedOI->ReturnBlockPreds) {
777 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
778 }
779}
780
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000781Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000782 // Returns true if the block is to be partial inlined into the caller
783 // (i.e. not to be extracted to the out of line function)
784 auto ToBeInlined = [&, this](BasicBlock *BB) {
785 return BB == ClonedOI->ReturnBlock ||
786 (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
787 ClonedOI->Entries.end());
788 };
789
790 // Gather up the blocks that we're going to extract.
791 std::vector<BasicBlock *> ToExtract;
792 ToExtract.push_back(ClonedOI->NonReturnBlock);
793 OutlinedRegionCost +=
794 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
795 for (BasicBlock &BB : *ClonedFunc)
796 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
797 ToExtract.push_back(&BB);
798 // FIXME: the code extractor may hoist/sink more code
799 // into the outlined function which may make the outlining
800 // overhead (the difference of the outlined function cost
801 // and OutliningRegionCost) look larger.
802 OutlinedRegionCost += computeBBInlineCost(&BB);
803 }
804
805 // The CodeExtractor needs a dominator tree.
806 DominatorTree DT;
807 DT.recalculate(*ClonedFunc);
808
809 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
810 LoopInfo LI(DT);
811 BranchProbabilityInfo BPI(*ClonedFunc, LI);
812 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
813
814 // Extract the body of the if.
815 OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
816 ClonedFuncBFI.get(), &BPI)
817 .extractCodeRegion();
818
819 if (OutlinedFunc) {
820 OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
821 .getInstruction()
822 ->getParent();
823 assert(OutliningCallBB->getParent() == ClonedFunc);
824 }
825
826 return OutlinedFunc;
827}
828
829PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
830 // Ditch the duplicate, since we're done with it, and rewrite all remaining
831 // users (function pointers, etc.) back to the original function.
832 ClonedFunc->replaceAllUsesWith(OrigFunc);
833 ClonedFunc->eraseFromParent();
834 if (!IsFunctionInlined) {
835 // Remove the function that is speculatively created if there is no
836 // reference.
837 if (OutlinedFunc)
838 OutlinedFunc->eraseFromParent();
839 }
840}
841
Xinliang David Lid21601a2017-04-27 16:34:00 +0000842Function *PartialInlinerImpl::unswitchFunction(Function *F) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000843 if (F->hasAddressTaken())
844 return nullptr;
845
Xinliang David Liab8722f2017-05-02 18:43:21 +0000846 // Let inliner handle it
847 if (F->hasFnAttribute(Attribute::AlwaysInline))
848 return nullptr;
849
850 if (F->hasFnAttribute(Attribute::NoInline))
851 return nullptr;
852
853 if (PSI->isFunctionEntryCold(F))
854 return nullptr;
855
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000856 if (F->user_begin() == F->user_end())
857 return nullptr;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000858
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000859 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
860
861 if (!OI)
Craig Topperf40110f2014-04-25 05:29:35 +0000862 return nullptr;
Sean Silvafe5abd52016-07-25 05:00:00 +0000863
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000864 FunctionCloner Cloner(F, OI.get());
865 Cloner.NormalizeReturnBlock();
Xinliang David Lic3f8e832017-06-16 16:54:13 +0000866 Function *OutlinedFunction = Cloner.doFunctionOutlining();
Sean Silvafe5abd52016-07-25 05:00:00 +0000867
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000868 bool AnyInline = tryPartialInline(Cloner);
Xinliang David Li392e9752017-05-14 02:54:02 +0000869
870 if (AnyInline)
871 return OutlinedFunction;
872
Xinliang David Li392e9752017-05-14 02:54:02 +0000873 return nullptr;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000874}
Sean Silvafe5abd52016-07-25 05:00:00 +0000875
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000876bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000877 int NonWeightedRcost;
878 int SizeCost;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000879
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000880 if (Cloner.OutlinedFunc == nullptr)
881 return false;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000882
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000883 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
884
885 auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
886 auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000887
888 // The call sequence to the outlined function is larger than the original
889 // outlined region size, it does not increase the chances of inlining
Chad Rosier4cb2e822017-08-24 20:29:02 +0000890 // the function with outlining (The inliner uses the size increase to
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000891 // model the cost of inlining a callee).
892 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
893 OptimizationRemarkEmitter ORE(Cloner.OrigFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000894 DebugLoc DLoc;
895 BasicBlock *Block;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000896 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
Vivek Pandya95906582017-10-11 17:12:59 +0000897 ORE.emit([&]() {
898 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000899 DLoc, Block)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000900 << ore::NV("Function", Cloner.OrigFunc)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000901 << " not partially inlined into callers (Original Size = "
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000902 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000903 << ", Size of call sequence to outlined function = "
Vivek Pandya95906582017-10-11 17:12:59 +0000904 << ore::NV("NewSize", SizeCost) << ")";
905 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000906 return false;
907 }
908
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000909 assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000910 "F's users should all be replaced!");
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000911
912 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
913 Cloner.ClonedFunc->user_end());
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000914
915 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000916 if (Cloner.OrigFunc->getEntryCount())
917 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000918
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000919 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000920 uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000921
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000922 bool AnyInline = false;
923 for (User *User : Users) {
924 CallSite CS = getCallSite(User);
925
926 if (IsLimitReached())
927 continue;
928
929 OptimizationRemarkEmitter ORE(CS.getCaller());
930
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000931 if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000932 continue;
933
Vivek Pandya95906582017-10-11 17:12:59 +0000934 ORE.emit([&]() {
935 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined",
936 CS.getInstruction())
937 << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
938 << ore::NV("Caller", CS.getCaller());
939 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000940
941 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
942 InlineFunction(CS, IFI);
943
944 // Now update the entry count:
945 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
946 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
947 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
948 }
949
950 AnyInline = true;
951 NumPartialInlining++;
952 // Update the stats
953 NumPartialInlined++;
954 }
955
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000956 if (AnyInline) {
957 Cloner.IsFunctionInlined = true;
958 if (CalleeEntryCount)
959 Cloner.OrigFunc->setEntryCount(CalleeEntryCountV);
960 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000961
962 return AnyInline;
Owen Anderson2f82e272009-06-14 08:26:32 +0000963}
964
Sean Silvafe5abd52016-07-25 05:00:00 +0000965bool PartialInlinerImpl::run(Module &M) {
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000966 if (DisablePartialInlining)
967 return false;
968
Sean Silva519323d2016-07-25 05:57:59 +0000969 std::vector<Function *> Worklist;
970 Worklist.reserve(M.size());
Benjamin Kramer135f7352016-06-26 12:28:59 +0000971 for (Function &F : M)
972 if (!F.use_empty() && !F.isDeclaration())
Sean Silva519323d2016-07-25 05:57:59 +0000973 Worklist.push_back(&F);
Benjamin Kramer135f7352016-06-26 12:28:59 +0000974
Sean Silva519323d2016-07-25 05:57:59 +0000975 bool Changed = false;
976 while (!Worklist.empty()) {
977 Function *CurrFunc = Worklist.back();
978 Worklist.pop_back();
Sean Silvafe5abd52016-07-25 05:00:00 +0000979
Sean Silva519323d2016-07-25 05:57:59 +0000980 if (CurrFunc->use_empty())
981 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000982
Sean Silva519323d2016-07-25 05:57:59 +0000983 bool Recursive = false;
984 for (User *U : CurrFunc->users())
985 if (Instruction *I = dyn_cast<Instruction>(U))
986 if (I->getParent()->getParent() == CurrFunc) {
987 Recursive = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000988 break;
989 }
Sean Silva519323d2016-07-25 05:57:59 +0000990 if (Recursive)
991 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +0000992
Sean Silvaf8015752016-08-02 02:15:45 +0000993 if (Function *NewFunc = unswitchFunction(CurrFunc)) {
994 Worklist.push_back(NewFunc);
Sean Silva519323d2016-07-25 05:57:59 +0000995 Changed = true;
Owen Anderson2f82e272009-06-14 08:26:32 +0000996 }
Owen Anderson2f82e272009-06-14 08:26:32 +0000997 }
Easwaran Raman1832bf62016-06-27 16:50:18 +0000998
Sean Silva519323d2016-07-25 05:57:59 +0000999 return Changed;
Sean Silvafe5abd52016-07-25 05:00:00 +00001000}
1001
1002char PartialInlinerLegacyPass::ID = 0;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +00001003
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001004INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1005 "Partial Inliner", false, false)
1006INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Xinliang David Li61338462017-05-02 02:44:14 +00001007INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
1008INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001009INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1010 "Partial Inliner", false, false)
Sean Silvafe5abd52016-07-25 05:00:00 +00001011
1012ModulePass *llvm::createPartialInliningPass() {
1013 return new PartialInlinerLegacyPass();
1014}
1015
1016PreservedAnalyses PartialInlinerPass::run(Module &M,
1017 ModuleAnalysisManager &AM) {
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001018 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
Xinliang David Li61338462017-05-02 02:44:14 +00001019
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001020 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1021 [&FAM](Function &F) -> AssumptionCache & {
1022 return FAM.getResult<AssumptionAnalysis>(F);
1023 };
Xinliang David Li61338462017-05-02 02:44:14 +00001024
1025 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1026 [&FAM](Function &F) -> BlockFrequencyInfo & {
1027 return FAM.getResult<BlockFrequencyAnalysis>(F);
1028 };
1029
1030 std::function<TargetTransformInfo &(Function &)> GetTTI =
1031 [&FAM](Function &F) -> TargetTransformInfo & {
1032 return FAM.getResult<TargetIRAnalysis>(F);
1033 };
1034
1035 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1036
1037 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
Easwaran Raman1832bf62016-06-27 16:50:18 +00001038 return PreservedAnalyses::none();
1039 return PreservedAnalyses::all();
Duncan Sands29c8efc2009-07-03 15:30:58 +00001040}