blob: 9b1a61ffb3dc527435523c99eb9e5baf70cc64c8 [file] [log] [blame]
Owen Anderson2f82e272009-06-14 08:26:32 +00001//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs partial inlining, typically by inlining an if statement
11// that surrounds the body of the function.
12//
13//===----------------------------------------------------------------------===//
14
Easwaran Raman1832bf62016-06-27 16:50:18 +000015#include "llvm/Transforms/IPO/PartialInlining.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000016#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/None.h"
19#include "llvm/ADT/Optional.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallVector.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/ADT/Statistic.h"
Sean Silvaf8015752016-08-02 02:15:45 +000023#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/BranchProbabilityInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000025#include "llvm/Analysis/InlineCost.h"
Sean Silvaf8015752016-08-02 02:15:45 +000026#include "llvm/Analysis/LoopInfo.h"
Adam Nemet0965da22017-10-09 23:19:02 +000027#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Xinliang David Li61338462017-05-02 02:44:14 +000028#include "llvm/Analysis/ProfileSummaryInfo.h"
Graham Yiu8b1882c2017-11-30 02:41:36 +000029#include "llvm/Analysis/TargetLibraryInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000030#include "llvm/Analysis/TargetTransformInfo.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000031#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
Chandler Carruth1305dc32014-03-04 11:45:46 +000033#include "llvm/IR/CFG.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000034#include "llvm/IR/CallSite.h"
35#include "llvm/IR/DebugLoc.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000036#include "llvm/IR/DiagnosticInfo.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000037#include "llvm/IR/Dominators.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000038#include "llvm/IR/Function.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Instruction.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000041#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000042#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000043#include "llvm/IR/Intrinsics.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000044#include "llvm/IR/Module.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000045#include "llvm/IR/User.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000046#include "llvm/Pass.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000047#include "llvm/Support/BlockFrequency.h"
48#include "llvm/Support/BranchProbability.h"
49#include "llvm/Support/Casting.h"
50#include "llvm/Support/CommandLine.h"
51#include "llvm/Support/ErrorHandling.h"
Easwaran Raman1832bf62016-06-27 16:50:18 +000052#include "llvm/Transforms/IPO.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000053#include "llvm/Transforms/Utils/Cloning.h"
Chandler Carruth0fde0012012-05-04 10:18:49 +000054#include "llvm/Transforms/Utils/CodeExtractor.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000055#include "llvm/Transforms/Utils/ValueMapper.h"
56#include <algorithm>
57#include <cassert>
58#include <cstdint>
59#include <functional>
60#include <iterator>
61#include <memory>
62#include <tuple>
63#include <vector>
64
Owen Anderson2f82e272009-06-14 08:26:32 +000065using namespace llvm;
66
Xinliang David Li15744ad2017-04-23 21:40:58 +000067#define DEBUG_TYPE "partial-inlining"
Chandler Carruth964daaa2014-04-22 02:55:47 +000068
Xinliang David Li61338462017-05-02 02:44:14 +000069STATISTIC(NumPartialInlined,
70 "Number of callsites functions partially inlined into.");
Graham Yiu8b1882c2017-11-30 02:41:36 +000071STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
72 "cold outlined regions were partially "
73 "inlined into its caller(s).");
74STATISTIC(NumColdRegionsFound,
75 "Number of cold single entry/exit regions found.");
76STATISTIC(NumColdRegionsOutlined,
77 "Number of cold single entry/exit regions outlined.");
Owen Andersonbd6a2132009-06-15 20:50:26 +000078
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000079// Command line option to disable partial-inlining. The default is false:
80static cl::opt<bool>
81 DisablePartialInlining("disable-partial-inlining", cl::init(false),
Graham Yiu8b1882c2017-11-30 02:41:36 +000082 cl::Hidden, cl::desc("Disable partial inlining"));
83// Command line option to disable multi-region partial-inlining. The default is
84// false:
85static cl::opt<bool> DisableMultiRegionPartialInline(
86 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
87 cl::desc("Disable multi-region partial inlining"));
88
89// Command line option to force outlining in regions with live exit variables.
90// The default is false:
91static cl::opt<bool>
92 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
93 cl::desc("Force outline regions with live exits"));
94
95// Command line option to enable marking outline functions with Cold Calling
96// Convention. The default is false:
97static cl::opt<bool>
98 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
99 cl::desc("Mark outline function calls with ColdCC"));
100
101#ifndef NDEBUG
102// Command line option to debug partial-inlining. The default is none:
103static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
104 cl::init(false), cl::Hidden,
105 cl::desc("Trace partial inlining."));
106#endif
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000107
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000108// This is an option used by testing:
109static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
110 cl::init(false), cl::ZeroOrMore,
111 cl::ReallyHidden,
112 cl::desc("Skip Cost Analysis"));
Graham Yiu8b1882c2017-11-30 02:41:36 +0000113// Used to determine if a cold region is worth outlining based on
114// its inlining cost compared to the original function. Default is set at 10%.
115// ie. if the cold region reduces the inlining cost of the original function by
116// at least 10%.
117static cl::opt<float> MinRegionSizeRatio(
118 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
119 cl::desc("Minimum ratio comparing relative sizes of each "
120 "outline candidate and original function"));
121// Used to tune the minimum number of execution counts needed in the predecessor
122// block to the cold edge. ie. confidence interval.
123static cl::opt<unsigned>
124 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
125 cl::desc("Minimum block executions to consider "
126 "its BranchProbabilityInfo valid"));
127// Used to determine when an edge is considered cold. Default is set to 10%. ie.
128// if the branch probability is 10% or less, then it is deemed as 'cold'.
129static cl::opt<float> ColdBranchRatio(
130 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
131 cl::desc("Minimum BranchProbability to consider a region cold."));
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000132
Xinliang David Lid21601a2017-04-27 16:34:00 +0000133static cl::opt<unsigned> MaxNumInlineBlocks(
134 "max-num-inline-blocks", cl::init(5), cl::Hidden,
Chad Rosierf98335e2017-08-24 21:21:09 +0000135 cl::desc("Max number of blocks to be partially inlined"));
Xinliang David Lid21601a2017-04-27 16:34:00 +0000136
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000137// Command line option to set the maximum number of partial inlining allowed
138// for the module. The default value of -1 means no limit.
139static cl::opt<int> MaxNumPartialInlining(
140 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
141 cl::desc("Max number of partial inlining. The default is unlimited"));
142
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000143// Used only when PGO or user annotated branch data is absent. It is
144// the least value that is used to weigh the outline region. If BFI
145// produces larger value, the BFI value will be used.
146static cl::opt<int>
147 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
148 cl::Hidden, cl::ZeroOrMore,
149 cl::desc("Relative frequency of outline region to "
150 "the entry block"));
151
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000152static cl::opt<unsigned> ExtraOutliningPenalty(
153 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
154 cl::desc("A debug option to add additional penalty to the computed one."));
155
Owen Anderson2f82e272009-06-14 08:26:32 +0000156namespace {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000157
158struct FunctionOutliningInfo {
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000159 FunctionOutliningInfo() = default;
160
Xinliang David Lid21601a2017-04-27 16:34:00 +0000161 // Returns the number of blocks to be inlined including all blocks
162 // in Entries and one return block.
163 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
164
165 // A set of blocks including the function entry that guard
166 // the region to be outlined.
167 SmallVector<BasicBlock *, 4> Entries;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000168
Xinliang David Lid21601a2017-04-27 16:34:00 +0000169 // The return block that is not included in the outlined region.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000170 BasicBlock *ReturnBlock = nullptr;
171
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000172 // The dominating block of the region to be outlined.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000173 BasicBlock *NonReturnBlock = nullptr;
174
Xinliang David Lid21601a2017-04-27 16:34:00 +0000175 // The set of blocks in Entries that that are predecessors to ReturnBlock
176 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
177};
178
Graham Yiu8b1882c2017-11-30 02:41:36 +0000179struct FunctionOutliningMultiRegionInfo {
180 FunctionOutliningMultiRegionInfo()
181 : ORI() {}
182
183 // Container for outline regions
184 struct OutlineRegionInfo {
185 OutlineRegionInfo(SmallVector<BasicBlock *, 8> Region,
186 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
187 BasicBlock *ReturnBlock)
188 : Region(Region), EntryBlock(EntryBlock), ExitBlock(ExitBlock),
189 ReturnBlock(ReturnBlock) {}
190 SmallVector<BasicBlock *, 8> Region;
191 BasicBlock *EntryBlock;
192 BasicBlock *ExitBlock;
193 BasicBlock *ReturnBlock;
194 };
195
196 SmallVector<OutlineRegionInfo, 4> ORI;
197};
198
Sean Silvafe5abd52016-07-25 05:00:00 +0000199struct PartialInlinerImpl {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000200
Xinliang David Li61338462017-05-02 02:44:14 +0000201 PartialInlinerImpl(
202 std::function<AssumptionCache &(Function &)> *GetAC,
203 std::function<TargetTransformInfo &(Function &)> *GTTI,
204 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
Graham Yiu8b1882c2017-11-30 02:41:36 +0000205 ProfileSummaryInfo *ProfSI,
206 std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
207 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI),
208 GetORE(GORE) {}
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000209
Sean Silvafe5abd52016-07-25 05:00:00 +0000210 bool run(Module &M);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000211 // Main part of the transformation that calls helper functions to find
212 // outlining candidates, clone & outline the function, and attempt to
213 // partially inline the resulting function. Returns true if
214 // inlining was successful, false otherwise. Also returns the outline
215 // function (only if we partially inlined early returns) as there is a
216 // possibility to further "peel" early return statements that were left in the
217 // outline function due to code size.
218 std::pair<bool, Function *> unswitchFunction(Function *F);
Sean Silvafe5abd52016-07-25 05:00:00 +0000219
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000220 // This class speculatively clones the the function to be partial inlined.
221 // At the end of partial inlining, the remaining callsites to the cloned
222 // function that are not partially inlined will be fixed up to reference
223 // the original function, and the cloned function will be erased.
224 struct FunctionCloner {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000225 // Two constructors, one for single region outlining, the other for
226 // multi-region outlining.
227 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
228 OptimizationRemarkEmitter &ORE);
229 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
230 OptimizationRemarkEmitter &ORE);
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000231 ~FunctionCloner();
232
233 // Prepare for function outlining: making sure there is only
234 // one incoming edge from the extracted/outlined region to
235 // the return block.
236 void NormalizeReturnBlock();
237
Graham Yiu8b1882c2017-11-30 02:41:36 +0000238 // Do function outlining for cold regions.
239 bool doMultiRegionFunctionOutlining();
240 // Do function outlining for region after early return block(s).
Florian Hahn0e9dec62017-11-13 10:35:52 +0000241 // NOTE: For vararg functions that do the vararg handling in the outlined
242 // function, we temporarily generate IR that does not properly
243 // forward varargs to the outlined function. Calling InlineFunction
244 // will update calls to the outlined functions to properly forward
245 // the varargs.
Graham Yiu8b1882c2017-11-30 02:41:36 +0000246 Function *doSingleRegionFunctionOutlining();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000247
248 Function *OrigFunc = nullptr;
249 Function *ClonedFunc = nullptr;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000250
251 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
252 // Keep track of Outlined Functions and the basic block they're called from.
253 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
254
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000255 // ClonedFunc is inlined in one of its callers after function
256 // outlining.
257 bool IsFunctionInlined = false;
258 // The cost of the region to be outlined.
259 int OutlinedRegionCost = 0;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000260 // ClonedOI is specific to outlining non-early return blocks.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000261 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000262 // ClonedOMRI is specific to outlining cold regions.
263 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000264 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000265 OptimizationRemarkEmitter &ORE;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000266 };
267
Sean Silvafe5abd52016-07-25 05:00:00 +0000268private:
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000269 int NumPartialInlining = 0;
Xinliang David Li61338462017-05-02 02:44:14 +0000270 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
271 std::function<TargetTransformInfo &(Function &)> *GetTTI;
272 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
273 ProfileSummaryInfo *PSI;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000274 std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000275
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000276 // Return the frequency of the OutlininingBB relative to F's entry point.
277 // The result is no larger than 1 and is represented using BP.
278 // (Note that the outlined region's 'head' block can only have incoming
279 // edges from the guarding entry blocks).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000280 BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000281
282 // Return true if the callee of CS should be partially inlined with
283 // profit.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000284 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
Graham Yiu8b1882c2017-11-30 02:41:36 +0000285 BlockFrequency WeightedOutliningRcost);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000286
287 // Try to inline DuplicateFunction (cloned from F with call to
288 // the OutlinedFunction into its callers. Return true
289 // if there is any successful inlining.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000290 bool tryPartialInline(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000291
292 // Compute the mapping from use site of DuplicationFunction to the enclosing
293 // BB's profile count.
294 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
295 DenseMap<User *, uint64_t> &SiteCountMap);
296
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000297 bool IsLimitReached() {
298 return (MaxNumPartialInlining != -1 &&
299 NumPartialInlining >= MaxNumPartialInlining);
300 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000301
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000302 static CallSite getCallSite(User *U) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000303 CallSite CS;
304 if (CallInst *CI = dyn_cast<CallInst>(U))
305 CS = CallSite(CI);
306 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
307 CS = CallSite(II);
308 else
309 llvm_unreachable("All uses must be calls");
310 return CS;
311 }
312
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000313 static CallSite getOneCallSiteTo(Function *F) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000314 User *User = *F->user_begin();
315 return getCallSite(User);
316 }
317
318 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
319 CallSite CS = getOneCallSiteTo(F);
320 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
321 BasicBlock *Block = CS.getParent();
322 return std::make_tuple(DLoc, Block);
323 }
324
325 // Returns the costs associated with function outlining:
326 // - The first value is the non-weighted runtime cost for making the call
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000327 // to the outlined function, including the addtional setup cost in the
328 // outlined function itself;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000329 // - The second value is the estimated size of the new call sequence in
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000330 // basic block Cloner.OutliningCallBB;
331 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000332
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000333 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
334 // approximate both the size and runtime cost (Note that in the current
335 // inline cost analysis, there is no clear distinction there either).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000336 static int computeBBInlineCost(BasicBlock *BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000337
338 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000339 std::unique_ptr<FunctionOutliningMultiRegionInfo>
340 computeOutliningColdRegionsInfo(Function *F);
Sean Silvafe5abd52016-07-25 05:00:00 +0000341};
Xinliang David Lid21601a2017-04-27 16:34:00 +0000342
Easwaran Raman1832bf62016-06-27 16:50:18 +0000343struct PartialInlinerLegacyPass : public ModulePass {
344 static char ID; // Pass identification, replacement for typeid
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000345
Easwaran Raman1832bf62016-06-27 16:50:18 +0000346 PartialInlinerLegacyPass() : ModulePass(ID) {
347 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
348 }
Craig Topper3e4c6972014-03-05 09:10:37 +0000349
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000350 void getAnalysisUsage(AnalysisUsage &AU) const override {
351 AU.addRequired<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000352 AU.addRequired<ProfileSummaryInfoWrapperPass>();
353 AU.addRequired<TargetTransformInfoWrapperPass>();
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000354 }
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000355
Easwaran Raman1832bf62016-06-27 16:50:18 +0000356 bool runOnModule(Module &M) override {
357 if (skipModule(M))
358 return false;
Craig Topper3e4c6972014-03-05 09:10:37 +0000359
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000360 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000361 TargetTransformInfoWrapperPass *TTIWP =
362 &getAnalysis<TargetTransformInfoWrapperPass>();
363 ProfileSummaryInfo *PSI =
364 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Graham Yiu8b1882c2017-11-30 02:41:36 +0000365 std::unique_ptr<OptimizationRemarkEmitter> UPORE;
Xinliang David Li61338462017-05-02 02:44:14 +0000366
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000367 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
368 [&ACT](Function &F) -> AssumptionCache & {
369 return ACT->getAssumptionCache(F);
370 };
Xinliang David Li61338462017-05-02 02:44:14 +0000371
372 std::function<TargetTransformInfo &(Function &)> GetTTI =
373 [&TTIWP](Function &F) -> TargetTransformInfo & {
374 return TTIWP->getTTI(F);
375 };
376
Graham Yiu8b1882c2017-11-30 02:41:36 +0000377 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
378 [&UPORE](Function &F) -> OptimizationRemarkEmitter & {
379 UPORE.reset(new OptimizationRemarkEmitter(&F));
380 return *UPORE.get();
381 };
382
383 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, NoneType::None, PSI,
384 &GetORE)
385 .run(M);
Sean Silvafe5abd52016-07-25 05:00:00 +0000386 }
Sean Silva519323d2016-07-25 05:57:59 +0000387};
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000388
389} // end anonymous namespace
Owen Anderson2f82e272009-06-14 08:26:32 +0000390
Graham Yiu8b1882c2017-11-30 02:41:36 +0000391std::unique_ptr<FunctionOutliningMultiRegionInfo>
392PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F) {
393 BasicBlock *EntryBlock = &F->front();
394
395 DominatorTree DT(*F);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000396 LoopInfo LI(DT);
397 BranchProbabilityInfo BPI(*F, LI);
398 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
399 BlockFrequencyInfo *BFI;
400 if (!GetBFI) {
401 ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
402 BFI = ScopedBFI.get();
403 } else
404 BFI = &(*GetBFI)(*F);
405
406 auto &ORE = (*GetORE)(*F);
407
Graham Yiu8b1882c2017-11-30 02:41:36 +0000408 // Return if we don't have profiling information.
409 if (!PSI->hasInstrumentationProfile())
410 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
411
412 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
413 llvm::make_unique<FunctionOutliningMultiRegionInfo>();
414
415 auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
416 BasicBlock *Dom = BlockList.front();
417 return BlockList.size() > 1 &&
418 std::distance(pred_begin(Dom), pred_end(Dom)) == 1;
419 };
420
421 auto IsSingleExit =
Graham Yiu70293fa2017-11-30 03:36:57 +0000422 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000423 BasicBlock *ExitBlock = nullptr;
424 for (auto *Block : BlockList) {
425 for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
426 if (!is_contained(BlockList, *SI)) {
427 if (ExitBlock) {
428 ORE.emit([&]() {
429 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
430 &SI->front())
431 << "Region dominated by "
432 << ore::NV("Block", BlockList.front()->getName())
433 << " has more than one region exit edge.";
434 });
435 return nullptr;
436 } else
437 ExitBlock = Block;
438 }
439 }
440 }
441 return ExitBlock;
442 };
443
444 auto BBProfileCount = [BFI](BasicBlock *BB) {
445 return BFI->getBlockProfileCount(BB)
446 ? BFI->getBlockProfileCount(BB).getValue()
447 : 0;
448 };
449
450 // Use the same computeBBInlineCost function to compute the cost savings of
451 // the outlining the candidate region.
452 int OverallFunctionCost = 0;
453 for (auto &BB : *F)
454 OverallFunctionCost += computeBBInlineCost(&BB);
455
456#ifndef NDEBUG
457 if (TracePartialInlining)
458 dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
459#endif
460 int MinOutlineRegionCost =
461 static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
462 BranchProbability MinBranchProbability(
463 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
464 MinBlockCounterExecution);
465 bool ColdCandidateFound = false;
466 BasicBlock *CurrEntry = EntryBlock;
467 std::vector<BasicBlock *> DFS;
468 DenseMap<BasicBlock *, bool> VisitedMap;
469 DFS.push_back(CurrEntry);
470 VisitedMap[CurrEntry] = true;
471 // Use Depth First Search on the basic blocks to find CFG edges that are
472 // considered cold.
473 // Cold regions considered must also have its inline cost compared to the
474 // overall inline cost of the original function. The region is outlined only
475 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
476 // more.
477 while (!DFS.empty()) {
478 auto *thisBB = DFS.back();
479 DFS.pop_back();
480 // Only consider regions with predecessor blocks that are considered
481 // not-cold (default: part of the top 99.99% of all block counters)
482 // AND greater than our minimum block execution count (default: 100).
483 if (PSI->isColdBB(thisBB, BFI) ||
484 BBProfileCount(thisBB) < MinBlockCounterExecution)
485 continue;
486 for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
487 if (VisitedMap[*SI])
488 continue;
489 VisitedMap[*SI] = true;
490 DFS.push_back(*SI);
491 // If branch isn't cold, we skip to the next one.
492 BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
493 if (SuccProb > MinBranchProbability)
494 continue;
495#ifndef NDEBUG
496 if (TracePartialInlining) {
497 dbgs() << "Found cold edge: " << thisBB->getName() << "->"
498 << (*SI)->getName() << "\nBranch Probability = " << SuccProb
499 << "\n";
500 }
501#endif
502 SmallVector<BasicBlock *, 8> DominateVector;
503 DT.getDescendants(*SI, DominateVector);
504 // We can only outline single entry regions (for now).
505 if (!IsSingleEntry(DominateVector))
506 continue;
507 BasicBlock *ExitBlock = nullptr;
508 // We can only outline single exit regions (for now).
509 if (!(ExitBlock = IsSingleExit(DominateVector)))
510 continue;
511 int OutlineRegionCost = 0;
512 for (auto *BB : DominateVector)
513 OutlineRegionCost += computeBBInlineCost(BB);
514
515#ifndef NDEBUG
516 if (TracePartialInlining)
517 dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
518#endif
519
520 if (OutlineRegionCost < MinOutlineRegionCost) {
521 ORE.emit([&]() {
522 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
523 &SI->front())
524 << ore::NV("Callee", F) << " inline cost-savings smaller than "
525 << ore::NV("Cost", MinOutlineRegionCost);
526 });
527 continue;
528 }
529 // For now, ignore blocks that belong to a SISE region that is a
530 // candidate for outlining. In the future, we may want to look
531 // at inner regions because the outer region may have live-exit
532 // variables.
533 for (auto *BB : DominateVector)
534 VisitedMap[BB] = true;
535 // ReturnBlock here means the block after the outline call
536 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
537 // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
538 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
539 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
540 RegInfo.Region = DominateVector;
541 OutliningInfo->ORI.push_back(RegInfo);
542#ifndef NDEBUG
543 if (TracePartialInlining) {
544 dbgs() << "Found Cold Candidate starting at block: "
545 << DominateVector.front()->getName() << "\n";
546 }
547#endif
548 ColdCandidateFound = true;
549 NumColdRegionsFound++;
550 }
551 }
552 if (ColdCandidateFound)
553 return OutliningInfo;
554 else
555 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
556}
557
Xinliang David Lid21601a2017-04-27 16:34:00 +0000558std::unique_ptr<FunctionOutliningInfo>
559PartialInlinerImpl::computeOutliningInfo(Function *F) {
Sean Silva519323d2016-07-25 05:57:59 +0000560 BasicBlock *EntryBlock = &F->front();
561 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
Owen Andersonf0081db2009-09-08 19:53:15 +0000562 if (!BR || BR->isUnconditional())
Xinliang David Lid21601a2017-04-27 16:34:00 +0000563 return std::unique_ptr<FunctionOutliningInfo>();
Sean Silvafe5abd52016-07-25 05:00:00 +0000564
Xinliang David Lid21601a2017-04-27 16:34:00 +0000565 // Returns true if Succ is BB's successor
566 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
567 return is_contained(successors(BB), Succ);
568 };
569
570 auto SuccSize = [](BasicBlock *BB) {
571 return std::distance(succ_begin(BB), succ_end(BB));
572 };
573
574 auto IsReturnBlock = [](BasicBlock *BB) {
575 TerminatorInst *TI = BB->getTerminator();
576 return isa<ReturnInst>(TI);
577 };
578
Davide Italianoaa42a102017-05-08 20:44:01 +0000579 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000580 if (IsReturnBlock(Succ1))
581 return std::make_tuple(Succ1, Succ2);
582 if (IsReturnBlock(Succ2))
583 return std::make_tuple(Succ2, Succ1);
584
585 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
586 };
587
588 // Detect a triangular shape:
Davide Italianoaa42a102017-05-08 20:44:01 +0000589 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000590 if (IsSuccessor(Succ1, Succ2))
591 return std::make_tuple(Succ1, Succ2);
592 if (IsSuccessor(Succ2, Succ1))
593 return std::make_tuple(Succ2, Succ1);
594
595 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
596 };
597
598 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
599 llvm::make_unique<FunctionOutliningInfo>();
600
601 BasicBlock *CurrEntry = EntryBlock;
602 bool CandidateFound = false;
603 do {
604 // The number of blocks to be inlined has already reached
605 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
606 // disables partial inlining for the function.
607 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
608 break;
609
610 if (SuccSize(CurrEntry) != 2)
611 break;
612
613 BasicBlock *Succ1 = *succ_begin(CurrEntry);
614 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
615
616 BasicBlock *ReturnBlock, *NonReturnBlock;
617 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
618
619 if (ReturnBlock) {
620 OutliningInfo->Entries.push_back(CurrEntry);
621 OutliningInfo->ReturnBlock = ReturnBlock;
622 OutliningInfo->NonReturnBlock = NonReturnBlock;
623 CandidateFound = true;
624 break;
625 }
626
627 BasicBlock *CommSucc;
628 BasicBlock *OtherSucc;
629 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
630
631 if (!CommSucc)
632 break;
633
634 OutliningInfo->Entries.push_back(CurrEntry);
635 CurrEntry = OtherSucc;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000636 } while (true);
637
638 if (!CandidateFound)
639 return std::unique_ptr<FunctionOutliningInfo>();
640
641 // Do sanity check of the entries: threre should not
642 // be any successors (not in the entry set) other than
643 // {ReturnBlock, NonReturnBlock}
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000644 assert(OutliningInfo->Entries[0] == &F->front() &&
645 "Function Entry must be the first in Entries vector");
Xinliang David Lid21601a2017-04-27 16:34:00 +0000646 DenseSet<BasicBlock *> Entries;
647 for (BasicBlock *E : OutliningInfo->Entries)
648 Entries.insert(E);
649
650 // Returns true of BB has Predecessor which is not
651 // in Entries set.
652 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
653 for (auto Pred : predecessors(BB)) {
654 if (!Entries.count(Pred))
655 return true;
656 }
657 return false;
658 };
659 auto CheckAndNormalizeCandidate =
660 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
661 for (BasicBlock *E : OutliningInfo->Entries) {
662 for (auto Succ : successors(E)) {
663 if (Entries.count(Succ))
664 continue;
665 if (Succ == OutliningInfo->ReturnBlock)
666 OutliningInfo->ReturnBlockPreds.push_back(E);
667 else if (Succ != OutliningInfo->NonReturnBlock)
668 return false;
669 }
670 // There should not be any outside incoming edges either:
671 if (HasNonEntryPred(E))
672 return false;
673 }
674 return true;
675 };
676
677 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
678 return std::unique_ptr<FunctionOutliningInfo>();
679
680 // Now further growing the candidate's inlining region by
681 // peeling off dominating blocks from the outlining region:
682 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
683 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
684 if (SuccSize(Cand) != 2)
685 break;
686
687 if (HasNonEntryPred(Cand))
688 break;
689
690 BasicBlock *Succ1 = *succ_begin(Cand);
691 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
692
693 BasicBlock *ReturnBlock, *NonReturnBlock;
694 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
695 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
696 break;
697
698 if (NonReturnBlock->getSinglePredecessor() != Cand)
699 break;
700
701 // Now grow and update OutlininigInfo:
702 OutliningInfo->Entries.push_back(Cand);
703 OutliningInfo->NonReturnBlock = NonReturnBlock;
704 OutliningInfo->ReturnBlockPreds.push_back(Cand);
705 Entries.insert(Cand);
Reid Klecknerc26a17a2015-02-04 19:14:57 +0000706 }
Sean Silvafe5abd52016-07-25 05:00:00 +0000707
Xinliang David Lid21601a2017-04-27 16:34:00 +0000708 return OutliningInfo;
709}
710
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000711// Check if there is PGO data or user annoated branch data:
712static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
Easwaran Ramana17f2202017-12-22 01:33:52 +0000713 if (F->hasProfileData())
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000714 return true;
715 // Now check if any of the entry block has MD_prof data:
716 for (auto *E : OI->Entries) {
717 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
718 if (!BR || BR->isUnconditional())
719 continue;
720 uint64_t T, F;
721 if (BR->extractProfMetadata(T, F))
722 return true;
723 }
724 return false;
725}
726
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000727BranchProbability
728PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000729 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000730 auto EntryFreq =
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000731 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
732 auto OutliningCallFreq =
Graham Yiu8b1882c2017-11-30 02:41:36 +0000733 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
734 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
735 // we outlined any regions, so we may encounter situations where the
736 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
737 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
738 OutliningCallFreq = EntryFreq;
739 }
740 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
741 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000742
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000743 if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000744 return OutlineRegionRelFreq;
745
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000746 // When profile data is not available, we need to be conservative in
747 // estimating the overall savings. Static branch prediction can usually
748 // guess the branch direction right (taken/non-taken), but the guessed
749 // branch probability is usually not biased enough. In case when the
750 // outlined region is predicted to be likely, its probability needs
751 // to be made higher (more biased) to not under-estimate the cost of
752 // function outlining. On the other hand, if the outlined region
753 // is predicted to be less likely, the predicted probablity is usually
754 // higher than the actual. For instance, the actual probability of the
755 // less likely target is only 5%, but the guessed probablity can be
756 // 40%. In the latter case, there is no need for further adjustement.
757 // FIXME: add an option for this.
758 if (OutlineRegionRelFreq < BranchProbability(45, 100))
759 return OutlineRegionRelFreq;
760
761 OutlineRegionRelFreq = std::max(
762 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000763
764 return OutlineRegionRelFreq;
765}
766
767bool PartialInlinerImpl::shouldPartialInline(
Graham Yiu8b1882c2017-11-30 02:41:36 +0000768 CallSite CS, FunctionCloner &Cloner,
769 BlockFrequency WeightedOutliningRcost) {
Xinliang David Li61338462017-05-02 02:44:14 +0000770 using namespace ore;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000771
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000772 if (SkipCostAnalysis)
773 return true;
774
Xinliang David Li61338462017-05-02 02:44:14 +0000775 Instruction *Call = CS.getInstruction();
776 Function *Callee = CS.getCalledFunction();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000777 assert(Callee == Cloner.ClonedFunc);
778
Xinliang David Li61338462017-05-02 02:44:14 +0000779 Function *Caller = CS.getCaller();
780 auto &CalleeTTI = (*GetTTI)(*Callee);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000781 auto &ORE = (*GetORE)(*Caller);
Xinliang David Li61338462017-05-02 02:44:14 +0000782 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
Haicheng Wu0812c5b2017-08-21 20:00:09 +0000783 *GetAssumptionCache, GetBFI, PSI, &ORE);
Xinliang David Li61338462017-05-02 02:44:14 +0000784
785 if (IC.isAlways()) {
Vivek Pandya95906582017-10-11 17:12:59 +0000786 ORE.emit([&]() {
787 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000788 << NV("Callee", Cloner.OrigFunc)
Vivek Pandya95906582017-10-11 17:12:59 +0000789 << " should always be fully inlined, not partially";
790 });
Xinliang David Li61338462017-05-02 02:44:14 +0000791 return false;
792 }
793
794 if (IC.isNever()) {
Vivek Pandya95906582017-10-11 17:12:59 +0000795 ORE.emit([&]() {
796 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000797 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000798 << NV("Caller", Caller)
Vivek Pandya95906582017-10-11 17:12:59 +0000799 << " because it should never be inlined (cost=never)";
800 });
Xinliang David Li61338462017-05-02 02:44:14 +0000801 return false;
802 }
803
804 if (!IC) {
Vivek Pandya95906582017-10-11 17:12:59 +0000805 ORE.emit([&]() {
806 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000807 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000808 << NV("Caller", Caller) << " because too costly to inline (cost="
809 << NV("Cost", IC.getCost()) << ", threshold="
Vivek Pandya95906582017-10-11 17:12:59 +0000810 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
811 });
Xinliang David Li61338462017-05-02 02:44:14 +0000812 return false;
813 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000814 const DataLayout &DL = Caller->getParent()->getDataLayout();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000815
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000816 // The savings of eliminating the call:
817 int NonWeightedSavings = getCallsiteCost(CS, DL);
818 BlockFrequency NormWeightedSavings(NonWeightedSavings);
819
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000820 // Weighted saving is smaller than weighted cost, return false
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000821 if (NormWeightedSavings < WeightedOutliningRcost) {
Vivek Pandya95906582017-10-11 17:12:59 +0000822 ORE.emit([&]() {
823 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
824 Call)
825 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
826 << NV("Caller", Caller) << " runtime overhead (overhead="
827 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
828 << ", savings="
829 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
830 << ")"
831 << " of making the outlined call is too high";
832 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000833
834 return false;
835 }
Xinliang David Li61338462017-05-02 02:44:14 +0000836
Vivek Pandya95906582017-10-11 17:12:59 +0000837 ORE.emit([&]() {
838 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000839 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000840 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
841 << " (threshold="
Vivek Pandya95906582017-10-11 17:12:59 +0000842 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
843 });
Xinliang David Li61338462017-05-02 02:44:14 +0000844 return true;
845}
846
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000847// TODO: Ideally we should share Inliner's InlineCost Analysis code.
848// For now use a simplified version. The returned 'InlineCost' will be used
849// to esimate the size cost as well as runtime cost of the BB.
850int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
851 int InlineCost = 0;
852 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
853 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
854 if (isa<DbgInfoIntrinsic>(I))
855 continue;
856
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000857 switch (I->getOpcode()) {
858 case Instruction::BitCast:
859 case Instruction::PtrToInt:
860 case Instruction::IntToPtr:
861 case Instruction::Alloca:
862 continue;
863 case Instruction::GetElementPtr:
864 if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
865 continue;
Adrian Prantl0e6694d2017-12-19 22:05:25 +0000866 break;
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000867 default:
868 break;
869 }
870
871 IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
872 if (IntrInst) {
873 if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
874 IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
875 continue;
876 }
877
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000878 if (CallInst *CI = dyn_cast<CallInst>(I)) {
879 InlineCost += getCallsiteCost(CallSite(CI), DL);
880 continue;
881 }
882
883 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
884 InlineCost += getCallsiteCost(CallSite(II), DL);
885 continue;
886 }
887
888 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
889 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
890 continue;
891 }
892 InlineCost += InlineConstants::InstrCost;
893 }
894 return InlineCost;
895}
896
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000897std::tuple<int, int>
898PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000899 int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
900 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
901 Function *OutlinedFunc = FuncBBPair.first;
902 BasicBlock* OutliningCallBB = FuncBBPair.second;
903 // Now compute the cost of the call sequence to the outlined function
904 // 'OutlinedFunction' in BB 'OutliningCallBB':
905 OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000906
Graham Yiu8b1882c2017-11-30 02:41:36 +0000907 // Now compute the cost of the extracted/outlined function itself:
908 for (BasicBlock &BB : *OutlinedFunc)
909 OutlinedFunctionCost += computeBBInlineCost(&BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000910 }
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000911 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000912 "Outlined function cost should be no less than the outlined region");
Graham Yiu8b1882c2017-11-30 02:41:36 +0000913
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000914 // The code extractor introduces a new root and exit stub blocks with
915 // additional unconditional branches. Those branches will be eliminated
916 // later with bb layout. The cost should be adjusted accordingly:
Graham Yiu8b1882c2017-11-30 02:41:36 +0000917 OutlinedFunctionCost -=
918 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000919
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000920 int OutliningRuntimeOverhead =
921 OutliningFuncCallCost +
922 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
923 ExtraOutliningPenalty;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000924
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000925 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000926}
927
928// Create the callsite to profile count map which is
929// used to update the original function's entry count,
930// after the function is partially inlined into the callsite.
931void PartialInlinerImpl::computeCallsiteToProfCountMap(
932 Function *DuplicateFunction,
933 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
934 std::vector<User *> Users(DuplicateFunction->user_begin(),
935 DuplicateFunction->user_end());
936 Function *CurrentCaller = nullptr;
Vitaly Bukaa6374892017-05-27 05:32:09 +0000937 std::unique_ptr<BlockFrequencyInfo> TempBFI;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000938 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
939
940 auto ComputeCurrBFI = [&,this](Function *Caller) {
941 // For the old pass manager:
942 if (!GetBFI) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000943 DominatorTree DT(*Caller);
944 LoopInfo LI(DT);
945 BranchProbabilityInfo BPI(*Caller, LI);
Vitaly Bukaa6374892017-05-27 05:32:09 +0000946 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
947 CurrentCallerBFI = TempBFI.get();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000948 } else {
949 // New pass manager:
950 CurrentCallerBFI = &(*GetBFI)(*Caller);
951 }
952 };
953
954 for (User *User : Users) {
955 CallSite CS = getCallSite(User);
956 Function *Caller = CS.getCaller();
957 if (CurrentCaller != Caller) {
958 CurrentCaller = Caller;
959 ComputeCurrBFI(Caller);
960 } else {
961 assert(CurrentCallerBFI && "CallerBFI is not set");
962 }
963 BasicBlock *CallBB = CS.getInstruction()->getParent();
964 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
965 if (Count)
966 CallSiteToProfCountMap[User] = *Count;
967 else
968 CallSiteToProfCountMap[User] = 0;
969 }
970}
971
Graham Yiu8b1882c2017-11-30 02:41:36 +0000972PartialInlinerImpl::FunctionCloner::FunctionCloner(
973 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE)
974 : OrigFunc(F), ORE(ORE) {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000975 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
976
977 // Clone the function, so that we can hack away on it.
978 ValueToValueMapTy VMap;
979 ClonedFunc = CloneFunction(F, VMap);
980
981 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
982 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
983 for (BasicBlock *BB : OI->Entries) {
984 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
985 }
986 for (BasicBlock *E : OI->ReturnBlockPreds) {
987 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
988 ClonedOI->ReturnBlockPreds.push_back(NewE);
989 }
990 // Go ahead and update all uses to the duplicate, so that we can just
991 // use the inliner functionality when we're done hacking.
992 F->replaceAllUsesWith(ClonedFunc);
993}
994
Graham Yiu8b1882c2017-11-30 02:41:36 +0000995PartialInlinerImpl::FunctionCloner::FunctionCloner(
996 Function *F, FunctionOutliningMultiRegionInfo *OI,
997 OptimizationRemarkEmitter &ORE)
998 : OrigFunc(F), ORE(ORE) {
999 ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
1000
1001 // Clone the function, so that we can hack away on it.
1002 ValueToValueMapTy VMap;
1003 ClonedFunc = CloneFunction(F, VMap);
1004
1005 // Go through all Outline Candidate Regions and update all BasicBlock
1006 // information.
1007 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1008 OI->ORI) {
1009 SmallVector<BasicBlock *, 8> Region;
1010 for (BasicBlock *BB : RegionInfo.Region) {
1011 Region.push_back(cast<BasicBlock>(VMap[BB]));
1012 }
1013 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1014 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1015 BasicBlock *NewReturnBlock = nullptr;
1016 if (RegionInfo.ReturnBlock)
1017 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1018 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1019 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1020 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1021 }
1022 // Go ahead and update all uses to the duplicate, so that we can just
1023 // use the inliner functionality when we're done hacking.
1024 F->replaceAllUsesWith(ClonedFunc);
1025}
1026
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001027void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001028 auto getFirstPHI = [](BasicBlock *BB) {
1029 BasicBlock::iterator I = BB->begin();
1030 PHINode *FirstPhi = nullptr;
1031 while (I != BB->end()) {
1032 PHINode *Phi = dyn_cast<PHINode>(I);
1033 if (!Phi)
1034 break;
1035 if (!FirstPhi) {
1036 FirstPhi = Phi;
1037 break;
1038 }
1039 }
1040 return FirstPhi;
1041 };
1042
Graham Yiu8b1882c2017-11-30 02:41:36 +00001043 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1044 // blocks.
1045 if (!ClonedOI)
1046 return;
1047
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001048 // Special hackery is needed with PHI nodes that have inputs from more than
1049 // one extracted block. For simplicity, just split the PHIs into a two-level
1050 // sequence of PHIs, some of which will go in the extracted region, and some
1051 // of which will go outside.
1052 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1053 // only split block when necessary:
1054 PHINode *FirstPhi = getFirstPHI(PreReturn);
1055 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1056
1057 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1058 return;
1059
1060 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1061 Value *CommonValue = PN->getIncomingValue(0);
1062 if (all_of(PN->incoming_values(),
1063 [&](Value *V) { return V == CommonValue; }))
1064 return CommonValue;
1065 return nullptr;
1066 };
1067
1068 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1069 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1070 BasicBlock::iterator I = PreReturn->begin();
1071 Instruction *Ins = &ClonedOI->ReturnBlock->front();
1072 SmallVector<Instruction *, 4> DeadPhis;
1073 while (I != PreReturn->end()) {
1074 PHINode *OldPhi = dyn_cast<PHINode>(I);
1075 if (!OldPhi)
1076 break;
1077
1078 PHINode *RetPhi =
1079 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1080 OldPhi->replaceAllUsesWith(RetPhi);
1081 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1082
1083 RetPhi->addIncoming(&*I, PreReturn);
1084 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1085 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1086 OldPhi->removeIncomingValue(E);
1087 }
1088
1089 // After incoming values splitting, the old phi may become trivial.
1090 // Keeping the trivial phi can introduce definition inside the outline
1091 // region which is live-out, causing necessary overhead (load, store
1092 // arg passing etc).
1093 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1094 OldPhi->replaceAllUsesWith(OldPhiVal);
1095 DeadPhis.push_back(OldPhi);
1096 }
1097 ++I;
Graham Yiu8b1882c2017-11-30 02:41:36 +00001098 }
1099 for (auto *DP : DeadPhis)
1100 DP->eraseFromParent();
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001101
Graham Yiu8b1882c2017-11-30 02:41:36 +00001102 for (auto E : ClonedOI->ReturnBlockPreds) {
1103 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1104 }
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001105}
1106
Graham Yiu8b1882c2017-11-30 02:41:36 +00001107bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1108
1109 auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
1110 int Cost = 0;
1111 for (BasicBlock* BB : Region)
1112 Cost += computeBBInlineCost(BB);
1113 return Cost;
1114 };
1115
1116 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1117
1118 if (ClonedOMRI->ORI.empty())
1119 return false;
1120
1121 // The CodeExtractor needs a dominator tree.
1122 DominatorTree DT;
1123 DT.recalculate(*ClonedFunc);
1124
1125 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1126 LoopInfo LI(DT);
1127 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1128 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1129
1130 SetVector<Value *> Inputs, Outputs, Sinks;
1131 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1132 ClonedOMRI->ORI) {
1133 int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1134
1135 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1136 ClonedFuncBFI.get(), &BPI, /* AllowVarargs */ false);
1137
1138 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1139
1140#ifndef NDEBUG
1141 if (TracePartialInlining) {
1142 dbgs() << "inputs: " << Inputs.size() << "\n";
1143 dbgs() << "outputs: " << Outputs.size() << "\n";
1144 for (Value *value : Inputs)
1145 dbgs() << "value used in func: " << *value << "\n";
1146 for (Value *output : Outputs)
1147 dbgs() << "instr used in func: " << *output << "\n";
1148 }
1149#endif
1150 // Do not extract regions that have live exit variables.
1151 if (Outputs.size() > 0 && !ForceLiveExit)
1152 continue;
1153
1154 Function *OutlinedFunc = CE.extractCodeRegion();
1155
1156 if (OutlinedFunc) {
1157 CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1158 BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
1159 assert(OutliningCallBB->getParent() == ClonedFunc);
1160 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1161 NumColdRegionsOutlined++;
1162 OutlinedRegionCost += CurrentOutlinedRegionCost;
1163
1164 if (MarkOutlinedColdCC) {
1165 OutlinedFunc->setCallingConv(CallingConv::Cold);
1166 OCS.setCallingConv(CallingConv::Cold);
1167 }
1168 } else
1169 ORE.emit([&]() {
1170 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1171 &RegionInfo.Region.front()->front())
1172 << "Failed to extract region at block "
1173 << ore::NV("Block", RegionInfo.Region.front());
1174 });
1175 }
1176
1177 return !OutlinedFunctions.empty();
1178}
1179
1180Function *
1181PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001182 // Returns true if the block is to be partial inlined into the caller
1183 // (i.e. not to be extracted to the out of line function)
1184 auto ToBeInlined = [&, this](BasicBlock *BB) {
1185 return BB == ClonedOI->ReturnBlock ||
1186 (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1187 ClonedOI->Entries.end());
1188 };
1189
Graham Yiu8b1882c2017-11-30 02:41:36 +00001190 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1191 // The CodeExtractor needs a dominator tree.
1192 DominatorTree DT;
1193 DT.recalculate(*ClonedFunc);
1194
1195 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1196 LoopInfo LI(DT);
1197 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1198 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1199
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001200 // Gather up the blocks that we're going to extract.
1201 std::vector<BasicBlock *> ToExtract;
1202 ToExtract.push_back(ClonedOI->NonReturnBlock);
1203 OutlinedRegionCost +=
1204 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1205 for (BasicBlock &BB : *ClonedFunc)
1206 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1207 ToExtract.push_back(&BB);
1208 // FIXME: the code extractor may hoist/sink more code
1209 // into the outlined function which may make the outlining
1210 // overhead (the difference of the outlined function cost
1211 // and OutliningRegionCost) look larger.
1212 OutlinedRegionCost += computeBBInlineCost(&BB);
1213 }
1214
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001215 // Extract the body of the if.
Graham Yiu8b1882c2017-11-30 02:41:36 +00001216 Function *OutlinedFunc =
1217 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1218 ClonedFuncBFI.get(), &BPI,
1219 /* AllowVarargs */ true)
1220 .extractCodeRegion();
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001221
1222 if (OutlinedFunc) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001223 BasicBlock *OutliningCallBB =
1224 PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1225 .getInstruction()
1226 ->getParent();
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001227 assert(OutliningCallBB->getParent() == ClonedFunc);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001228 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1229 } else
1230 ORE.emit([&]() {
1231 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1232 &ToExtract.front()->front())
1233 << "Failed to extract region at block "
1234 << ore::NV("Block", ToExtract.front());
1235 });
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001236
1237 return OutlinedFunc;
1238}
1239
1240PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1241 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1242 // users (function pointers, etc.) back to the original function.
1243 ClonedFunc->replaceAllUsesWith(OrigFunc);
1244 ClonedFunc->eraseFromParent();
1245 if (!IsFunctionInlined) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001246 // Remove each function that was speculatively created if there is no
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001247 // reference.
Graham Yiu8b1882c2017-11-30 02:41:36 +00001248 for (auto FuncBBPair : OutlinedFunctions) {
1249 Function *Func = FuncBBPair.first;
1250 Func->eraseFromParent();
1251 }
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001252 }
1253}
1254
Graham Yiu8b1882c2017-11-30 02:41:36 +00001255std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1256
Xinliang David Lid21601a2017-04-27 16:34:00 +00001257 if (F->hasAddressTaken())
Graham Yiu8b1882c2017-11-30 02:41:36 +00001258 return {false, nullptr};
Xinliang David Lid21601a2017-04-27 16:34:00 +00001259
Xinliang David Liab8722f2017-05-02 18:43:21 +00001260 // Let inliner handle it
1261 if (F->hasFnAttribute(Attribute::AlwaysInline))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001262 return {false, nullptr};
Xinliang David Liab8722f2017-05-02 18:43:21 +00001263
1264 if (F->hasFnAttribute(Attribute::NoInline))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001265 return {false, nullptr};
Xinliang David Liab8722f2017-05-02 18:43:21 +00001266
1267 if (PSI->isFunctionEntryCold(F))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001268 return {false, nullptr};
Xinliang David Liab8722f2017-05-02 18:43:21 +00001269
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001270 if (F->user_begin() == F->user_end())
Graham Yiu8b1882c2017-11-30 02:41:36 +00001271 return {false, nullptr};
Xinliang David Lid21601a2017-04-27 16:34:00 +00001272
Graham Yiu8b1882c2017-11-30 02:41:36 +00001273 auto &ORE = (*GetORE)(*F);
1274
1275 // Only try to outline cold regions if we have a profile summary, which
1276 // implies we have profiling information.
Easwaran Ramana17f2202017-12-22 01:33:52 +00001277 if (PSI->hasProfileSummary() && F->hasProfileData() &&
Graham Yiu8b1882c2017-11-30 02:41:36 +00001278 !DisableMultiRegionPartialInline) {
1279 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1280 computeOutliningColdRegionsInfo(F);
1281 if (OMRI) {
1282 FunctionCloner Cloner(F, OMRI.get(), ORE);
1283
1284#ifndef NDEBUG
1285 if (TracePartialInlining) {
1286 dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
1287 dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
1288 << "\n";
1289 }
1290#endif
1291 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1292
1293 if (DidOutline) {
1294#ifndef NDEBUG
1295 if (TracePartialInlining) {
1296 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1297 Cloner.ClonedFunc->print(dbgs());
1298 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1299 }
1300#endif
1301
1302 if (tryPartialInline(Cloner))
1303 return {true, nullptr};
1304 }
1305 }
1306 }
1307
1308 // Fall-thru to regular partial inlining if we:
1309 // i) can't find any cold regions to outline, or
1310 // ii) can't inline the outlined function anywhere.
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001311 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001312 if (!OI)
Graham Yiu8b1882c2017-11-30 02:41:36 +00001313 return {false, nullptr};
Sean Silvafe5abd52016-07-25 05:00:00 +00001314
Graham Yiu8b1882c2017-11-30 02:41:36 +00001315 FunctionCloner Cloner(F, OI.get(), ORE);
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001316 Cloner.NormalizeReturnBlock();
Graham Yiu8b1882c2017-11-30 02:41:36 +00001317
1318 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1319
1320 if (!OutlinedFunction)
1321 return {false, nullptr};
Sean Silvafe5abd52016-07-25 05:00:00 +00001322
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001323 bool AnyInline = tryPartialInline(Cloner);
Xinliang David Li392e9752017-05-14 02:54:02 +00001324
1325 if (AnyInline)
Graham Yiu8b1882c2017-11-30 02:41:36 +00001326 return {true, OutlinedFunction};
Xinliang David Li392e9752017-05-14 02:54:02 +00001327
Graham Yiu8b1882c2017-11-30 02:41:36 +00001328 return {false, nullptr};
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001329}
Sean Silvafe5abd52016-07-25 05:00:00 +00001330
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001331bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001332 if (Cloner.OutlinedFunctions.empty())
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001333 return false;
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001334
Graham Yiu8b1882c2017-11-30 02:41:36 +00001335 int SizeCost = 0;
1336 BlockFrequency WeightedRcost;
1337 int NonWeightedRcost;
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001338 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1339
Graham Yiu8b1882c2017-11-30 02:41:36 +00001340 // Only calculate RelativeToEntryFreq when we are doing single region
1341 // outlining.
1342 BranchProbability RelativeToEntryFreq;
1343 if (Cloner.ClonedOI) {
1344 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1345 } else
1346 // RelativeToEntryFreq doesn't make sense when we have more than one
1347 // outlined call because each call will have a different relative frequency
1348 // to the entry block. We can consider using the average, but the
1349 // usefulness of that information is questionable. For now, assume we never
1350 // execute the calls to outlined functions.
1351 RelativeToEntryFreq = BranchProbability(0, 1);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001352
Graham Yiu8b1882c2017-11-30 02:41:36 +00001353 WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1354
1355 // The call sequence(s) to the outlined function(s) are larger than the sum of
1356 // the original outlined region size(s), it does not increase the chances of
1357 // inlining the function with outlining (The inliner uses the size increase to
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001358 // model the cost of inlining a callee).
1359 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001360 auto &ORE = (*GetORE)(*Cloner.OrigFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001361 DebugLoc DLoc;
1362 BasicBlock *Block;
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001363 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
Vivek Pandya95906582017-10-11 17:12:59 +00001364 ORE.emit([&]() {
1365 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001366 DLoc, Block)
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001367 << ore::NV("Function", Cloner.OrigFunc)
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001368 << " not partially inlined into callers (Original Size = "
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001369 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001370 << ", Size of call sequence to outlined function = "
Vivek Pandya95906582017-10-11 17:12:59 +00001371 << ore::NV("NewSize", SizeCost) << ")";
1372 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001373 return false;
1374 }
1375
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001376 assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() &&
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001377 "F's users should all be replaced!");
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001378
1379 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1380 Cloner.ClonedFunc->user_end());
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001381
1382 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
Easwaran Ramana17f2202017-12-22 01:33:52 +00001383 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1384 if (CalleeEntryCount)
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001385 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001386
Easwaran Ramane5b8de22018-01-17 22:24:23 +00001387 uint64_t CalleeEntryCountV =
1388 (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001389
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001390 bool AnyInline = false;
1391 for (User *User : Users) {
1392 CallSite CS = getCallSite(User);
1393
1394 if (IsLimitReached())
1395 continue;
1396
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001397
Graham Yiu8b1882c2017-11-30 02:41:36 +00001398 if (!shouldPartialInline(CS, Cloner, WeightedRcost))
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001399 continue;
1400
Graham Yiu8b1882c2017-11-30 02:41:36 +00001401 auto &ORE = (*GetORE)(*CS.getCaller());
Florian Hahn41e32bf2017-11-03 11:29:00 +00001402 // Construct remark before doing the inlining, as after successful inlining
1403 // the callsite is removed.
1404 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
1405 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1406 << ore::NV("Caller", CS.getCaller());
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001407
1408 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001409 // We can only forward varargs when we outlined a single region, else we
1410 // bail on vararg functions.
1411 if (!InlineFunction(CS, IFI, nullptr, true,
1412 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1413 : nullptr)))
Florian Hahn41e32bf2017-11-03 11:29:00 +00001414 continue;
1415
1416 ORE.emit(OR);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001417
1418 // Now update the entry count:
1419 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1420 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1421 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1422 }
1423
1424 AnyInline = true;
1425 NumPartialInlining++;
1426 // Update the stats
Graham Yiu8b1882c2017-11-30 02:41:36 +00001427 if (Cloner.ClonedOI)
1428 NumPartialInlined++;
1429 else
1430 NumColdOutlinePartialInlined++;
1431
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001432 }
1433
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001434 if (AnyInline) {
1435 Cloner.IsFunctionInlined = true;
1436 if (CalleeEntryCount)
Easwaran Ramane5b8de22018-01-17 22:24:23 +00001437 Cloner.OrigFunc->setEntryCount(
1438 CalleeEntryCount.setCount(CalleeEntryCountV));
Graham Yiu8b1882c2017-11-30 02:41:36 +00001439 auto &ORE = (*GetORE)(*Cloner.OrigFunc);
1440 ORE.emit([&]() {
1441 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1442 << "Partially inlined into at least one caller";
1443 });
1444
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001445 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001446
1447 return AnyInline;
Owen Anderson2f82e272009-06-14 08:26:32 +00001448}
1449
Sean Silvafe5abd52016-07-25 05:00:00 +00001450bool PartialInlinerImpl::run(Module &M) {
Xinliang David Lidb8d09b2017-04-23 23:39:04 +00001451 if (DisablePartialInlining)
1452 return false;
1453
Sean Silva519323d2016-07-25 05:57:59 +00001454 std::vector<Function *> Worklist;
1455 Worklist.reserve(M.size());
Benjamin Kramer135f7352016-06-26 12:28:59 +00001456 for (Function &F : M)
1457 if (!F.use_empty() && !F.isDeclaration())
Sean Silva519323d2016-07-25 05:57:59 +00001458 Worklist.push_back(&F);
Benjamin Kramer135f7352016-06-26 12:28:59 +00001459
Sean Silva519323d2016-07-25 05:57:59 +00001460 bool Changed = false;
1461 while (!Worklist.empty()) {
1462 Function *CurrFunc = Worklist.back();
1463 Worklist.pop_back();
Sean Silvafe5abd52016-07-25 05:00:00 +00001464
Sean Silva519323d2016-07-25 05:57:59 +00001465 if (CurrFunc->use_empty())
1466 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +00001467
Sean Silva519323d2016-07-25 05:57:59 +00001468 bool Recursive = false;
1469 for (User *U : CurrFunc->users())
1470 if (Instruction *I = dyn_cast<Instruction>(U))
1471 if (I->getParent()->getParent() == CurrFunc) {
1472 Recursive = true;
Owen Anderson2f82e272009-06-14 08:26:32 +00001473 break;
1474 }
Sean Silva519323d2016-07-25 05:57:59 +00001475 if (Recursive)
1476 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +00001477
Graham Yiu8b1882c2017-11-30 02:41:36 +00001478 std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1479 if (Result.second)
1480 Worklist.push_back(Result.second);
1481 if (Result.first) {
Sean Silva519323d2016-07-25 05:57:59 +00001482 Changed = true;
Owen Anderson2f82e272009-06-14 08:26:32 +00001483 }
Owen Anderson2f82e272009-06-14 08:26:32 +00001484 }
Easwaran Raman1832bf62016-06-27 16:50:18 +00001485
Sean Silva519323d2016-07-25 05:57:59 +00001486 return Changed;
Sean Silvafe5abd52016-07-25 05:00:00 +00001487}
1488
1489char PartialInlinerLegacyPass::ID = 0;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +00001490
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001491INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1492 "Partial Inliner", false, false)
1493INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Xinliang David Li61338462017-05-02 02:44:14 +00001494INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
1495INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001496INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1497 "Partial Inliner", false, false)
Sean Silvafe5abd52016-07-25 05:00:00 +00001498
1499ModulePass *llvm::createPartialInliningPass() {
1500 return new PartialInlinerLegacyPass();
1501}
1502
1503PreservedAnalyses PartialInlinerPass::run(Module &M,
1504 ModuleAnalysisManager &AM) {
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001505 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
Xinliang David Li61338462017-05-02 02:44:14 +00001506
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001507 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1508 [&FAM](Function &F) -> AssumptionCache & {
1509 return FAM.getResult<AssumptionAnalysis>(F);
1510 };
Xinliang David Li61338462017-05-02 02:44:14 +00001511
1512 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1513 [&FAM](Function &F) -> BlockFrequencyInfo & {
1514 return FAM.getResult<BlockFrequencyAnalysis>(F);
1515 };
1516
1517 std::function<TargetTransformInfo &(Function &)> GetTTI =
1518 [&FAM](Function &F) -> TargetTransformInfo & {
1519 return FAM.getResult<TargetIRAnalysis>(F);
1520 };
1521
Graham Yiu8b1882c2017-11-30 02:41:36 +00001522 std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
1523 [&FAM](Function &F) -> OptimizationRemarkEmitter & {
1524 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1525 };
1526
Xinliang David Li61338462017-05-02 02:44:14 +00001527 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1528
Graham Yiu8b1882c2017-11-30 02:41:36 +00001529 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI, &GetORE)
1530 .run(M))
Easwaran Raman1832bf62016-06-27 16:50:18 +00001531 return PreservedAnalyses::none();
1532 return PreservedAnalyses::all();
Duncan Sands29c8efc2009-07-03 15:30:58 +00001533}