blob: 733782e8764dd1e4540ef45684e40ebc519d66da [file] [log] [blame]
Owen Anderson2f82e272009-06-14 08:26:32 +00001//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Owen Anderson2f82e272009-06-14 08:26:32 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
Easwaran Raman1832bf62016-06-27 16:50:18 +000014#include "llvm/Transforms/IPO/PartialInlining.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000015#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/None.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000021#include "llvm/ADT/Statistic.h"
Sean Silvaf8015752016-08-02 02:15:45 +000022#include "llvm/Analysis/BlockFrequencyInfo.h"
23#include "llvm/Analysis/BranchProbabilityInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000024#include "llvm/Analysis/InlineCost.h"
Sean Silvaf8015752016-08-02 02:15:45 +000025#include "llvm/Analysis/LoopInfo.h"
Adam Nemet0965da22017-10-09 23:19:02 +000026#include "llvm/Analysis/OptimizationRemarkEmitter.h"
Xinliang David Li61338462017-05-02 02:44:14 +000027#include "llvm/Analysis/ProfileSummaryInfo.h"
Graham Yiu8b1882c2017-11-30 02:41:36 +000028#include "llvm/Analysis/TargetLibraryInfo.h"
Xinliang David Li61338462017-05-02 02:44:14 +000029#include "llvm/Analysis/TargetTransformInfo.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000030#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
Chandler Carruth1305dc32014-03-04 11:45:46 +000032#include "llvm/IR/CFG.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000033#include "llvm/IR/CallSite.h"
34#include "llvm/IR/DebugLoc.h"
Xinliang David Li15744ad2017-04-23 21:40:58 +000035#include "llvm/IR/DiagnosticInfo.h"
Chandler Carruth5ad5f152014-01-13 09:26:24 +000036#include "llvm/IR/Dominators.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000037#include "llvm/IR/Function.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000040#include "llvm/IR/Instructions.h"
Reid Kleckner0e8c4bb2017-09-07 23:27:44 +000041#include "llvm/IR/IntrinsicInst.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000042#include "llvm/IR/Intrinsics.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000043#include "llvm/IR/Module.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000044#include "llvm/IR/User.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000045#include "llvm/Pass.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000046#include "llvm/Support/BlockFrequency.h"
47#include "llvm/Support/BranchProbability.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/ErrorHandling.h"
Easwaran Raman1832bf62016-06-27 16:50:18 +000051#include "llvm/Transforms/IPO.h"
Owen Anderson2f82e272009-06-14 08:26:32 +000052#include "llvm/Transforms/Utils/Cloning.h"
Chandler Carruth0fde0012012-05-04 10:18:49 +000053#include "llvm/Transforms/Utils/CodeExtractor.h"
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +000054#include "llvm/Transforms/Utils/ValueMapper.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <functional>
59#include <iterator>
60#include <memory>
61#include <tuple>
62#include <vector>
63
Owen Anderson2f82e272009-06-14 08:26:32 +000064using namespace llvm;
65
Xinliang David Li15744ad2017-04-23 21:40:58 +000066#define DEBUG_TYPE "partial-inlining"
Chandler Carruth964daaa2014-04-22 02:55:47 +000067
Xinliang David Li61338462017-05-02 02:44:14 +000068STATISTIC(NumPartialInlined,
69 "Number of callsites functions partially inlined into.");
Graham Yiu8b1882c2017-11-30 02:41:36 +000070STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
71 "cold outlined regions were partially "
72 "inlined into its caller(s).");
73STATISTIC(NumColdRegionsFound,
74 "Number of cold single entry/exit regions found.");
75STATISTIC(NumColdRegionsOutlined,
76 "Number of cold single entry/exit regions outlined.");
Owen Andersonbd6a2132009-06-15 20:50:26 +000077
Xinliang David Lidb8d09b2017-04-23 23:39:04 +000078// Command line option to disable partial-inlining. The default is false:
79static cl::opt<bool>
80 DisablePartialInlining("disable-partial-inlining", cl::init(false),
Graham Yiu8b1882c2017-11-30 02:41:36 +000081 cl::Hidden, cl::desc("Disable partial inlining"));
82// Command line option to disable multi-region partial-inlining. The default is
83// false:
84static cl::opt<bool> DisableMultiRegionPartialInline(
85 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
86 cl::desc("Disable multi-region partial inlining"));
87
88// Command line option to force outlining in regions with live exit variables.
89// The default is false:
90static cl::opt<bool>
91 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
92 cl::desc("Force outline regions with live exits"));
93
94// Command line option to enable marking outline functions with Cold Calling
95// Convention. The default is false:
96static cl::opt<bool>
97 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
98 cl::desc("Mark outline function calls with ColdCC"));
99
100#ifndef NDEBUG
101// Command line option to debug partial-inlining. The default is none:
102static cl::opt<bool> TracePartialInlining("trace-partial-inlining",
103 cl::init(false), cl::Hidden,
104 cl::desc("Trace partial inlining."));
105#endif
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000106
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000107// This is an option used by testing:
108static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
109 cl::init(false), cl::ZeroOrMore,
110 cl::ReallyHidden,
111 cl::desc("Skip Cost Analysis"));
Graham Yiu8b1882c2017-11-30 02:41:36 +0000112// Used to determine if a cold region is worth outlining based on
113// its inlining cost compared to the original function. Default is set at 10%.
114// ie. if the cold region reduces the inlining cost of the original function by
115// at least 10%.
116static cl::opt<float> MinRegionSizeRatio(
117 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
118 cl::desc("Minimum ratio comparing relative sizes of each "
119 "outline candidate and original function"));
120// Used to tune the minimum number of execution counts needed in the predecessor
121// block to the cold edge. ie. confidence interval.
122static cl::opt<unsigned>
123 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
124 cl::desc("Minimum block executions to consider "
125 "its BranchProbabilityInfo valid"));
126// Used to determine when an edge is considered cold. Default is set to 10%. ie.
127// if the branch probability is 10% or less, then it is deemed as 'cold'.
128static cl::opt<float> ColdBranchRatio(
129 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
130 cl::desc("Minimum BranchProbability to consider a region cold."));
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000131
Xinliang David Lid21601a2017-04-27 16:34:00 +0000132static cl::opt<unsigned> MaxNumInlineBlocks(
133 "max-num-inline-blocks", cl::init(5), cl::Hidden,
Chad Rosierf98335e2017-08-24 21:21:09 +0000134 cl::desc("Max number of blocks to be partially inlined"));
Xinliang David Lid21601a2017-04-27 16:34:00 +0000135
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000136// Command line option to set the maximum number of partial inlining allowed
137// for the module. The default value of -1 means no limit.
138static cl::opt<int> MaxNumPartialInlining(
139 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
140 cl::desc("Max number of partial inlining. The default is unlimited"));
141
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000142// Used only when PGO or user annotated branch data is absent. It is
143// the least value that is used to weigh the outline region. If BFI
144// produces larger value, the BFI value will be used.
145static cl::opt<int>
146 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
147 cl::Hidden, cl::ZeroOrMore,
148 cl::desc("Relative frequency of outline region to "
149 "the entry block"));
150
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000151static cl::opt<unsigned> ExtraOutliningPenalty(
152 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
153 cl::desc("A debug option to add additional penalty to the computed one."));
154
Owen Anderson2f82e272009-06-14 08:26:32 +0000155namespace {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000156
157struct FunctionOutliningInfo {
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000158 FunctionOutliningInfo() = default;
159
Xinliang David Lid21601a2017-04-27 16:34:00 +0000160 // Returns the number of blocks to be inlined including all blocks
161 // in Entries and one return block.
162 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
163
164 // A set of blocks including the function entry that guard
165 // the region to be outlined.
166 SmallVector<BasicBlock *, 4> Entries;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000167
Xinliang David Lid21601a2017-04-27 16:34:00 +0000168 // The return block that is not included in the outlined region.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000169 BasicBlock *ReturnBlock = nullptr;
170
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000171 // The dominating block of the region to be outlined.
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000172 BasicBlock *NonReturnBlock = nullptr;
173
Xinliang David Lid21601a2017-04-27 16:34:00 +0000174 // The set of blocks in Entries that that are predecessors to ReturnBlock
175 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
176};
177
Graham Yiu8b1882c2017-11-30 02:41:36 +0000178struct FunctionOutliningMultiRegionInfo {
179 FunctionOutliningMultiRegionInfo()
180 : ORI() {}
181
182 // Container for outline regions
183 struct OutlineRegionInfo {
Sergey Dmitrievafd612e2019-02-08 23:52:15 +0000184 OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
Graham Yiu8b1882c2017-11-30 02:41:36 +0000185 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
186 BasicBlock *ReturnBlock)
Sergey Dmitrievafd612e2019-02-08 23:52:15 +0000187 : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
188 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
Graham Yiu8b1882c2017-11-30 02:41:36 +0000189 SmallVector<BasicBlock *, 8> Region;
190 BasicBlock *EntryBlock;
191 BasicBlock *ExitBlock;
192 BasicBlock *ReturnBlock;
193 };
194
195 SmallVector<OutlineRegionInfo, 4> ORI;
196};
197
Sean Silvafe5abd52016-07-25 05:00:00 +0000198struct PartialInlinerImpl {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000199
Xinliang David Li61338462017-05-02 02:44:14 +0000200 PartialInlinerImpl(
201 std::function<AssumptionCache &(Function &)> *GetAC,
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000202 function_ref<AssumptionCache *(Function &)> LookupAC,
Xinliang David Li61338462017-05-02 02:44:14 +0000203 std::function<TargetTransformInfo &(Function &)> *GTTI,
204 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
Sean Fertile18f17332018-04-20 19:56:26 +0000205 ProfileSummaryInfo *ProfSI)
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000206 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
207 GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000208
Sean Silvafe5abd52016-07-25 05:00:00 +0000209 bool run(Module &M);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000210 // Main part of the transformation that calls helper functions to find
211 // outlining candidates, clone & outline the function, and attempt to
212 // partially inline the resulting function. Returns true if
213 // inlining was successful, false otherwise. Also returns the outline
214 // function (only if we partially inlined early returns) as there is a
215 // possibility to further "peel" early return statements that were left in the
216 // outline function due to code size.
217 std::pair<bool, Function *> unswitchFunction(Function *F);
Sean Silvafe5abd52016-07-25 05:00:00 +0000218
Hiroshi Inoued24ddcd2018-01-19 10:55:29 +0000219 // This class speculatively clones the function to be partial inlined.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000220 // At the end of partial inlining, the remaining callsites to the cloned
221 // function that are not partially inlined will be fixed up to reference
222 // the original function, and the cloned function will be erased.
223 struct FunctionCloner {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000224 // Two constructors, one for single region outlining, the other for
225 // multi-region outlining.
226 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000227 OptimizationRemarkEmitter &ORE,
228 function_ref<AssumptionCache *(Function &)> LookupAC);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000229 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000230 OptimizationRemarkEmitter &ORE,
231 function_ref<AssumptionCache *(Function &)> LookupAC);
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000232 ~FunctionCloner();
233
234 // Prepare for function outlining: making sure there is only
235 // one incoming edge from the extracted/outlined region to
236 // the return block.
237 void NormalizeReturnBlock();
238
Graham Yiu8b1882c2017-11-30 02:41:36 +0000239 // Do function outlining for cold regions.
240 bool doMultiRegionFunctionOutlining();
241 // Do function outlining for region after early return block(s).
Florian Hahn0e9dec62017-11-13 10:35:52 +0000242 // NOTE: For vararg functions that do the vararg handling in the outlined
243 // function, we temporarily generate IR that does not properly
244 // forward varargs to the outlined function. Calling InlineFunction
245 // will update calls to the outlined functions to properly forward
246 // the varargs.
Graham Yiu8b1882c2017-11-30 02:41:36 +0000247 Function *doSingleRegionFunctionOutlining();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000248
249 Function *OrigFunc = nullptr;
250 Function *ClonedFunc = nullptr;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000251
252 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
253 // Keep track of Outlined Functions and the basic block they're called from.
254 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
255
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000256 // ClonedFunc is inlined in one of its callers after function
257 // outlining.
258 bool IsFunctionInlined = false;
259 // The cost of the region to be outlined.
260 int OutlinedRegionCost = 0;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000261 // ClonedOI is specific to outlining non-early return blocks.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000262 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000263 // ClonedOMRI is specific to outlining cold regions.
264 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000265 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
Graham Yiu8b1882c2017-11-30 02:41:36 +0000266 OptimizationRemarkEmitter &ORE;
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000267 function_ref<AssumptionCache *(Function &)> LookupAC;
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000268 };
269
Sean Silvafe5abd52016-07-25 05:00:00 +0000270private:
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000271 int NumPartialInlining = 0;
Xinliang David Li61338462017-05-02 02:44:14 +0000272 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000273 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
Xinliang David Li61338462017-05-02 02:44:14 +0000274 std::function<TargetTransformInfo &(Function &)> *GetTTI;
275 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
276 ProfileSummaryInfo *PSI;
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000277
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000278 // Return the frequency of the OutlininingBB relative to F's entry point.
279 // The result is no larger than 1 and is represented using BP.
280 // (Note that the outlined region's 'head' block can only have incoming
281 // edges from the guarding entry blocks).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000282 BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000283
284 // Return true if the callee of CS should be partially inlined with
285 // profit.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000286 bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner,
Sean Fertile18f17332018-04-20 19:56:26 +0000287 BlockFrequency WeightedOutliningRcost,
288 OptimizationRemarkEmitter &ORE);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000289
290 // Try to inline DuplicateFunction (cloned from F with call to
291 // the OutlinedFunction into its callers. Return true
292 // if there is any successful inlining.
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000293 bool tryPartialInline(FunctionCloner &Cloner);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000294
295 // Compute the mapping from use site of DuplicationFunction to the enclosing
296 // BB's profile count.
297 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
298 DenseMap<User *, uint64_t> &SiteCountMap);
299
Xinliang David Lidb8d09b2017-04-23 23:39:04 +0000300 bool IsLimitReached() {
301 return (MaxNumPartialInlining != -1 &&
302 NumPartialInlining >= MaxNumPartialInlining);
303 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000304
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000305 static CallSite getCallSite(User *U) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000306 CallSite CS;
307 if (CallInst *CI = dyn_cast<CallInst>(U))
308 CS = CallSite(CI);
309 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
310 CS = CallSite(II);
311 else
312 llvm_unreachable("All uses must be calls");
313 return CS;
314 }
315
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000316 static CallSite getOneCallSiteTo(Function *F) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000317 User *User = *F->user_begin();
318 return getCallSite(User);
319 }
320
321 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
322 CallSite CS = getOneCallSiteTo(F);
323 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
324 BasicBlock *Block = CS.getParent();
325 return std::make_tuple(DLoc, Block);
326 }
327
328 // Returns the costs associated with function outlining:
329 // - The first value is the non-weighted runtime cost for making the call
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000330 // to the outlined function, including the addtional setup cost in the
331 // outlined function itself;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000332 // - The second value is the estimated size of the new call sequence in
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000333 // basic block Cloner.OutliningCallBB;
334 std::tuple<int, int> computeOutliningCosts(FunctionCloner &Cloner);
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000335
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000336 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
337 // approximate both the size and runtime cost (Note that in the current
338 // inline cost analysis, there is no clear distinction there either).
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000339 static int computeBBInlineCost(BasicBlock *BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000340
341 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000342 std::unique_ptr<FunctionOutliningMultiRegionInfo>
Sean Fertile18f17332018-04-20 19:56:26 +0000343 computeOutliningColdRegionsInfo(Function *F, OptimizationRemarkEmitter &ORE);
Sean Silvafe5abd52016-07-25 05:00:00 +0000344};
Xinliang David Lid21601a2017-04-27 16:34:00 +0000345
Easwaran Raman1832bf62016-06-27 16:50:18 +0000346struct PartialInlinerLegacyPass : public ModulePass {
347 static char ID; // Pass identification, replacement for typeid
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000348
Easwaran Raman1832bf62016-06-27 16:50:18 +0000349 PartialInlinerLegacyPass() : ModulePass(ID) {
350 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
351 }
Craig Topper3e4c6972014-03-05 09:10:37 +0000352
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000353 void getAnalysisUsage(AnalysisUsage &AU) const override {
354 AU.addRequired<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000355 AU.addRequired<ProfileSummaryInfoWrapperPass>();
356 AU.addRequired<TargetTransformInfoWrapperPass>();
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000357 }
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000358
Easwaran Raman1832bf62016-06-27 16:50:18 +0000359 bool runOnModule(Module &M) override {
360 if (skipModule(M))
361 return false;
Craig Topper3e4c6972014-03-05 09:10:37 +0000362
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000363 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
Xinliang David Li61338462017-05-02 02:44:14 +0000364 TargetTransformInfoWrapperPass *TTIWP =
365 &getAnalysis<TargetTransformInfoWrapperPass>();
366 ProfileSummaryInfo *PSI =
Vedant Kumare7b789b2018-11-19 05:23:16 +0000367 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Xinliang David Li61338462017-05-02 02:44:14 +0000368
Daniel Jasperaec2fa32016-12-19 08:22:17 +0000369 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
370 [&ACT](Function &F) -> AssumptionCache & {
371 return ACT->getAssumptionCache(F);
372 };
Xinliang David Li61338462017-05-02 02:44:14 +0000373
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000374 auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
375 return ACT->lookupAssumptionCache(F);
376 };
377
Xinliang David Li61338462017-05-02 02:44:14 +0000378 std::function<TargetTransformInfo &(Function &)> GetTTI =
379 [&TTIWP](Function &F) -> TargetTransformInfo & {
380 return TTIWP->getTTI(F);
381 };
382
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000383 return PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache,
384 &GetTTI, NoneType::None, PSI)
Graham Yiu8b1882c2017-11-30 02:41:36 +0000385 .run(M);
Sean Silvafe5abd52016-07-25 05:00:00 +0000386 }
Sean Silva519323d2016-07-25 05:57:59 +0000387};
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000388
389} // end anonymous namespace
Owen Anderson2f82e272009-06-14 08:26:32 +0000390
Graham Yiu8b1882c2017-11-30 02:41:36 +0000391std::unique_ptr<FunctionOutliningMultiRegionInfo>
Sean Fertile18f17332018-04-20 19:56:26 +0000392PartialInlinerImpl::computeOutliningColdRegionsInfo(Function *F,
393 OptimizationRemarkEmitter &ORE) {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000394 BasicBlock *EntryBlock = &F->front();
395
396 DominatorTree DT(*F);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000397 LoopInfo LI(DT);
398 BranchProbabilityInfo BPI(*F, LI);
399 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
400 BlockFrequencyInfo *BFI;
401 if (!GetBFI) {
402 ScopedBFI.reset(new BlockFrequencyInfo(*F, BPI, LI));
403 BFI = ScopedBFI.get();
404 } else
405 BFI = &(*GetBFI)(*F);
406
Graham Yiu8b1882c2017-11-30 02:41:36 +0000407 // Return if we don't have profiling information.
408 if (!PSI->hasInstrumentationProfile())
409 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
410
411 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
412 llvm::make_unique<FunctionOutliningMultiRegionInfo>();
413
414 auto IsSingleEntry = [](SmallVectorImpl<BasicBlock *> &BlockList) {
415 BasicBlock *Dom = BlockList.front();
Vedant Kumar4de31bb2018-11-19 19:54:27 +0000416 return BlockList.size() > 1 && Dom->hasNPredecessors(1);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000417 };
418
419 auto IsSingleExit =
Graham Yiu70293fa2017-11-30 03:36:57 +0000420 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000421 BasicBlock *ExitBlock = nullptr;
422 for (auto *Block : BlockList) {
423 for (auto SI = succ_begin(Block); SI != succ_end(Block); ++SI) {
424 if (!is_contained(BlockList, *SI)) {
425 if (ExitBlock) {
426 ORE.emit([&]() {
427 return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
428 &SI->front())
429 << "Region dominated by "
430 << ore::NV("Block", BlockList.front()->getName())
431 << " has more than one region exit edge.";
432 });
433 return nullptr;
434 } else
435 ExitBlock = Block;
436 }
437 }
438 }
439 return ExitBlock;
440 };
441
442 auto BBProfileCount = [BFI](BasicBlock *BB) {
443 return BFI->getBlockProfileCount(BB)
444 ? BFI->getBlockProfileCount(BB).getValue()
445 : 0;
446 };
447
448 // Use the same computeBBInlineCost function to compute the cost savings of
449 // the outlining the candidate region.
450 int OverallFunctionCost = 0;
451 for (auto &BB : *F)
452 OverallFunctionCost += computeBBInlineCost(&BB);
453
454#ifndef NDEBUG
455 if (TracePartialInlining)
456 dbgs() << "OverallFunctionCost = " << OverallFunctionCost << "\n";
457#endif
458 int MinOutlineRegionCost =
459 static_cast<int>(OverallFunctionCost * MinRegionSizeRatio);
460 BranchProbability MinBranchProbability(
461 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
462 MinBlockCounterExecution);
463 bool ColdCandidateFound = false;
464 BasicBlock *CurrEntry = EntryBlock;
465 std::vector<BasicBlock *> DFS;
466 DenseMap<BasicBlock *, bool> VisitedMap;
467 DFS.push_back(CurrEntry);
468 VisitedMap[CurrEntry] = true;
469 // Use Depth First Search on the basic blocks to find CFG edges that are
470 // considered cold.
471 // Cold regions considered must also have its inline cost compared to the
472 // overall inline cost of the original function. The region is outlined only
473 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
474 // more.
475 while (!DFS.empty()) {
476 auto *thisBB = DFS.back();
477 DFS.pop_back();
478 // Only consider regions with predecessor blocks that are considered
479 // not-cold (default: part of the top 99.99% of all block counters)
480 // AND greater than our minimum block execution count (default: 100).
Vedant Kumare7b789b2018-11-19 05:23:16 +0000481 if (PSI->isColdBlock(thisBB, BFI) ||
Graham Yiu8b1882c2017-11-30 02:41:36 +0000482 BBProfileCount(thisBB) < MinBlockCounterExecution)
483 continue;
484 for (auto SI = succ_begin(thisBB); SI != succ_end(thisBB); ++SI) {
485 if (VisitedMap[*SI])
486 continue;
487 VisitedMap[*SI] = true;
488 DFS.push_back(*SI);
489 // If branch isn't cold, we skip to the next one.
490 BranchProbability SuccProb = BPI.getEdgeProbability(thisBB, *SI);
491 if (SuccProb > MinBranchProbability)
492 continue;
493#ifndef NDEBUG
494 if (TracePartialInlining) {
495 dbgs() << "Found cold edge: " << thisBB->getName() << "->"
496 << (*SI)->getName() << "\nBranch Probability = " << SuccProb
497 << "\n";
498 }
499#endif
500 SmallVector<BasicBlock *, 8> DominateVector;
501 DT.getDescendants(*SI, DominateVector);
502 // We can only outline single entry regions (for now).
503 if (!IsSingleEntry(DominateVector))
504 continue;
505 BasicBlock *ExitBlock = nullptr;
506 // We can only outline single exit regions (for now).
507 if (!(ExitBlock = IsSingleExit(DominateVector)))
508 continue;
509 int OutlineRegionCost = 0;
510 for (auto *BB : DominateVector)
511 OutlineRegionCost += computeBBInlineCost(BB);
512
513#ifndef NDEBUG
514 if (TracePartialInlining)
515 dbgs() << "OutlineRegionCost = " << OutlineRegionCost << "\n";
516#endif
517
518 if (OutlineRegionCost < MinOutlineRegionCost) {
519 ORE.emit([&]() {
520 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
521 &SI->front())
522 << ore::NV("Callee", F) << " inline cost-savings smaller than "
523 << ore::NV("Cost", MinOutlineRegionCost);
524 });
525 continue;
526 }
527 // For now, ignore blocks that belong to a SISE region that is a
528 // candidate for outlining. In the future, we may want to look
529 // at inner regions because the outer region may have live-exit
530 // variables.
531 for (auto *BB : DominateVector)
532 VisitedMap[BB] = true;
533 // ReturnBlock here means the block after the outline call
534 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
535 // assert(ReturnBlock && "ReturnBlock is NULL somehow!");
536 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
537 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
Graham Yiu8b1882c2017-11-30 02:41:36 +0000538 OutliningInfo->ORI.push_back(RegInfo);
539#ifndef NDEBUG
540 if (TracePartialInlining) {
541 dbgs() << "Found Cold Candidate starting at block: "
542 << DominateVector.front()->getName() << "\n";
543 }
544#endif
545 ColdCandidateFound = true;
546 NumColdRegionsFound++;
547 }
548 }
549 if (ColdCandidateFound)
550 return OutliningInfo;
551 else
552 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
553}
554
Xinliang David Lid21601a2017-04-27 16:34:00 +0000555std::unique_ptr<FunctionOutliningInfo>
556PartialInlinerImpl::computeOutliningInfo(Function *F) {
Sean Silva519323d2016-07-25 05:57:59 +0000557 BasicBlock *EntryBlock = &F->front();
558 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
Owen Andersonf0081db2009-09-08 19:53:15 +0000559 if (!BR || BR->isUnconditional())
Xinliang David Lid21601a2017-04-27 16:34:00 +0000560 return std::unique_ptr<FunctionOutliningInfo>();
Sean Silvafe5abd52016-07-25 05:00:00 +0000561
Xinliang David Lid21601a2017-04-27 16:34:00 +0000562 // Returns true if Succ is BB's successor
563 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
564 return is_contained(successors(BB), Succ);
565 };
566
Xinliang David Lid21601a2017-04-27 16:34:00 +0000567 auto IsReturnBlock = [](BasicBlock *BB) {
Chandler Carruthedb12a82018-10-15 10:04:59 +0000568 Instruction *TI = BB->getTerminator();
Xinliang David Lid21601a2017-04-27 16:34:00 +0000569 return isa<ReturnInst>(TI);
570 };
571
Davide Italianoaa42a102017-05-08 20:44:01 +0000572 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000573 if (IsReturnBlock(Succ1))
574 return std::make_tuple(Succ1, Succ2);
575 if (IsReturnBlock(Succ2))
576 return std::make_tuple(Succ2, Succ1);
577
578 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
579 };
580
581 // Detect a triangular shape:
Davide Italianoaa42a102017-05-08 20:44:01 +0000582 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
Xinliang David Lid21601a2017-04-27 16:34:00 +0000583 if (IsSuccessor(Succ1, Succ2))
584 return std::make_tuple(Succ1, Succ2);
585 if (IsSuccessor(Succ2, Succ1))
586 return std::make_tuple(Succ2, Succ1);
587
588 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
589 };
590
591 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
592 llvm::make_unique<FunctionOutliningInfo>();
593
594 BasicBlock *CurrEntry = EntryBlock;
595 bool CandidateFound = false;
596 do {
597 // The number of blocks to be inlined has already reached
598 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
599 // disables partial inlining for the function.
600 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
601 break;
602
Vedant Kumare0b5f862018-05-10 23:01:54 +0000603 if (succ_size(CurrEntry) != 2)
Xinliang David Lid21601a2017-04-27 16:34:00 +0000604 break;
605
606 BasicBlock *Succ1 = *succ_begin(CurrEntry);
607 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
608
609 BasicBlock *ReturnBlock, *NonReturnBlock;
610 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
611
612 if (ReturnBlock) {
613 OutliningInfo->Entries.push_back(CurrEntry);
614 OutliningInfo->ReturnBlock = ReturnBlock;
615 OutliningInfo->NonReturnBlock = NonReturnBlock;
616 CandidateFound = true;
617 break;
618 }
619
620 BasicBlock *CommSucc;
621 BasicBlock *OtherSucc;
622 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
623
624 if (!CommSucc)
625 break;
626
627 OutliningInfo->Entries.push_back(CurrEntry);
628 CurrEntry = OtherSucc;
Xinliang David Lid21601a2017-04-27 16:34:00 +0000629 } while (true);
630
631 if (!CandidateFound)
632 return std::unique_ptr<FunctionOutliningInfo>();
633
634 // Do sanity check of the entries: threre should not
635 // be any successors (not in the entry set) other than
636 // {ReturnBlock, NonReturnBlock}
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000637 assert(OutliningInfo->Entries[0] == &F->front() &&
638 "Function Entry must be the first in Entries vector");
Xinliang David Lid21601a2017-04-27 16:34:00 +0000639 DenseSet<BasicBlock *> Entries;
640 for (BasicBlock *E : OutliningInfo->Entries)
641 Entries.insert(E);
642
643 // Returns true of BB has Predecessor which is not
644 // in Entries set.
645 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
646 for (auto Pred : predecessors(BB)) {
647 if (!Entries.count(Pred))
648 return true;
649 }
650 return false;
651 };
652 auto CheckAndNormalizeCandidate =
653 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
654 for (BasicBlock *E : OutliningInfo->Entries) {
655 for (auto Succ : successors(E)) {
656 if (Entries.count(Succ))
657 continue;
658 if (Succ == OutliningInfo->ReturnBlock)
659 OutliningInfo->ReturnBlockPreds.push_back(E);
660 else if (Succ != OutliningInfo->NonReturnBlock)
661 return false;
662 }
663 // There should not be any outside incoming edges either:
664 if (HasNonEntryPred(E))
665 return false;
666 }
667 return true;
668 };
669
670 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
671 return std::unique_ptr<FunctionOutliningInfo>();
672
673 // Now further growing the candidate's inlining region by
674 // peeling off dominating blocks from the outlining region:
675 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
676 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
Vedant Kumare0b5f862018-05-10 23:01:54 +0000677 if (succ_size(Cand) != 2)
Xinliang David Lid21601a2017-04-27 16:34:00 +0000678 break;
679
680 if (HasNonEntryPred(Cand))
681 break;
682
683 BasicBlock *Succ1 = *succ_begin(Cand);
684 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
685
686 BasicBlock *ReturnBlock, *NonReturnBlock;
687 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
688 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
689 break;
690
691 if (NonReturnBlock->getSinglePredecessor() != Cand)
692 break;
693
694 // Now grow and update OutlininigInfo:
695 OutliningInfo->Entries.push_back(Cand);
696 OutliningInfo->NonReturnBlock = NonReturnBlock;
697 OutliningInfo->ReturnBlockPreds.push_back(Cand);
698 Entries.insert(Cand);
Reid Klecknerc26a17a2015-02-04 19:14:57 +0000699 }
Sean Silvafe5abd52016-07-25 05:00:00 +0000700
Xinliang David Lid21601a2017-04-27 16:34:00 +0000701 return OutliningInfo;
702}
703
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000704// Check if there is PGO data or user annoated branch data:
705static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
Easwaran Ramana17f2202017-12-22 01:33:52 +0000706 if (F->hasProfileData())
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000707 return true;
708 // Now check if any of the entry block has MD_prof data:
709 for (auto *E : OI->Entries) {
710 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
711 if (!BR || BR->isUnconditional())
712 continue;
713 uint64_t T, F;
714 if (BR->extractProfMetadata(T, F))
715 return true;
716 }
717 return false;
718}
719
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000720BranchProbability
721PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000722 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000723 auto EntryFreq =
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000724 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
725 auto OutliningCallFreq =
Graham Yiu8b1882c2017-11-30 02:41:36 +0000726 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
727 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
728 // we outlined any regions, so we may encounter situations where the
729 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
730 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency()) {
731 OutliningCallFreq = EntryFreq;
732 }
733 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
734 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000735
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000736 if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get()))
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000737 return OutlineRegionRelFreq;
738
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000739 // When profile data is not available, we need to be conservative in
740 // estimating the overall savings. Static branch prediction can usually
741 // guess the branch direction right (taken/non-taken), but the guessed
742 // branch probability is usually not biased enough. In case when the
743 // outlined region is predicted to be likely, its probability needs
744 // to be made higher (more biased) to not under-estimate the cost of
745 // function outlining. On the other hand, if the outlined region
746 // is predicted to be less likely, the predicted probablity is usually
747 // higher than the actual. For instance, the actual probability of the
748 // less likely target is only 5%, but the guessed probablity can be
749 // 40%. In the latter case, there is no need for further adjustement.
750 // FIXME: add an option for this.
751 if (OutlineRegionRelFreq < BranchProbability(45, 100))
752 return OutlineRegionRelFreq;
753
754 OutlineRegionRelFreq = std::max(
755 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000756
757 return OutlineRegionRelFreq;
758}
759
760bool PartialInlinerImpl::shouldPartialInline(
Graham Yiu8b1882c2017-11-30 02:41:36 +0000761 CallSite CS, FunctionCloner &Cloner,
Sean Fertile18f17332018-04-20 19:56:26 +0000762 BlockFrequency WeightedOutliningRcost,
763 OptimizationRemarkEmitter &ORE) {
Xinliang David Li61338462017-05-02 02:44:14 +0000764 using namespace ore;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +0000765
Xinliang David Li61338462017-05-02 02:44:14 +0000766 Instruction *Call = CS.getInstruction();
767 Function *Callee = CS.getCalledFunction();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000768 assert(Callee == Cloner.ClonedFunc);
769
Florian Hahna7dcfa72018-03-10 14:53:44 +0000770 if (SkipCostAnalysis)
771 return isInlineViable(*Callee);
772
Xinliang David Li61338462017-05-02 02:44:14 +0000773 Function *Caller = CS.getCaller();
774 auto &CalleeTTI = (*GetTTI)(*Callee);
Wei Mi500606f2019-02-21 02:57:52 +0000775 bool RemarksEnabled =
776 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
777 DEBUG_TYPE);
Fedor Sergeev652168a2019-04-23 12:43:27 +0000778 assert(Call && "invalid callsite for partial inline");
779 InlineCost IC = getInlineCost(cast<CallBase>(*Call), getInlineParams(),
780 CalleeTTI, *GetAssumptionCache, GetBFI, PSI,
781 RemarksEnabled ? &ORE : nullptr);
Xinliang David Li61338462017-05-02 02:44:14 +0000782
783 if (IC.isAlways()) {
Vivek Pandya95906582017-10-11 17:12:59 +0000784 ORE.emit([&]() {
785 return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000786 << NV("Callee", Cloner.OrigFunc)
Vivek Pandya95906582017-10-11 17:12:59 +0000787 << " should always be fully inlined, not partially";
788 });
Xinliang David Li61338462017-05-02 02:44:14 +0000789 return false;
790 }
791
792 if (IC.isNever()) {
Vivek Pandya95906582017-10-11 17:12:59 +0000793 ORE.emit([&]() {
794 return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000795 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000796 << NV("Caller", Caller)
Vivek Pandya95906582017-10-11 17:12:59 +0000797 << " because it should never be inlined (cost=never)";
798 });
Xinliang David Li61338462017-05-02 02:44:14 +0000799 return false;
800 }
801
802 if (!IC) {
Vivek Pandya95906582017-10-11 17:12:59 +0000803 ORE.emit([&]() {
804 return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000805 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000806 << NV("Caller", Caller) << " because too costly to inline (cost="
807 << NV("Cost", IC.getCost()) << ", threshold="
Vivek Pandya95906582017-10-11 17:12:59 +0000808 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
809 });
Xinliang David Li61338462017-05-02 02:44:14 +0000810 return false;
811 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000812 const DataLayout &DL = Caller->getParent()->getDataLayout();
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000813
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000814 // The savings of eliminating the call:
Fedor Sergeev652168a2019-04-23 12:43:27 +0000815 int NonWeightedSavings = getCallsiteCost(cast<CallBase>(*Call), DL);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000816 BlockFrequency NormWeightedSavings(NonWeightedSavings);
817
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000818 // Weighted saving is smaller than weighted cost, return false
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000819 if (NormWeightedSavings < WeightedOutliningRcost) {
Vivek Pandya95906582017-10-11 17:12:59 +0000820 ORE.emit([&]() {
821 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
822 Call)
823 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
824 << NV("Caller", Caller) << " runtime overhead (overhead="
825 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
826 << ", savings="
827 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
828 << ")"
829 << " of making the outlined call is too high";
830 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000831
832 return false;
833 }
Xinliang David Li61338462017-05-02 02:44:14 +0000834
Vivek Pandya95906582017-10-11 17:12:59 +0000835 ORE.emit([&]() {
836 return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000837 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
Xinliang David Li61338462017-05-02 02:44:14 +0000838 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
839 << " (threshold="
Vivek Pandya95906582017-10-11 17:12:59 +0000840 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
841 });
Xinliang David Li61338462017-05-02 02:44:14 +0000842 return true;
843}
844
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000845// TODO: Ideally we should share Inliner's InlineCost Analysis code.
846// For now use a simplified version. The returned 'InlineCost' will be used
847// to esimate the size cost as well as runtime cost of the BB.
848int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
849 int InlineCost = 0;
850 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
Florian Hahnfd6ea132018-11-27 18:17:27 +0000851 for (Instruction &I : BB->instructionsWithoutDebug()) {
852 // Skip free instructions.
853 switch (I.getOpcode()) {
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000854 case Instruction::BitCast:
855 case Instruction::PtrToInt:
856 case Instruction::IntToPtr:
857 case Instruction::Alloca:
Florian Hahnfd6ea132018-11-27 18:17:27 +0000858 case Instruction::PHI:
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000859 continue;
860 case Instruction::GetElementPtr:
Florian Hahnfd6ea132018-11-27 18:17:27 +0000861 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000862 continue;
Adrian Prantl0e6694d2017-12-19 22:05:25 +0000863 break;
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000864 default:
865 break;
866 }
867
Vedant Kumarb264d692018-12-21 21:49:40 +0000868 if (I.isLifetimeStartOrEnd())
869 continue;
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000870
Florian Hahnfd6ea132018-11-27 18:17:27 +0000871 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
Fedor Sergeev652168a2019-04-23 12:43:27 +0000872 InlineCost += getCallsiteCost(*CI, DL);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000873 continue;
874 }
875
Florian Hahnfd6ea132018-11-27 18:17:27 +0000876 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
Fedor Sergeev652168a2019-04-23 12:43:27 +0000877 InlineCost += getCallsiteCost(*II, DL);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000878 continue;
879 }
880
Florian Hahnfd6ea132018-11-27 18:17:27 +0000881 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000882 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
883 continue;
884 }
885 InlineCost += InlineConstants::InstrCost;
886 }
887 return InlineCost;
888}
889
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000890std::tuple<int, int>
891PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000892 int OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
893 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
894 Function *OutlinedFunc = FuncBBPair.first;
895 BasicBlock* OutliningCallBB = FuncBBPair.second;
896 // Now compute the cost of the call sequence to the outlined function
897 // 'OutlinedFunction' in BB 'OutliningCallBB':
898 OutliningFuncCallCost += computeBBInlineCost(OutliningCallBB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000899
Graham Yiu8b1882c2017-11-30 02:41:36 +0000900 // Now compute the cost of the extracted/outlined function itself:
901 for (BasicBlock &BB : *OutlinedFunc)
902 OutlinedFunctionCost += computeBBInlineCost(&BB);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000903 }
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000904 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
Xinliang David Li5fdc75a2017-06-02 22:38:48 +0000905 "Outlined function cost should be no less than the outlined region");
Graham Yiu8b1882c2017-11-30 02:41:36 +0000906
Xinliang David Li0b7d8582017-06-02 22:08:04 +0000907 // The code extractor introduces a new root and exit stub blocks with
908 // additional unconditional branches. Those branches will be eliminated
909 // later with bb layout. The cost should be adjusted accordingly:
Graham Yiu8b1882c2017-11-30 02:41:36 +0000910 OutlinedFunctionCost -=
911 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000912
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000913 int OutliningRuntimeOverhead =
914 OutliningFuncCallCost +
915 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
916 ExtraOutliningPenalty;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000917
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000918 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000919}
920
921// Create the callsite to profile count map which is
922// used to update the original function's entry count,
923// after the function is partially inlined into the callsite.
924void PartialInlinerImpl::computeCallsiteToProfCountMap(
925 Function *DuplicateFunction,
926 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
927 std::vector<User *> Users(DuplicateFunction->user_begin(),
928 DuplicateFunction->user_end());
929 Function *CurrentCaller = nullptr;
Vitaly Bukaa6374892017-05-27 05:32:09 +0000930 std::unique_ptr<BlockFrequencyInfo> TempBFI;
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000931 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
932
933 auto ComputeCurrBFI = [&,this](Function *Caller) {
934 // For the old pass manager:
935 if (!GetBFI) {
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000936 DominatorTree DT(*Caller);
937 LoopInfo LI(DT);
938 BranchProbabilityInfo BPI(*Caller, LI);
Vitaly Bukaa6374892017-05-27 05:32:09 +0000939 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
940 CurrentCallerBFI = TempBFI.get();
Xinliang David Li66bdfca2017-05-12 23:41:43 +0000941 } else {
942 // New pass manager:
943 CurrentCallerBFI = &(*GetBFI)(*Caller);
944 }
945 };
946
947 for (User *User : Users) {
948 CallSite CS = getCallSite(User);
949 Function *Caller = CS.getCaller();
950 if (CurrentCaller != Caller) {
951 CurrentCaller = Caller;
952 ComputeCurrBFI(Caller);
953 } else {
954 assert(CurrentCallerBFI && "CallerBFI is not set");
955 }
956 BasicBlock *CallBB = CS.getInstruction()->getParent();
957 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
958 if (Count)
959 CallSiteToProfCountMap[User] = *Count;
960 else
961 CallSiteToProfCountMap[User] = 0;
962 }
963}
964
Graham Yiu8b1882c2017-11-30 02:41:36 +0000965PartialInlinerImpl::FunctionCloner::FunctionCloner(
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000966 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
967 function_ref<AssumptionCache *(Function &)> LookupAC)
968 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
Xinliang David Lieea0ade2017-06-15 23:56:59 +0000969 ClonedOI = llvm::make_unique<FunctionOutliningInfo>();
970
971 // Clone the function, so that we can hack away on it.
972 ValueToValueMapTy VMap;
973 ClonedFunc = CloneFunction(F, VMap);
974
975 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
976 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
977 for (BasicBlock *BB : OI->Entries) {
978 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
979 }
980 for (BasicBlock *E : OI->ReturnBlockPreds) {
981 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
982 ClonedOI->ReturnBlockPreds.push_back(NewE);
983 }
984 // Go ahead and update all uses to the duplicate, so that we can just
985 // use the inliner functionality when we're done hacking.
986 F->replaceAllUsesWith(ClonedFunc);
987}
988
Graham Yiu8b1882c2017-11-30 02:41:36 +0000989PartialInlinerImpl::FunctionCloner::FunctionCloner(
990 Function *F, FunctionOutliningMultiRegionInfo *OI,
Sergey Dmitriev807960e2019-02-08 06:55:18 +0000991 OptimizationRemarkEmitter &ORE,
992 function_ref<AssumptionCache *(Function &)> LookupAC)
993 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC) {
Graham Yiu8b1882c2017-11-30 02:41:36 +0000994 ClonedOMRI = llvm::make_unique<FunctionOutliningMultiRegionInfo>();
995
996 // Clone the function, so that we can hack away on it.
997 ValueToValueMapTy VMap;
998 ClonedFunc = CloneFunction(F, VMap);
999
1000 // Go through all Outline Candidate Regions and update all BasicBlock
1001 // information.
1002 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1003 OI->ORI) {
1004 SmallVector<BasicBlock *, 8> Region;
1005 for (BasicBlock *BB : RegionInfo.Region) {
1006 Region.push_back(cast<BasicBlock>(VMap[BB]));
1007 }
1008 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1009 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1010 BasicBlock *NewReturnBlock = nullptr;
1011 if (RegionInfo.ReturnBlock)
1012 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1013 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1014 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1015 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1016 }
1017 // Go ahead and update all uses to the duplicate, so that we can just
1018 // use the inliner functionality when we're done hacking.
1019 F->replaceAllUsesWith(ClonedFunc);
1020}
1021
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001022void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001023 auto getFirstPHI = [](BasicBlock *BB) {
1024 BasicBlock::iterator I = BB->begin();
1025 PHINode *FirstPhi = nullptr;
1026 while (I != BB->end()) {
1027 PHINode *Phi = dyn_cast<PHINode>(I);
1028 if (!Phi)
1029 break;
1030 if (!FirstPhi) {
1031 FirstPhi = Phi;
1032 break;
1033 }
1034 }
1035 return FirstPhi;
1036 };
1037
Graham Yiu8b1882c2017-11-30 02:41:36 +00001038 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1039 // blocks.
1040 if (!ClonedOI)
1041 return;
1042
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001043 // Special hackery is needed with PHI nodes that have inputs from more than
1044 // one extracted block. For simplicity, just split the PHIs into a two-level
1045 // sequence of PHIs, some of which will go in the extracted region, and some
1046 // of which will go outside.
1047 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1048 // only split block when necessary:
1049 PHINode *FirstPhi = getFirstPHI(PreReturn);
1050 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1051
1052 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1053 return;
1054
1055 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1056 Value *CommonValue = PN->getIncomingValue(0);
1057 if (all_of(PN->incoming_values(),
1058 [&](Value *V) { return V == CommonValue; }))
1059 return CommonValue;
1060 return nullptr;
1061 };
1062
1063 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1064 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1065 BasicBlock::iterator I = PreReturn->begin();
1066 Instruction *Ins = &ClonedOI->ReturnBlock->front();
1067 SmallVector<Instruction *, 4> DeadPhis;
1068 while (I != PreReturn->end()) {
1069 PHINode *OldPhi = dyn_cast<PHINode>(I);
1070 if (!OldPhi)
1071 break;
1072
1073 PHINode *RetPhi =
1074 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1075 OldPhi->replaceAllUsesWith(RetPhi);
1076 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1077
1078 RetPhi->addIncoming(&*I, PreReturn);
1079 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1080 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1081 OldPhi->removeIncomingValue(E);
1082 }
1083
1084 // After incoming values splitting, the old phi may become trivial.
1085 // Keeping the trivial phi can introduce definition inside the outline
1086 // region which is live-out, causing necessary overhead (load, store
1087 // arg passing etc).
1088 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1089 OldPhi->replaceAllUsesWith(OldPhiVal);
1090 DeadPhis.push_back(OldPhi);
1091 }
1092 ++I;
Graham Yiu8b1882c2017-11-30 02:41:36 +00001093 }
1094 for (auto *DP : DeadPhis)
1095 DP->eraseFromParent();
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001096
Graham Yiu8b1882c2017-11-30 02:41:36 +00001097 for (auto E : ClonedOI->ReturnBlockPreds) {
1098 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1099 }
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001100}
1101
Graham Yiu8b1882c2017-11-30 02:41:36 +00001102bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1103
1104 auto ComputeRegionCost = [](SmallVectorImpl<BasicBlock *> &Region) {
1105 int Cost = 0;
1106 for (BasicBlock* BB : Region)
1107 Cost += computeBBInlineCost(BB);
1108 return Cost;
1109 };
1110
1111 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
1112
1113 if (ClonedOMRI->ORI.empty())
1114 return false;
1115
1116 // The CodeExtractor needs a dominator tree.
1117 DominatorTree DT;
1118 DT.recalculate(*ClonedFunc);
1119
1120 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1121 LoopInfo LI(DT);
1122 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1123 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1124
1125 SetVector<Value *> Inputs, Outputs, Sinks;
1126 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1127 ClonedOMRI->ORI) {
1128 int CurrentOutlinedRegionCost = ComputeRegionCost(RegionInfo.Region);
1129
1130 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
Sergey Dmitriev807960e2019-02-08 06:55:18 +00001131 ClonedFuncBFI.get(), &BPI,
1132 LookupAC(*RegionInfo.EntryBlock->getParent()),
1133 /* AllowVarargs */ false);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001134
1135 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1136
1137#ifndef NDEBUG
1138 if (TracePartialInlining) {
1139 dbgs() << "inputs: " << Inputs.size() << "\n";
1140 dbgs() << "outputs: " << Outputs.size() << "\n";
1141 for (Value *value : Inputs)
1142 dbgs() << "value used in func: " << *value << "\n";
1143 for (Value *output : Outputs)
1144 dbgs() << "instr used in func: " << *output << "\n";
1145 }
1146#endif
1147 // Do not extract regions that have live exit variables.
1148 if (Outputs.size() > 0 && !ForceLiveExit)
1149 continue;
1150
1151 Function *OutlinedFunc = CE.extractCodeRegion();
1152
1153 if (OutlinedFunc) {
1154 CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc);
1155 BasicBlock *OutliningCallBB = OCS.getInstruction()->getParent();
1156 assert(OutliningCallBB->getParent() == ClonedFunc);
1157 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1158 NumColdRegionsOutlined++;
1159 OutlinedRegionCost += CurrentOutlinedRegionCost;
1160
1161 if (MarkOutlinedColdCC) {
1162 OutlinedFunc->setCallingConv(CallingConv::Cold);
1163 OCS.setCallingConv(CallingConv::Cold);
1164 }
1165 } else
1166 ORE.emit([&]() {
1167 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1168 &RegionInfo.Region.front()->front())
1169 << "Failed to extract region at block "
1170 << ore::NV("Block", RegionInfo.Region.front());
1171 });
1172 }
1173
1174 return !OutlinedFunctions.empty();
1175}
1176
1177Function *
1178PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001179 // Returns true if the block is to be partial inlined into the caller
1180 // (i.e. not to be extracted to the out of line function)
1181 auto ToBeInlined = [&, this](BasicBlock *BB) {
1182 return BB == ClonedOI->ReturnBlock ||
1183 (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) !=
1184 ClonedOI->Entries.end());
1185 };
1186
Graham Yiu8b1882c2017-11-30 02:41:36 +00001187 assert(ClonedOI && "Expecting OutlineInfo for single region outline");
1188 // The CodeExtractor needs a dominator tree.
1189 DominatorTree DT;
1190 DT.recalculate(*ClonedFunc);
1191
1192 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1193 LoopInfo LI(DT);
1194 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1195 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1196
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001197 // Gather up the blocks that we're going to extract.
1198 std::vector<BasicBlock *> ToExtract;
1199 ToExtract.push_back(ClonedOI->NonReturnBlock);
1200 OutlinedRegionCost +=
1201 PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock);
1202 for (BasicBlock &BB : *ClonedFunc)
1203 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1204 ToExtract.push_back(&BB);
1205 // FIXME: the code extractor may hoist/sink more code
1206 // into the outlined function which may make the outlining
1207 // overhead (the difference of the outlined function cost
1208 // and OutliningRegionCost) look larger.
1209 OutlinedRegionCost += computeBBInlineCost(&BB);
1210 }
1211
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001212 // Extract the body of the if.
Graham Yiu8b1882c2017-11-30 02:41:36 +00001213 Function *OutlinedFunc =
1214 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
Sergey Dmitriev807960e2019-02-08 06:55:18 +00001215 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
Graham Yiu8b1882c2017-11-30 02:41:36 +00001216 /* AllowVarargs */ true)
1217 .extractCodeRegion();
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001218
1219 if (OutlinedFunc) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001220 BasicBlock *OutliningCallBB =
1221 PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc)
1222 .getInstruction()
1223 ->getParent();
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001224 assert(OutliningCallBB->getParent() == ClonedFunc);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001225 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1226 } else
1227 ORE.emit([&]() {
1228 return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
1229 &ToExtract.front()->front())
1230 << "Failed to extract region at block "
1231 << ore::NV("Block", ToExtract.front());
1232 });
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001233
1234 return OutlinedFunc;
1235}
1236
1237PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1238 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1239 // users (function pointers, etc.) back to the original function.
1240 ClonedFunc->replaceAllUsesWith(OrigFunc);
1241 ClonedFunc->eraseFromParent();
1242 if (!IsFunctionInlined) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001243 // Remove each function that was speculatively created if there is no
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001244 // reference.
Graham Yiu8b1882c2017-11-30 02:41:36 +00001245 for (auto FuncBBPair : OutlinedFunctions) {
1246 Function *Func = FuncBBPair.first;
1247 Func->eraseFromParent();
1248 }
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001249 }
1250}
1251
Graham Yiu8b1882c2017-11-30 02:41:36 +00001252std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function *F) {
1253
Xinliang David Lid21601a2017-04-27 16:34:00 +00001254 if (F->hasAddressTaken())
Graham Yiu8b1882c2017-11-30 02:41:36 +00001255 return {false, nullptr};
Xinliang David Lid21601a2017-04-27 16:34:00 +00001256
Xinliang David Liab8722f2017-05-02 18:43:21 +00001257 // Let inliner handle it
1258 if (F->hasFnAttribute(Attribute::AlwaysInline))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001259 return {false, nullptr};
Xinliang David Liab8722f2017-05-02 18:43:21 +00001260
1261 if (F->hasFnAttribute(Attribute::NoInline))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001262 return {false, nullptr};
Xinliang David Liab8722f2017-05-02 18:43:21 +00001263
1264 if (PSI->isFunctionEntryCold(F))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001265 return {false, nullptr};
Xinliang David Liab8722f2017-05-02 18:43:21 +00001266
Matthias Braun9fd397b2018-10-31 00:23:23 +00001267 if (empty(F->users()))
Graham Yiu8b1882c2017-11-30 02:41:36 +00001268 return {false, nullptr};
Xinliang David Lid21601a2017-04-27 16:34:00 +00001269
Sean Fertile18f17332018-04-20 19:56:26 +00001270 OptimizationRemarkEmitter ORE(F);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001271
1272 // Only try to outline cold regions if we have a profile summary, which
1273 // implies we have profiling information.
Easwaran Ramana17f2202017-12-22 01:33:52 +00001274 if (PSI->hasProfileSummary() && F->hasProfileData() &&
Graham Yiu8b1882c2017-11-30 02:41:36 +00001275 !DisableMultiRegionPartialInline) {
1276 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
Sean Fertile18f17332018-04-20 19:56:26 +00001277 computeOutliningColdRegionsInfo(F, ORE);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001278 if (OMRI) {
Sergey Dmitriev807960e2019-02-08 06:55:18 +00001279 FunctionCloner Cloner(F, OMRI.get(), ORE, LookupAssumptionCache);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001280
1281#ifndef NDEBUG
1282 if (TracePartialInlining) {
1283 dbgs() << "HotCountThreshold = " << PSI->getHotCountThreshold() << "\n";
1284 dbgs() << "ColdCountThreshold = " << PSI->getColdCountThreshold()
1285 << "\n";
1286 }
1287#endif
1288 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1289
1290 if (DidOutline) {
1291#ifndef NDEBUG
1292 if (TracePartialInlining) {
1293 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
1294 Cloner.ClonedFunc->print(dbgs());
1295 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1296 }
1297#endif
1298
1299 if (tryPartialInline(Cloner))
1300 return {true, nullptr};
1301 }
1302 }
1303 }
1304
1305 // Fall-thru to regular partial inlining if we:
1306 // i) can't find any cold regions to outline, or
1307 // ii) can't inline the outlined function anywhere.
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001308 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001309 if (!OI)
Graham Yiu8b1882c2017-11-30 02:41:36 +00001310 return {false, nullptr};
Sean Silvafe5abd52016-07-25 05:00:00 +00001311
Sergey Dmitriev807960e2019-02-08 06:55:18 +00001312 FunctionCloner Cloner(F, OI.get(), ORE, LookupAssumptionCache);
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001313 Cloner.NormalizeReturnBlock();
Graham Yiu8b1882c2017-11-30 02:41:36 +00001314
1315 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1316
1317 if (!OutlinedFunction)
1318 return {false, nullptr};
Sean Silvafe5abd52016-07-25 05:00:00 +00001319
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001320 bool AnyInline = tryPartialInline(Cloner);
Xinliang David Li392e9752017-05-14 02:54:02 +00001321
1322 if (AnyInline)
Graham Yiu8b1882c2017-11-30 02:41:36 +00001323 return {true, OutlinedFunction};
Xinliang David Li392e9752017-05-14 02:54:02 +00001324
Graham Yiu8b1882c2017-11-30 02:41:36 +00001325 return {false, nullptr};
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001326}
Sean Silvafe5abd52016-07-25 05:00:00 +00001327
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001328bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001329 if (Cloner.OutlinedFunctions.empty())
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001330 return false;
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001331
Graham Yiu8b1882c2017-11-30 02:41:36 +00001332 int SizeCost = 0;
1333 BlockFrequency WeightedRcost;
1334 int NonWeightedRcost;
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001335 std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner);
1336
Graham Yiu8b1882c2017-11-30 02:41:36 +00001337 // Only calculate RelativeToEntryFreq when we are doing single region
1338 // outlining.
1339 BranchProbability RelativeToEntryFreq;
1340 if (Cloner.ClonedOI) {
1341 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1342 } else
1343 // RelativeToEntryFreq doesn't make sense when we have more than one
1344 // outlined call because each call will have a different relative frequency
1345 // to the entry block. We can consider using the average, but the
1346 // usefulness of that information is questionable. For now, assume we never
1347 // execute the calls to outlined functions.
1348 RelativeToEntryFreq = BranchProbability(0, 1);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001349
Graham Yiu8b1882c2017-11-30 02:41:36 +00001350 WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1351
1352 // The call sequence(s) to the outlined function(s) are larger than the sum of
1353 // the original outlined region size(s), it does not increase the chances of
1354 // inlining the function with outlining (The inliner uses the size increase to
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001355 // model the cost of inlining a callee).
1356 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
Sean Fertile18f17332018-04-20 19:56:26 +00001357 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001358 DebugLoc DLoc;
1359 BasicBlock *Block;
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001360 std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc);
Sean Fertile18f17332018-04-20 19:56:26 +00001361 OrigFuncORE.emit([&]() {
Vivek Pandya95906582017-10-11 17:12:59 +00001362 return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001363 DLoc, Block)
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001364 << ore::NV("Function", Cloner.OrigFunc)
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001365 << " not partially inlined into callers (Original Size = "
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001366 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001367 << ", Size of call sequence to outlined function = "
Vivek Pandya95906582017-10-11 17:12:59 +00001368 << ore::NV("NewSize", SizeCost) << ")";
1369 });
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001370 return false;
1371 }
1372
Matthias Braun9fd397b2018-10-31 00:23:23 +00001373 assert(empty(Cloner.OrigFunc->users()) &&
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001374 "F's users should all be replaced!");
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001375
1376 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1377 Cloner.ClonedFunc->user_end());
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001378
1379 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
Easwaran Ramana17f2202017-12-22 01:33:52 +00001380 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1381 if (CalleeEntryCount)
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001382 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001383
Easwaran Ramane5b8de22018-01-17 22:24:23 +00001384 uint64_t CalleeEntryCountV =
1385 (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001386
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001387 bool AnyInline = false;
1388 for (User *User : Users) {
1389 CallSite CS = getCallSite(User);
1390
1391 if (IsLimitReached())
1392 continue;
1393
Sean Fertile18f17332018-04-20 19:56:26 +00001394 OptimizationRemarkEmitter CallerORE(CS.getCaller());
1395 if (!shouldPartialInline(CS, Cloner, WeightedRcost, CallerORE))
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001396 continue;
1397
Florian Hahn41e32bf2017-11-03 11:29:00 +00001398 // Construct remark before doing the inlining, as after successful inlining
1399 // the callsite is removed.
1400 OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction());
1401 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1402 << ore::NV("Caller", CS.getCaller());
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001403
1404 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
Graham Yiu8b1882c2017-11-30 02:41:36 +00001405 // We can only forward varargs when we outlined a single region, else we
1406 // bail on vararg functions.
1407 if (!InlineFunction(CS, IFI, nullptr, true,
1408 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1409 : nullptr)))
Florian Hahn41e32bf2017-11-03 11:29:00 +00001410 continue;
1411
Sean Fertile18f17332018-04-20 19:56:26 +00001412 CallerORE.emit(OR);
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001413
1414 // Now update the entry count:
1415 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1416 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1417 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1418 }
1419
1420 AnyInline = true;
1421 NumPartialInlining++;
1422 // Update the stats
Graham Yiu8b1882c2017-11-30 02:41:36 +00001423 if (Cloner.ClonedOI)
1424 NumPartialInlined++;
1425 else
1426 NumColdOutlinePartialInlined++;
1427
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001428 }
1429
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001430 if (AnyInline) {
1431 Cloner.IsFunctionInlined = true;
1432 if (CalleeEntryCount)
Easwaran Ramane5b8de22018-01-17 22:24:23 +00001433 Cloner.OrigFunc->setEntryCount(
1434 CalleeEntryCount.setCount(CalleeEntryCountV));
Sean Fertile18f17332018-04-20 19:56:26 +00001435 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1436 OrigFuncORE.emit([&]() {
Graham Yiu8b1882c2017-11-30 02:41:36 +00001437 return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
1438 << "Partially inlined into at least one caller";
1439 });
1440
Xinliang David Lieea0ade2017-06-15 23:56:59 +00001441 }
Xinliang David Li66bdfca2017-05-12 23:41:43 +00001442
1443 return AnyInline;
Owen Anderson2f82e272009-06-14 08:26:32 +00001444}
1445
Sean Silvafe5abd52016-07-25 05:00:00 +00001446bool PartialInlinerImpl::run(Module &M) {
Xinliang David Lidb8d09b2017-04-23 23:39:04 +00001447 if (DisablePartialInlining)
1448 return false;
1449
Sean Silva519323d2016-07-25 05:57:59 +00001450 std::vector<Function *> Worklist;
1451 Worklist.reserve(M.size());
Benjamin Kramer135f7352016-06-26 12:28:59 +00001452 for (Function &F : M)
1453 if (!F.use_empty() && !F.isDeclaration())
Sean Silva519323d2016-07-25 05:57:59 +00001454 Worklist.push_back(&F);
Benjamin Kramer135f7352016-06-26 12:28:59 +00001455
Sean Silva519323d2016-07-25 05:57:59 +00001456 bool Changed = false;
1457 while (!Worklist.empty()) {
1458 Function *CurrFunc = Worklist.back();
1459 Worklist.pop_back();
Sean Silvafe5abd52016-07-25 05:00:00 +00001460
Sean Silva519323d2016-07-25 05:57:59 +00001461 if (CurrFunc->use_empty())
1462 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +00001463
Sean Silva519323d2016-07-25 05:57:59 +00001464 bool Recursive = false;
1465 for (User *U : CurrFunc->users())
1466 if (Instruction *I = dyn_cast<Instruction>(U))
1467 if (I->getParent()->getParent() == CurrFunc) {
1468 Recursive = true;
Owen Anderson2f82e272009-06-14 08:26:32 +00001469 break;
1470 }
Sean Silva519323d2016-07-25 05:57:59 +00001471 if (Recursive)
1472 continue;
Sean Silvafe5abd52016-07-25 05:00:00 +00001473
Graham Yiu8b1882c2017-11-30 02:41:36 +00001474 std::pair<bool, Function * > Result = unswitchFunction(CurrFunc);
1475 if (Result.second)
1476 Worklist.push_back(Result.second);
Graham Yiu58dbc002018-08-03 14:42:53 +00001477 Changed |= Result.first;
Owen Anderson2f82e272009-06-14 08:26:32 +00001478 }
Easwaran Raman1832bf62016-06-27 16:50:18 +00001479
Sean Silva519323d2016-07-25 05:57:59 +00001480 return Changed;
Sean Silvafe5abd52016-07-25 05:00:00 +00001481}
1482
1483char PartialInlinerLegacyPass::ID = 0;
Eugene Zelenkoe9ea08a2017-10-10 22:49:55 +00001484
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001485INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
1486 "Partial Inliner", false, false)
1487INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
Xinliang David Li61338462017-05-02 02:44:14 +00001488INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
1489INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001490INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
1491 "Partial Inliner", false, false)
Sean Silvafe5abd52016-07-25 05:00:00 +00001492
1493ModulePass *llvm::createPartialInliningPass() {
1494 return new PartialInlinerLegacyPass();
1495}
1496
1497PreservedAnalyses PartialInlinerPass::run(Module &M,
1498 ModuleAnalysisManager &AM) {
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001499 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
Xinliang David Li61338462017-05-02 02:44:14 +00001500
Daniel Jasperaec2fa32016-12-19 08:22:17 +00001501 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
1502 [&FAM](Function &F) -> AssumptionCache & {
1503 return FAM.getResult<AssumptionAnalysis>(F);
1504 };
Xinliang David Li61338462017-05-02 02:44:14 +00001505
Sergey Dmitriev807960e2019-02-08 06:55:18 +00001506 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1507 return FAM.getCachedResult<AssumptionAnalysis>(F);
1508 };
1509
Xinliang David Li61338462017-05-02 02:44:14 +00001510 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
1511 [&FAM](Function &F) -> BlockFrequencyInfo & {
1512 return FAM.getResult<BlockFrequencyAnalysis>(F);
1513 };
1514
1515 std::function<TargetTransformInfo &(Function &)> GetTTI =
1516 [&FAM](Function &F) -> TargetTransformInfo & {
1517 return FAM.getResult<TargetIRAnalysis>(F);
1518 };
1519
1520 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1521
Sergey Dmitriev807960e2019-02-08 06:55:18 +00001522 if (PartialInlinerImpl(&GetAssumptionCache, LookupAssumptionCache, &GetTTI,
1523 {GetBFI}, PSI)
Graham Yiu8b1882c2017-11-30 02:41:36 +00001524 .run(M))
Easwaran Raman1832bf62016-06-27 16:50:18 +00001525 return PreservedAnalyses::none();
1526 return PreservedAnalyses::all();
Duncan Sands29c8efc2009-07-03 15:30:58 +00001527}