blob: ceab8331a1166cccd6fed5336021aa1f9f7ac732 [file] [log] [blame]
Dan Gohman4552e3c2009-10-13 18:30:07 +00001//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements inline cost analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/InlineCost.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000015#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
Chandler Carruth66b31302015-01-04 12:03:27 +000020#include "llvm/Analysis/AssumptionCache.h"
Hal Finkel57f03dd2014-09-07 13:49:57 +000021#include "llvm/Analysis/CodeMetrics.h"
Chandler Carruthd9903882015-01-14 11:23:27 +000022#include "llvm/Analysis/ConstantFolding.h"
Chandler Carruth0539c072012-03-31 12:42:41 +000023#include "llvm/Analysis/InstructionSimplify.h"
Chandler Carruth42f3dce2013-01-21 11:55:09 +000024#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth219b89b2014-03-04 11:01:28 +000025#include "llvm/IR/CallSite.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000026#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/DataLayout.h"
Chandler Carruth03eb0de2014-03-04 10:40:04 +000028#include "llvm/IR/GetElementPtrTypeIterator.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000029#include "llvm/IR/GlobalAlias.h"
Chandler Carruth7da14f12014-03-06 03:23:41 +000030#include "llvm/IR/InstVisitor.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000031#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/Operator.h"
Chandler Carruth0539c072012-03-31 12:42:41 +000033#include "llvm/Support/Debug.h"
Chandler Carruth0539c072012-03-31 12:42:41 +000034#include "llvm/Support/raw_ostream.h"
Eric Christopher2dfbd7e2011-02-05 00:49:15 +000035
Dan Gohman4552e3c2009-10-13 18:30:07 +000036using namespace llvm;
37
Chandler Carruthf1221bd2014-04-22 02:48:03 +000038#define DEBUG_TYPE "inline-cost"
39
Chandler Carruth7ae90d42012-04-11 10:15:10 +000040STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
41
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +000042// Threshold to use when optsize is specified (and there is no
43// -inline-threshold).
44const int OptSizeThreshold = 75;
45
46// Threshold to use when -Oz is specified (and there is no -inline-threshold).
47const int OptMinSizeThreshold = 25;
48
49// Threshold to use when -O[34] is specified (and there is no
50// -inline-threshold).
51const int OptAggressiveThreshold = 275;
52
53static cl::opt<int> DefaultInlineThreshold(
54 "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
55 cl::desc("Control the amount of inlining to perform (default = 225)"));
56
57static cl::opt<int> HintThreshold(
58 "inlinehint-threshold", cl::Hidden, cl::init(325),
59 cl::desc("Threshold for inlining functions with inline hint"));
60
61// We introduce this threshold to help performance of instrumentation based
62// PGO before we actually hook up inliner with analysis passes such as BPI and
63// BFI.
64static cl::opt<int> ColdThreshold(
65 "inlinecold-threshold", cl::Hidden, cl::init(225),
66 cl::desc("Threshold for inlining functions with cold attribute"));
67
Chandler Carruth0539c072012-03-31 12:42:41 +000068namespace {
Chandler Carrutha3089552012-03-14 07:32:53 +000069
Chandler Carruth0539c072012-03-31 12:42:41 +000070class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
71 typedef InstVisitor<CallAnalyzer, bool> Base;
72 friend class InstVisitor<CallAnalyzer, bool>;
Owen Andersona08318a2010-09-09 16:56:42 +000073
Chandler Carruth42f3dce2013-01-21 11:55:09 +000074 /// The TargetTransformInfo available for this compilation.
75 const TargetTransformInfo &TTI;
76
Hal Finkel57f03dd2014-09-07 13:49:57 +000077 /// The cache of @llvm.assume intrinsics.
Bjorn Steinbrink6f972a12015-02-12 21:04:22 +000078 AssumptionCacheTracker *ACT;
Hal Finkel57f03dd2014-09-07 13:49:57 +000079
Chandler Carruth0539c072012-03-31 12:42:41 +000080 // The called function.
81 Function &F;
Owen Andersona08318a2010-09-09 16:56:42 +000082
Philip Reames9b5c9582015-06-26 20:51:17 +000083 // The candidate callsite being analyzed. Please do not use this to do
84 // analysis in the caller function; we want the inline cost query to be
85 // easily cacheable. Instead, use the cover function paramHasAttr.
86 CallSite CandidateCS;
87
Chandler Carruth0539c072012-03-31 12:42:41 +000088 int Threshold;
89 int Cost;
Owen Andersona08318a2010-09-09 16:56:42 +000090
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +000091 bool IsCallerRecursive;
92 bool IsRecursiveCall;
Chandler Carruth0539c072012-03-31 12:42:41 +000093 bool ExposesReturnsTwice;
94 bool HasDynamicAlloca;
James Molloy4f6fb952012-12-20 16:04:27 +000095 bool ContainsNoDuplicateCall;
Chandler Carruth0814d2a2013-12-13 07:59:56 +000096 bool HasReturn;
97 bool HasIndirectBr;
Reid Kleckner223de262015-04-14 20:38:14 +000098 bool HasFrameEscape;
James Molloy4f6fb952012-12-20 16:04:27 +000099
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000100 /// Number of bytes allocated statically by the callee.
101 uint64_t AllocatedSize;
Chandler Carruth0539c072012-03-31 12:42:41 +0000102 unsigned NumInstructions, NumVectorInstructions;
103 int FiftyPercentVectorBonus, TenPercentVectorBonus;
104 int VectorBonus;
105
106 // While we walk the potentially-inlined instructions, we build up and
107 // maintain a mapping of simplified values specific to this callsite. The
108 // idea is to propagate any special information we have about arguments to
109 // this call through the inlinable section of the function, and account for
110 // likely simplifications post-inlining. The most important aspect we track
111 // is CFG altering simplifications -- when we prove a basic block dead, that
112 // can cause dramatic shifts in the cost of inlining a function.
113 DenseMap<Value *, Constant *> SimplifiedValues;
114
115 // Keep track of the values which map back (through function arguments) to
116 // allocas on the caller stack which could be simplified through SROA.
117 DenseMap<Value *, Value *> SROAArgValues;
118
119 // The mapping of caller Alloca values to their accumulated cost savings. If
120 // we have to disable SROA for one of the allocas, this tells us how much
121 // cost must be added.
122 DenseMap<Value *, int> SROAArgCosts;
123
124 // Keep track of values which map to a pointer base and constant offset.
125 DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
126
127 // Custom simplification helper routines.
128 bool isAllocaDerivedArg(Value *V);
129 bool lookupSROAArgAndCost(Value *V, Value *&Arg,
130 DenseMap<Value *, int>::iterator &CostIt);
131 void disableSROA(DenseMap<Value *, int>::iterator CostIt);
132 void disableSROA(Value *V);
133 void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
134 int InstructionCost);
Chandler Carruth0539c072012-03-31 12:42:41 +0000135 bool isGEPOffsetConstant(GetElementPtrInst &GEP);
136 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
Chandler Carruth753e21d2012-12-28 14:23:32 +0000137 bool simplifyCallSite(Function *F, CallSite CS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000138 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
139
Philip Reames9b5c9582015-06-26 20:51:17 +0000140 /// Return true if the given argument to the function being considered for
141 /// inlining has the given attribute set either at the call site or the
142 /// function declaration. Primarily used to inspect call site specific
143 /// attributes since these can be more precise than the ones on the callee
Easwaran Raman3676da42015-12-03 19:03:20 +0000144 /// itself.
Philip Reames9b5c9582015-06-26 20:51:17 +0000145 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
146
147 /// Return true if the given value is known non null within the callee if
Easwaran Raman3676da42015-12-03 19:03:20 +0000148 /// inlined through this particular callsite.
Philip Reames9b5c9582015-06-26 20:51:17 +0000149 bool isKnownNonNullInCallee(Value *V);
150
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000151 /// Update Threshold based on callsite properties such as callee
152 /// attributes and callee hotness for PGO builds. The Callee is explicitly
153 /// passed to support analyzing indirect calls whose target is inferred by
154 /// analysis.
155 void updateThreshold(CallSite CS, Function &Callee);
156
Chandler Carruth0539c072012-03-31 12:42:41 +0000157 // Custom analysis routines.
Hal Finkel57f03dd2014-09-07 13:49:57 +0000158 bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
Chandler Carruth0539c072012-03-31 12:42:41 +0000159
160 // Disable several entry points to the visitor so we don't accidentally use
161 // them by declaring but not defining them here.
162 void visit(Module *); void visit(Module &);
163 void visit(Function *); void visit(Function &);
164 void visit(BasicBlock *); void visit(BasicBlock &);
165
166 // Provide base case for our instruction visit.
167 bool visitInstruction(Instruction &I);
168
169 // Our visit overrides.
170 bool visitAlloca(AllocaInst &I);
171 bool visitPHI(PHINode &I);
172 bool visitGetElementPtr(GetElementPtrInst &I);
173 bool visitBitCast(BitCastInst &I);
174 bool visitPtrToInt(PtrToIntInst &I);
175 bool visitIntToPtr(IntToPtrInst &I);
176 bool visitCastInst(CastInst &I);
177 bool visitUnaryInstruction(UnaryInstruction &I);
Matt Arsenault727aa342013-07-20 04:09:00 +0000178 bool visitCmpInst(CmpInst &I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000179 bool visitSub(BinaryOperator &I);
180 bool visitBinaryOperator(BinaryOperator &I);
181 bool visitLoad(LoadInst &I);
182 bool visitStore(StoreInst &I);
Chandler Carruth753e21d2012-12-28 14:23:32 +0000183 bool visitExtractValue(ExtractValueInst &I);
184 bool visitInsertValue(InsertValueInst &I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000185 bool visitCallSite(CallSite CS);
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000186 bool visitReturnInst(ReturnInst &RI);
187 bool visitBranchInst(BranchInst &BI);
188 bool visitSwitchInst(SwitchInst &SI);
189 bool visitIndirectBrInst(IndirectBrInst &IBI);
190 bool visitResumeInst(ResumeInst &RI);
David Majnemer654e1302015-07-31 17:58:14 +0000191 bool visitCleanupReturnInst(CleanupReturnInst &RI);
192 bool visitCatchReturnInst(CatchReturnInst &RI);
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000193 bool visitUnreachableInst(UnreachableInst &I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000194
195public:
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000196 CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +0000197 Function &Callee, int Threshold, CallSite CSArg)
198 : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
199 Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
200 ExposesReturnsTwice(false), HasDynamicAlloca(false),
201 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
202 HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
203 NumVectorInstructions(0), FiftyPercentVectorBonus(0),
204 TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
205 NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
206 NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
207 SROACostSavings(0), SROACostSavingsLost(0) {}
Chandler Carruth0539c072012-03-31 12:42:41 +0000208
209 bool analyzeCall(CallSite CS);
210
211 int getThreshold() { return Threshold; }
212 int getCost() { return Cost; }
213
214 // Keep a bunch of stats about the cost savings found so we can print them
215 // out when debugging.
216 unsigned NumConstantArgs;
217 unsigned NumConstantOffsetPtrArgs;
218 unsigned NumAllocaArgs;
219 unsigned NumConstantPtrCmps;
220 unsigned NumConstantPtrDiffs;
221 unsigned NumInstructionsSimplified;
222 unsigned SROACostSavings;
223 unsigned SROACostSavingsLost;
224
225 void dump();
226};
227
228} // namespace
229
230/// \brief Test whether the given value is an Alloca-derived function argument.
231bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
232 return SROAArgValues.count(V);
Owen Andersona08318a2010-09-09 16:56:42 +0000233}
234
Chandler Carruth0539c072012-03-31 12:42:41 +0000235/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
236/// Returns false if V does not map to a SROA-candidate.
237bool CallAnalyzer::lookupSROAArgAndCost(
238 Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
239 if (SROAArgValues.empty() || SROAArgCosts.empty())
240 return false;
Chandler Carruth783b7192012-03-09 02:49:36 +0000241
Chandler Carruth0539c072012-03-31 12:42:41 +0000242 DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
243 if (ArgIt == SROAArgValues.end())
244 return false;
Chandler Carruth783b7192012-03-09 02:49:36 +0000245
Chandler Carruth0539c072012-03-31 12:42:41 +0000246 Arg = ArgIt->second;
247 CostIt = SROAArgCosts.find(Arg);
248 return CostIt != SROAArgCosts.end();
Chandler Carruth783b7192012-03-09 02:49:36 +0000249}
250
Chandler Carruth0539c072012-03-31 12:42:41 +0000251/// \brief Disable SROA for the candidate marked by this cost iterator.
Chandler Carruth783b7192012-03-09 02:49:36 +0000252///
Benjamin Kramerbde91762012-06-02 10:20:22 +0000253/// This marks the candidate as no longer viable for SROA, and adds the cost
Chandler Carruth0539c072012-03-31 12:42:41 +0000254/// savings associated with it back into the inline cost measurement.
255void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
256 // If we're no longer able to perform SROA we need to undo its cost savings
257 // and prevent subsequent analysis.
258 Cost += CostIt->second;
259 SROACostSavings -= CostIt->second;
260 SROACostSavingsLost += CostIt->second;
261 SROAArgCosts.erase(CostIt);
262}
263
264/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
265void CallAnalyzer::disableSROA(Value *V) {
266 Value *SROAArg;
267 DenseMap<Value *, int>::iterator CostIt;
268 if (lookupSROAArgAndCost(V, SROAArg, CostIt))
269 disableSROA(CostIt);
270}
271
272/// \brief Accumulate the given cost for a particular SROA candidate.
273void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
274 int InstructionCost) {
275 CostIt->second += InstructionCost;
276 SROACostSavings += InstructionCost;
277}
278
Chandler Carruth0539c072012-03-31 12:42:41 +0000279/// \brief Check whether a GEP's indices are all constant.
280///
281/// Respects any simplified values known during the analysis of this callsite.
282bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
283 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
284 if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
Chandler Carruth783b7192012-03-09 02:49:36 +0000285 return false;
Chandler Carruth783b7192012-03-09 02:49:36 +0000286
Chandler Carruth0539c072012-03-31 12:42:41 +0000287 return true;
288}
289
290/// \brief Accumulate a constant GEP offset into an APInt if possible.
291///
292/// Returns false if unable to compute the offset for any reason. Respects any
293/// simplified values known during the analysis of this callsite.
294bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000295 const DataLayout &DL = F.getParent()->getDataLayout();
296 unsigned IntPtrWidth = DL.getPointerSizeInBits();
Chandler Carruth0539c072012-03-31 12:42:41 +0000297 assert(IntPtrWidth == Offset.getBitWidth());
298
299 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
300 GTI != GTE; ++GTI) {
301 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
302 if (!OpC)
303 if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
304 OpC = dyn_cast<ConstantInt>(SimpleOp);
305 if (!OpC)
Chandler Carruth783b7192012-03-09 02:49:36 +0000306 return false;
Chandler Carruth0539c072012-03-31 12:42:41 +0000307 if (OpC->isZero()) continue;
Chandler Carruth783b7192012-03-09 02:49:36 +0000308
Chandler Carruth0539c072012-03-31 12:42:41 +0000309 // Handle a struct index, which adds its field offset to the pointer.
310 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
311 unsigned ElementIdx = OpC->getZExtValue();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000312 const StructLayout *SL = DL.getStructLayout(STy);
Chandler Carruth0539c072012-03-31 12:42:41 +0000313 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
314 continue;
Chandler Carruth783b7192012-03-09 02:49:36 +0000315 }
Chandler Carruth783b7192012-03-09 02:49:36 +0000316
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000317 APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
Chandler Carruth0539c072012-03-31 12:42:41 +0000318 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
319 }
320 return true;
321}
322
323bool CallAnalyzer::visitAlloca(AllocaInst &I) {
Eric Christopherbeb2cd62014-04-07 13:36:21 +0000324 // Check whether inlining will turn a dynamic alloca into a static
Chandler Carruth0539c072012-03-31 12:42:41 +0000325 // alloca, and handle that case.
Eric Christopherbeb2cd62014-04-07 13:36:21 +0000326 if (I.isArrayAllocation()) {
327 if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
328 ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
329 assert(AllocSize && "Allocation size not a constant int?");
330 Type *Ty = I.getAllocatedType();
331 AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
332 return Base::visitAlloca(I);
333 }
334 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000335
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000336 // Accumulate the allocated size.
337 if (I.isStaticAlloca()) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000338 const DataLayout &DL = F.getParent()->getDataLayout();
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000339 Type *Ty = I.getAllocatedType();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000340 AllocatedSize += DL.getTypeAllocSize(Ty);
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000341 }
342
Bob Wilsona5b0dc82012-11-19 07:04:35 +0000343 // We will happily inline static alloca instructions.
344 if (I.isStaticAlloca())
Chandler Carruth0539c072012-03-31 12:42:41 +0000345 return Base::visitAlloca(I);
346
347 // FIXME: This is overly conservative. Dynamic allocas are inefficient for
348 // a variety of reasons, and so we would like to not inline them into
349 // functions which don't currently have a dynamic alloca. This simply
350 // disables inlining altogether in the presence of a dynamic alloca.
351 HasDynamicAlloca = true;
352 return false;
353}
354
355bool CallAnalyzer::visitPHI(PHINode &I) {
356 // FIXME: We should potentially be tracking values through phi nodes,
357 // especially when they collapse to a single value due to deleted CFG edges
358 // during inlining.
359
360 // FIXME: We need to propagate SROA *disabling* through phi nodes, even
361 // though we don't want to propagate it's bonuses. The idea is to disable
362 // SROA if it *might* be used in an inappropriate manner.
363
364 // Phi nodes are always zero-cost.
365 return true;
366}
367
368bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
369 Value *SROAArg;
370 DenseMap<Value *, int>::iterator CostIt;
371 bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
372 SROAArg, CostIt);
373
374 // Try to fold GEPs of constant-offset call site argument pointers. This
375 // requires target data and inbounds GEPs.
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000376 if (I.isInBounds()) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000377 // Check if we have a base + offset for the pointer.
378 Value *Ptr = I.getPointerOperand();
379 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
380 if (BaseAndOffset.first) {
381 // Check if the offset of this GEP is constant, and if so accumulate it
382 // into Offset.
383 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
384 // Non-constant GEPs aren't folded, and disable SROA.
385 if (SROACandidate)
386 disableSROA(CostIt);
387 return false;
388 }
389
390 // Add the result as a new mapping to Base + Offset.
391 ConstantOffsetPtrs[&I] = BaseAndOffset;
392
393 // Also handle SROA candidates here, we already know that the GEP is
394 // all-constant indexed.
395 if (SROACandidate)
396 SROAArgValues[&I] = SROAArg;
397
Chandler Carruth783b7192012-03-09 02:49:36 +0000398 return true;
399 }
400 }
401
Chandler Carruth0539c072012-03-31 12:42:41 +0000402 if (isGEPOffsetConstant(I)) {
403 if (SROACandidate)
404 SROAArgValues[&I] = SROAArg;
405
406 // Constant GEPs are modeled as free.
407 return true;
408 }
409
410 // Variable GEPs will require math and will disable SROA.
411 if (SROACandidate)
412 disableSROA(CostIt);
Chandler Carruth783b7192012-03-09 02:49:36 +0000413 return false;
414}
415
Chandler Carruth0539c072012-03-31 12:42:41 +0000416bool CallAnalyzer::visitBitCast(BitCastInst &I) {
417 // Propagate constants through bitcasts.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000418 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
419 if (!COp)
420 COp = SimplifiedValues.lookup(I.getOperand(0));
421 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000422 if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
423 SimplifiedValues[&I] = C;
424 return true;
Owen Andersona08318a2010-09-09 16:56:42 +0000425 }
Owen Andersona08318a2010-09-09 16:56:42 +0000426
Chandler Carruth0539c072012-03-31 12:42:41 +0000427 // Track base/offsets through casts
428 std::pair<Value *, APInt> BaseAndOffset
429 = ConstantOffsetPtrs.lookup(I.getOperand(0));
430 // Casts don't change the offset, just wrap it up.
431 if (BaseAndOffset.first)
432 ConstantOffsetPtrs[&I] = BaseAndOffset;
433
434 // Also look for SROA candidates here.
435 Value *SROAArg;
436 DenseMap<Value *, int>::iterator CostIt;
437 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
438 SROAArgValues[&I] = SROAArg;
439
440 // Bitcasts are always zero cost.
441 return true;
Owen Andersona08318a2010-09-09 16:56:42 +0000442}
443
Chandler Carruth0539c072012-03-31 12:42:41 +0000444bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
445 // Propagate constants through ptrtoint.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000446 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
447 if (!COp)
448 COp = SimplifiedValues.lookup(I.getOperand(0));
449 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000450 if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
451 SimplifiedValues[&I] = C;
452 return true;
Chandler Carruth4d1d34f2012-03-14 23:19:53 +0000453 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000454
455 // Track base/offset pairs when converted to a plain integer provided the
456 // integer is large enough to represent the pointer.
457 unsigned IntegerSize = I.getType()->getScalarSizeInBits();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000458 const DataLayout &DL = F.getParent()->getDataLayout();
Mehdi Amini46a43552015-03-04 18:43:29 +0000459 if (IntegerSize >= DL.getPointerSizeInBits()) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000460 std::pair<Value *, APInt> BaseAndOffset
461 = ConstantOffsetPtrs.lookup(I.getOperand(0));
462 if (BaseAndOffset.first)
463 ConstantOffsetPtrs[&I] = BaseAndOffset;
464 }
465
466 // This is really weird. Technically, ptrtoint will disable SROA. However,
467 // unless that ptrtoint is *used* somewhere in the live basic blocks after
468 // inlining, it will be nuked, and SROA should proceed. All of the uses which
469 // would block SROA would also block SROA if applied directly to a pointer,
470 // and so we can just add the integer in here. The only places where SROA is
471 // preserved either cannot fire on an integer, or won't in-and-of themselves
472 // disable SROA (ext) w/o some later use that we would see and disable.
473 Value *SROAArg;
474 DenseMap<Value *, int>::iterator CostIt;
475 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
476 SROAArgValues[&I] = SROAArg;
477
Chandler Carruthb8cf5102013-01-21 12:05:16 +0000478 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
Chandler Carruth4d1d34f2012-03-14 23:19:53 +0000479}
480
Chandler Carruth0539c072012-03-31 12:42:41 +0000481bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
482 // Propagate constants through ptrtoint.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000483 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
484 if (!COp)
485 COp = SimplifiedValues.lookup(I.getOperand(0));
486 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000487 if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
488 SimplifiedValues[&I] = C;
489 return true;
490 }
Dan Gohman4552e3c2009-10-13 18:30:07 +0000491
Chandler Carruth0539c072012-03-31 12:42:41 +0000492 // Track base/offset pairs when round-tripped through a pointer without
493 // modifications provided the integer is not too large.
494 Value *Op = I.getOperand(0);
495 unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000496 const DataLayout &DL = F.getParent()->getDataLayout();
Mehdi Amini46a43552015-03-04 18:43:29 +0000497 if (IntegerSize <= DL.getPointerSizeInBits()) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000498 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
499 if (BaseAndOffset.first)
500 ConstantOffsetPtrs[&I] = BaseAndOffset;
501 }
Dan Gohman4552e3c2009-10-13 18:30:07 +0000502
Chandler Carruth0539c072012-03-31 12:42:41 +0000503 // "Propagate" SROA here in the same manner as we do for ptrtoint above.
504 Value *SROAArg;
505 DenseMap<Value *, int>::iterator CostIt;
506 if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
507 SROAArgValues[&I] = SROAArg;
Chandler Carruth4d1d34f2012-03-14 23:19:53 +0000508
Chandler Carruthb8cf5102013-01-21 12:05:16 +0000509 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000510}
511
512bool CallAnalyzer::visitCastInst(CastInst &I) {
513 // Propagate constants through ptrtoint.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000514 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
515 if (!COp)
516 COp = SimplifiedValues.lookup(I.getOperand(0));
517 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000518 if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
519 SimplifiedValues[&I] = C;
520 return true;
521 }
522
523 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
524 disableSROA(I.getOperand(0));
525
Chandler Carruthb8cf5102013-01-21 12:05:16 +0000526 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000527}
528
529bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
530 Value *Operand = I.getOperand(0);
Jakub Staszak7b9e0b92013-03-07 20:01:19 +0000531 Constant *COp = dyn_cast<Constant>(Operand);
532 if (!COp)
533 COp = SimplifiedValues.lookup(Operand);
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000534 if (COp) {
535 const DataLayout &DL = F.getParent()->getDataLayout();
Manuel Jacobe9024592016-01-21 06:33:22 +0000536 if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000537 SimplifiedValues[&I] = C;
538 return true;
539 }
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000540 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000541
542 // Disable any SROA on the argument to arbitrary unary operators.
543 disableSROA(Operand);
544
545 return false;
546}
547
Philip Reames9b5c9582015-06-26 20:51:17 +0000548bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
549 unsigned ArgNo = A->getArgNo();
550 return CandidateCS.paramHasAttr(ArgNo+1, Attr);
551}
552
553bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
554 // Does the *call site* have the NonNull attribute set on an argument? We
555 // use the attribute on the call site to memoize any analysis done in the
556 // caller. This will also trip if the callee function has a non-null
557 // parameter attribute, but that's a less interesting case because hopefully
558 // the callee would already have been simplified based on that.
559 if (Argument *A = dyn_cast<Argument>(V))
560 if (paramHasAttr(A, Attribute::NonNull))
561 return true;
562
563 // Is this an alloca in the caller? This is distinct from the attribute case
564 // above because attributes aren't updated within the inliner itself and we
565 // always want to catch the alloca derived case.
566 if (isAllocaDerivedArg(V))
567 // We can actually predict the result of comparisons between an
568 // alloca-derived value and null. Note that this fires regardless of
569 // SROA firing.
570 return true;
571
572 return false;
573}
574
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000575void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
Easwaran Raman30a93c12016-01-28 23:44:41 +0000576 // If -inline-threshold is not given, listen to the optsize and minsize
577 // attributes when they would decrease the threshold.
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000578 Function *Caller = CS.getCaller();
579
Easwaran Raman30a93c12016-01-28 23:44:41 +0000580 if (!(DefaultInlineThreshold.getNumOccurrences() > 0)) {
581 if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
582 Threshold = OptMinSizeThreshold;
583 else if (Caller->optForSize() && OptSizeThreshold < Threshold)
584 Threshold = OptSizeThreshold;
585 }
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000586
587 // If profile information is available, use that to adjust threshold of hot
588 // and cold functions.
589 // FIXME: The heuristic used below for determining hotness and coldness are
590 // based on preliminary SPEC tuning and may not be optimal. Replace this with
591 // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
592 uint64_t FunctionCount = 0, MaxFunctionCount = 0;
593 bool HasPGOCounts = false;
594 if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
595 HasPGOCounts = true;
596 FunctionCount = Callee.getEntryCount().getValue();
597 MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
598 }
599
600 // Listen to the inlinehint attribute or profile based hotness information
601 // when it would increase the threshold and the caller does not need to
602 // minimize its size.
603 bool InlineHint =
604 Callee.hasFnAttribute(Attribute::InlineHint) ||
605 (HasPGOCounts &&
606 FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
607 if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
608 Threshold = HintThreshold;
609
610 // Listen to the cold attribute or profile based coldness information
611 // when it would decrease the threshold.
612 bool ColdCallee =
613 Callee.hasFnAttribute(Attribute::Cold) ||
614 (HasPGOCounts &&
615 FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
616 // Command line argument for DefaultInlineThreshold will override the default
617 // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
618 // do not use the default cold threshold even if it is smaller.
619 if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
620 ColdThreshold.getNumOccurrences() > 0) &&
621 ColdCallee && ColdThreshold < Threshold)
622 Threshold = ColdThreshold;
623}
624
Matt Arsenault727aa342013-07-20 04:09:00 +0000625bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000626 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
627 // First try to handle simplified comparisons.
628 if (!isa<Constant>(LHS))
629 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
630 LHS = SimpleLHS;
631 if (!isa<Constant>(RHS))
632 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
633 RHS = SimpleRHS;
Matt Arsenault727aa342013-07-20 04:09:00 +0000634 if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000635 if (Constant *CRHS = dyn_cast<Constant>(RHS))
Matt Arsenault727aa342013-07-20 04:09:00 +0000636 if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000637 SimplifiedValues[&I] = C;
638 return true;
639 }
Matt Arsenault727aa342013-07-20 04:09:00 +0000640 }
641
642 if (I.getOpcode() == Instruction::FCmp)
643 return false;
Chandler Carruth0539c072012-03-31 12:42:41 +0000644
645 // Otherwise look for a comparison between constant offset pointers with
646 // a common base.
647 Value *LHSBase, *RHSBase;
648 APInt LHSOffset, RHSOffset;
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000649 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000650 if (LHSBase) {
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000651 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000652 if (RHSBase && LHSBase == RHSBase) {
653 // We have common bases, fold the icmp to a constant based on the
654 // offsets.
655 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
656 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
657 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
658 SimplifiedValues[&I] = C;
659 ++NumConstantPtrCmps;
660 return true;
661 }
662 }
663 }
664
665 // If the comparison is an equality comparison with null, we can simplify it
Philip Reames9b5c9582015-06-26 20:51:17 +0000666 // if we know the value (argument) can't be null
667 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
668 isKnownNonNullInCallee(I.getOperand(0))) {
669 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
670 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
671 : ConstantInt::getFalse(I.getType());
672 return true;
673 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000674 // Finally check for SROA candidates in comparisons.
675 Value *SROAArg;
676 DenseMap<Value *, int>::iterator CostIt;
677 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
678 if (isa<ConstantPointerNull>(I.getOperand(1))) {
679 accumulateSROACost(CostIt, InlineConstants::InstrCost);
680 return true;
681 }
682
683 disableSROA(CostIt);
684 }
685
686 return false;
687}
688
689bool CallAnalyzer::visitSub(BinaryOperator &I) {
690 // Try to handle a special case: we can fold computing the difference of two
691 // constant-related pointers.
692 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
693 Value *LHSBase, *RHSBase;
694 APInt LHSOffset, RHSOffset;
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000695 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000696 if (LHSBase) {
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000697 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000698 if (RHSBase && LHSBase == RHSBase) {
699 // We have common bases, fold the subtract to a constant based on the
700 // offsets.
701 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
702 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
703 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
704 SimplifiedValues[&I] = C;
705 ++NumConstantPtrDiffs;
706 return true;
707 }
708 }
709 }
710
711 // Otherwise, fall back to the generic logic for simplifying and handling
712 // instructions.
713 return Base::visitSub(I);
714}
715
716bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
717 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000718 const DataLayout &DL = F.getParent()->getDataLayout();
Chandler Carruth0539c072012-03-31 12:42:41 +0000719 if (!isa<Constant>(LHS))
720 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
721 LHS = SimpleLHS;
722 if (!isa<Constant>(RHS))
723 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
724 RHS = SimpleRHS;
Michael Zolotukhin4e8598e2015-02-06 20:02:51 +0000725 Value *SimpleV = nullptr;
726 if (auto FI = dyn_cast<FPMathOperator>(&I))
727 SimpleV =
728 SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
729 else
730 SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
731
Chandler Carruth0539c072012-03-31 12:42:41 +0000732 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
733 SimplifiedValues[&I] = C;
734 return true;
735 }
736
737 // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
738 disableSROA(LHS);
739 disableSROA(RHS);
740
741 return false;
742}
743
744bool CallAnalyzer::visitLoad(LoadInst &I) {
745 Value *SROAArg;
746 DenseMap<Value *, int>::iterator CostIt;
Wei Mi6c428d62015-03-20 18:33:12 +0000747 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000748 if (I.isSimple()) {
749 accumulateSROACost(CostIt, InlineConstants::InstrCost);
750 return true;
751 }
752
753 disableSROA(CostIt);
754 }
755
756 return false;
757}
758
759bool CallAnalyzer::visitStore(StoreInst &I) {
760 Value *SROAArg;
761 DenseMap<Value *, int>::iterator CostIt;
Wei Mi6c428d62015-03-20 18:33:12 +0000762 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000763 if (I.isSimple()) {
764 accumulateSROACost(CostIt, InlineConstants::InstrCost);
765 return true;
766 }
767
768 disableSROA(CostIt);
769 }
770
771 return false;
772}
773
Chandler Carruth753e21d2012-12-28 14:23:32 +0000774bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
775 // Constant folding for extract value is trivial.
776 Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
777 if (!C)
778 C = SimplifiedValues.lookup(I.getAggregateOperand());
779 if (C) {
780 SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
781 return true;
782 }
783
784 // SROA can look through these but give them a cost.
785 return false;
786}
787
788bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
789 // Constant folding for insert value is trivial.
790 Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
791 if (!AggC)
792 AggC = SimplifiedValues.lookup(I.getAggregateOperand());
793 Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
794 if (!InsertedC)
795 InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
796 if (AggC && InsertedC) {
797 SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC,
798 I.getIndices());
799 return true;
800 }
801
802 // SROA can look through these but give them a cost.
803 return false;
804}
805
806/// \brief Try to simplify a call site.
807///
808/// Takes a concrete function and callsite and tries to actually simplify it by
809/// analyzing the arguments and call itself with instsimplify. Returns true if
810/// it has simplified the callsite to some other entity (a constant), making it
811/// free.
812bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
813 // FIXME: Using the instsimplify logic directly for this is inefficient
814 // because we have to continually rebuild the argument list even when no
815 // simplifications can be performed. Until that is fixed with remapping
816 // inside of instsimplify, directly constant fold calls here.
817 if (!canConstantFoldCallTo(F))
818 return false;
819
820 // Try to re-map the arguments to constants.
821 SmallVector<Constant *, 4> ConstantArgs;
822 ConstantArgs.reserve(CS.arg_size());
823 for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
824 I != E; ++I) {
825 Constant *C = dyn_cast<Constant>(*I);
826 if (!C)
827 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
828 if (!C)
829 return false; // This argument doesn't map to a constant.
830
831 ConstantArgs.push_back(C);
832 }
833 if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
834 SimplifiedValues[CS.getInstruction()] = C;
835 return true;
836 }
837
838 return false;
839}
840
Chandler Carruth0539c072012-03-31 12:42:41 +0000841bool CallAnalyzer::visitCallSite(CallSite CS) {
Chandler Carruth37d25de2013-12-13 08:00:01 +0000842 if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
Duncan P. N. Exon Smithb3fc83c2015-02-14 00:12:15 +0000843 !F.hasFnAttribute(Attribute::ReturnsTwice)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000844 // This aborts the entire analysis.
845 ExposesReturnsTwice = true;
846 return false;
847 }
James Molloy4f6fb952012-12-20 16:04:27 +0000848 if (CS.isCall() &&
Eli Bendersky576ef3c2014-03-17 16:19:07 +0000849 cast<CallInst>(CS.getInstruction())->cannotDuplicate())
James Molloy4f6fb952012-12-20 16:04:27 +0000850 ContainsNoDuplicateCall = true;
Chandler Carruth0539c072012-03-31 12:42:41 +0000851
Chandler Carruth0539c072012-03-31 12:42:41 +0000852 if (Function *F = CS.getCalledFunction()) {
Chandler Carruth753e21d2012-12-28 14:23:32 +0000853 // When we have a concrete function, first try to simplify it directly.
854 if (simplifyCallSite(F, CS))
855 return true;
856
857 // Next check if it is an intrinsic we know about.
858 // FIXME: Lift this into part of the InstVisitor.
859 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
860 switch (II->getIntrinsicID()) {
861 default:
862 return Base::visitCallSite(CS);
863
864 case Intrinsic::memset:
865 case Intrinsic::memcpy:
866 case Intrinsic::memmove:
867 // SROA can usually chew through these intrinsics, but they aren't free.
868 return false;
Reid Kleckner60381792015-07-07 22:25:32 +0000869 case Intrinsic::localescape:
Reid Kleckner223de262015-04-14 20:38:14 +0000870 HasFrameEscape = true;
871 return false;
Chandler Carruth753e21d2012-12-28 14:23:32 +0000872 }
873 }
874
Chandler Carruth0539c072012-03-31 12:42:41 +0000875 if (F == CS.getInstruction()->getParent()->getParent()) {
876 // This flag will fully abort the analysis, so don't bother with anything
877 // else.
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000878 IsRecursiveCall = true;
Chandler Carruth0539c072012-03-31 12:42:41 +0000879 return false;
880 }
881
Chandler Carruth0ba8db42013-01-22 11:26:02 +0000882 if (TTI.isLoweredToCall(F)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000883 // We account for the average 1 instruction per call argument setup
884 // here.
885 Cost += CS.arg_size() * InlineConstants::InstrCost;
886
887 // Everything other than inline ASM will also have a significant cost
888 // merely from making the call.
889 if (!isa<InlineAsm>(CS.getCalledValue()))
890 Cost += InlineConstants::CallPenalty;
891 }
892
893 return Base::visitCallSite(CS);
894 }
895
896 // Otherwise we're in a very special case -- an indirect function call. See
897 // if we can be particularly clever about this.
898 Value *Callee = CS.getCalledValue();
899
900 // First, pay the price of the argument setup. We account for the average
901 // 1 instruction per call argument setup here.
902 Cost += CS.arg_size() * InlineConstants::InstrCost;
903
904 // Next, check if this happens to be an indirect function call to a known
905 // function in this inline context. If not, we've done all we can.
906 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
907 if (!F)
908 return Base::visitCallSite(CS);
909
910 // If we have a constant that we are calling as a function, we can peer
911 // through it and see the function target. This happens not infrequently
912 // during devirtualization and so we want to give it a hefty bonus for
913 // inlining, but cap that bonus in the event that inlining wouldn't pan
914 // out. Pretend to inline the function, with a custom threshold.
Easwaran Ramanb1bd3982016-03-08 00:36:35 +0000915 CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000916 if (CA.analyzeCall(CS)) {
917 // We were able to inline the indirect call! Subtract the cost from the
Easwaran Raman6d90d9f2015-12-07 21:21:20 +0000918 // threshold to get the bonus we want to apply, but don't go below zero.
919 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
Chandler Carruth0539c072012-03-31 12:42:41 +0000920 }
921
922 return Base::visitCallSite(CS);
923}
924
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000925bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
926 // At least one return instruction will be free after inlining.
927 bool Free = !HasReturn;
928 HasReturn = true;
929 return Free;
930}
931
932bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
933 // We model unconditional branches as essentially free -- they really
934 // shouldn't exist at all, but handling them makes the behavior of the
935 // inliner more regular and predictable. Interestingly, conditional branches
936 // which will fold away are also free.
937 return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
938 dyn_cast_or_null<ConstantInt>(
939 SimplifiedValues.lookup(BI.getCondition()));
940}
941
942bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
943 // We model unconditional switches as free, see the comments on handling
944 // branches.
Chandler Carruthe01fd5f2014-04-28 08:52:44 +0000945 if (isa<ConstantInt>(SI.getCondition()))
946 return true;
947 if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
948 if (isa<ConstantInt>(V))
949 return true;
950
951 // Otherwise, we need to accumulate a cost proportional to the number of
952 // distinct successor blocks. This fan-out in the CFG cannot be represented
953 // for free even if we can represent the core switch as a jumptable that
954 // takes a single instruction.
955 //
956 // NB: We convert large switches which are just used to initialize large phi
957 // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
958 // inlining those. It will prevent inlining in cases where the optimization
959 // does not (yet) fire.
960 SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
961 SuccessorBlocks.insert(SI.getDefaultDest());
962 for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
963 SuccessorBlocks.insert(I.getCaseSuccessor());
964 // Add cost corresponding to the number of distinct destinations. The first
965 // we model as free because of fallthrough.
966 Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
967 return false;
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000968}
969
970bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
971 // We never want to inline functions that contain an indirectbr. This is
972 // incorrect because all the blockaddress's (in static global initializers
973 // for example) would be referring to the original function, and this
974 // indirect jump would jump from the inlined copy of the function into the
975 // original function which is extremely undefined behavior.
976 // FIXME: This logic isn't really right; we can safely inline functions with
977 // indirectbr's as long as no other function or global references the
Gerolf Hoflehner734f4c82014-07-01 00:19:34 +0000978 // blockaddress of a block within the current function.
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000979 HasIndirectBr = true;
980 return false;
981}
982
983bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
984 // FIXME: It's not clear that a single instruction is an accurate model for
985 // the inline cost of a resume instruction.
986 return false;
987}
988
David Majnemer654e1302015-07-31 17:58:14 +0000989bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
990 // FIXME: It's not clear that a single instruction is an accurate model for
991 // the inline cost of a cleanupret instruction.
992 return false;
993}
994
995bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
996 // FIXME: It's not clear that a single instruction is an accurate model for
Joseph Tremoulet8220bcc2015-08-23 00:26:33 +0000997 // the inline cost of a catchret instruction.
David Majnemer654e1302015-07-31 17:58:14 +0000998 return false;
999}
1000
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001001bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1002 // FIXME: It might be reasonably to discount the cost of instructions leading
1003 // to unreachable as they have the lowest possible impact on both runtime and
1004 // code size.
1005 return true; // No actual code is needed for unreachable.
1006}
1007
Chandler Carruth0539c072012-03-31 12:42:41 +00001008bool CallAnalyzer::visitInstruction(Instruction &I) {
Chandler Carruthda7513a2012-05-04 00:58:03 +00001009 // Some instructions are free. All of the free intrinsics can also be
1010 // handled by SROA, etc.
Chandler Carruthb8cf5102013-01-21 12:05:16 +00001011 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
Chandler Carruthda7513a2012-05-04 00:58:03 +00001012 return true;
1013
Chandler Carruth0539c072012-03-31 12:42:41 +00001014 // We found something we don't understand or can't handle. Mark any SROA-able
1015 // values in the operand list as no longer viable.
1016 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1017 disableSROA(*OI);
1018
1019 return false;
1020}
1021
1022
1023/// \brief Analyze a basic block for its contribution to the inline cost.
1024///
1025/// This method walks the analyzer over every instruction in the given basic
1026/// block and accounts for their cost during inlining at this callsite. It
1027/// aborts early if the threshold has been exceeded or an impossible to inline
1028/// construct has been detected. It returns false if inlining is no longer
1029/// viable, and true if inlining remains viable.
Hal Finkel57f03dd2014-09-07 13:49:57 +00001030bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1031 SmallPtrSetImpl<const Value *> &EphValues) {
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001032 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Chandler Carruth6b4cc8b2014-02-01 10:38:17 +00001033 // FIXME: Currently, the number of instructions in a function regardless of
1034 // our ability to simplify them during inline to constants or dead code,
1035 // are actually used by the vector bonus heuristic. As long as that's true,
1036 // we have to special case debug intrinsics here to prevent differences in
1037 // inlining due to debug symbols. Eventually, the number of unsimplified
1038 // instructions shouldn't factor into the cost computation, but until then,
1039 // hack around it here.
1040 if (isa<DbgInfoIntrinsic>(I))
1041 continue;
1042
Hal Finkel57f03dd2014-09-07 13:49:57 +00001043 // Skip ephemeral values.
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001044 if (EphValues.count(&*I))
Hal Finkel57f03dd2014-09-07 13:49:57 +00001045 continue;
1046
Chandler Carruth0539c072012-03-31 12:42:41 +00001047 ++NumInstructions;
1048 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1049 ++NumVectorInstructions;
1050
Sanjay Patele9434e82015-09-15 15:26:25 +00001051 // If the instruction is floating point, and the target says this operation
1052 // is expensive or the function has the "use-soft-float" attribute, this may
1053 // eventually become a library call. Treat the cost as such.
Cameron Esfahani17177d12015-02-05 02:09:33 +00001054 if (I->getType()->isFloatingPointTy()) {
1055 bool hasSoftFloatAttr = false;
1056
Sanjay Patele9434e82015-09-15 15:26:25 +00001057 // If the function has the "use-soft-float" attribute, mark it as
1058 // expensive.
Cameron Esfahani17177d12015-02-05 02:09:33 +00001059 if (F.hasFnAttribute("use-soft-float")) {
1060 Attribute Attr = F.getFnAttribute("use-soft-float");
1061 StringRef Val = Attr.getValueAsString();
1062 if (Val == "true")
1063 hasSoftFloatAttr = true;
1064 }
1065
1066 if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1067 hasSoftFloatAttr)
1068 Cost += InlineConstants::CallPenalty;
1069 }
1070
Chandler Carruth0539c072012-03-31 12:42:41 +00001071 // If the instruction simplified to a constant, there is no cost to this
1072 // instruction. Visit the instructions using our InstVisitor to account for
1073 // all of the per-instruction logic. The visit tree returns true if we
1074 // consumed the instruction in any way, and false if the instruction's base
1075 // cost should count against inlining.
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001076 if (Base::visit(&*I))
Chandler Carruth0539c072012-03-31 12:42:41 +00001077 ++NumInstructionsSimplified;
1078 else
1079 Cost += InlineConstants::InstrCost;
1080
1081 // If the visit this instruction detected an uninlinable pattern, abort.
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001082 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
Reid Kleckner223de262015-04-14 20:38:14 +00001083 HasIndirectBr || HasFrameEscape)
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001084 return false;
1085
1086 // If the caller is a recursive function then we don't want to inline
1087 // functions which allocate a lot of stack space because it would increase
1088 // the caller stack usage dramatically.
1089 if (IsCallerRecursive &&
1090 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
Chandler Carruth0539c072012-03-31 12:42:41 +00001091 return false;
1092
Chandler Carrutha004f222015-05-27 02:49:05 +00001093 // Check if we've past the maximum possible threshold so we don't spin in
1094 // huge basic blocks that will never inline.
1095 if (Cost > Threshold)
Chandler Carruth0539c072012-03-31 12:42:41 +00001096 return false;
1097 }
1098
1099 return true;
1100}
1101
1102/// \brief Compute the base pointer and cumulative constant offsets for V.
1103///
1104/// This strips all constant offsets off of V, leaving it the base pointer, and
1105/// accumulates the total constant offset applied in the returned constant. It
1106/// returns 0 if V is not a pointer, and returns the constant '0' if there are
1107/// no constant offsets applied.
1108ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001109 if (!V->getType()->isPointerTy())
Craig Topper353eda42014-04-24 06:44:33 +00001110 return nullptr;
Chandler Carruth0539c072012-03-31 12:42:41 +00001111
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001112 const DataLayout &DL = F.getParent()->getDataLayout();
1113 unsigned IntPtrWidth = DL.getPointerSizeInBits();
Chandler Carruth0539c072012-03-31 12:42:41 +00001114 APInt Offset = APInt::getNullValue(IntPtrWidth);
1115
1116 // Even though we don't look through PHI nodes, we could be called on an
1117 // instruction in an unreachable block, which may be on a cycle.
1118 SmallPtrSet<Value *, 4> Visited;
1119 Visited.insert(V);
1120 do {
1121 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1122 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
Craig Topper353eda42014-04-24 06:44:33 +00001123 return nullptr;
Chandler Carruth0539c072012-03-31 12:42:41 +00001124 V = GEP->getPointerOperand();
1125 } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1126 V = cast<Operator>(V)->getOperand(0);
1127 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
Sanjoy Das5ce32722016-04-08 00:48:30 +00001128 if (GA->isInterposable())
Chandler Carruth0539c072012-03-31 12:42:41 +00001129 break;
1130 V = GA->getAliasee();
1131 } else {
1132 break;
1133 }
1134 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
David Blaikie70573dc2014-11-19 07:49:26 +00001135 } while (Visited.insert(V).second);
Chandler Carruth0539c072012-03-31 12:42:41 +00001136
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001137 Type *IntPtrTy = DL.getIntPtrType(V->getContext());
Chandler Carruth0539c072012-03-31 12:42:41 +00001138 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1139}
1140
1141/// \brief Analyze a call site for potential inlining.
1142///
1143/// Returns true if inlining this call is viable, and false if it is not
1144/// viable. It computes the cost and adjusts the threshold based on numerous
1145/// factors and heuristics. If this method returns false but the computed cost
1146/// is below the computed threshold, then inlining was forcibly disabled by
Bob Wilson266802d2012-11-19 07:04:30 +00001147/// some artifact of the routine.
Chandler Carruth0539c072012-03-31 12:42:41 +00001148bool CallAnalyzer::analyzeCall(CallSite CS) {
Chandler Carruth7ae90d42012-04-11 10:15:10 +00001149 ++NumCallsAnalyzed;
1150
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001151 // Perform some tweaks to the cost and threshold based on the direct
1152 // callsite information.
Chandler Carruth0539c072012-03-31 12:42:41 +00001153
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001154 // We want to more aggressively inline vector-dense kernels, so up the
1155 // threshold, and we'll lower it if the % of vector instructions gets too
Chandler Carrutha004f222015-05-27 02:49:05 +00001156 // low. Note that these bonuses are some what arbitrary and evolved over time
1157 // by accident as much as because they are principled bonuses.
1158 //
1159 // FIXME: It would be nice to remove all such bonuses. At least it would be
1160 // nice to base the bonus values on something more scientific.
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001161 assert(NumInstructions == 0);
1162 assert(NumVectorInstructions == 0);
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001163
1164 // Update the threshold based on callsite properties
1165 updateThreshold(CS, F);
1166
Chandler Carrutha004f222015-05-27 02:49:05 +00001167 FiftyPercentVectorBonus = 3 * Threshold / 2;
1168 TenPercentVectorBonus = 3 * Threshold / 4;
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001169 const DataLayout &DL = F.getParent()->getDataLayout();
Benjamin Kramerc99d0e92012-08-07 11:13:19 +00001170
Chandler Carrutha004f222015-05-27 02:49:05 +00001171 // Track whether the post-inlining function would have more than one basic
1172 // block. A single basic block is often intended for inlining. Balloon the
1173 // threshold by 50% until we pass the single-BB phase.
1174 bool SingleBB = true;
1175 int SingleBBBonus = Threshold / 2;
1176
1177 // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1178 // this Threshold any time, and cost cannot decrease, we can stop processing
1179 // the rest of the function body.
1180 Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1181
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001182 // Give out bonuses per argument, as the instructions setting them up will
1183 // be gone after inlining.
1184 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001185 if (CS.isByValArgument(I)) {
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001186 // We approximate the number of loads and stores needed by dividing the
1187 // size of the byval type by the target's pointer size.
1188 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001189 unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1190 unsigned PointerSize = DL.getPointerSizeInBits();
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001191 // Ceiling division.
1192 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
Benjamin Kramerc99d0e92012-08-07 11:13:19 +00001193
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001194 // If it generates more than 8 stores it is likely to be expanded as an
1195 // inline memcpy so we take that as an upper bound. Otherwise we assume
1196 // one load and one store per word copied.
1197 // FIXME: The maxStoresPerMemcpy setting from the target should be used
1198 // here instead of a magic number of 8, but it's not available via
1199 // DataLayout.
1200 NumStores = std::min(NumStores, 8U);
1201
1202 Cost -= 2 * NumStores * InlineConstants::InstrCost;
1203 } else {
1204 // For non-byval arguments subtract off one instruction per call
1205 // argument.
1206 Cost -= InlineConstants::InstrCost;
Benjamin Kramerc99d0e92012-08-07 11:13:19 +00001207 }
Chandler Carruth0539c072012-03-31 12:42:41 +00001208 }
1209
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001210 // If there is only one call of the function, and it has internal linkage,
1211 // the cost of inlining it drops dramatically.
James Molloy4f6fb952012-12-20 16:04:27 +00001212 bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
1213 &F == CS.getCalledFunction();
1214 if (OnlyOneCallAndLocalLinkage)
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001215 Cost += InlineConstants::LastCallToStaticBonus;
1216
Jun Bum Lim53907162016-02-01 20:55:11 +00001217 // If the normal destination of the invoke or the parent block of the call
1218 // site is unreachable-terminated, there is little point in inlining this
1219 // unless there is literally zero cost.
1220 // FIXME: Note that it is possible that an unreachable-terminated block has a
1221 // hot entry. For example, in below scenario inlining hot_call_X() may be
1222 // beneficial :
1223 // main() {
1224 // hot_call_1();
1225 // ...
1226 // hot_call_N()
1227 // exit(0);
1228 // }
1229 // For now, we are not handling this corner case here as it is rare in real
1230 // code. In future, we should elaborate this based on BPI and BFI in more
1231 // general threshold adjusting heuristics in updateThreshold().
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001232 Instruction *Instr = CS.getInstruction();
1233 if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
Jun Bum Lim53907162016-02-01 20:55:11 +00001234 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
Chandler Carrutha004f222015-05-27 02:49:05 +00001235 Threshold = 0;
Jun Bum Lim53907162016-02-01 20:55:11 +00001236 } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
Chandler Carrutha004f222015-05-27 02:49:05 +00001237 Threshold = 0;
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001238
1239 // If this function uses the coldcc calling convention, prefer not to inline
1240 // it.
1241 if (F.getCallingConv() == CallingConv::Cold)
1242 Cost += InlineConstants::ColdccPenalty;
1243
1244 // Check if we're done. This can happen due to bonuses and penalties.
1245 if (Cost > Threshold)
1246 return false;
1247
Chandler Carruth0539c072012-03-31 12:42:41 +00001248 if (F.empty())
1249 return true;
1250
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001251 Function *Caller = CS.getInstruction()->getParent()->getParent();
1252 // Check if the caller function is recursive itself.
Chandler Carruthcdf47882014-03-09 03:16:01 +00001253 for (User *U : Caller->users()) {
1254 CallSite Site(U);
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001255 if (!Site)
1256 continue;
1257 Instruction *I = Site.getInstruction();
1258 if (I->getParent()->getParent() == Caller) {
1259 IsCallerRecursive = true;
1260 break;
1261 }
1262 }
1263
Chandler Carruth0539c072012-03-31 12:42:41 +00001264 // Populate our simplified values by mapping from function arguments to call
1265 // arguments with known important simplifications.
1266 CallSite::arg_iterator CAI = CS.arg_begin();
1267 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1268 FAI != FAE; ++FAI, ++CAI) {
1269 assert(CAI != CS.arg_end());
1270 if (Constant *C = dyn_cast<Constant>(CAI))
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001271 SimplifiedValues[&*FAI] = C;
Chandler Carruth0539c072012-03-31 12:42:41 +00001272
1273 Value *PtrArg = *CAI;
1274 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001275 ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
Chandler Carruth0539c072012-03-31 12:42:41 +00001276
1277 // We can SROA any pointer arguments derived from alloca instructions.
1278 if (isa<AllocaInst>(PtrArg)) {
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001279 SROAArgValues[&*FAI] = PtrArg;
Chandler Carruth0539c072012-03-31 12:42:41 +00001280 SROAArgCosts[PtrArg] = 0;
1281 }
1282 }
1283 }
1284 NumConstantArgs = SimplifiedValues.size();
1285 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1286 NumAllocaArgs = SROAArgValues.size();
1287
Hal Finkel57f03dd2014-09-07 13:49:57 +00001288 // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1289 // the ephemeral values multiple times (and they're completely determined by
1290 // the callee, so this is purely duplicate work).
1291 SmallPtrSet<const Value *, 32> EphValues;
Bjorn Steinbrink6f972a12015-02-12 21:04:22 +00001292 CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F), EphValues);
Hal Finkel57f03dd2014-09-07 13:49:57 +00001293
Chandler Carruth0539c072012-03-31 12:42:41 +00001294 // The worklist of live basic blocks in the callee *after* inlining. We avoid
1295 // adding basic blocks of the callee which can be proven to be dead for this
1296 // particular call site in order to get more accurate cost estimates. This
1297 // requires a somewhat heavyweight iteration pattern: we need to walk the
1298 // basic blocks in a breadth-first order as we insert live successors. To
1299 // accomplish this, prioritizing for small iterations because we exit after
1300 // crossing our threshold, we use a small-size optimized SetVector.
1301 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1302 SmallPtrSet<BasicBlock *, 16> > BBSetVector;
1303 BBSetVector BBWorklist;
1304 BBWorklist.insert(&F.getEntryBlock());
1305 // Note that we *must not* cache the size, this loop grows the worklist.
1306 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1307 // Bail out the moment we cross the threshold. This means we'll under-count
1308 // the cost, but only when undercounting doesn't matter.
Chandler Carrutha004f222015-05-27 02:49:05 +00001309 if (Cost > Threshold)
Chandler Carruth0539c072012-03-31 12:42:41 +00001310 break;
1311
1312 BasicBlock *BB = BBWorklist[Idx];
1313 if (BB->empty())
Chandler Carruth4d1d34f2012-03-14 23:19:53 +00001314 continue;
Dan Gohman4552e3c2009-10-13 18:30:07 +00001315
Gerolf Hoflehner734f4c82014-07-01 00:19:34 +00001316 // Disallow inlining a blockaddress. A blockaddress only has defined
1317 // behavior for an indirect branch in the same function, and we do not
1318 // currently support inlining indirect branches. But, the inliner may not
1319 // see an indirect branch that ends up being dead code at a particular call
1320 // site. If the blockaddress escapes the function, e.g., via a global
1321 // variable, inlining may lead to an invalid cross-function reference.
1322 if (BB->hasAddressTaken())
1323 return false;
1324
Chandler Carruth0539c072012-03-31 12:42:41 +00001325 // Analyze the cost of this block. If we blow through the threshold, this
1326 // returns false, and we can bail on out.
Hal Finkel57f03dd2014-09-07 13:49:57 +00001327 if (!analyzeBlock(BB, EphValues)) {
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001328 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
Reid Kleckner223de262015-04-14 20:38:14 +00001329 HasIndirectBr || HasFrameEscape)
Chandler Carruth0539c072012-03-31 12:42:41 +00001330 return false;
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001331
1332 // If the caller is a recursive function then we don't want to inline
1333 // functions which allocate a lot of stack space because it would increase
1334 // the caller stack usage dramatically.
1335 if (IsCallerRecursive &&
1336 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1337 return false;
1338
Chandler Carruth0539c072012-03-31 12:42:41 +00001339 break;
Eric Christopher46308e62011-02-01 01:16:32 +00001340 }
Eric Christopher46308e62011-02-01 01:16:32 +00001341
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001342 TerminatorInst *TI = BB->getTerminator();
1343
Chandler Carruth0539c072012-03-31 12:42:41 +00001344 // Add in the live successors by first checking whether we have terminator
1345 // that may be simplified based on the values simplified by this call.
1346 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1347 if (BI->isConditional()) {
1348 Value *Cond = BI->getCondition();
1349 if (ConstantInt *SimpleCond
1350 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1351 BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1352 continue;
Eric Christopher46308e62011-02-01 01:16:32 +00001353 }
Chandler Carruth0539c072012-03-31 12:42:41 +00001354 }
1355 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1356 Value *Cond = SI->getCondition();
1357 if (ConstantInt *SimpleCond
1358 = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1359 BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1360 continue;
1361 }
1362 }
Eric Christopher46308e62011-02-01 01:16:32 +00001363
Chandler Carruth0539c072012-03-31 12:42:41 +00001364 // If we're unable to select a particular successor, just count all of
1365 // them.
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001366 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1367 ++TIdx)
Chandler Carruth0539c072012-03-31 12:42:41 +00001368 BBWorklist.insert(TI->getSuccessor(TIdx));
1369
1370 // If we had any successors at this point, than post-inlining is likely to
1371 // have them as well. Note that we assume any basic blocks which existed
1372 // due to branches or switches which folded above will also fold after
1373 // inlining.
1374 if (SingleBB && TI->getNumSuccessors() > 1) {
1375 // Take off the bonus we applied to the threshold.
1376 Threshold -= SingleBBBonus;
1377 SingleBB = false;
Eric Christopher46308e62011-02-01 01:16:32 +00001378 }
1379 }
Andrew Trickcaa500b2011-10-01 01:27:56 +00001380
Chandler Carruthcb5beb32013-12-12 11:59:26 +00001381 // If this is a noduplicate call, we can still inline as long as
James Molloy4f6fb952012-12-20 16:04:27 +00001382 // inlining this would cause the removal of the caller (so the instruction
1383 // is not actually duplicated, just moved).
1384 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1385 return false;
1386
Chandler Carrutha004f222015-05-27 02:49:05 +00001387 // We applied the maximum possible vector bonus at the beginning. Now,
1388 // subtract the excess bonus, if any, from the Threshold before
1389 // comparing against Cost.
1390 if (NumVectorInstructions <= NumInstructions / 10)
1391 Threshold -= FiftyPercentVectorBonus;
1392 else if (NumVectorInstructions <= NumInstructions / 2)
1393 Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
Chandler Carruth0539c072012-03-31 12:42:41 +00001394
Hans Wennborg00ab73d2016-02-05 20:32:42 +00001395 return Cost < std::max(1, Threshold);
Eric Christopher2dfbd7e2011-02-05 00:49:15 +00001396}
1397
Manman Ren49d684e2012-09-12 05:06:18 +00001398#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Chandler Carruth0539c072012-03-31 12:42:41 +00001399/// \brief Dump stats about this call's analysis.
Yaron Kereneb2a2542016-01-29 20:50:44 +00001400LLVM_DUMP_METHOD void CallAnalyzer::dump() {
Eric Christophera13839f2014-02-26 23:27:16 +00001401#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
Chandler Carruth0539c072012-03-31 12:42:41 +00001402 DEBUG_PRINT_STAT(NumConstantArgs);
1403 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1404 DEBUG_PRINT_STAT(NumAllocaArgs);
1405 DEBUG_PRINT_STAT(NumConstantPtrCmps);
1406 DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1407 DEBUG_PRINT_STAT(NumInstructionsSimplified);
Chandler Carrutha004f222015-05-27 02:49:05 +00001408 DEBUG_PRINT_STAT(NumInstructions);
Chandler Carruth0539c072012-03-31 12:42:41 +00001409 DEBUG_PRINT_STAT(SROACostSavings);
1410 DEBUG_PRINT_STAT(SROACostSavingsLost);
James Molloy4f6fb952012-12-20 16:04:27 +00001411 DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
Chandler Carruth394e34f2014-01-31 22:32:32 +00001412 DEBUG_PRINT_STAT(Cost);
1413 DEBUG_PRINT_STAT(Threshold);
Chandler Carruth0539c072012-03-31 12:42:41 +00001414#undef DEBUG_PRINT_STAT
Eric Christopher2dfbd7e2011-02-05 00:49:15 +00001415}
Manman Renc3366cc2012-09-06 19:55:56 +00001416#endif
Eric Christopher2dfbd7e2011-02-05 00:49:15 +00001417
Akira Hatanaka5af7ace2015-11-13 01:44:32 +00001418/// \brief Test that two functions either have or have not the given attribute
1419/// at the same time.
1420template<typename AttrKind>
1421static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
1422 return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
1423}
1424
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001425/// \brief Test that there are no attribute conflicts between Caller and Callee
1426/// that prevent inlining.
1427static bool functionsHaveCompatibleAttributes(Function *Caller,
Eric Christopher4371b132015-07-02 01:11:47 +00001428 Function *Callee,
1429 TargetTransformInfo &TTI) {
Eric Christopherd566fb12015-07-29 22:09:48 +00001430 return TTI.areInlineCompatible(Caller, Callee) &&
Akira Hatanaka1cb242e2015-12-22 23:57:37 +00001431 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001432}
1433
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001434InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001435 TargetTransformInfo &CalleeTTI,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001436 AssumptionCacheTracker *ACT) {
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001437 return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001438 ACT);
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001439}
1440
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001441int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
1442 unsigned SizeOptLevel) {
1443 if (OptLevel > 2)
1444 return OptAggressiveThreshold;
1445 if (SizeOptLevel == 1) // -Os
1446 return OptSizeThreshold;
1447 if (SizeOptLevel == 2) // -Oz
1448 return OptMinSizeThreshold;
1449 return DefaultInlineThreshold;
1450}
1451
1452int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
1453
1454InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
1455 int DefaultThreshold,
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001456 TargetTransformInfo &CalleeTTI,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001457 AssumptionCacheTracker *ACT) {
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001458
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001459 // Cannot inline indirect calls.
1460 if (!Callee)
1461 return llvm::InlineCost::getNever();
1462
1463 // Calls to functions with always-inline attributes should be inlined
1464 // whenever possible.
Peter Collingbourne68a88972014-05-19 18:25:54 +00001465 if (CS.hasFnAttr(Attribute::AlwaysInline)) {
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001466 if (isInlineViable(*Callee))
1467 return llvm::InlineCost::getAlways();
1468 return llvm::InlineCost::getNever();
1469 }
1470
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001471 // Never inline functions with conflicting attributes (unless callee has
1472 // always-inline attribute).
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001473 if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001474 return llvm::InlineCost::getNever();
1475
Paul Robinsondcbe35b2013-11-18 21:44:03 +00001476 // Don't inline this call if the caller has the optnone attribute.
1477 if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1478 return llvm::InlineCost::getNever();
1479
Sanjoy Das5ce32722016-04-08 00:48:30 +00001480 // Don't inline functions which can be interposed at link-time. Don't inline
1481 // functions marked noinline or call sites marked noinline.
1482 // Note: inlining non-exact non-interposable fucntions is fine, since we know
1483 // we have *a* correct implementation of the source level function.
1484 if (Callee->isInterposable() ||
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001485 Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
Dan Gohman4552e3c2009-10-13 18:30:07 +00001486 return llvm::InlineCost::getNever();
1487
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001488 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
1489 << "...\n");
Andrew Trickcaa500b2011-10-01 01:27:56 +00001490
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001491 CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
Chandler Carruth0539c072012-03-31 12:42:41 +00001492 bool ShouldInline = CA.analyzeCall(CS);
Dan Gohman4552e3c2009-10-13 18:30:07 +00001493
Chandler Carruth0539c072012-03-31 12:42:41 +00001494 DEBUG(CA.dump());
1495
1496 // Check if there was a reason to force inlining or no inlining.
1497 if (!ShouldInline && CA.getCost() < CA.getThreshold())
Dan Gohman4552e3c2009-10-13 18:30:07 +00001498 return InlineCost::getNever();
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001499 if (ShouldInline && CA.getCost() >= CA.getThreshold())
Dan Gohman4552e3c2009-10-13 18:30:07 +00001500 return InlineCost::getAlways();
Andrew Trickcaa500b2011-10-01 01:27:56 +00001501
Chandler Carruth0539c072012-03-31 12:42:41 +00001502 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
Dan Gohman4552e3c2009-10-13 18:30:07 +00001503}
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001504
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001505bool llvm::isInlineViable(Function &F) {
Duncan P. N. Exon Smithb3fc83c2015-02-14 00:12:15 +00001506 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001507 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
Gerolf Hoflehner734f4c82014-07-01 00:19:34 +00001508 // Disallow inlining of functions which contain indirect branches or
1509 // blockaddresses.
1510 if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001511 return false;
1512
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001513 for (auto &II : *BI) {
1514 CallSite CS(&II);
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001515 if (!CS)
1516 continue;
1517
1518 // Disallow recursive calls.
1519 if (&F == CS.getCalledFunction())
1520 return false;
1521
1522 // Disallow calls which expose returns-twice to a function not previously
1523 // attributed as such.
1524 if (!ReturnsTwice && CS.isCall() &&
1525 cast<CallInst>(CS.getInstruction())->canReturnTwice())
1526 return false;
Reid Kleckner223de262015-04-14 20:38:14 +00001527
Reid Kleckner60381792015-07-07 22:25:32 +00001528 // Disallow inlining functions that call @llvm.localescape. Doing this
Reid Kleckner223de262015-04-14 20:38:14 +00001529 // correctly would require major changes to the inliner.
1530 if (CS.getCalledFunction() &&
1531 CS.getCalledFunction()->getIntrinsicID() ==
Reid Kleckner60381792015-07-07 22:25:32 +00001532 llvm::Intrinsic::localescape)
Reid Kleckner223de262015-04-14 20:38:14 +00001533 return false;
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001534 }
1535 }
1536
1537 return true;
1538}