blob: 0a32ef8a83be037a27b76198576bc28f4ab11695 [file] [log] [blame]
Dan Gohman4552e3c2009-10-13 18:30:07 +00001//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements inline cost analysis.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/InlineCost.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000015#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
Chandler Carruth66b31302015-01-04 12:03:27 +000020#include "llvm/Analysis/AssumptionCache.h"
Hal Finkel57f03dd2014-09-07 13:49:57 +000021#include "llvm/Analysis/CodeMetrics.h"
Chandler Carruthd9903882015-01-14 11:23:27 +000022#include "llvm/Analysis/ConstantFolding.h"
Chandler Carruth0539c072012-03-31 12:42:41 +000023#include "llvm/Analysis/InstructionSimplify.h"
Chandler Carruth42f3dce2013-01-21 11:55:09 +000024#include "llvm/Analysis/TargetTransformInfo.h"
Chandler Carruth219b89b2014-03-04 11:01:28 +000025#include "llvm/IR/CallSite.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000026#include "llvm/IR/CallingConv.h"
27#include "llvm/IR/DataLayout.h"
Chandler Carruth03eb0de2014-03-04 10:40:04 +000028#include "llvm/IR/GetElementPtrTypeIterator.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000029#include "llvm/IR/GlobalAlias.h"
Chandler Carruth7da14f12014-03-06 03:23:41 +000030#include "llvm/IR/InstVisitor.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000031#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/Operator.h"
Chandler Carruth0539c072012-03-31 12:42:41 +000033#include "llvm/Support/Debug.h"
Chandler Carruth0539c072012-03-31 12:42:41 +000034#include "llvm/Support/raw_ostream.h"
Eric Christopher2dfbd7e2011-02-05 00:49:15 +000035
Dan Gohman4552e3c2009-10-13 18:30:07 +000036using namespace llvm;
37
Chandler Carruthf1221bd2014-04-22 02:48:03 +000038#define DEBUG_TYPE "inline-cost"
39
Chandler Carruth7ae90d42012-04-11 10:15:10 +000040STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
41
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +000042// Threshold to use when optsize is specified (and there is no
43// -inline-threshold).
44const int OptSizeThreshold = 75;
45
46// Threshold to use when -Oz is specified (and there is no -inline-threshold).
47const int OptMinSizeThreshold = 25;
48
49// Threshold to use when -O[34] is specified (and there is no
50// -inline-threshold).
51const int OptAggressiveThreshold = 275;
52
53static cl::opt<int> DefaultInlineThreshold(
54 "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
55 cl::desc("Control the amount of inlining to perform (default = 225)"));
56
57static cl::opt<int> HintThreshold(
58 "inlinehint-threshold", cl::Hidden, cl::init(325),
59 cl::desc("Threshold for inlining functions with inline hint"));
60
61// We introduce this threshold to help performance of instrumentation based
62// PGO before we actually hook up inliner with analysis passes such as BPI and
63// BFI.
64static cl::opt<int> ColdThreshold(
65 "inlinecold-threshold", cl::Hidden, cl::init(225),
66 cl::desc("Threshold for inlining functions with cold attribute"));
67
Chandler Carruth0539c072012-03-31 12:42:41 +000068namespace {
Chandler Carrutha3089552012-03-14 07:32:53 +000069
Chandler Carruth0539c072012-03-31 12:42:41 +000070class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
71 typedef InstVisitor<CallAnalyzer, bool> Base;
72 friend class InstVisitor<CallAnalyzer, bool>;
Owen Andersona08318a2010-09-09 16:56:42 +000073
Chandler Carruth42f3dce2013-01-21 11:55:09 +000074 /// The TargetTransformInfo available for this compilation.
75 const TargetTransformInfo &TTI;
76
Hal Finkel57f03dd2014-09-07 13:49:57 +000077 /// The cache of @llvm.assume intrinsics.
Bjorn Steinbrink6f972a12015-02-12 21:04:22 +000078 AssumptionCacheTracker *ACT;
Hal Finkel57f03dd2014-09-07 13:49:57 +000079
Chandler Carruth0539c072012-03-31 12:42:41 +000080 // The called function.
81 Function &F;
Owen Andersona08318a2010-09-09 16:56:42 +000082
Philip Reames9b5c9582015-06-26 20:51:17 +000083 // The candidate callsite being analyzed. Please do not use this to do
84 // analysis in the caller function; we want the inline cost query to be
85 // easily cacheable. Instead, use the cover function paramHasAttr.
86 CallSite CandidateCS;
87
Chandler Carruth0539c072012-03-31 12:42:41 +000088 int Threshold;
89 int Cost;
Owen Andersona08318a2010-09-09 16:56:42 +000090
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +000091 bool IsCallerRecursive;
92 bool IsRecursiveCall;
Chandler Carruth0539c072012-03-31 12:42:41 +000093 bool ExposesReturnsTwice;
94 bool HasDynamicAlloca;
James Molloy4f6fb952012-12-20 16:04:27 +000095 bool ContainsNoDuplicateCall;
Chandler Carruth0814d2a2013-12-13 07:59:56 +000096 bool HasReturn;
97 bool HasIndirectBr;
Reid Kleckner223de262015-04-14 20:38:14 +000098 bool HasFrameEscape;
James Molloy4f6fb952012-12-20 16:04:27 +000099
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000100 /// Number of bytes allocated statically by the callee.
101 uint64_t AllocatedSize;
Chandler Carruth0539c072012-03-31 12:42:41 +0000102 unsigned NumInstructions, NumVectorInstructions;
103 int FiftyPercentVectorBonus, TenPercentVectorBonus;
104 int VectorBonus;
105
106 // While we walk the potentially-inlined instructions, we build up and
107 // maintain a mapping of simplified values specific to this callsite. The
108 // idea is to propagate any special information we have about arguments to
109 // this call through the inlinable section of the function, and account for
110 // likely simplifications post-inlining. The most important aspect we track
111 // is CFG altering simplifications -- when we prove a basic block dead, that
112 // can cause dramatic shifts in the cost of inlining a function.
113 DenseMap<Value *, Constant *> SimplifiedValues;
114
115 // Keep track of the values which map back (through function arguments) to
116 // allocas on the caller stack which could be simplified through SROA.
117 DenseMap<Value *, Value *> SROAArgValues;
118
119 // The mapping of caller Alloca values to their accumulated cost savings. If
120 // we have to disable SROA for one of the allocas, this tells us how much
121 // cost must be added.
122 DenseMap<Value *, int> SROAArgCosts;
123
124 // Keep track of values which map to a pointer base and constant offset.
Chad Rosier567556a2016-04-28 14:47:23 +0000125 DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
Chandler Carruth0539c072012-03-31 12:42:41 +0000126
127 // Custom simplification helper routines.
128 bool isAllocaDerivedArg(Value *V);
129 bool lookupSROAArgAndCost(Value *V, Value *&Arg,
130 DenseMap<Value *, int>::iterator &CostIt);
131 void disableSROA(DenseMap<Value *, int>::iterator CostIt);
132 void disableSROA(Value *V);
133 void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
134 int InstructionCost);
Chandler Carruth0539c072012-03-31 12:42:41 +0000135 bool isGEPOffsetConstant(GetElementPtrInst &GEP);
136 bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
Chandler Carruth753e21d2012-12-28 14:23:32 +0000137 bool simplifyCallSite(Function *F, CallSite CS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000138 ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
139
Philip Reames9b5c9582015-06-26 20:51:17 +0000140 /// Return true if the given argument to the function being considered for
141 /// inlining has the given attribute set either at the call site or the
142 /// function declaration. Primarily used to inspect call site specific
143 /// attributes since these can be more precise than the ones on the callee
Easwaran Raman3676da42015-12-03 19:03:20 +0000144 /// itself.
Philip Reames9b5c9582015-06-26 20:51:17 +0000145 bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
Chad Rosier567556a2016-04-28 14:47:23 +0000146
Philip Reames9b5c9582015-06-26 20:51:17 +0000147 /// Return true if the given value is known non null within the callee if
Easwaran Raman3676da42015-12-03 19:03:20 +0000148 /// inlined through this particular callsite.
Philip Reames9b5c9582015-06-26 20:51:17 +0000149 bool isKnownNonNullInCallee(Value *V);
150
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000151 /// Update Threshold based on callsite properties such as callee
152 /// attributes and callee hotness for PGO builds. The Callee is explicitly
153 /// passed to support analyzing indirect calls whose target is inferred by
154 /// analysis.
155 void updateThreshold(CallSite CS, Function &Callee);
156
Easwaran Raman9a3fc172016-04-08 21:28:02 +0000157 /// Return true if size growth is allowed when inlining the callee at CS.
158 bool allowSizeGrowth(CallSite CS);
159
Chandler Carruth0539c072012-03-31 12:42:41 +0000160 // Custom analysis routines.
Hal Finkel57f03dd2014-09-07 13:49:57 +0000161 bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
Chandler Carruth0539c072012-03-31 12:42:41 +0000162
163 // Disable several entry points to the visitor so we don't accidentally use
164 // them by declaring but not defining them here.
Chad Rosier567556a2016-04-28 14:47:23 +0000165 void visit(Module *);
166 void visit(Module &);
167 void visit(Function *);
168 void visit(Function &);
169 void visit(BasicBlock *);
170 void visit(BasicBlock &);
Chandler Carruth0539c072012-03-31 12:42:41 +0000171
172 // Provide base case for our instruction visit.
173 bool visitInstruction(Instruction &I);
174
175 // Our visit overrides.
176 bool visitAlloca(AllocaInst &I);
177 bool visitPHI(PHINode &I);
178 bool visitGetElementPtr(GetElementPtrInst &I);
179 bool visitBitCast(BitCastInst &I);
180 bool visitPtrToInt(PtrToIntInst &I);
181 bool visitIntToPtr(IntToPtrInst &I);
182 bool visitCastInst(CastInst &I);
183 bool visitUnaryInstruction(UnaryInstruction &I);
Matt Arsenault727aa342013-07-20 04:09:00 +0000184 bool visitCmpInst(CmpInst &I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000185 bool visitSub(BinaryOperator &I);
186 bool visitBinaryOperator(BinaryOperator &I);
187 bool visitLoad(LoadInst &I);
188 bool visitStore(StoreInst &I);
Chandler Carruth753e21d2012-12-28 14:23:32 +0000189 bool visitExtractValue(ExtractValueInst &I);
190 bool visitInsertValue(InsertValueInst &I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000191 bool visitCallSite(CallSite CS);
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000192 bool visitReturnInst(ReturnInst &RI);
193 bool visitBranchInst(BranchInst &BI);
194 bool visitSwitchInst(SwitchInst &SI);
195 bool visitIndirectBrInst(IndirectBrInst &IBI);
196 bool visitResumeInst(ResumeInst &RI);
David Majnemer654e1302015-07-31 17:58:14 +0000197 bool visitCleanupReturnInst(CleanupReturnInst &RI);
198 bool visitCatchReturnInst(CatchReturnInst &RI);
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000199 bool visitUnreachableInst(UnreachableInst &I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000200
201public:
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000202 CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +0000203 Function &Callee, int Threshold, CallSite CSArg)
Chad Rosier567556a2016-04-28 14:47:23 +0000204 : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
Easwaran Ramanb1bd3982016-03-08 00:36:35 +0000205 Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
206 ExposesReturnsTwice(false), HasDynamicAlloca(false),
207 ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
208 HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
209 NumVectorInstructions(0), FiftyPercentVectorBonus(0),
210 TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
211 NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
212 NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
213 SROACostSavings(0), SROACostSavingsLost(0) {}
Chandler Carruth0539c072012-03-31 12:42:41 +0000214
215 bool analyzeCall(CallSite CS);
216
217 int getThreshold() { return Threshold; }
218 int getCost() { return Cost; }
219
220 // Keep a bunch of stats about the cost savings found so we can print them
221 // out when debugging.
222 unsigned NumConstantArgs;
223 unsigned NumConstantOffsetPtrArgs;
224 unsigned NumAllocaArgs;
225 unsigned NumConstantPtrCmps;
226 unsigned NumConstantPtrDiffs;
227 unsigned NumInstructionsSimplified;
228 unsigned SROACostSavings;
229 unsigned SROACostSavingsLost;
230
231 void dump();
232};
233
234} // namespace
235
236/// \brief Test whether the given value is an Alloca-derived function argument.
237bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
238 return SROAArgValues.count(V);
Owen Andersona08318a2010-09-09 16:56:42 +0000239}
240
Chandler Carruth0539c072012-03-31 12:42:41 +0000241/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
242/// Returns false if V does not map to a SROA-candidate.
243bool CallAnalyzer::lookupSROAArgAndCost(
244 Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
245 if (SROAArgValues.empty() || SROAArgCosts.empty())
246 return false;
Chandler Carruth783b7192012-03-09 02:49:36 +0000247
Chandler Carruth0539c072012-03-31 12:42:41 +0000248 DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
249 if (ArgIt == SROAArgValues.end())
250 return false;
Chandler Carruth783b7192012-03-09 02:49:36 +0000251
Chandler Carruth0539c072012-03-31 12:42:41 +0000252 Arg = ArgIt->second;
253 CostIt = SROAArgCosts.find(Arg);
254 return CostIt != SROAArgCosts.end();
Chandler Carruth783b7192012-03-09 02:49:36 +0000255}
256
Chandler Carruth0539c072012-03-31 12:42:41 +0000257/// \brief Disable SROA for the candidate marked by this cost iterator.
Chandler Carruth783b7192012-03-09 02:49:36 +0000258///
Benjamin Kramerbde91762012-06-02 10:20:22 +0000259/// This marks the candidate as no longer viable for SROA, and adds the cost
Chandler Carruth0539c072012-03-31 12:42:41 +0000260/// savings associated with it back into the inline cost measurement.
261void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
262 // If we're no longer able to perform SROA we need to undo its cost savings
263 // and prevent subsequent analysis.
264 Cost += CostIt->second;
265 SROACostSavings -= CostIt->second;
266 SROACostSavingsLost += CostIt->second;
267 SROAArgCosts.erase(CostIt);
268}
269
270/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
271void CallAnalyzer::disableSROA(Value *V) {
272 Value *SROAArg;
273 DenseMap<Value *, int>::iterator CostIt;
274 if (lookupSROAArgAndCost(V, SROAArg, CostIt))
275 disableSROA(CostIt);
276}
277
278/// \brief Accumulate the given cost for a particular SROA candidate.
279void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
280 int InstructionCost) {
281 CostIt->second += InstructionCost;
282 SROACostSavings += InstructionCost;
283}
284
Chandler Carruth0539c072012-03-31 12:42:41 +0000285/// \brief Check whether a GEP's indices are all constant.
286///
287/// Respects any simplified values known during the analysis of this callsite.
288bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
289 for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
290 if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
Chandler Carruth783b7192012-03-09 02:49:36 +0000291 return false;
Chandler Carruth783b7192012-03-09 02:49:36 +0000292
Chandler Carruth0539c072012-03-31 12:42:41 +0000293 return true;
294}
295
296/// \brief Accumulate a constant GEP offset into an APInt if possible.
297///
298/// Returns false if unable to compute the offset for any reason. Respects any
299/// simplified values known during the analysis of this callsite.
300bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000301 const DataLayout &DL = F.getParent()->getDataLayout();
302 unsigned IntPtrWidth = DL.getPointerSizeInBits();
Chandler Carruth0539c072012-03-31 12:42:41 +0000303 assert(IntPtrWidth == Offset.getBitWidth());
304
305 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
306 GTI != GTE; ++GTI) {
307 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
308 if (!OpC)
309 if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
310 OpC = dyn_cast<ConstantInt>(SimpleOp);
311 if (!OpC)
Chandler Carruth783b7192012-03-09 02:49:36 +0000312 return false;
Chad Rosier567556a2016-04-28 14:47:23 +0000313 if (OpC->isZero())
314 continue;
Chandler Carruth783b7192012-03-09 02:49:36 +0000315
Chandler Carruth0539c072012-03-31 12:42:41 +0000316 // Handle a struct index, which adds its field offset to the pointer.
317 if (StructType *STy = dyn_cast<StructType>(*GTI)) {
318 unsigned ElementIdx = OpC->getZExtValue();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000319 const StructLayout *SL = DL.getStructLayout(STy);
Chandler Carruth0539c072012-03-31 12:42:41 +0000320 Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
321 continue;
Chandler Carruth783b7192012-03-09 02:49:36 +0000322 }
Chandler Carruth783b7192012-03-09 02:49:36 +0000323
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000324 APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
Chandler Carruth0539c072012-03-31 12:42:41 +0000325 Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
326 }
327 return true;
328}
329
330bool CallAnalyzer::visitAlloca(AllocaInst &I) {
Eric Christopherbeb2cd62014-04-07 13:36:21 +0000331 // Check whether inlining will turn a dynamic alloca into a static
Chandler Carruth0539c072012-03-31 12:42:41 +0000332 // alloca, and handle that case.
Eric Christopherbeb2cd62014-04-07 13:36:21 +0000333 if (I.isArrayAllocation()) {
334 if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
335 ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
336 assert(AllocSize && "Allocation size not a constant int?");
337 Type *Ty = I.getAllocatedType();
338 AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
339 return Base::visitAlloca(I);
340 }
341 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000342
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000343 // Accumulate the allocated size.
344 if (I.isStaticAlloca()) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000345 const DataLayout &DL = F.getParent()->getDataLayout();
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000346 Type *Ty = I.getAllocatedType();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000347 AllocatedSize += DL.getTypeAllocSize(Ty);
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000348 }
349
Bob Wilsona5b0dc82012-11-19 07:04:35 +0000350 // We will happily inline static alloca instructions.
351 if (I.isStaticAlloca())
Chandler Carruth0539c072012-03-31 12:42:41 +0000352 return Base::visitAlloca(I);
353
354 // FIXME: This is overly conservative. Dynamic allocas are inefficient for
355 // a variety of reasons, and so we would like to not inline them into
356 // functions which don't currently have a dynamic alloca. This simply
357 // disables inlining altogether in the presence of a dynamic alloca.
358 HasDynamicAlloca = true;
359 return false;
360}
361
362bool CallAnalyzer::visitPHI(PHINode &I) {
363 // FIXME: We should potentially be tracking values through phi nodes,
364 // especially when they collapse to a single value due to deleted CFG edges
365 // during inlining.
366
367 // FIXME: We need to propagate SROA *disabling* through phi nodes, even
368 // though we don't want to propagate it's bonuses. The idea is to disable
369 // SROA if it *might* be used in an inappropriate manner.
370
371 // Phi nodes are always zero-cost.
372 return true;
373}
374
375bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
376 Value *SROAArg;
377 DenseMap<Value *, int>::iterator CostIt;
Chad Rosier567556a2016-04-28 14:47:23 +0000378 bool SROACandidate =
379 lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
Chandler Carruth0539c072012-03-31 12:42:41 +0000380
381 // Try to fold GEPs of constant-offset call site argument pointers. This
382 // requires target data and inbounds GEPs.
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000383 if (I.isInBounds()) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000384 // Check if we have a base + offset for the pointer.
385 Value *Ptr = I.getPointerOperand();
386 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
387 if (BaseAndOffset.first) {
388 // Check if the offset of this GEP is constant, and if so accumulate it
389 // into Offset.
390 if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
391 // Non-constant GEPs aren't folded, and disable SROA.
392 if (SROACandidate)
393 disableSROA(CostIt);
394 return false;
395 }
396
397 // Add the result as a new mapping to Base + Offset.
398 ConstantOffsetPtrs[&I] = BaseAndOffset;
399
400 // Also handle SROA candidates here, we already know that the GEP is
401 // all-constant indexed.
402 if (SROACandidate)
403 SROAArgValues[&I] = SROAArg;
404
Chandler Carruth783b7192012-03-09 02:49:36 +0000405 return true;
406 }
407 }
408
Chandler Carruth0539c072012-03-31 12:42:41 +0000409 if (isGEPOffsetConstant(I)) {
410 if (SROACandidate)
411 SROAArgValues[&I] = SROAArg;
412
413 // Constant GEPs are modeled as free.
414 return true;
415 }
416
417 // Variable GEPs will require math and will disable SROA.
418 if (SROACandidate)
419 disableSROA(CostIt);
Chandler Carruth783b7192012-03-09 02:49:36 +0000420 return false;
421}
422
Chandler Carruth0539c072012-03-31 12:42:41 +0000423bool CallAnalyzer::visitBitCast(BitCastInst &I) {
424 // Propagate constants through bitcasts.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000425 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
426 if (!COp)
427 COp = SimplifiedValues.lookup(I.getOperand(0));
428 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000429 if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
430 SimplifiedValues[&I] = C;
431 return true;
Owen Andersona08318a2010-09-09 16:56:42 +0000432 }
Owen Andersona08318a2010-09-09 16:56:42 +0000433
Chandler Carruth0539c072012-03-31 12:42:41 +0000434 // Track base/offsets through casts
Chad Rosier567556a2016-04-28 14:47:23 +0000435 std::pair<Value *, APInt> BaseAndOffset =
436 ConstantOffsetPtrs.lookup(I.getOperand(0));
Chandler Carruth0539c072012-03-31 12:42:41 +0000437 // Casts don't change the offset, just wrap it up.
438 if (BaseAndOffset.first)
439 ConstantOffsetPtrs[&I] = BaseAndOffset;
440
441 // Also look for SROA candidates here.
442 Value *SROAArg;
443 DenseMap<Value *, int>::iterator CostIt;
444 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
445 SROAArgValues[&I] = SROAArg;
446
447 // Bitcasts are always zero cost.
448 return true;
Owen Andersona08318a2010-09-09 16:56:42 +0000449}
450
Chandler Carruth0539c072012-03-31 12:42:41 +0000451bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
452 // Propagate constants through ptrtoint.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000453 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
454 if (!COp)
455 COp = SimplifiedValues.lookup(I.getOperand(0));
456 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000457 if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
458 SimplifiedValues[&I] = C;
459 return true;
Chandler Carruth4d1d34f2012-03-14 23:19:53 +0000460 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000461
462 // Track base/offset pairs when converted to a plain integer provided the
463 // integer is large enough to represent the pointer.
464 unsigned IntegerSize = I.getType()->getScalarSizeInBits();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000465 const DataLayout &DL = F.getParent()->getDataLayout();
Mehdi Amini46a43552015-03-04 18:43:29 +0000466 if (IntegerSize >= DL.getPointerSizeInBits()) {
Chad Rosier567556a2016-04-28 14:47:23 +0000467 std::pair<Value *, APInt> BaseAndOffset =
468 ConstantOffsetPtrs.lookup(I.getOperand(0));
Chandler Carruth0539c072012-03-31 12:42:41 +0000469 if (BaseAndOffset.first)
470 ConstantOffsetPtrs[&I] = BaseAndOffset;
471 }
472
473 // This is really weird. Technically, ptrtoint will disable SROA. However,
474 // unless that ptrtoint is *used* somewhere in the live basic blocks after
475 // inlining, it will be nuked, and SROA should proceed. All of the uses which
476 // would block SROA would also block SROA if applied directly to a pointer,
477 // and so we can just add the integer in here. The only places where SROA is
478 // preserved either cannot fire on an integer, or won't in-and-of themselves
479 // disable SROA (ext) w/o some later use that we would see and disable.
480 Value *SROAArg;
481 DenseMap<Value *, int>::iterator CostIt;
482 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
483 SROAArgValues[&I] = SROAArg;
484
Chandler Carruthb8cf5102013-01-21 12:05:16 +0000485 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
Chandler Carruth4d1d34f2012-03-14 23:19:53 +0000486}
487
Chandler Carruth0539c072012-03-31 12:42:41 +0000488bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
489 // Propagate constants through ptrtoint.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000490 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
491 if (!COp)
492 COp = SimplifiedValues.lookup(I.getOperand(0));
493 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000494 if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
495 SimplifiedValues[&I] = C;
496 return true;
497 }
Dan Gohman4552e3c2009-10-13 18:30:07 +0000498
Chandler Carruth0539c072012-03-31 12:42:41 +0000499 // Track base/offset pairs when round-tripped through a pointer without
500 // modifications provided the integer is not too large.
501 Value *Op = I.getOperand(0);
502 unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000503 const DataLayout &DL = F.getParent()->getDataLayout();
Mehdi Amini46a43552015-03-04 18:43:29 +0000504 if (IntegerSize <= DL.getPointerSizeInBits()) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000505 std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
506 if (BaseAndOffset.first)
507 ConstantOffsetPtrs[&I] = BaseAndOffset;
508 }
Dan Gohman4552e3c2009-10-13 18:30:07 +0000509
Chandler Carruth0539c072012-03-31 12:42:41 +0000510 // "Propagate" SROA here in the same manner as we do for ptrtoint above.
511 Value *SROAArg;
512 DenseMap<Value *, int>::iterator CostIt;
513 if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
514 SROAArgValues[&I] = SROAArg;
Chandler Carruth4d1d34f2012-03-14 23:19:53 +0000515
Chandler Carruthb8cf5102013-01-21 12:05:16 +0000516 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000517}
518
519bool CallAnalyzer::visitCastInst(CastInst &I) {
520 // Propagate constants through ptrtoint.
Chandler Carruth86ed5302012-12-28 14:43:42 +0000521 Constant *COp = dyn_cast<Constant>(I.getOperand(0));
522 if (!COp)
523 COp = SimplifiedValues.lookup(I.getOperand(0));
524 if (COp)
Chandler Carruth0539c072012-03-31 12:42:41 +0000525 if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
526 SimplifiedValues[&I] = C;
527 return true;
528 }
529
530 // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
531 disableSROA(I.getOperand(0));
532
Chandler Carruthb8cf5102013-01-21 12:05:16 +0000533 return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
Chandler Carruth0539c072012-03-31 12:42:41 +0000534}
535
536bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
537 Value *Operand = I.getOperand(0);
Jakub Staszak7b9e0b92013-03-07 20:01:19 +0000538 Constant *COp = dyn_cast<Constant>(Operand);
539 if (!COp)
540 COp = SimplifiedValues.lookup(Operand);
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000541 if (COp) {
542 const DataLayout &DL = F.getParent()->getDataLayout();
Manuel Jacobe9024592016-01-21 06:33:22 +0000543 if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000544 SimplifiedValues[&I] = C;
545 return true;
546 }
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000547 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000548
549 // Disable any SROA on the argument to arbitrary unary operators.
550 disableSROA(Operand);
551
552 return false;
553}
554
Philip Reames9b5c9582015-06-26 20:51:17 +0000555bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
556 unsigned ArgNo = A->getArgNo();
Chad Rosier567556a2016-04-28 14:47:23 +0000557 return CandidateCS.paramHasAttr(ArgNo + 1, Attr);
Philip Reames9b5c9582015-06-26 20:51:17 +0000558}
559
560bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
561 // Does the *call site* have the NonNull attribute set on an argument? We
562 // use the attribute on the call site to memoize any analysis done in the
563 // caller. This will also trip if the callee function has a non-null
564 // parameter attribute, but that's a less interesting case because hopefully
565 // the callee would already have been simplified based on that.
566 if (Argument *A = dyn_cast<Argument>(V))
567 if (paramHasAttr(A, Attribute::NonNull))
568 return true;
Chad Rosier567556a2016-04-28 14:47:23 +0000569
Philip Reames9b5c9582015-06-26 20:51:17 +0000570 // Is this an alloca in the caller? This is distinct from the attribute case
571 // above because attributes aren't updated within the inliner itself and we
572 // always want to catch the alloca derived case.
573 if (isAllocaDerivedArg(V))
574 // We can actually predict the result of comparisons between an
575 // alloca-derived value and null. Note that this fires regardless of
576 // SROA firing.
577 return true;
Chad Rosier567556a2016-04-28 14:47:23 +0000578
Philip Reames9b5c9582015-06-26 20:51:17 +0000579 return false;
580}
581
Easwaran Raman9a3fc172016-04-08 21:28:02 +0000582bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
583 // If the normal destination of the invoke or the parent block of the call
584 // site is unreachable-terminated, there is little point in inlining this
585 // unless there is literally zero cost.
586 // FIXME: Note that it is possible that an unreachable-terminated block has a
587 // hot entry. For example, in below scenario inlining hot_call_X() may be
588 // beneficial :
589 // main() {
590 // hot_call_1();
591 // ...
592 // hot_call_N()
593 // exit(0);
594 // }
595 // For now, we are not handling this corner case here as it is rare in real
596 // code. In future, we should elaborate this based on BPI and BFI in more
597 // general threshold adjusting heuristics in updateThreshold().
598 Instruction *Instr = CS.getInstruction();
599 if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
600 if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
601 return false;
602 } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
603 return false;
604
605 return true;
606}
607
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000608void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
Easwaran Raman9a3fc172016-04-08 21:28:02 +0000609 // If no size growth is allowed for this inlining, set Threshold to 0.
610 if (!allowSizeGrowth(CS)) {
611 Threshold = 0;
612 return;
613 }
614
Easwaran Raman30a93c12016-01-28 23:44:41 +0000615 // If -inline-threshold is not given, listen to the optsize and minsize
616 // attributes when they would decrease the threshold.
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000617 Function *Caller = CS.getCaller();
618
Easwaran Raman30a93c12016-01-28 23:44:41 +0000619 if (!(DefaultInlineThreshold.getNumOccurrences() > 0)) {
620 if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
621 Threshold = OptMinSizeThreshold;
622 else if (Caller->optForSize() && OptSizeThreshold < Threshold)
623 Threshold = OptSizeThreshold;
624 }
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000625
626 // If profile information is available, use that to adjust threshold of hot
627 // and cold functions.
628 // FIXME: The heuristic used below for determining hotness and coldness are
629 // based on preliminary SPEC tuning and may not be optimal. Replace this with
630 // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
631 uint64_t FunctionCount = 0, MaxFunctionCount = 0;
632 bool HasPGOCounts = false;
Eric Liud09f15e2016-04-18 15:31:11 +0000633 if (Callee.getEntryCount() && Callee.getParent()->getMaximumFunctionCount()) {
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000634 HasPGOCounts = true;
635 FunctionCount = Callee.getEntryCount().getValue();
Eric Liud09f15e2016-04-18 15:31:11 +0000636 MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000637 }
638
639 // Listen to the inlinehint attribute or profile based hotness information
640 // when it would increase the threshold and the caller does not need to
641 // minimize its size.
642 bool InlineHint =
643 Callee.hasFnAttribute(Attribute::InlineHint) ||
644 (HasPGOCounts &&
645 FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
646 if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
647 Threshold = HintThreshold;
648
649 // Listen to the cold attribute or profile based coldness information
650 // when it would decrease the threshold.
651 bool ColdCallee =
652 Callee.hasFnAttribute(Attribute::Cold) ||
653 (HasPGOCounts &&
654 FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
655 // Command line argument for DefaultInlineThreshold will override the default
656 // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
657 // do not use the default cold threshold even if it is smaller.
658 if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
659 ColdThreshold.getNumOccurrences() > 0) &&
660 ColdCallee && ColdThreshold < Threshold)
661 Threshold = ColdThreshold;
Justin Lebar8650a4d2016-04-15 01:38:48 +0000662
663 // Finally, take the target-specific inlining threshold multiplier into
664 // account.
665 Threshold *= TTI.getInliningThresholdMultiplier();
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +0000666}
667
Matt Arsenault727aa342013-07-20 04:09:00 +0000668bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000669 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
670 // First try to handle simplified comparisons.
671 if (!isa<Constant>(LHS))
672 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
673 LHS = SimpleLHS;
674 if (!isa<Constant>(RHS))
675 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
676 RHS = SimpleRHS;
Matt Arsenault727aa342013-07-20 04:09:00 +0000677 if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000678 if (Constant *CRHS = dyn_cast<Constant>(RHS))
Chad Rosier567556a2016-04-28 14:47:23 +0000679 if (Constant *C =
680 ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000681 SimplifiedValues[&I] = C;
682 return true;
683 }
Matt Arsenault727aa342013-07-20 04:09:00 +0000684 }
685
686 if (I.getOpcode() == Instruction::FCmp)
687 return false;
Chandler Carruth0539c072012-03-31 12:42:41 +0000688
689 // Otherwise look for a comparison between constant offset pointers with
690 // a common base.
691 Value *LHSBase, *RHSBase;
692 APInt LHSOffset, RHSOffset;
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000693 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000694 if (LHSBase) {
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000695 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000696 if (RHSBase && LHSBase == RHSBase) {
697 // We have common bases, fold the icmp to a constant based on the
698 // offsets.
699 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
700 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
701 if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
702 SimplifiedValues[&I] = C;
703 ++NumConstantPtrCmps;
704 return true;
705 }
706 }
707 }
708
709 // If the comparison is an equality comparison with null, we can simplify it
Philip Reames9b5c9582015-06-26 20:51:17 +0000710 // if we know the value (argument) can't be null
711 if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
712 isKnownNonNullInCallee(I.getOperand(0))) {
713 bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
714 SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
715 : ConstantInt::getFalse(I.getType());
716 return true;
717 }
Chandler Carruth0539c072012-03-31 12:42:41 +0000718 // Finally check for SROA candidates in comparisons.
719 Value *SROAArg;
720 DenseMap<Value *, int>::iterator CostIt;
721 if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
722 if (isa<ConstantPointerNull>(I.getOperand(1))) {
723 accumulateSROACost(CostIt, InlineConstants::InstrCost);
724 return true;
725 }
726
727 disableSROA(CostIt);
728 }
729
730 return false;
731}
732
733bool CallAnalyzer::visitSub(BinaryOperator &I) {
734 // Try to handle a special case: we can fold computing the difference of two
735 // constant-related pointers.
736 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
737 Value *LHSBase, *RHSBase;
738 APInt LHSOffset, RHSOffset;
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000739 std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000740 if (LHSBase) {
Benjamin Kramerd6f1f842014-03-02 13:30:33 +0000741 std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000742 if (RHSBase && LHSBase == RHSBase) {
743 // We have common bases, fold the subtract to a constant based on the
744 // offsets.
745 Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
746 Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
747 if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
748 SimplifiedValues[&I] = C;
749 ++NumConstantPtrDiffs;
750 return true;
751 }
752 }
753 }
754
755 // Otherwise, fall back to the generic logic for simplifying and handling
756 // instructions.
757 return Base::visitSub(I);
758}
759
760bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
761 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Mehdi Aminia28d91d2015-03-10 02:37:25 +0000762 const DataLayout &DL = F.getParent()->getDataLayout();
Chandler Carruth0539c072012-03-31 12:42:41 +0000763 if (!isa<Constant>(LHS))
764 if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
765 LHS = SimpleLHS;
766 if (!isa<Constant>(RHS))
767 if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
768 RHS = SimpleRHS;
Michael Zolotukhin4e8598e2015-02-06 20:02:51 +0000769 Value *SimpleV = nullptr;
770 if (auto FI = dyn_cast<FPMathOperator>(&I))
771 SimpleV =
772 SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
773 else
774 SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
775
Chandler Carruth0539c072012-03-31 12:42:41 +0000776 if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
777 SimplifiedValues[&I] = C;
778 return true;
779 }
780
781 // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
782 disableSROA(LHS);
783 disableSROA(RHS);
784
785 return false;
786}
787
788bool CallAnalyzer::visitLoad(LoadInst &I) {
789 Value *SROAArg;
790 DenseMap<Value *, int>::iterator CostIt;
Wei Mi6c428d62015-03-20 18:33:12 +0000791 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000792 if (I.isSimple()) {
793 accumulateSROACost(CostIt, InlineConstants::InstrCost);
794 return true;
795 }
796
797 disableSROA(CostIt);
798 }
799
800 return false;
801}
802
803bool CallAnalyzer::visitStore(StoreInst &I) {
804 Value *SROAArg;
805 DenseMap<Value *, int>::iterator CostIt;
Wei Mi6c428d62015-03-20 18:33:12 +0000806 if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000807 if (I.isSimple()) {
808 accumulateSROACost(CostIt, InlineConstants::InstrCost);
809 return true;
810 }
811
812 disableSROA(CostIt);
813 }
814
815 return false;
816}
817
Chandler Carruth753e21d2012-12-28 14:23:32 +0000818bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
819 // Constant folding for extract value is trivial.
820 Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
821 if (!C)
822 C = SimplifiedValues.lookup(I.getAggregateOperand());
823 if (C) {
824 SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
825 return true;
826 }
827
828 // SROA can look through these but give them a cost.
829 return false;
830}
831
832bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
833 // Constant folding for insert value is trivial.
834 Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
835 if (!AggC)
836 AggC = SimplifiedValues.lookup(I.getAggregateOperand());
837 Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
838 if (!InsertedC)
839 InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
840 if (AggC && InsertedC) {
Chad Rosier567556a2016-04-28 14:47:23 +0000841 SimplifiedValues[&I] =
842 ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices());
Chandler Carruth753e21d2012-12-28 14:23:32 +0000843 return true;
844 }
845
846 // SROA can look through these but give them a cost.
847 return false;
848}
849
850/// \brief Try to simplify a call site.
851///
852/// Takes a concrete function and callsite and tries to actually simplify it by
853/// analyzing the arguments and call itself with instsimplify. Returns true if
854/// it has simplified the callsite to some other entity (a constant), making it
855/// free.
856bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
857 // FIXME: Using the instsimplify logic directly for this is inefficient
858 // because we have to continually rebuild the argument list even when no
859 // simplifications can be performed. Until that is fixed with remapping
860 // inside of instsimplify, directly constant fold calls here.
861 if (!canConstantFoldCallTo(F))
862 return false;
863
864 // Try to re-map the arguments to constants.
865 SmallVector<Constant *, 4> ConstantArgs;
866 ConstantArgs.reserve(CS.arg_size());
Chad Rosier567556a2016-04-28 14:47:23 +0000867 for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
868 ++I) {
Chandler Carruth753e21d2012-12-28 14:23:32 +0000869 Constant *C = dyn_cast<Constant>(*I);
870 if (!C)
871 C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
872 if (!C)
873 return false; // This argument doesn't map to a constant.
874
875 ConstantArgs.push_back(C);
876 }
877 if (Constant *C = ConstantFoldCall(F, ConstantArgs)) {
878 SimplifiedValues[CS.getInstruction()] = C;
879 return true;
880 }
881
882 return false;
883}
884
Chandler Carruth0539c072012-03-31 12:42:41 +0000885bool CallAnalyzer::visitCallSite(CallSite CS) {
Chandler Carruth37d25de2013-12-13 08:00:01 +0000886 if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
Duncan P. N. Exon Smithb3fc83c2015-02-14 00:12:15 +0000887 !F.hasFnAttribute(Attribute::ReturnsTwice)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000888 // This aborts the entire analysis.
889 ExposesReturnsTwice = true;
890 return false;
891 }
Chad Rosier567556a2016-04-28 14:47:23 +0000892 if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
James Molloy4f6fb952012-12-20 16:04:27 +0000893 ContainsNoDuplicateCall = true;
Chandler Carruth0539c072012-03-31 12:42:41 +0000894
Chandler Carruth0539c072012-03-31 12:42:41 +0000895 if (Function *F = CS.getCalledFunction()) {
Chandler Carruth753e21d2012-12-28 14:23:32 +0000896 // When we have a concrete function, first try to simplify it directly.
897 if (simplifyCallSite(F, CS))
898 return true;
899
900 // Next check if it is an intrinsic we know about.
901 // FIXME: Lift this into part of the InstVisitor.
902 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
903 switch (II->getIntrinsicID()) {
904 default:
905 return Base::visitCallSite(CS);
906
Peter Collingbourne7dd8dbf2016-04-22 21:18:02 +0000907 case Intrinsic::load_relative:
908 // This is normally lowered to 4 LLVM instructions.
909 Cost += 3 * InlineConstants::InstrCost;
910 return false;
911
Chandler Carruth753e21d2012-12-28 14:23:32 +0000912 case Intrinsic::memset:
913 case Intrinsic::memcpy:
914 case Intrinsic::memmove:
915 // SROA can usually chew through these intrinsics, but they aren't free.
916 return false;
Reid Kleckner60381792015-07-07 22:25:32 +0000917 case Intrinsic::localescape:
Reid Kleckner223de262015-04-14 20:38:14 +0000918 HasFrameEscape = true;
919 return false;
Chandler Carruth753e21d2012-12-28 14:23:32 +0000920 }
921 }
922
Chandler Carruth0539c072012-03-31 12:42:41 +0000923 if (F == CS.getInstruction()->getParent()->getParent()) {
924 // This flag will fully abort the analysis, so don't bother with anything
925 // else.
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +0000926 IsRecursiveCall = true;
Chandler Carruth0539c072012-03-31 12:42:41 +0000927 return false;
928 }
929
Chandler Carruth0ba8db42013-01-22 11:26:02 +0000930 if (TTI.isLoweredToCall(F)) {
Chandler Carruth0539c072012-03-31 12:42:41 +0000931 // We account for the average 1 instruction per call argument setup
932 // here.
933 Cost += CS.arg_size() * InlineConstants::InstrCost;
934
935 // Everything other than inline ASM will also have a significant cost
936 // merely from making the call.
937 if (!isa<InlineAsm>(CS.getCalledValue()))
938 Cost += InlineConstants::CallPenalty;
939 }
940
941 return Base::visitCallSite(CS);
942 }
943
944 // Otherwise we're in a very special case -- an indirect function call. See
945 // if we can be particularly clever about this.
946 Value *Callee = CS.getCalledValue();
947
948 // First, pay the price of the argument setup. We account for the average
949 // 1 instruction per call argument setup here.
950 Cost += CS.arg_size() * InlineConstants::InstrCost;
951
952 // Next, check if this happens to be an indirect function call to a known
953 // function in this inline context. If not, we've done all we can.
954 Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
955 if (!F)
956 return Base::visitCallSite(CS);
957
958 // If we have a constant that we are calling as a function, we can peer
959 // through it and see the function target. This happens not infrequently
960 // during devirtualization and so we want to give it a hefty bonus for
961 // inlining, but cap that bonus in the event that inlining wouldn't pan
962 // out. Pretend to inline the function, with a custom threshold.
Easwaran Ramanb1bd3982016-03-08 00:36:35 +0000963 CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
Chandler Carruth0539c072012-03-31 12:42:41 +0000964 if (CA.analyzeCall(CS)) {
965 // We were able to inline the indirect call! Subtract the cost from the
Easwaran Raman6d90d9f2015-12-07 21:21:20 +0000966 // threshold to get the bonus we want to apply, but don't go below zero.
967 Cost -= std::max(0, CA.getThreshold() - CA.getCost());
Chandler Carruth0539c072012-03-31 12:42:41 +0000968 }
969
970 return Base::visitCallSite(CS);
971}
972
Chandler Carruth0814d2a2013-12-13 07:59:56 +0000973bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
974 // At least one return instruction will be free after inlining.
975 bool Free = !HasReturn;
976 HasReturn = true;
977 return Free;
978}
979
980bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
981 // We model unconditional branches as essentially free -- they really
982 // shouldn't exist at all, but handling them makes the behavior of the
983 // inliner more regular and predictable. Interestingly, conditional branches
984 // which will fold away are also free.
985 return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
986 dyn_cast_or_null<ConstantInt>(
987 SimplifiedValues.lookup(BI.getCondition()));
988}
989
990bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
991 // We model unconditional switches as free, see the comments on handling
992 // branches.
Chandler Carruthe01fd5f2014-04-28 08:52:44 +0000993 if (isa<ConstantInt>(SI.getCondition()))
994 return true;
995 if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
996 if (isa<ConstantInt>(V))
997 return true;
998
999 // Otherwise, we need to accumulate a cost proportional to the number of
1000 // distinct successor blocks. This fan-out in the CFG cannot be represented
1001 // for free even if we can represent the core switch as a jumptable that
1002 // takes a single instruction.
1003 //
1004 // NB: We convert large switches which are just used to initialize large phi
1005 // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1006 // inlining those. It will prevent inlining in cases where the optimization
1007 // does not (yet) fire.
1008 SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
1009 SuccessorBlocks.insert(SI.getDefaultDest());
1010 for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
1011 SuccessorBlocks.insert(I.getCaseSuccessor());
1012 // Add cost corresponding to the number of distinct destinations. The first
1013 // we model as free because of fallthrough.
1014 Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
1015 return false;
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001016}
1017
1018bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1019 // We never want to inline functions that contain an indirectbr. This is
1020 // incorrect because all the blockaddress's (in static global initializers
1021 // for example) would be referring to the original function, and this
1022 // indirect jump would jump from the inlined copy of the function into the
1023 // original function which is extremely undefined behavior.
1024 // FIXME: This logic isn't really right; we can safely inline functions with
1025 // indirectbr's as long as no other function or global references the
Gerolf Hoflehner734f4c82014-07-01 00:19:34 +00001026 // blockaddress of a block within the current function.
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001027 HasIndirectBr = true;
1028 return false;
1029}
1030
1031bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1032 // FIXME: It's not clear that a single instruction is an accurate model for
1033 // the inline cost of a resume instruction.
1034 return false;
1035}
1036
David Majnemer654e1302015-07-31 17:58:14 +00001037bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1038 // FIXME: It's not clear that a single instruction is an accurate model for
1039 // the inline cost of a cleanupret instruction.
1040 return false;
1041}
1042
1043bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1044 // FIXME: It's not clear that a single instruction is an accurate model for
Joseph Tremoulet8220bcc2015-08-23 00:26:33 +00001045 // the inline cost of a catchret instruction.
David Majnemer654e1302015-07-31 17:58:14 +00001046 return false;
1047}
1048
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001049bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1050 // FIXME: It might be reasonably to discount the cost of instructions leading
1051 // to unreachable as they have the lowest possible impact on both runtime and
1052 // code size.
1053 return true; // No actual code is needed for unreachable.
1054}
1055
Chandler Carruth0539c072012-03-31 12:42:41 +00001056bool CallAnalyzer::visitInstruction(Instruction &I) {
Chandler Carruthda7513a2012-05-04 00:58:03 +00001057 // Some instructions are free. All of the free intrinsics can also be
1058 // handled by SROA, etc.
Chandler Carruthb8cf5102013-01-21 12:05:16 +00001059 if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
Chandler Carruthda7513a2012-05-04 00:58:03 +00001060 return true;
1061
Chandler Carruth0539c072012-03-31 12:42:41 +00001062 // We found something we don't understand or can't handle. Mark any SROA-able
1063 // values in the operand list as no longer viable.
1064 for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1065 disableSROA(*OI);
1066
1067 return false;
1068}
1069
Chandler Carruth0539c072012-03-31 12:42:41 +00001070/// \brief Analyze a basic block for its contribution to the inline cost.
1071///
1072/// This method walks the analyzer over every instruction in the given basic
1073/// block and accounts for their cost during inlining at this callsite. It
1074/// aborts early if the threshold has been exceeded or an impossible to inline
1075/// construct has been detected. It returns false if inlining is no longer
1076/// viable, and true if inlining remains viable.
Hal Finkel57f03dd2014-09-07 13:49:57 +00001077bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1078 SmallPtrSetImpl<const Value *> &EphValues) {
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001079 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Chandler Carruth6b4cc8b2014-02-01 10:38:17 +00001080 // FIXME: Currently, the number of instructions in a function regardless of
1081 // our ability to simplify them during inline to constants or dead code,
1082 // are actually used by the vector bonus heuristic. As long as that's true,
1083 // we have to special case debug intrinsics here to prevent differences in
1084 // inlining due to debug symbols. Eventually, the number of unsimplified
1085 // instructions shouldn't factor into the cost computation, but until then,
1086 // hack around it here.
1087 if (isa<DbgInfoIntrinsic>(I))
1088 continue;
1089
Hal Finkel57f03dd2014-09-07 13:49:57 +00001090 // Skip ephemeral values.
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001091 if (EphValues.count(&*I))
Hal Finkel57f03dd2014-09-07 13:49:57 +00001092 continue;
1093
Chandler Carruth0539c072012-03-31 12:42:41 +00001094 ++NumInstructions;
1095 if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1096 ++NumVectorInstructions;
1097
Sanjay Patele9434e82015-09-15 15:26:25 +00001098 // If the instruction is floating point, and the target says this operation
1099 // is expensive or the function has the "use-soft-float" attribute, this may
1100 // eventually become a library call. Treat the cost as such.
Cameron Esfahani17177d12015-02-05 02:09:33 +00001101 if (I->getType()->isFloatingPointTy()) {
1102 bool hasSoftFloatAttr = false;
1103
Sanjay Patele9434e82015-09-15 15:26:25 +00001104 // If the function has the "use-soft-float" attribute, mark it as
1105 // expensive.
Cameron Esfahani17177d12015-02-05 02:09:33 +00001106 if (F.hasFnAttribute("use-soft-float")) {
1107 Attribute Attr = F.getFnAttribute("use-soft-float");
1108 StringRef Val = Attr.getValueAsString();
1109 if (Val == "true")
1110 hasSoftFloatAttr = true;
1111 }
1112
1113 if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1114 hasSoftFloatAttr)
1115 Cost += InlineConstants::CallPenalty;
1116 }
1117
Chandler Carruth0539c072012-03-31 12:42:41 +00001118 // If the instruction simplified to a constant, there is no cost to this
1119 // instruction. Visit the instructions using our InstVisitor to account for
1120 // all of the per-instruction logic. The visit tree returns true if we
1121 // consumed the instruction in any way, and false if the instruction's base
1122 // cost should count against inlining.
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001123 if (Base::visit(&*I))
Chandler Carruth0539c072012-03-31 12:42:41 +00001124 ++NumInstructionsSimplified;
1125 else
1126 Cost += InlineConstants::InstrCost;
1127
1128 // If the visit this instruction detected an uninlinable pattern, abort.
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001129 if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
Reid Kleckner223de262015-04-14 20:38:14 +00001130 HasIndirectBr || HasFrameEscape)
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001131 return false;
1132
1133 // If the caller is a recursive function then we don't want to inline
1134 // functions which allocate a lot of stack space because it would increase
1135 // the caller stack usage dramatically.
1136 if (IsCallerRecursive &&
1137 AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
Chandler Carruth0539c072012-03-31 12:42:41 +00001138 return false;
1139
Chandler Carrutha004f222015-05-27 02:49:05 +00001140 // Check if we've past the maximum possible threshold so we don't spin in
1141 // huge basic blocks that will never inline.
1142 if (Cost > Threshold)
Chandler Carruth0539c072012-03-31 12:42:41 +00001143 return false;
1144 }
1145
1146 return true;
1147}
1148
1149/// \brief Compute the base pointer and cumulative constant offsets for V.
1150///
1151/// This strips all constant offsets off of V, leaving it the base pointer, and
1152/// accumulates the total constant offset applied in the returned constant. It
1153/// returns 0 if V is not a pointer, and returns the constant '0' if there are
1154/// no constant offsets applied.
1155ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001156 if (!V->getType()->isPointerTy())
Craig Topper353eda42014-04-24 06:44:33 +00001157 return nullptr;
Chandler Carruth0539c072012-03-31 12:42:41 +00001158
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001159 const DataLayout &DL = F.getParent()->getDataLayout();
1160 unsigned IntPtrWidth = DL.getPointerSizeInBits();
Chandler Carruth0539c072012-03-31 12:42:41 +00001161 APInt Offset = APInt::getNullValue(IntPtrWidth);
1162
1163 // Even though we don't look through PHI nodes, we could be called on an
1164 // instruction in an unreachable block, which may be on a cycle.
1165 SmallPtrSet<Value *, 4> Visited;
1166 Visited.insert(V);
1167 do {
1168 if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1169 if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
Craig Topper353eda42014-04-24 06:44:33 +00001170 return nullptr;
Chandler Carruth0539c072012-03-31 12:42:41 +00001171 V = GEP->getPointerOperand();
1172 } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1173 V = cast<Operator>(V)->getOperand(0);
1174 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
Sanjoy Das5ce32722016-04-08 00:48:30 +00001175 if (GA->isInterposable())
Chandler Carruth0539c072012-03-31 12:42:41 +00001176 break;
1177 V = GA->getAliasee();
1178 } else {
1179 break;
1180 }
1181 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
David Blaikie70573dc2014-11-19 07:49:26 +00001182 } while (Visited.insert(V).second);
Chandler Carruth0539c072012-03-31 12:42:41 +00001183
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001184 Type *IntPtrTy = DL.getIntPtrType(V->getContext());
Chandler Carruth0539c072012-03-31 12:42:41 +00001185 return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1186}
1187
1188/// \brief Analyze a call site for potential inlining.
1189///
1190/// Returns true if inlining this call is viable, and false if it is not
1191/// viable. It computes the cost and adjusts the threshold based on numerous
1192/// factors and heuristics. If this method returns false but the computed cost
1193/// is below the computed threshold, then inlining was forcibly disabled by
Bob Wilson266802d2012-11-19 07:04:30 +00001194/// some artifact of the routine.
Chandler Carruth0539c072012-03-31 12:42:41 +00001195bool CallAnalyzer::analyzeCall(CallSite CS) {
Chandler Carruth7ae90d42012-04-11 10:15:10 +00001196 ++NumCallsAnalyzed;
1197
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001198 // Perform some tweaks to the cost and threshold based on the direct
1199 // callsite information.
Chandler Carruth0539c072012-03-31 12:42:41 +00001200
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001201 // We want to more aggressively inline vector-dense kernels, so up the
1202 // threshold, and we'll lower it if the % of vector instructions gets too
Chandler Carrutha004f222015-05-27 02:49:05 +00001203 // low. Note that these bonuses are some what arbitrary and evolved over time
1204 // by accident as much as because they are principled bonuses.
1205 //
1206 // FIXME: It would be nice to remove all such bonuses. At least it would be
1207 // nice to base the bonus values on something more scientific.
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001208 assert(NumInstructions == 0);
1209 assert(NumVectorInstructions == 0);
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001210
1211 // Update the threshold based on callsite properties
1212 updateThreshold(CS, F);
1213
Chandler Carrutha004f222015-05-27 02:49:05 +00001214 FiftyPercentVectorBonus = 3 * Threshold / 2;
1215 TenPercentVectorBonus = 3 * Threshold / 4;
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001216 const DataLayout &DL = F.getParent()->getDataLayout();
Benjamin Kramerc99d0e92012-08-07 11:13:19 +00001217
Chandler Carrutha004f222015-05-27 02:49:05 +00001218 // Track whether the post-inlining function would have more than one basic
1219 // block. A single basic block is often intended for inlining. Balloon the
1220 // threshold by 50% until we pass the single-BB phase.
1221 bool SingleBB = true;
1222 int SingleBBBonus = Threshold / 2;
1223
1224 // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1225 // this Threshold any time, and cost cannot decrease, we can stop processing
1226 // the rest of the function body.
1227 Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1228
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001229 // Give out bonuses per argument, as the instructions setting them up will
1230 // be gone after inlining.
1231 for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001232 if (CS.isByValArgument(I)) {
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001233 // We approximate the number of loads and stores needed by dividing the
1234 // size of the byval type by the target's pointer size.
1235 PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
Mehdi Aminia28d91d2015-03-10 02:37:25 +00001236 unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1237 unsigned PointerSize = DL.getPointerSizeInBits();
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001238 // Ceiling division.
1239 unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
Benjamin Kramerc99d0e92012-08-07 11:13:19 +00001240
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001241 // If it generates more than 8 stores it is likely to be expanded as an
1242 // inline memcpy so we take that as an upper bound. Otherwise we assume
1243 // one load and one store per word copied.
1244 // FIXME: The maxStoresPerMemcpy setting from the target should be used
1245 // here instead of a magic number of 8, but it's not available via
1246 // DataLayout.
1247 NumStores = std::min(NumStores, 8U);
1248
1249 Cost -= 2 * NumStores * InlineConstants::InstrCost;
1250 } else {
1251 // For non-byval arguments subtract off one instruction per call
1252 // argument.
1253 Cost -= InlineConstants::InstrCost;
Benjamin Kramerc99d0e92012-08-07 11:13:19 +00001254 }
Chandler Carruth0539c072012-03-31 12:42:41 +00001255 }
1256
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001257 // If there is only one call of the function, and it has internal linkage,
1258 // the cost of inlining it drops dramatically.
Chad Rosier567556a2016-04-28 14:47:23 +00001259 bool OnlyOneCallAndLocalLinkage =
1260 F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
James Molloy4f6fb952012-12-20 16:04:27 +00001261 if (OnlyOneCallAndLocalLinkage)
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001262 Cost += InlineConstants::LastCallToStaticBonus;
1263
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001264 // If this function uses the coldcc calling convention, prefer not to inline
1265 // it.
1266 if (F.getCallingConv() == CallingConv::Cold)
1267 Cost += InlineConstants::ColdccPenalty;
1268
1269 // Check if we're done. This can happen due to bonuses and penalties.
1270 if (Cost > Threshold)
1271 return false;
1272
Chandler Carruth0539c072012-03-31 12:42:41 +00001273 if (F.empty())
1274 return true;
1275
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001276 Function *Caller = CS.getInstruction()->getParent()->getParent();
1277 // Check if the caller function is recursive itself.
Chandler Carruthcdf47882014-03-09 03:16:01 +00001278 for (User *U : Caller->users()) {
1279 CallSite Site(U);
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001280 if (!Site)
1281 continue;
1282 Instruction *I = Site.getInstruction();
1283 if (I->getParent()->getParent() == Caller) {
1284 IsCallerRecursive = true;
1285 break;
1286 }
1287 }
1288
Chandler Carruth0539c072012-03-31 12:42:41 +00001289 // Populate our simplified values by mapping from function arguments to call
1290 // arguments with known important simplifications.
1291 CallSite::arg_iterator CAI = CS.arg_begin();
1292 for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1293 FAI != FAE; ++FAI, ++CAI) {
1294 assert(CAI != CS.arg_end());
1295 if (Constant *C = dyn_cast<Constant>(CAI))
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001296 SimplifiedValues[&*FAI] = C;
Chandler Carruth0539c072012-03-31 12:42:41 +00001297
1298 Value *PtrArg = *CAI;
1299 if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001300 ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
Chandler Carruth0539c072012-03-31 12:42:41 +00001301
1302 // We can SROA any pointer arguments derived from alloca instructions.
1303 if (isa<AllocaInst>(PtrArg)) {
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001304 SROAArgValues[&*FAI] = PtrArg;
Chandler Carruth0539c072012-03-31 12:42:41 +00001305 SROAArgCosts[PtrArg] = 0;
1306 }
1307 }
1308 }
1309 NumConstantArgs = SimplifiedValues.size();
1310 NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1311 NumAllocaArgs = SROAArgValues.size();
1312
Hal Finkel57f03dd2014-09-07 13:49:57 +00001313 // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1314 // the ephemeral values multiple times (and they're completely determined by
1315 // the callee, so this is purely duplicate work).
1316 SmallPtrSet<const Value *, 32> EphValues;
Chad Rosier567556a2016-04-28 14:47:23 +00001317 CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F),
1318 EphValues);
Hal Finkel57f03dd2014-09-07 13:49:57 +00001319
Chandler Carruth0539c072012-03-31 12:42:41 +00001320 // The worklist of live basic blocks in the callee *after* inlining. We avoid
1321 // adding basic blocks of the callee which can be proven to be dead for this
1322 // particular call site in order to get more accurate cost estimates. This
1323 // requires a somewhat heavyweight iteration pattern: we need to walk the
1324 // basic blocks in a breadth-first order as we insert live successors. To
1325 // accomplish this, prioritizing for small iterations because we exit after
1326 // crossing our threshold, we use a small-size optimized SetVector.
1327 typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
Chad Rosier567556a2016-04-28 14:47:23 +00001328 SmallPtrSet<BasicBlock *, 16>>
1329 BBSetVector;
Chandler Carruth0539c072012-03-31 12:42:41 +00001330 BBSetVector BBWorklist;
1331 BBWorklist.insert(&F.getEntryBlock());
1332 // Note that we *must not* cache the size, this loop grows the worklist.
1333 for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1334 // Bail out the moment we cross the threshold. This means we'll under-count
1335 // the cost, but only when undercounting doesn't matter.
Chandler Carrutha004f222015-05-27 02:49:05 +00001336 if (Cost > Threshold)
Chandler Carruth0539c072012-03-31 12:42:41 +00001337 break;
1338
1339 BasicBlock *BB = BBWorklist[Idx];
1340 if (BB->empty())
Chandler Carruth4d1d34f2012-03-14 23:19:53 +00001341 continue;
Dan Gohman4552e3c2009-10-13 18:30:07 +00001342
Gerolf Hoflehner734f4c82014-07-01 00:19:34 +00001343 // Disallow inlining a blockaddress. A blockaddress only has defined
1344 // behavior for an indirect branch in the same function, and we do not
1345 // currently support inlining indirect branches. But, the inliner may not
1346 // see an indirect branch that ends up being dead code at a particular call
1347 // site. If the blockaddress escapes the function, e.g., via a global
1348 // variable, inlining may lead to an invalid cross-function reference.
1349 if (BB->hasAddressTaken())
1350 return false;
1351
Chandler Carruth0539c072012-03-31 12:42:41 +00001352 // Analyze the cost of this block. If we blow through the threshold, this
1353 // returns false, and we can bail on out.
Easwaran Ramand295b002016-04-13 21:20:22 +00001354 if (!analyzeBlock(BB, EphValues))
1355 return false;
Eric Christopher46308e62011-02-01 01:16:32 +00001356
Chandler Carruth0814d2a2013-12-13 07:59:56 +00001357 TerminatorInst *TI = BB->getTerminator();
1358
Chandler Carruth0539c072012-03-31 12:42:41 +00001359 // Add in the live successors by first checking whether we have terminator
1360 // that may be simplified based on the values simplified by this call.
1361 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1362 if (BI->isConditional()) {
1363 Value *Cond = BI->getCondition();
Chad Rosier567556a2016-04-28 14:47:23 +00001364 if (ConstantInt *SimpleCond =
1365 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
Chandler Carruth0539c072012-03-31 12:42:41 +00001366 BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1367 continue;
Eric Christopher46308e62011-02-01 01:16:32 +00001368 }
Chandler Carruth0539c072012-03-31 12:42:41 +00001369 }
1370 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1371 Value *Cond = SI->getCondition();
Chad Rosier567556a2016-04-28 14:47:23 +00001372 if (ConstantInt *SimpleCond =
1373 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
Chandler Carruth0539c072012-03-31 12:42:41 +00001374 BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
1375 continue;
1376 }
1377 }
Eric Christopher46308e62011-02-01 01:16:32 +00001378
Chandler Carruth0539c072012-03-31 12:42:41 +00001379 // If we're unable to select a particular successor, just count all of
1380 // them.
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001381 for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1382 ++TIdx)
Chandler Carruth0539c072012-03-31 12:42:41 +00001383 BBWorklist.insert(TI->getSuccessor(TIdx));
1384
1385 // If we had any successors at this point, than post-inlining is likely to
1386 // have them as well. Note that we assume any basic blocks which existed
1387 // due to branches or switches which folded above will also fold after
1388 // inlining.
1389 if (SingleBB && TI->getNumSuccessors() > 1) {
1390 // Take off the bonus we applied to the threshold.
1391 Threshold -= SingleBBBonus;
1392 SingleBB = false;
Eric Christopher46308e62011-02-01 01:16:32 +00001393 }
1394 }
Andrew Trickcaa500b2011-10-01 01:27:56 +00001395
Chandler Carruthcb5beb32013-12-12 11:59:26 +00001396 // If this is a noduplicate call, we can still inline as long as
James Molloy4f6fb952012-12-20 16:04:27 +00001397 // inlining this would cause the removal of the caller (so the instruction
1398 // is not actually duplicated, just moved).
1399 if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1400 return false;
1401
Chandler Carrutha004f222015-05-27 02:49:05 +00001402 // We applied the maximum possible vector bonus at the beginning. Now,
1403 // subtract the excess bonus, if any, from the Threshold before
1404 // comparing against Cost.
1405 if (NumVectorInstructions <= NumInstructions / 10)
1406 Threshold -= FiftyPercentVectorBonus;
1407 else if (NumVectorInstructions <= NumInstructions / 2)
1408 Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
Chandler Carruth0539c072012-03-31 12:42:41 +00001409
Hans Wennborg00ab73d2016-02-05 20:32:42 +00001410 return Cost < std::max(1, Threshold);
Eric Christopher2dfbd7e2011-02-05 00:49:15 +00001411}
1412
Manman Ren49d684e2012-09-12 05:06:18 +00001413#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Chandler Carruth0539c072012-03-31 12:42:41 +00001414/// \brief Dump stats about this call's analysis.
Yaron Kereneb2a2542016-01-29 20:50:44 +00001415LLVM_DUMP_METHOD void CallAnalyzer::dump() {
Eric Christophera13839f2014-02-26 23:27:16 +00001416#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
Chandler Carruth0539c072012-03-31 12:42:41 +00001417 DEBUG_PRINT_STAT(NumConstantArgs);
1418 DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1419 DEBUG_PRINT_STAT(NumAllocaArgs);
1420 DEBUG_PRINT_STAT(NumConstantPtrCmps);
1421 DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1422 DEBUG_PRINT_STAT(NumInstructionsSimplified);
Chandler Carrutha004f222015-05-27 02:49:05 +00001423 DEBUG_PRINT_STAT(NumInstructions);
Chandler Carruth0539c072012-03-31 12:42:41 +00001424 DEBUG_PRINT_STAT(SROACostSavings);
1425 DEBUG_PRINT_STAT(SROACostSavingsLost);
James Molloy4f6fb952012-12-20 16:04:27 +00001426 DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
Chandler Carruth394e34f2014-01-31 22:32:32 +00001427 DEBUG_PRINT_STAT(Cost);
1428 DEBUG_PRINT_STAT(Threshold);
Chandler Carruth0539c072012-03-31 12:42:41 +00001429#undef DEBUG_PRINT_STAT
Eric Christopher2dfbd7e2011-02-05 00:49:15 +00001430}
Manman Renc3366cc2012-09-06 19:55:56 +00001431#endif
Eric Christopher2dfbd7e2011-02-05 00:49:15 +00001432
Akira Hatanaka5af7ace2015-11-13 01:44:32 +00001433/// \brief Test that two functions either have or have not the given attribute
1434/// at the same time.
Chad Rosier567556a2016-04-28 14:47:23 +00001435template <typename AttrKind>
Akira Hatanaka5af7ace2015-11-13 01:44:32 +00001436static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
1437 return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
1438}
1439
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001440/// \brief Test that there are no attribute conflicts between Caller and Callee
1441/// that prevent inlining.
1442static bool functionsHaveCompatibleAttributes(Function *Caller,
Eric Christopher4371b132015-07-02 01:11:47 +00001443 Function *Callee,
1444 TargetTransformInfo &TTI) {
Eric Christopherd566fb12015-07-29 22:09:48 +00001445 return TTI.areInlineCompatible(Caller, Callee) &&
Akira Hatanaka1cb242e2015-12-22 23:57:37 +00001446 AttributeFuncs::areInlineCompatible(*Caller, *Callee);
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001447}
1448
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001449InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001450 TargetTransformInfo &CalleeTTI,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001451 AssumptionCacheTracker *ACT) {
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001452 return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001453 ACT);
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001454}
1455
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001456int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
1457 unsigned SizeOptLevel) {
1458 if (OptLevel > 2)
1459 return OptAggressiveThreshold;
1460 if (SizeOptLevel == 1) // -Os
1461 return OptSizeThreshold;
1462 if (SizeOptLevel == 2) // -Oz
1463 return OptMinSizeThreshold;
1464 return DefaultInlineThreshold;
1465}
1466
1467int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
1468
1469InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
1470 int DefaultThreshold,
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001471 TargetTransformInfo &CalleeTTI,
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001472 AssumptionCacheTracker *ACT) {
Easwaran Ramanf4bb2f02016-01-14 23:16:29 +00001473
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001474 // Cannot inline indirect calls.
1475 if (!Callee)
1476 return llvm::InlineCost::getNever();
1477
1478 // Calls to functions with always-inline attributes should be inlined
1479 // whenever possible.
Peter Collingbourne68a88972014-05-19 18:25:54 +00001480 if (CS.hasFnAttr(Attribute::AlwaysInline)) {
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001481 if (isInlineViable(*Callee))
1482 return llvm::InlineCost::getAlways();
1483 return llvm::InlineCost::getNever();
1484 }
1485
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001486 // Never inline functions with conflicting attributes (unless callee has
1487 // always-inline attribute).
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001488 if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
Evgeniy Stepanov2ad36982013-08-08 08:22:39 +00001489 return llvm::InlineCost::getNever();
1490
Paul Robinsondcbe35b2013-11-18 21:44:03 +00001491 // Don't inline this call if the caller has the optnone attribute.
1492 if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1493 return llvm::InlineCost::getNever();
1494
Sanjoy Das5ce32722016-04-08 00:48:30 +00001495 // Don't inline functions which can be interposed at link-time. Don't inline
1496 // functions marked noinline or call sites marked noinline.
1497 // Note: inlining non-exact non-interposable fucntions is fine, since we know
1498 // we have *a* correct implementation of the source level function.
Chad Rosier567556a2016-04-28 14:47:23 +00001499 if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1500 CS.isNoInline())
Dan Gohman4552e3c2009-10-13 18:30:07 +00001501 return llvm::InlineCost::getNever();
1502
Nadav Rotem4eb3d4b2012-09-19 08:08:04 +00001503 DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
Chad Rosier567556a2016-04-28 14:47:23 +00001504 << "...\n");
Andrew Trickcaa500b2011-10-01 01:27:56 +00001505
Easwaran Ramanb1bd3982016-03-08 00:36:35 +00001506 CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
Chandler Carruth0539c072012-03-31 12:42:41 +00001507 bool ShouldInline = CA.analyzeCall(CS);
Dan Gohman4552e3c2009-10-13 18:30:07 +00001508
Chandler Carruth0539c072012-03-31 12:42:41 +00001509 DEBUG(CA.dump());
1510
1511 // Check if there was a reason to force inlining or no inlining.
1512 if (!ShouldInline && CA.getCost() < CA.getThreshold())
Dan Gohman4552e3c2009-10-13 18:30:07 +00001513 return InlineCost::getNever();
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001514 if (ShouldInline && CA.getCost() >= CA.getThreshold())
Dan Gohman4552e3c2009-10-13 18:30:07 +00001515 return InlineCost::getAlways();
Andrew Trickcaa500b2011-10-01 01:27:56 +00001516
Chandler Carruth0539c072012-03-31 12:42:41 +00001517 return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
Dan Gohman4552e3c2009-10-13 18:30:07 +00001518}
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001519
Easwaran Ramanb9f71202015-12-28 20:28:19 +00001520bool llvm::isInlineViable(Function &F) {
Duncan P. N. Exon Smithb3fc83c2015-02-14 00:12:15 +00001521 bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001522 for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
Gerolf Hoflehner734f4c82014-07-01 00:19:34 +00001523 // Disallow inlining of functions which contain indirect branches or
1524 // blockaddresses.
1525 if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001526 return false;
1527
Duncan P. N. Exon Smith5a82c912015-10-10 00:53:03 +00001528 for (auto &II : *BI) {
1529 CallSite CS(&II);
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001530 if (!CS)
1531 continue;
1532
1533 // Disallow recursive calls.
1534 if (&F == CS.getCalledFunction())
1535 return false;
1536
1537 // Disallow calls which expose returns-twice to a function not previously
1538 // attributed as such.
1539 if (!ReturnsTwice && CS.isCall() &&
1540 cast<CallInst>(CS.getInstruction())->canReturnTwice())
1541 return false;
Reid Kleckner223de262015-04-14 20:38:14 +00001542
Reid Kleckner60381792015-07-07 22:25:32 +00001543 // Disallow inlining functions that call @llvm.localescape. Doing this
Reid Kleckner223de262015-04-14 20:38:14 +00001544 // correctly would require major changes to the inliner.
1545 if (CS.getCalledFunction() &&
1546 CS.getCalledFunction()->getIntrinsicID() ==
Reid Kleckner60381792015-07-07 22:25:32 +00001547 llvm::Intrinsic::localescape)
Reid Kleckner223de262015-04-14 20:38:14 +00001548 return false;
Bob Wilsona5b0dc82012-11-19 07:04:35 +00001549 }
1550 }
1551
1552 return true;
1553}