| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 1 | //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // This file implements inline cost analysis. | 
|  | 11 | // | 
|  | 12 | //===----------------------------------------------------------------------===// | 
|  | 13 |  | 
|  | 14 | #include "llvm/Analysis/InlineCost.h" | 
| Chandler Carruth | ed0881b | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 15 | #include "llvm/ADT/STLExtras.h" | 
|  | 16 | #include "llvm/ADT/SetVector.h" | 
|  | 17 | #include "llvm/ADT/SmallPtrSet.h" | 
|  | 18 | #include "llvm/ADT/SmallVector.h" | 
|  | 19 | #include "llvm/ADT/Statistic.h" | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 20 | #include "llvm/Analysis/AssumptionCache.h" | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 21 | #include "llvm/Analysis/BlockFrequencyInfo.h" | 
| Hal Finkel | 57f03dd | 2014-09-07 13:49:57 +0000 | [diff] [blame] | 22 | #include "llvm/Analysis/CodeMetrics.h" | 
| Chandler Carruth | d990388 | 2015-01-14 11:23:27 +0000 | [diff] [blame] | 23 | #include "llvm/Analysis/ConstantFolding.h" | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 24 | #include "llvm/Analysis/CFG.h" | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 25 | #include "llvm/Analysis/InstructionSimplify.h" | 
| Easwaran Raman | 71069cf | 2016-06-09 22:23:21 +0000 | [diff] [blame] | 26 | #include "llvm/Analysis/ProfileSummaryInfo.h" | 
| Chandler Carruth | 42f3dce | 2013-01-21 11:55:09 +0000 | [diff] [blame] | 27 | #include "llvm/Analysis/TargetTransformInfo.h" | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 28 | #include "llvm/Analysis/ValueTracking.h" | 
| Nico Weber | 432a388 | 2018-04-30 14:59:11 +0000 | [diff] [blame] | 29 | #include "llvm/Config/llvm-config.h" | 
| Chandler Carruth | 219b89b | 2014-03-04 11:01:28 +0000 | [diff] [blame] | 30 | #include "llvm/IR/CallSite.h" | 
| Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 31 | #include "llvm/IR/CallingConv.h" | 
|  | 32 | #include "llvm/IR/DataLayout.h" | 
| Chandler Carruth | 03eb0de | 2014-03-04 10:40:04 +0000 | [diff] [blame] | 33 | #include "llvm/IR/GetElementPtrTypeIterator.h" | 
| Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 34 | #include "llvm/IR/GlobalAlias.h" | 
| Chandler Carruth | 7da14f1 | 2014-03-06 03:23:41 +0000 | [diff] [blame] | 35 | #include "llvm/IR/InstVisitor.h" | 
| Chandler Carruth | 9fb823b | 2013-01-02 11:36:10 +0000 | [diff] [blame] | 36 | #include "llvm/IR/IntrinsicInst.h" | 
|  | 37 | #include "llvm/IR/Operator.h" | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 38 | #include "llvm/Support/Debug.h" | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 39 | #include "llvm/Support/raw_ostream.h" | 
| Eric Christopher | 2dfbd7e | 2011-02-05 00:49:15 +0000 | [diff] [blame] | 40 |  | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 41 | using namespace llvm; | 
|  | 42 |  | 
| Chandler Carruth | f1221bd | 2014-04-22 02:48:03 +0000 | [diff] [blame] | 43 | #define DEBUG_TYPE "inline-cost" | 
|  | 44 |  | 
| Chandler Carruth | 7ae90d4 | 2012-04-11 10:15:10 +0000 | [diff] [blame] | 45 | STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); | 
|  | 46 |  | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 47 | static cl::opt<int> InlineThreshold( | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 48 | "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, | 
|  | 49 | cl::desc("Control the amount of inlining to perform (default = 225)")); | 
|  | 50 |  | 
|  | 51 | static cl::opt<int> HintThreshold( | 
|  | 52 | "inlinehint-threshold", cl::Hidden, cl::init(325), | 
|  | 53 | cl::desc("Threshold for inlining functions with inline hint")); | 
|  | 54 |  | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 55 | static cl::opt<int> | 
|  | 56 | ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, | 
|  | 57 | cl::init(45), | 
|  | 58 | cl::desc("Threshold for inlining cold callsites")); | 
|  | 59 |  | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 60 | // We introduce this threshold to help performance of instrumentation based | 
|  | 61 | // PGO before we actually hook up inliner with analysis passes such as BPI and | 
|  | 62 | // BFI. | 
|  | 63 | static cl::opt<int> ColdThreshold( | 
| Easwaran Raman | c103ef8 | 2017-05-11 21:36:28 +0000 | [diff] [blame] | 64 | "inlinecold-threshold", cl::Hidden, cl::init(45), | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 65 | cl::desc("Threshold for inlining functions with cold attribute")); | 
|  | 66 |  | 
| Dehao Chen | de39cb9 | 2016-08-05 20:28:41 +0000 | [diff] [blame] | 67 | static cl::opt<int> | 
|  | 68 | HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), | 
|  | 69 | cl::ZeroOrMore, | 
|  | 70 | cl::desc("Threshold for hot callsites ")); | 
|  | 71 |  | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 72 | static cl::opt<int> LocallyHotCallSiteThreshold( | 
|  | 73 | "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, | 
|  | 74 | cl::desc("Threshold for locally hot callsites ")); | 
|  | 75 |  | 
| Easwaran Raman | c5fa635 | 2017-06-27 23:11:18 +0000 | [diff] [blame] | 76 | static cl::opt<int> ColdCallSiteRelFreq( | 
|  | 77 | "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, | 
|  | 78 | cl::desc("Maxmimum block frequency, expressed as a percentage of caller's " | 
|  | 79 | "entry frequency, for a callsite to be cold in the absence of " | 
|  | 80 | "profile information.")); | 
|  | 81 |  | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 82 | static cl::opt<int> HotCallSiteRelFreq( | 
|  | 83 | "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, | 
| Easwaran Raman | ff77cc7 | 2017-08-04 17:15:17 +0000 | [diff] [blame] | 84 | cl::desc("Minimum block frequency, expressed as a multiple of caller's " | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 85 | "entry frequency, for a callsite to be hot in the absence of " | 
|  | 86 | "profile information.")); | 
|  | 87 |  | 
| Easwaran Raman | 4924bb0 | 2017-09-13 20:16:02 +0000 | [diff] [blame] | 88 | static cl::opt<bool> OptComputeFullInlineCost( | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 89 | "inline-cost-full", cl::Hidden, cl::init(false), | 
|  | 90 | cl::desc("Compute the full inline cost of a call site even when the cost " | 
|  | 91 | "exceeds the threshold.")); | 
|  | 92 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 93 | namespace { | 
| Chandler Carruth | a308955 | 2012-03-14 07:32:53 +0000 | [diff] [blame] | 94 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 95 | class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { | 
|  | 96 | typedef InstVisitor<CallAnalyzer, bool> Base; | 
|  | 97 | friend class InstVisitor<CallAnalyzer, bool>; | 
| Owen Anderson | a08318a | 2010-09-09 16:56:42 +0000 | [diff] [blame] | 98 |  | 
| Chandler Carruth | 42f3dce | 2013-01-21 11:55:09 +0000 | [diff] [blame] | 99 | /// The TargetTransformInfo available for this compilation. | 
|  | 100 | const TargetTransformInfo &TTI; | 
|  | 101 |  | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 102 | /// Getter for the cache of @llvm.assume intrinsics. | 
|  | 103 | std::function<AssumptionCache &(Function &)> &GetAssumptionCache; | 
|  | 104 |  | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 105 | /// Getter for BlockFrequencyInfo | 
|  | 106 | Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI; | 
|  | 107 |  | 
| Easwaran Raman | 71069cf | 2016-06-09 22:23:21 +0000 | [diff] [blame] | 108 | /// Profile summary information. | 
|  | 109 | ProfileSummaryInfo *PSI; | 
|  | 110 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 111 | /// The called function. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 112 | Function &F; | 
| Owen Anderson | a08318a | 2010-09-09 16:56:42 +0000 | [diff] [blame] | 113 |  | 
| Eric Christopher | 85be8ca | 2017-04-15 06:14:50 +0000 | [diff] [blame] | 114 | // Cache the DataLayout since we use it a lot. | 
|  | 115 | const DataLayout &DL; | 
|  | 116 |  | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 117 | /// The OptimizationRemarkEmitter available for this compilation. | 
|  | 118 | OptimizationRemarkEmitter *ORE; | 
|  | 119 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 120 | /// The candidate callsite being analyzed. Please do not use this to do | 
|  | 121 | /// analysis in the caller function; we want the inline cost query to be | 
|  | 122 | /// easily cacheable. Instead, use the cover function paramHasAttr. | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 123 | CallSite CandidateCS; | 
|  | 124 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 125 | /// Tunable parameters that control the analysis. | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 126 | const InlineParams &Params; | 
|  | 127 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 128 | int Threshold; | 
|  | 129 | int Cost; | 
| Easwaran Raman | 4924bb0 | 2017-09-13 20:16:02 +0000 | [diff] [blame] | 130 | bool ComputeFullInlineCost; | 
| Owen Anderson | a08318a | 2010-09-09 16:56:42 +0000 | [diff] [blame] | 131 |  | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 132 | bool IsCallerRecursive; | 
|  | 133 | bool IsRecursiveCall; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 134 | bool ExposesReturnsTwice; | 
|  | 135 | bool HasDynamicAlloca; | 
| James Molloy | 4f6fb95 | 2012-12-20 16:04:27 +0000 | [diff] [blame] | 136 | bool ContainsNoDuplicateCall; | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 137 | bool HasReturn; | 
|  | 138 | bool HasIndirectBr; | 
| Vitaly Buka | 4296ea7 | 2018-04-04 21:46:27 +0000 | [diff] [blame] | 139 | bool HasUninlineableIntrinsic; | 
| Sameer AbuAsal | 77beee4 | 2018-09-20 18:39:34 +0000 | [diff] [blame] | 140 | bool InitsVargArgs; | 
| James Molloy | 4f6fb95 | 2012-12-20 16:04:27 +0000 | [diff] [blame] | 141 |  | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 142 | /// Number of bytes allocated statically by the callee. | 
|  | 143 | uint64_t AllocatedSize; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 144 | unsigned NumInstructions, NumVectorInstructions; | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 145 | int VectorBonus, TenPercentVectorBonus; | 
|  | 146 | // Bonus to be applied when the callee has only one reachable basic block. | 
|  | 147 | int SingleBBBonus; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 148 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 149 | /// While we walk the potentially-inlined instructions, we build up and | 
|  | 150 | /// maintain a mapping of simplified values specific to this callsite. The | 
|  | 151 | /// idea is to propagate any special information we have about arguments to | 
|  | 152 | /// this call through the inlinable section of the function, and account for | 
|  | 153 | /// likely simplifications post-inlining. The most important aspect we track | 
|  | 154 | /// is CFG altering simplifications -- when we prove a basic block dead, that | 
|  | 155 | /// can cause dramatic shifts in the cost of inlining a function. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 156 | DenseMap<Value *, Constant *> SimplifiedValues; | 
|  | 157 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 158 | /// Keep track of the values which map back (through function arguments) to | 
|  | 159 | /// allocas on the caller stack which could be simplified through SROA. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 160 | DenseMap<Value *, Value *> SROAArgValues; | 
|  | 161 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 162 | /// The mapping of caller Alloca values to their accumulated cost savings. If | 
|  | 163 | /// we have to disable SROA for one of the allocas, this tells us how much | 
|  | 164 | /// cost must be added. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 165 | DenseMap<Value *, int> SROAArgCosts; | 
|  | 166 |  | 
| Piotr Padlewski | f3d122c | 2016-09-30 21:05:49 +0000 | [diff] [blame] | 167 | /// Keep track of values which map to a pointer base and constant offset. | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 168 | DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 169 |  | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 170 | /// Keep track of dead blocks due to the constant arguments. | 
|  | 171 | SetVector<BasicBlock *> DeadBlocks; | 
|  | 172 |  | 
|  | 173 | /// The mapping of the blocks to their known unique successors due to the | 
|  | 174 | /// constant arguments. | 
|  | 175 | DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors; | 
|  | 176 |  | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 177 | /// Model the elimination of repeated loads that is expected to happen | 
|  | 178 | /// whenever we simplify away the stores that would otherwise cause them to be | 
|  | 179 | /// loads. | 
|  | 180 | bool EnableLoadElimination; | 
|  | 181 | SmallPtrSet<Value *, 16> LoadAddrSet; | 
|  | 182 | int LoadEliminationCost; | 
|  | 183 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 184 | // Custom simplification helper routines. | 
|  | 185 | bool isAllocaDerivedArg(Value *V); | 
|  | 186 | bool lookupSROAArgAndCost(Value *V, Value *&Arg, | 
|  | 187 | DenseMap<Value *, int>::iterator &CostIt); | 
|  | 188 | void disableSROA(DenseMap<Value *, int>::iterator CostIt); | 
|  | 189 | void disableSROA(Value *V); | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 190 | void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 191 | void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, | 
|  | 192 | int InstructionCost); | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 193 | void disableLoadElimination(); | 
| Haicheng Wu | 201b191 | 2017-01-20 18:51:22 +0000 | [diff] [blame] | 194 | bool isGEPFree(GetElementPtrInst &GEP); | 
| Evgeny Astigeevich | d3558b5 | 2017-10-03 12:00:40 +0000 | [diff] [blame] | 195 | bool canFoldInboundsGEP(GetElementPtrInst &I); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 196 | bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 197 | bool simplifyCallSite(Function *F, CallSite CS); | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 198 | template <typename Callable> | 
|  | 199 | bool simplifyInstruction(Instruction &I, Callable Evaluate); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 200 | ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); | 
|  | 201 |  | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 202 | /// Return true if the given argument to the function being considered for | 
|  | 203 | /// inlining has the given attribute set either at the call site or the | 
|  | 204 | /// function declaration.  Primarily used to inspect call site specific | 
|  | 205 | /// attributes since these can be more precise than the ones on the callee | 
| Easwaran Raman | 3676da4 | 2015-12-03 19:03:20 +0000 | [diff] [blame] | 206 | /// itself. | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 207 | bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 208 |  | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 209 | /// Return true if the given value is known non null within the callee if | 
| Easwaran Raman | 3676da4 | 2015-12-03 19:03:20 +0000 | [diff] [blame] | 210 | /// inlined through this particular callsite. | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 211 | bool isKnownNonNullInCallee(Value *V); | 
|  | 212 |  | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 213 | /// Update Threshold based on callsite properties such as callee | 
|  | 214 | /// attributes and callee hotness for PGO builds. The Callee is explicitly | 
|  | 215 | /// passed to support analyzing indirect calls whose target is inferred by | 
|  | 216 | /// analysis. | 
|  | 217 | void updateThreshold(CallSite CS, Function &Callee); | 
|  | 218 |  | 
| Easwaran Raman | 9a3fc17 | 2016-04-08 21:28:02 +0000 | [diff] [blame] | 219 | /// Return true if size growth is allowed when inlining the callee at CS. | 
|  | 220 | bool allowSizeGrowth(CallSite CS); | 
|  | 221 |  | 
| Easwaran Raman | c5fa635 | 2017-06-27 23:11:18 +0000 | [diff] [blame] | 222 | /// Return true if \p CS is a cold callsite. | 
|  | 223 | bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI); | 
|  | 224 |  | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 225 | /// Return a higher threshold if \p CS is a hot callsite. | 
|  | 226 | Optional<int> getHotCallSiteThreshold(CallSite CS, | 
|  | 227 | BlockFrequencyInfo *CallerBFI); | 
|  | 228 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 229 | // Custom analysis routines. | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 230 | InlineResult analyzeBlock(BasicBlock *BB, | 
|  | 231 | SmallPtrSetImpl<const Value *> &EphValues); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 232 |  | 
|  | 233 | // Disable several entry points to the visitor so we don't accidentally use | 
|  | 234 | // them by declaring but not defining them here. | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 235 | void visit(Module *); | 
|  | 236 | void visit(Module &); | 
|  | 237 | void visit(Function *); | 
|  | 238 | void visit(Function &); | 
|  | 239 | void visit(BasicBlock *); | 
|  | 240 | void visit(BasicBlock &); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 241 |  | 
|  | 242 | // Provide base case for our instruction visit. | 
|  | 243 | bool visitInstruction(Instruction &I); | 
|  | 244 |  | 
|  | 245 | // Our visit overrides. | 
|  | 246 | bool visitAlloca(AllocaInst &I); | 
|  | 247 | bool visitPHI(PHINode &I); | 
|  | 248 | bool visitGetElementPtr(GetElementPtrInst &I); | 
|  | 249 | bool visitBitCast(BitCastInst &I); | 
|  | 250 | bool visitPtrToInt(PtrToIntInst &I); | 
|  | 251 | bool visitIntToPtr(IntToPtrInst &I); | 
|  | 252 | bool visitCastInst(CastInst &I); | 
|  | 253 | bool visitUnaryInstruction(UnaryInstruction &I); | 
| Matt Arsenault | 727aa34 | 2013-07-20 04:09:00 +0000 | [diff] [blame] | 254 | bool visitCmpInst(CmpInst &I); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 255 | bool visitSub(BinaryOperator &I); | 
|  | 256 | bool visitBinaryOperator(BinaryOperator &I); | 
|  | 257 | bool visitLoad(LoadInst &I); | 
|  | 258 | bool visitStore(StoreInst &I); | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 259 | bool visitExtractValue(ExtractValueInst &I); | 
|  | 260 | bool visitInsertValue(InsertValueInst &I); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 261 | bool visitCallSite(CallSite CS); | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 262 | bool visitReturnInst(ReturnInst &RI); | 
|  | 263 | bool visitBranchInst(BranchInst &BI); | 
| Haicheng Wu | 3ec848b | 2017-09-27 14:44:56 +0000 | [diff] [blame] | 264 | bool visitSelectInst(SelectInst &SI); | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 265 | bool visitSwitchInst(SwitchInst &SI); | 
|  | 266 | bool visitIndirectBrInst(IndirectBrInst &IBI); | 
|  | 267 | bool visitResumeInst(ResumeInst &RI); | 
| David Majnemer | 654e130 | 2015-07-31 17:58:14 +0000 | [diff] [blame] | 268 | bool visitCleanupReturnInst(CleanupReturnInst &RI); | 
|  | 269 | bool visitCatchReturnInst(CatchReturnInst &RI); | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 270 | bool visitUnreachableInst(UnreachableInst &I); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 271 |  | 
|  | 272 | public: | 
| Sean Silva | ab6a683 | 2016-07-23 04:22:50 +0000 | [diff] [blame] | 273 | CallAnalyzer(const TargetTransformInfo &TTI, | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 274 | std::function<AssumptionCache &(Function &)> &GetAssumptionCache, | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 275 | Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 276 | ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, | 
|  | 277 | Function &Callee, CallSite CSArg, const InlineParams &Params) | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 278 | : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 279 | PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), | 
| Eric Christopher | 85be8ca | 2017-04-15 06:14:50 +0000 | [diff] [blame] | 280 | CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), | 
| Easwaran Raman | 4924bb0 | 2017-09-13 20:16:02 +0000 | [diff] [blame] | 281 | Cost(0), ComputeFullInlineCost(OptComputeFullInlineCost || | 
|  | 282 | Params.ComputeFullInlineCost || ORE), | 
|  | 283 | IsCallerRecursive(false), IsRecursiveCall(false), | 
| Eric Christopher | 85be8ca | 2017-04-15 06:14:50 +0000 | [diff] [blame] | 284 | ExposesReturnsTwice(false), HasDynamicAlloca(false), | 
|  | 285 | ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), | 
| Sameer AbuAsal | 77beee4 | 2018-09-20 18:39:34 +0000 | [diff] [blame] | 286 | HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0), | 
| Vitaly Buka | 4296ea7 | 2018-04-04 21:46:27 +0000 | [diff] [blame] | 287 | NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), | 
|  | 288 | SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0), | 
|  | 289 | NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), | 
|  | 290 | NumConstantPtrCmps(0), NumConstantPtrDiffs(0), | 
|  | 291 | NumInstructionsSimplified(0), SROACostSavings(0), | 
|  | 292 | SROACostSavingsLost(0) {} | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 293 |  | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 294 | InlineResult analyzeCall(CallSite CS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 295 |  | 
|  | 296 | int getThreshold() { return Threshold; } | 
|  | 297 | int getCost() { return Cost; } | 
|  | 298 |  | 
|  | 299 | // Keep a bunch of stats about the cost savings found so we can print them | 
|  | 300 | // out when debugging. | 
|  | 301 | unsigned NumConstantArgs; | 
|  | 302 | unsigned NumConstantOffsetPtrArgs; | 
|  | 303 | unsigned NumAllocaArgs; | 
|  | 304 | unsigned NumConstantPtrCmps; | 
|  | 305 | unsigned NumConstantPtrDiffs; | 
|  | 306 | unsigned NumInstructionsSimplified; | 
|  | 307 | unsigned SROACostSavings; | 
|  | 308 | unsigned SROACostSavingsLost; | 
|  | 309 |  | 
|  | 310 | void dump(); | 
|  | 311 | }; | 
|  | 312 |  | 
|  | 313 | } // namespace | 
|  | 314 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 315 | /// Test whether the given value is an Alloca-derived function argument. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 316 | bool CallAnalyzer::isAllocaDerivedArg(Value *V) { | 
|  | 317 | return SROAArgValues.count(V); | 
| Owen Anderson | a08318a | 2010-09-09 16:56:42 +0000 | [diff] [blame] | 318 | } | 
|  | 319 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 320 | /// Lookup the SROA-candidate argument and cost iterator which V maps to. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 321 | /// Returns false if V does not map to a SROA-candidate. | 
|  | 322 | bool CallAnalyzer::lookupSROAArgAndCost( | 
|  | 323 | Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { | 
|  | 324 | if (SROAArgValues.empty() || SROAArgCosts.empty()) | 
|  | 325 | return false; | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 326 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 327 | DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); | 
|  | 328 | if (ArgIt == SROAArgValues.end()) | 
|  | 329 | return false; | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 330 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 331 | Arg = ArgIt->second; | 
|  | 332 | CostIt = SROAArgCosts.find(Arg); | 
|  | 333 | return CostIt != SROAArgCosts.end(); | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 334 | } | 
|  | 335 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 336 | /// Disable SROA for the candidate marked by this cost iterator. | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 337 | /// | 
| Benjamin Kramer | bde9176 | 2012-06-02 10:20:22 +0000 | [diff] [blame] | 338 | /// This marks the candidate as no longer viable for SROA, and adds the cost | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 339 | /// savings associated with it back into the inline cost measurement. | 
|  | 340 | void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { | 
|  | 341 | // If we're no longer able to perform SROA we need to undo its cost savings | 
|  | 342 | // and prevent subsequent analysis. | 
|  | 343 | Cost += CostIt->second; | 
|  | 344 | SROACostSavings -= CostIt->second; | 
|  | 345 | SROACostSavingsLost += CostIt->second; | 
|  | 346 | SROAArgCosts.erase(CostIt); | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 347 | disableLoadElimination(); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 348 | } | 
|  | 349 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 350 | /// If 'V' maps to a SROA candidate, disable SROA for it. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 351 | void CallAnalyzer::disableSROA(Value *V) { | 
|  | 352 | Value *SROAArg; | 
|  | 353 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 354 | if (lookupSROAArgAndCost(V, SROAArg, CostIt)) | 
|  | 355 | disableSROA(CostIt); | 
|  | 356 | } | 
|  | 357 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 358 | /// Accumulate the given cost for a particular SROA candidate. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 359 | void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, | 
|  | 360 | int InstructionCost) { | 
|  | 361 | CostIt->second += InstructionCost; | 
|  | 362 | SROACostSavings += InstructionCost; | 
|  | 363 | } | 
|  | 364 |  | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 365 | void CallAnalyzer::disableLoadElimination() { | 
|  | 366 | if (EnableLoadElimination) { | 
|  | 367 | Cost += LoadEliminationCost; | 
| Haicheng Wu | b3689ca | 2017-12-19 13:42:58 +0000 | [diff] [blame] | 368 | LoadEliminationCost = 0; | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 369 | EnableLoadElimination = false; | 
|  | 370 | } | 
|  | 371 | } | 
|  | 372 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 373 | /// Accumulate a constant GEP offset into an APInt if possible. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 374 | /// | 
|  | 375 | /// Returns false if unable to compute the offset for any reason. Respects any | 
|  | 376 | /// simplified values known during the analysis of this callsite. | 
|  | 377 | bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { | 
| Elena Demikhovsky | 945b7e5 | 2018-02-14 06:58:08 +0000 | [diff] [blame] | 378 | unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 379 | assert(IntPtrWidth == Offset.getBitWidth()); | 
|  | 380 |  | 
|  | 381 | for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); | 
|  | 382 | GTI != GTE; ++GTI) { | 
|  | 383 | ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); | 
|  | 384 | if (!OpC) | 
|  | 385 | if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) | 
|  | 386 | OpC = dyn_cast<ConstantInt>(SimpleOp); | 
|  | 387 | if (!OpC) | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 388 | return false; | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 389 | if (OpC->isZero()) | 
|  | 390 | continue; | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 391 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 392 | // Handle a struct index, which adds its field offset to the pointer. | 
| Peter Collingbourne | ab85225b | 2016-12-02 02:24:42 +0000 | [diff] [blame] | 393 | if (StructType *STy = GTI.getStructTypeOrNull()) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 394 | unsigned ElementIdx = OpC->getZExtValue(); | 
| Mehdi Amini | a28d91d | 2015-03-10 02:37:25 +0000 | [diff] [blame] | 395 | const StructLayout *SL = DL.getStructLayout(STy); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 396 | Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); | 
|  | 397 | continue; | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 398 | } | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 399 |  | 
| Mehdi Amini | a28d91d | 2015-03-10 02:37:25 +0000 | [diff] [blame] | 400 | APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 401 | Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; | 
|  | 402 | } | 
|  | 403 | return true; | 
|  | 404 | } | 
|  | 405 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 406 | /// Use TTI to check whether a GEP is free. | 
| Haicheng Wu | 201b191 | 2017-01-20 18:51:22 +0000 | [diff] [blame] | 407 | /// | 
|  | 408 | /// Respects any simplified values known during the analysis of this callsite. | 
|  | 409 | bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { | 
| Evgeny Astigeevich | 61c1bd5 | 2017-07-27 12:49:27 +0000 | [diff] [blame] | 410 | SmallVector<Value *, 4> Operands; | 
|  | 411 | Operands.push_back(GEP.getOperand(0)); | 
| Haicheng Wu | 201b191 | 2017-01-20 18:51:22 +0000 | [diff] [blame] | 412 | for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) | 
|  | 413 | if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) | 
| Evgeny Astigeevich | 61c1bd5 | 2017-07-27 12:49:27 +0000 | [diff] [blame] | 414 | Operands.push_back(SimpleOp); | 
| Haicheng Wu | 201b191 | 2017-01-20 18:51:22 +0000 | [diff] [blame] | 415 | else | 
| Evgeny Astigeevich | 61c1bd5 | 2017-07-27 12:49:27 +0000 | [diff] [blame] | 416 | Operands.push_back(*I); | 
|  | 417 | return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); | 
| Haicheng Wu | 201b191 | 2017-01-20 18:51:22 +0000 | [diff] [blame] | 418 | } | 
|  | 419 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 420 | bool CallAnalyzer::visitAlloca(AllocaInst &I) { | 
| Eric Christopher | beb2cd6 | 2014-04-07 13:36:21 +0000 | [diff] [blame] | 421 | // Check whether inlining will turn a dynamic alloca into a static | 
| Sanjay Patel | 0f15342 | 2016-05-09 21:51:53 +0000 | [diff] [blame] | 422 | // alloca and handle that case. | 
| Eric Christopher | beb2cd6 | 2014-04-07 13:36:21 +0000 | [diff] [blame] | 423 | if (I.isArrayAllocation()) { | 
| Sanjay Patel | 0f15342 | 2016-05-09 21:51:53 +0000 | [diff] [blame] | 424 | Constant *Size = SimplifiedValues.lookup(I.getArraySize()); | 
|  | 425 | if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) { | 
| Eric Christopher | beb2cd6 | 2014-04-07 13:36:21 +0000 | [diff] [blame] | 426 | Type *Ty = I.getAllocatedType(); | 
| Easwaran Raman | 22eb80a | 2016-06-27 22:31:53 +0000 | [diff] [blame] | 427 | AllocatedSize = SaturatingMultiplyAdd( | 
|  | 428 | AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); | 
| Eric Christopher | beb2cd6 | 2014-04-07 13:36:21 +0000 | [diff] [blame] | 429 | return Base::visitAlloca(I); | 
|  | 430 | } | 
|  | 431 | } | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 432 |  | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 433 | // Accumulate the allocated size. | 
|  | 434 | if (I.isStaticAlloca()) { | 
|  | 435 | Type *Ty = I.getAllocatedType(); | 
| Easwaran Raman | 22eb80a | 2016-06-27 22:31:53 +0000 | [diff] [blame] | 436 | AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 437 | } | 
|  | 438 |  | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 439 | // We will happily inline static alloca instructions. | 
|  | 440 | if (I.isStaticAlloca()) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 441 | return Base::visitAlloca(I); | 
|  | 442 |  | 
|  | 443 | // FIXME: This is overly conservative. Dynamic allocas are inefficient for | 
|  | 444 | // a variety of reasons, and so we would like to not inline them into | 
|  | 445 | // functions which don't currently have a dynamic alloca. This simply | 
|  | 446 | // disables inlining altogether in the presence of a dynamic alloca. | 
|  | 447 | HasDynamicAlloca = true; | 
|  | 448 | return false; | 
|  | 449 | } | 
|  | 450 |  | 
|  | 451 | bool CallAnalyzer::visitPHI(PHINode &I) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 452 | // FIXME: We need to propagate SROA *disabling* through phi nodes, even | 
|  | 453 | // though we don't want to propagate it's bonuses. The idea is to disable | 
|  | 454 | // SROA if it *might* be used in an inappropriate manner. | 
|  | 455 |  | 
|  | 456 | // Phi nodes are always zero-cost. | 
| Bjorn Pettersson | 77f3299 | 2018-01-04 18:23:40 +0000 | [diff] [blame] | 457 | // FIXME: Pointer sizes may differ between different address spaces, so do we | 
|  | 458 | // need to use correct address space in the call to getPointerSizeInBits here? | 
|  | 459 | // Or could we skip the getPointerSizeInBits call completely? As far as I can | 
|  | 460 | // see the ZeroOffset is used as a dummy value, so we can probably use any | 
|  | 461 | // bit width for the ZeroOffset? | 
|  | 462 | APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0)); | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 463 | bool CheckSROA = I.getType()->isPointerTy(); | 
|  | 464 |  | 
|  | 465 | // Track the constant or pointer with constant offset we've seen so far. | 
|  | 466 | Constant *FirstC = nullptr; | 
|  | 467 | std::pair<Value *, APInt> FirstBaseAndOffset = {nullptr, ZeroOffset}; | 
|  | 468 | Value *FirstV = nullptr; | 
|  | 469 |  | 
|  | 470 | for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { | 
|  | 471 | BasicBlock *Pred = I.getIncomingBlock(i); | 
|  | 472 | // If the incoming block is dead, skip the incoming block. | 
|  | 473 | if (DeadBlocks.count(Pred)) | 
|  | 474 | continue; | 
|  | 475 | // If the parent block of phi is not the known successor of the incoming | 
|  | 476 | // block, skip the incoming block. | 
|  | 477 | BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; | 
|  | 478 | if (KnownSuccessor && KnownSuccessor != I.getParent()) | 
|  | 479 | continue; | 
|  | 480 |  | 
|  | 481 | Value *V = I.getIncomingValue(i); | 
|  | 482 | // If the incoming value is this phi itself, skip the incoming value. | 
|  | 483 | if (&I == V) | 
|  | 484 | continue; | 
|  | 485 |  | 
|  | 486 | Constant *C = dyn_cast<Constant>(V); | 
|  | 487 | if (!C) | 
|  | 488 | C = SimplifiedValues.lookup(V); | 
|  | 489 |  | 
|  | 490 | std::pair<Value *, APInt> BaseAndOffset = {nullptr, ZeroOffset}; | 
|  | 491 | if (!C && CheckSROA) | 
|  | 492 | BaseAndOffset = ConstantOffsetPtrs.lookup(V); | 
|  | 493 |  | 
|  | 494 | if (!C && !BaseAndOffset.first) | 
|  | 495 | // The incoming value is neither a constant nor a pointer with constant | 
|  | 496 | // offset, exit early. | 
|  | 497 | return true; | 
|  | 498 |  | 
|  | 499 | if (FirstC) { | 
|  | 500 | if (FirstC == C) | 
|  | 501 | // If we've seen a constant incoming value before and it is the same | 
|  | 502 | // constant we see this time, continue checking the next incoming value. | 
|  | 503 | continue; | 
|  | 504 | // Otherwise early exit because we either see a different constant or saw | 
|  | 505 | // a constant before but we have a pointer with constant offset this time. | 
|  | 506 | return true; | 
|  | 507 | } | 
|  | 508 |  | 
|  | 509 | if (FirstV) { | 
|  | 510 | // The same logic as above, but check pointer with constant offset here. | 
|  | 511 | if (FirstBaseAndOffset == BaseAndOffset) | 
|  | 512 | continue; | 
|  | 513 | return true; | 
|  | 514 | } | 
|  | 515 |  | 
|  | 516 | if (C) { | 
|  | 517 | // This is the 1st time we've seen a constant, record it. | 
|  | 518 | FirstC = C; | 
|  | 519 | continue; | 
|  | 520 | } | 
|  | 521 |  | 
|  | 522 | // The remaining case is that this is the 1st time we've seen a pointer with | 
|  | 523 | // constant offset, record it. | 
|  | 524 | FirstV = V; | 
|  | 525 | FirstBaseAndOffset = BaseAndOffset; | 
|  | 526 | } | 
|  | 527 |  | 
|  | 528 | // Check if we can map phi to a constant. | 
|  | 529 | if (FirstC) { | 
|  | 530 | SimplifiedValues[&I] = FirstC; | 
|  | 531 | return true; | 
|  | 532 | } | 
|  | 533 |  | 
|  | 534 | // Check if we can map phi to a pointer with constant offset. | 
|  | 535 | if (FirstBaseAndOffset.first) { | 
|  | 536 | ConstantOffsetPtrs[&I] = FirstBaseAndOffset; | 
|  | 537 |  | 
|  | 538 | Value *SROAArg; | 
|  | 539 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 540 | if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) | 
|  | 541 | SROAArgValues[&I] = SROAArg; | 
|  | 542 | } | 
|  | 543 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 544 | return true; | 
|  | 545 | } | 
|  | 546 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 547 | /// Check we can fold GEPs of constant-offset call site argument pointers. | 
| Evgeny Astigeevich | d3558b5 | 2017-10-03 12:00:40 +0000 | [diff] [blame] | 548 | /// This requires target data and inbounds GEPs. | 
|  | 549 | /// | 
|  | 550 | /// \return true if the specified GEP can be folded. | 
|  | 551 | bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { | 
|  | 552 | // Check if we have a base + offset for the pointer. | 
|  | 553 | std::pair<Value *, APInt> BaseAndOffset = | 
|  | 554 | ConstantOffsetPtrs.lookup(I.getPointerOperand()); | 
|  | 555 | if (!BaseAndOffset.first) | 
|  | 556 | return false; | 
|  | 557 |  | 
|  | 558 | // Check if the offset of this GEP is constant, and if so accumulate it | 
|  | 559 | // into Offset. | 
|  | 560 | if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) | 
|  | 561 | return false; | 
|  | 562 |  | 
|  | 563 | // Add the result as a new mapping to Base + Offset. | 
|  | 564 | ConstantOffsetPtrs[&I] = BaseAndOffset; | 
|  | 565 |  | 
|  | 566 | return true; | 
|  | 567 | } | 
|  | 568 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 569 | bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { | 
|  | 570 | Value *SROAArg; | 
|  | 571 | DenseMap<Value *, int>::iterator CostIt; | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 572 | bool SROACandidate = | 
|  | 573 | lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 574 |  | 
| Easwaran Raman | a8b9cdc | 2017-02-25 00:10:22 +0000 | [diff] [blame] | 575 | // Lambda to check whether a GEP's indices are all constant. | 
|  | 576 | auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { | 
|  | 577 | for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) | 
|  | 578 | if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) | 
|  | 579 | return false; | 
|  | 580 | return true; | 
|  | 581 | }; | 
|  | 582 |  | 
| Evgeny Astigeevich | d3558b5 | 2017-10-03 12:00:40 +0000 | [diff] [blame] | 583 | if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 584 | if (SROACandidate) | 
|  | 585 | SROAArgValues[&I] = SROAArg; | 
|  | 586 |  | 
|  | 587 | // Constant GEPs are modeled as free. | 
|  | 588 | return true; | 
|  | 589 | } | 
|  | 590 |  | 
|  | 591 | // Variable GEPs will require math and will disable SROA. | 
|  | 592 | if (SROACandidate) | 
|  | 593 | disableSROA(CostIt); | 
| Haicheng Wu | 201b191 | 2017-01-20 18:51:22 +0000 | [diff] [blame] | 594 | return isGEPFree(I); | 
| Chandler Carruth | 783b719 | 2012-03-09 02:49:36 +0000 | [diff] [blame] | 595 | } | 
|  | 596 |  | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 597 | /// Simplify \p I if its operands are constants and update SimplifiedValues. | 
|  | 598 | /// \p Evaluate is a callable specific to instruction type that evaluates the | 
|  | 599 | /// instruction when all the operands are constants. | 
|  | 600 | template <typename Callable> | 
|  | 601 | bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { | 
|  | 602 | SmallVector<Constant *, 2> COps; | 
|  | 603 | for (Value *Op : I.operands()) { | 
|  | 604 | Constant *COp = dyn_cast<Constant>(Op); | 
|  | 605 | if (!COp) | 
|  | 606 | COp = SimplifiedValues.lookup(Op); | 
|  | 607 | if (!COp) | 
|  | 608 | return false; | 
|  | 609 | COps.push_back(COp); | 
|  | 610 | } | 
|  | 611 | auto *C = Evaluate(COps); | 
|  | 612 | if (!C) | 
|  | 613 | return false; | 
|  | 614 | SimplifiedValues[&I] = C; | 
|  | 615 | return true; | 
|  | 616 | } | 
|  | 617 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 618 | bool CallAnalyzer::visitBitCast(BitCastInst &I) { | 
|  | 619 | // Propagate constants through bitcasts. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 620 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 621 | return ConstantExpr::getBitCast(COps[0], I.getType()); | 
|  | 622 | })) | 
|  | 623 | return true; | 
| Owen Anderson | a08318a | 2010-09-09 16:56:42 +0000 | [diff] [blame] | 624 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 625 | // Track base/offsets through casts | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 626 | std::pair<Value *, APInt> BaseAndOffset = | 
|  | 627 | ConstantOffsetPtrs.lookup(I.getOperand(0)); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 628 | // Casts don't change the offset, just wrap it up. | 
|  | 629 | if (BaseAndOffset.first) | 
|  | 630 | ConstantOffsetPtrs[&I] = BaseAndOffset; | 
|  | 631 |  | 
|  | 632 | // Also look for SROA candidates here. | 
|  | 633 | Value *SROAArg; | 
|  | 634 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 635 | if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) | 
|  | 636 | SROAArgValues[&I] = SROAArg; | 
|  | 637 |  | 
|  | 638 | // Bitcasts are always zero cost. | 
|  | 639 | return true; | 
| Owen Anderson | a08318a | 2010-09-09 16:56:42 +0000 | [diff] [blame] | 640 | } | 
|  | 641 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 642 | bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { | 
|  | 643 | // Propagate constants through ptrtoint. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 644 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 645 | return ConstantExpr::getPtrToInt(COps[0], I.getType()); | 
|  | 646 | })) | 
|  | 647 | return true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 648 |  | 
|  | 649 | // Track base/offset pairs when converted to a plain integer provided the | 
|  | 650 | // integer is large enough to represent the pointer. | 
|  | 651 | unsigned IntegerSize = I.getType()->getScalarSizeInBits(); | 
| Bjorn Pettersson | 77f3299 | 2018-01-04 18:23:40 +0000 | [diff] [blame] | 652 | unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); | 
|  | 653 | if (IntegerSize >= DL.getPointerSizeInBits(AS)) { | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 654 | std::pair<Value *, APInt> BaseAndOffset = | 
|  | 655 | ConstantOffsetPtrs.lookup(I.getOperand(0)); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 656 | if (BaseAndOffset.first) | 
|  | 657 | ConstantOffsetPtrs[&I] = BaseAndOffset; | 
|  | 658 | } | 
|  | 659 |  | 
|  | 660 | // This is really weird. Technically, ptrtoint will disable SROA. However, | 
|  | 661 | // unless that ptrtoint is *used* somewhere in the live basic blocks after | 
|  | 662 | // inlining, it will be nuked, and SROA should proceed. All of the uses which | 
|  | 663 | // would block SROA would also block SROA if applied directly to a pointer, | 
|  | 664 | // and so we can just add the integer in here. The only places where SROA is | 
|  | 665 | // preserved either cannot fire on an integer, or won't in-and-of themselves | 
|  | 666 | // disable SROA (ext) w/o some later use that we would see and disable. | 
|  | 667 | Value *SROAArg; | 
|  | 668 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 669 | if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) | 
|  | 670 | SROAArgValues[&I] = SROAArg; | 
|  | 671 |  | 
| Chandler Carruth | b8cf510 | 2013-01-21 12:05:16 +0000 | [diff] [blame] | 672 | return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); | 
| Chandler Carruth | 4d1d34f | 2012-03-14 23:19:53 +0000 | [diff] [blame] | 673 | } | 
|  | 674 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 675 | bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { | 
|  | 676 | // Propagate constants through ptrtoint. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 677 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 678 | return ConstantExpr::getIntToPtr(COps[0], I.getType()); | 
|  | 679 | })) | 
|  | 680 | return true; | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 681 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 682 | // Track base/offset pairs when round-tripped through a pointer without | 
|  | 683 | // modifications provided the integer is not too large. | 
|  | 684 | Value *Op = I.getOperand(0); | 
|  | 685 | unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); | 
| Bjorn Pettersson | 77f3299 | 2018-01-04 18:23:40 +0000 | [diff] [blame] | 686 | if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 687 | std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); | 
|  | 688 | if (BaseAndOffset.first) | 
|  | 689 | ConstantOffsetPtrs[&I] = BaseAndOffset; | 
|  | 690 | } | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 691 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 692 | // "Propagate" SROA here in the same manner as we do for ptrtoint above. | 
|  | 693 | Value *SROAArg; | 
|  | 694 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 695 | if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) | 
|  | 696 | SROAArgValues[&I] = SROAArg; | 
| Chandler Carruth | 4d1d34f | 2012-03-14 23:19:53 +0000 | [diff] [blame] | 697 |  | 
| Chandler Carruth | b8cf510 | 2013-01-21 12:05:16 +0000 | [diff] [blame] | 698 | return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 699 | } | 
|  | 700 |  | 
|  | 701 | bool CallAnalyzer::visitCastInst(CastInst &I) { | 
|  | 702 | // Propagate constants through ptrtoint. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 703 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 704 | return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); | 
|  | 705 | })) | 
|  | 706 | return true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 707 |  | 
|  | 708 | // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. | 
|  | 709 | disableSROA(I.getOperand(0)); | 
|  | 710 |  | 
| Eli Friedman | 39ed9a6 | 2017-12-22 02:08:08 +0000 | [diff] [blame] | 711 | // If this is a floating-point cast, and the target says this operation | 
|  | 712 | // is expensive, this may eventually become a library call. Treat the cost | 
|  | 713 | // as such. | 
|  | 714 | switch (I.getOpcode()) { | 
|  | 715 | case Instruction::FPTrunc: | 
|  | 716 | case Instruction::FPExt: | 
|  | 717 | case Instruction::UIToFP: | 
|  | 718 | case Instruction::SIToFP: | 
|  | 719 | case Instruction::FPToUI: | 
|  | 720 | case Instruction::FPToSI: | 
|  | 721 | if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) | 
|  | 722 | Cost += InlineConstants::CallPenalty; | 
|  | 723 | default: | 
|  | 724 | break; | 
|  | 725 | } | 
|  | 726 |  | 
| Chandler Carruth | b8cf510 | 2013-01-21 12:05:16 +0000 | [diff] [blame] | 727 | return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 728 | } | 
|  | 729 |  | 
|  | 730 | bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { | 
|  | 731 | Value *Operand = I.getOperand(0); | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 732 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 733 | return ConstantFoldInstOperands(&I, COps[0], DL); | 
|  | 734 | })) | 
|  | 735 | return true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 736 |  | 
|  | 737 | // Disable any SROA on the argument to arbitrary unary operators. | 
|  | 738 | disableSROA(Operand); | 
|  | 739 |  | 
|  | 740 | return false; | 
|  | 741 | } | 
|  | 742 |  | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 743 | bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { | 
| Reid Kleckner | fb502d2 | 2017-04-14 20:19:02 +0000 | [diff] [blame] | 744 | return CandidateCS.paramHasAttr(A->getArgNo(), Attr); | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 745 | } | 
|  | 746 |  | 
|  | 747 | bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { | 
|  | 748 | // Does the *call site* have the NonNull attribute set on an argument?  We | 
|  | 749 | // use the attribute on the call site to memoize any analysis done in the | 
|  | 750 | // caller. This will also trip if the callee function has a non-null | 
|  | 751 | // parameter attribute, but that's a less interesting case because hopefully | 
|  | 752 | // the callee would already have been simplified based on that. | 
|  | 753 | if (Argument *A = dyn_cast<Argument>(V)) | 
|  | 754 | if (paramHasAttr(A, Attribute::NonNull)) | 
|  | 755 | return true; | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 756 |  | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 757 | // Is this an alloca in the caller?  This is distinct from the attribute case | 
|  | 758 | // above because attributes aren't updated within the inliner itself and we | 
|  | 759 | // always want to catch the alloca derived case. | 
|  | 760 | if (isAllocaDerivedArg(V)) | 
|  | 761 | // We can actually predict the result of comparisons between an | 
|  | 762 | // alloca-derived value and null. Note that this fires regardless of | 
|  | 763 | // SROA firing. | 
|  | 764 | return true; | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 765 |  | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 766 | return false; | 
|  | 767 | } | 
|  | 768 |  | 
| Easwaran Raman | 9a3fc17 | 2016-04-08 21:28:02 +0000 | [diff] [blame] | 769 | bool CallAnalyzer::allowSizeGrowth(CallSite CS) { | 
|  | 770 | // If the normal destination of the invoke or the parent block of the call | 
|  | 771 | // site is unreachable-terminated, there is little point in inlining this | 
|  | 772 | // unless there is literally zero cost. | 
|  | 773 | // FIXME: Note that it is possible that an unreachable-terminated block has a | 
|  | 774 | // hot entry. For example, in below scenario inlining hot_call_X() may be | 
|  | 775 | // beneficial : | 
|  | 776 | // main() { | 
|  | 777 | //   hot_call_1(); | 
|  | 778 | //   ... | 
|  | 779 | //   hot_call_N() | 
|  | 780 | //   exit(0); | 
|  | 781 | // } | 
|  | 782 | // For now, we are not handling this corner case here as it is rare in real | 
|  | 783 | // code. In future, we should elaborate this based on BPI and BFI in more | 
|  | 784 | // general threshold adjusting heuristics in updateThreshold(). | 
|  | 785 | Instruction *Instr = CS.getInstruction(); | 
|  | 786 | if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { | 
|  | 787 | if (isa<UnreachableInst>(II->getNormalDest()->getTerminator())) | 
|  | 788 | return false; | 
|  | 789 | } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator())) | 
|  | 790 | return false; | 
|  | 791 |  | 
|  | 792 | return true; | 
|  | 793 | } | 
|  | 794 |  | 
| Easwaran Raman | c5fa635 | 2017-06-27 23:11:18 +0000 | [diff] [blame] | 795 | bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) { | 
|  | 796 | // If global profile summary is available, then callsite's coldness is | 
|  | 797 | // determined based on that. | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 798 | if (PSI && PSI->hasProfileSummary()) | 
| Easwaran Raman | c5fa635 | 2017-06-27 23:11:18 +0000 | [diff] [blame] | 799 | return PSI->isColdCallSite(CS, CallerBFI); | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 800 |  | 
|  | 801 | // Otherwise we need BFI to be available. | 
| Easwaran Raman | c5fa635 | 2017-06-27 23:11:18 +0000 | [diff] [blame] | 802 | if (!CallerBFI) | 
|  | 803 | return false; | 
|  | 804 |  | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 805 | // Determine if the callsite is cold relative to caller's entry. We could | 
|  | 806 | // potentially cache the computation of scaled entry frequency, but the added | 
|  | 807 | // complexity is not worth it unless this scaling shows up high in the | 
|  | 808 | // profiles. | 
| Easwaran Raman | c5fa635 | 2017-06-27 23:11:18 +0000 | [diff] [blame] | 809 | const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); | 
|  | 810 | auto CallSiteBB = CS.getInstruction()->getParent(); | 
|  | 811 | auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); | 
|  | 812 | auto CallerEntryFreq = | 
|  | 813 | CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock())); | 
|  | 814 | return CallSiteFreq < CallerEntryFreq * ColdProb; | 
|  | 815 | } | 
|  | 816 |  | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 817 | Optional<int> | 
|  | 818 | CallAnalyzer::getHotCallSiteThreshold(CallSite CS, | 
|  | 819 | BlockFrequencyInfo *CallerBFI) { | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 820 |  | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 821 | // If global profile summary is available, then callsite's hotness is | 
|  | 822 | // determined based on that. | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 823 | if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(CS, CallerBFI)) | 
|  | 824 | return Params.HotCallSiteThreshold; | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 825 |  | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 826 | // Otherwise we need BFI to be available and to have a locally hot callsite | 
|  | 827 | // threshold. | 
|  | 828 | if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 829 | return None; | 
|  | 830 |  | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 831 | // Determine if the callsite is hot relative to caller's entry. We could | 
|  | 832 | // potentially cache the computation of scaled entry frequency, but the added | 
|  | 833 | // complexity is not worth it unless this scaling shows up high in the | 
|  | 834 | // profiles. | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 835 | auto CallSiteBB = CS.getInstruction()->getParent(); | 
|  | 836 | auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); | 
|  | 837 | auto CallerEntryFreq = CallerBFI->getEntryFreq(); | 
|  | 838 | if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 839 | return Params.LocallyHotCallSiteThreshold; | 
|  | 840 |  | 
|  | 841 | // Otherwise treat it normally. | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 842 | return None; | 
|  | 843 | } | 
|  | 844 |  | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 845 | void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { | 
| Easwaran Raman | 9a3fc17 | 2016-04-08 21:28:02 +0000 | [diff] [blame] | 846 | // If no size growth is allowed for this inlining, set Threshold to 0. | 
|  | 847 | if (!allowSizeGrowth(CS)) { | 
|  | 848 | Threshold = 0; | 
|  | 849 | return; | 
|  | 850 | } | 
|  | 851 |  | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 852 | Function *Caller = CS.getCaller(); | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 853 |  | 
|  | 854 | // return min(A, B) if B is valid. | 
|  | 855 | auto MinIfValid = [](int A, Optional<int> B) { | 
|  | 856 | return B ? std::min(A, B.getValue()) : A; | 
|  | 857 | }; | 
|  | 858 |  | 
| Easwaran Raman | 0d58fca | 2016-08-11 03:58:05 +0000 | [diff] [blame] | 859 | // return max(A, B) if B is valid. | 
|  | 860 | auto MaxIfValid = [](int A, Optional<int> B) { | 
|  | 861 | return B ? std::max(A, B.getValue()) : A; | 
|  | 862 | }; | 
|  | 863 |  | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 864 | // Various bonus percentages. These are multiplied by Threshold to get the | 
|  | 865 | // bonus values. | 
|  | 866 | // SingleBBBonus: This bonus is applied if the callee has a single reachable | 
|  | 867 | // basic block at the given callsite context. This is speculatively applied | 
|  | 868 | // and withdrawn if more than one basic block is seen. | 
|  | 869 | // | 
|  | 870 | // Vector bonuses: We want to more aggressively inline vector-dense kernels | 
|  | 871 | // and apply this bonus based on the percentage of vector instructions. A | 
|  | 872 | // bonus is applied if the vector instructions exceed 50% and half that amount | 
|  | 873 | // is applied if it exceeds 10%. Note that these bonuses are some what | 
|  | 874 | // arbitrary and evolved over time by accident as much as because they are | 
|  | 875 | // principled bonuses. | 
|  | 876 | // FIXME: It would be nice to base the bonus values on something more | 
|  | 877 | // scientific. | 
|  | 878 | // | 
|  | 879 | // LstCallToStaticBonus: This large bonus is applied to ensure the inlining | 
|  | 880 | // of the last call to a static function as inlining such functions is | 
|  | 881 | // guaranteed to reduce code size. | 
|  | 882 | // | 
|  | 883 | // These bonus percentages may be set to 0 based on properties of the caller | 
|  | 884 | // and the callsite. | 
|  | 885 | int SingleBBBonusPercent = 50; | 
|  | 886 | int VectorBonusPercent = 150; | 
|  | 887 | int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; | 
|  | 888 |  | 
|  | 889 | // Lambda to set all the above bonus and bonus percentages to 0. | 
|  | 890 | auto DisallowAllBonuses = [&]() { | 
|  | 891 | SingleBBBonusPercent = 0; | 
|  | 892 | VectorBonusPercent = 0; | 
|  | 893 | LastCallToStaticBonus = 0; | 
|  | 894 | }; | 
|  | 895 |  | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 896 | // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available | 
|  | 897 | // and reduce the threshold if the caller has the necessary attribute. | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 898 | if (Caller->optForMinSize()) { | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 899 | Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 900 | // For minsize, we want to disable the single BB bonus and the vector | 
|  | 901 | // bonuses, but not the last-call-to-static bonus. Inlining the last call to | 
|  | 902 | // a static function will, at the minimum, eliminate the parameter setup and | 
|  | 903 | // call/return instructions. | 
|  | 904 | SingleBBBonusPercent = 0; | 
|  | 905 | VectorBonusPercent = 0; | 
|  | 906 | } else if (Caller->optForSize()) | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 907 | Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 908 |  | 
| Easwaran Raman | e08b139 | 2017-01-09 21:56:26 +0000 | [diff] [blame] | 909 | // Adjust the threshold based on inlinehint attribute and profile based | 
|  | 910 | // hotness information if the caller does not have MinSize attribute. | 
|  | 911 | if (!Caller->optForMinSize()) { | 
|  | 912 | if (Callee.hasFnAttribute(Attribute::InlineHint)) | 
|  | 913 | Threshold = MaxIfValid(Threshold, Params.HintThreshold); | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 914 |  | 
|  | 915 | // FIXME: After switching to the new passmanager, simplify the logic below | 
|  | 916 | // by checking only the callsite hotness/coldness as we will reliably | 
|  | 917 | // have local profile information. | 
|  | 918 | // | 
|  | 919 | // Callsite hotness and coldness can be determined if sample profile is | 
|  | 920 | // used (which adds hotness metadata to calls) or if caller's | 
|  | 921 | // BlockFrequencyInfo is available. | 
|  | 922 | BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; | 
|  | 923 | auto HotCallSiteThreshold = getHotCallSiteThreshold(CS, CallerBFI); | 
|  | 924 | if (!Caller->optForSize() && HotCallSiteThreshold) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 925 | LLVM_DEBUG(dbgs() << "Hot callsite.\n"); | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 926 | // FIXME: This should update the threshold only if it exceeds the | 
|  | 927 | // current threshold, but AutoFDO + ThinLTO currently relies on this | 
|  | 928 | // behavior to prevent inlining of hot callsites during ThinLTO | 
|  | 929 | // compile phase. | 
|  | 930 | Threshold = HotCallSiteThreshold.getValue(); | 
|  | 931 | } else if (isColdCallSite(CS, CallerBFI)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 932 | LLVM_DEBUG(dbgs() << "Cold callsite.\n"); | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 933 | // Do not apply bonuses for a cold callsite including the | 
|  | 934 | // LastCallToStatic bonus. While this bonus might result in code size | 
|  | 935 | // reduction, it can cause the size of a non-cold caller to increase | 
|  | 936 | // preventing it from being inlined. | 
|  | 937 | DisallowAllBonuses(); | 
|  | 938 | Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); | 
|  | 939 | } else if (PSI) { | 
|  | 940 | // Use callee's global profile information only if we have no way of | 
|  | 941 | // determining this via callsite information. | 
|  | 942 | if (PSI->isFunctionEntryHot(&Callee)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 943 | LLVM_DEBUG(dbgs() << "Hot callee.\n"); | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 944 | // If callsite hotness can not be determined, we may still know | 
|  | 945 | // that the callee is hot and treat it as a weaker hint for threshold | 
|  | 946 | // increase. | 
|  | 947 | Threshold = MaxIfValid(Threshold, Params.HintThreshold); | 
|  | 948 | } else if (PSI->isFunctionEntryCold(&Callee)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 949 | LLVM_DEBUG(dbgs() << "Cold callee.\n"); | 
| Chandler Carruth | bba762a | 2017-08-14 21:25:00 +0000 | [diff] [blame] | 950 | // Do not apply bonuses for a cold callee including the | 
|  | 951 | // LastCallToStatic bonus. While this bonus might result in code size | 
|  | 952 | // reduction, it can cause the size of a non-cold caller to increase | 
|  | 953 | // preventing it from being inlined. | 
|  | 954 | DisallowAllBonuses(); | 
|  | 955 | Threshold = MinIfValid(Threshold, Params.ColdThreshold); | 
| Easwaran Raman | e08b139 | 2017-01-09 21:56:26 +0000 | [diff] [blame] | 956 | } | 
|  | 957 | } | 
| Dehao Chen | e1c7c57 | 2016-08-05 20:49:04 +0000 | [diff] [blame] | 958 | } | 
| Dehao Chen | 9232f98 | 2016-07-11 16:48:54 +0000 | [diff] [blame] | 959 |  | 
| Justin Lebar | 8650a4d | 2016-04-15 01:38:48 +0000 | [diff] [blame] | 960 | // Finally, take the target-specific inlining threshold multiplier into | 
|  | 961 | // account. | 
|  | 962 | Threshold *= TTI.getInliningThresholdMultiplier(); | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 963 |  | 
|  | 964 | SingleBBBonus = Threshold * SingleBBBonusPercent / 100; | 
|  | 965 | VectorBonus = Threshold * VectorBonusPercent / 100; | 
|  | 966 |  | 
|  | 967 | bool OnlyOneCallAndLocalLinkage = | 
|  | 968 | F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); | 
|  | 969 | // If there is only one call of the function, and it has internal linkage, | 
|  | 970 | // the cost of inlining it drops dramatically. It may seem odd to update | 
|  | 971 | // Cost in updateThreshold, but the bonus depends on the logic in this method. | 
|  | 972 | if (OnlyOneCallAndLocalLinkage) | 
|  | 973 | Cost -= LastCallToStaticBonus; | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 974 | } | 
|  | 975 |  | 
| Matt Arsenault | 727aa34 | 2013-07-20 04:09:00 +0000 | [diff] [blame] | 976 | bool CallAnalyzer::visitCmpInst(CmpInst &I) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 977 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | 
|  | 978 | // First try to handle simplified comparisons. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 979 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 980 | return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); | 
|  | 981 | })) | 
|  | 982 | return true; | 
| Matt Arsenault | 727aa34 | 2013-07-20 04:09:00 +0000 | [diff] [blame] | 983 |  | 
|  | 984 | if (I.getOpcode() == Instruction::FCmp) | 
|  | 985 | return false; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 986 |  | 
|  | 987 | // Otherwise look for a comparison between constant offset pointers with | 
|  | 988 | // a common base. | 
|  | 989 | Value *LHSBase, *RHSBase; | 
|  | 990 | APInt LHSOffset, RHSOffset; | 
| Benjamin Kramer | d6f1f84 | 2014-03-02 13:30:33 +0000 | [diff] [blame] | 991 | std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 992 | if (LHSBase) { | 
| Benjamin Kramer | d6f1f84 | 2014-03-02 13:30:33 +0000 | [diff] [blame] | 993 | std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 994 | if (RHSBase && LHSBase == RHSBase) { | 
|  | 995 | // We have common bases, fold the icmp to a constant based on the | 
|  | 996 | // offsets. | 
|  | 997 | Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); | 
|  | 998 | Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); | 
|  | 999 | if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { | 
|  | 1000 | SimplifiedValues[&I] = C; | 
|  | 1001 | ++NumConstantPtrCmps; | 
|  | 1002 | return true; | 
|  | 1003 | } | 
|  | 1004 | } | 
|  | 1005 | } | 
|  | 1006 |  | 
|  | 1007 | // If the comparison is an equality comparison with null, we can simplify it | 
| Philip Reames | 9b5c958 | 2015-06-26 20:51:17 +0000 | [diff] [blame] | 1008 | // if we know the value (argument) can't be null | 
|  | 1009 | if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) && | 
|  | 1010 | isKnownNonNullInCallee(I.getOperand(0))) { | 
|  | 1011 | bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; | 
|  | 1012 | SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) | 
|  | 1013 | : ConstantInt::getFalse(I.getType()); | 
|  | 1014 | return true; | 
|  | 1015 | } | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1016 | // Finally check for SROA candidates in comparisons. | 
|  | 1017 | Value *SROAArg; | 
|  | 1018 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 1019 | if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { | 
|  | 1020 | if (isa<ConstantPointerNull>(I.getOperand(1))) { | 
|  | 1021 | accumulateSROACost(CostIt, InlineConstants::InstrCost); | 
|  | 1022 | return true; | 
|  | 1023 | } | 
|  | 1024 |  | 
|  | 1025 | disableSROA(CostIt); | 
|  | 1026 | } | 
|  | 1027 |  | 
|  | 1028 | return false; | 
|  | 1029 | } | 
|  | 1030 |  | 
|  | 1031 | bool CallAnalyzer::visitSub(BinaryOperator &I) { | 
|  | 1032 | // Try to handle a special case: we can fold computing the difference of two | 
|  | 1033 | // constant-related pointers. | 
|  | 1034 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | 
|  | 1035 | Value *LHSBase, *RHSBase; | 
|  | 1036 | APInt LHSOffset, RHSOffset; | 
| Benjamin Kramer | d6f1f84 | 2014-03-02 13:30:33 +0000 | [diff] [blame] | 1037 | std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1038 | if (LHSBase) { | 
| Benjamin Kramer | d6f1f84 | 2014-03-02 13:30:33 +0000 | [diff] [blame] | 1039 | std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1040 | if (RHSBase && LHSBase == RHSBase) { | 
|  | 1041 | // We have common bases, fold the subtract to a constant based on the | 
|  | 1042 | // offsets. | 
|  | 1043 | Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); | 
|  | 1044 | Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); | 
|  | 1045 | if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { | 
|  | 1046 | SimplifiedValues[&I] = C; | 
|  | 1047 | ++NumConstantPtrDiffs; | 
|  | 1048 | return true; | 
|  | 1049 | } | 
|  | 1050 | } | 
|  | 1051 | } | 
|  | 1052 |  | 
|  | 1053 | // Otherwise, fall back to the generic logic for simplifying and handling | 
|  | 1054 | // instructions. | 
|  | 1055 | return Base::visitSub(I); | 
|  | 1056 | } | 
|  | 1057 |  | 
|  | 1058 | bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { | 
|  | 1059 | Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); | 
| Haicheng Wu | 6d14dfe | 2017-12-22 17:09:09 +0000 | [diff] [blame] | 1060 | Constant *CLHS = dyn_cast<Constant>(LHS); | 
|  | 1061 | if (!CLHS) | 
|  | 1062 | CLHS = SimplifiedValues.lookup(LHS); | 
|  | 1063 | Constant *CRHS = dyn_cast<Constant>(RHS); | 
|  | 1064 | if (!CRHS) | 
|  | 1065 | CRHS = SimplifiedValues.lookup(RHS); | 
| Michael Zolotukhin | 4e8598e | 2015-02-06 20:02:51 +0000 | [diff] [blame] | 1066 |  | 
| Haicheng Wu | 6d14dfe | 2017-12-22 17:09:09 +0000 | [diff] [blame] | 1067 | Value *SimpleV = nullptr; | 
|  | 1068 | if (auto FI = dyn_cast<FPMathOperator>(&I)) | 
|  | 1069 | SimpleV = SimplifyFPBinOp(I.getOpcode(), CLHS ? CLHS : LHS, | 
|  | 1070 | CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); | 
|  | 1071 | else | 
|  | 1072 | SimpleV = | 
|  | 1073 | SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); | 
|  | 1074 |  | 
|  | 1075 | if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) | 
|  | 1076 | SimplifiedValues[&I] = C; | 
|  | 1077 |  | 
|  | 1078 | if (SimpleV) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1079 | return true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1080 |  | 
|  | 1081 | // Disable any SROA on arguments to arbitrary, unsimplified binary operators. | 
|  | 1082 | disableSROA(LHS); | 
|  | 1083 | disableSROA(RHS); | 
|  | 1084 |  | 
| Eli Friedman | 39ed9a6 | 2017-12-22 02:08:08 +0000 | [diff] [blame] | 1085 | // If the instruction is floating point, and the target says this operation | 
|  | 1086 | // is expensive, this may eventually become a library call. Treat the cost | 
|  | 1087 | // as such. | 
|  | 1088 | if (I.getType()->isFloatingPointTy() && | 
|  | 1089 | TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) | 
|  | 1090 | Cost += InlineConstants::CallPenalty; | 
|  | 1091 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1092 | return false; | 
|  | 1093 | } | 
|  | 1094 |  | 
|  | 1095 | bool CallAnalyzer::visitLoad(LoadInst &I) { | 
|  | 1096 | Value *SROAArg; | 
|  | 1097 | DenseMap<Value *, int>::iterator CostIt; | 
| Wei Mi | 6c428d6 | 2015-03-20 18:33:12 +0000 | [diff] [blame] | 1098 | if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1099 | if (I.isSimple()) { | 
|  | 1100 | accumulateSROACost(CostIt, InlineConstants::InstrCost); | 
|  | 1101 | return true; | 
|  | 1102 | } | 
|  | 1103 |  | 
|  | 1104 | disableSROA(CostIt); | 
|  | 1105 | } | 
|  | 1106 |  | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1107 | // If the data is already loaded from this address and hasn't been clobbered | 
|  | 1108 | // by any stores or calls, this load is likely to be redundant and can be | 
|  | 1109 | // eliminated. | 
|  | 1110 | if (EnableLoadElimination && | 
| Haicheng Wu | b3689ca | 2017-12-19 13:42:58 +0000 | [diff] [blame] | 1111 | !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1112 | LoadEliminationCost += InlineConstants::InstrCost; | 
|  | 1113 | return true; | 
|  | 1114 | } | 
|  | 1115 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1116 | return false; | 
|  | 1117 | } | 
|  | 1118 |  | 
|  | 1119 | bool CallAnalyzer::visitStore(StoreInst &I) { | 
|  | 1120 | Value *SROAArg; | 
|  | 1121 | DenseMap<Value *, int>::iterator CostIt; | 
| Wei Mi | 6c428d6 | 2015-03-20 18:33:12 +0000 | [diff] [blame] | 1122 | if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1123 | if (I.isSimple()) { | 
|  | 1124 | accumulateSROACost(CostIt, InlineConstants::InstrCost); | 
|  | 1125 | return true; | 
|  | 1126 | } | 
|  | 1127 |  | 
|  | 1128 | disableSROA(CostIt); | 
|  | 1129 | } | 
|  | 1130 |  | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1131 | // The store can potentially clobber loads and prevent repeated loads from | 
|  | 1132 | // being eliminated. | 
|  | 1133 | // FIXME: | 
|  | 1134 | // 1. We can probably keep an initial set of eliminatable loads substracted | 
|  | 1135 | // from the cost even when we finally see a store. We just need to disable | 
|  | 1136 | // *further* accumulation of elimination savings. | 
|  | 1137 | // 2. We should probably at some point thread MemorySSA for the callee into | 
|  | 1138 | // this and then use that to actually compute *really* precise savings. | 
|  | 1139 | disableLoadElimination(); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1140 | return false; | 
|  | 1141 | } | 
|  | 1142 |  | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1143 | bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { | 
|  | 1144 | // Constant folding for extract value is trivial. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 1145 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 1146 | return ConstantExpr::getExtractValue(COps[0], I.getIndices()); | 
|  | 1147 | })) | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1148 | return true; | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1149 |  | 
|  | 1150 | // SROA can look through these but give them a cost. | 
|  | 1151 | return false; | 
|  | 1152 | } | 
|  | 1153 |  | 
|  | 1154 | bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { | 
|  | 1155 | // Constant folding for insert value is trivial. | 
| Easwaran Raman | 617f636 | 2017-02-18 17:22:52 +0000 | [diff] [blame] | 1156 | if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) { | 
|  | 1157 | return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], | 
|  | 1158 | /*InsertedValueOperand*/ COps[1], | 
|  | 1159 | I.getIndices()); | 
|  | 1160 | })) | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1161 | return true; | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1162 |  | 
|  | 1163 | // SROA can look through these but give them a cost. | 
|  | 1164 | return false; | 
|  | 1165 | } | 
|  | 1166 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1167 | /// Try to simplify a call site. | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1168 | /// | 
|  | 1169 | /// Takes a concrete function and callsite and tries to actually simplify it by | 
|  | 1170 | /// analyzing the arguments and call itself with instsimplify. Returns true if | 
|  | 1171 | /// it has simplified the callsite to some other entity (a constant), making it | 
|  | 1172 | /// free. | 
|  | 1173 | bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { | 
|  | 1174 | // FIXME: Using the instsimplify logic directly for this is inefficient | 
|  | 1175 | // because we have to continually rebuild the argument list even when no | 
|  | 1176 | // simplifications can be performed. Until that is fixed with remapping | 
|  | 1177 | // inside of instsimplify, directly constant fold calls here. | 
| Andrew Kaylor | 647025f | 2017-06-09 23:18:11 +0000 | [diff] [blame] | 1178 | if (!canConstantFoldCallTo(CS, F)) | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1179 | return false; | 
|  | 1180 |  | 
|  | 1181 | // Try to re-map the arguments to constants. | 
|  | 1182 | SmallVector<Constant *, 4> ConstantArgs; | 
|  | 1183 | ConstantArgs.reserve(CS.arg_size()); | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 1184 | for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; | 
|  | 1185 | ++I) { | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1186 | Constant *C = dyn_cast<Constant>(*I); | 
|  | 1187 | if (!C) | 
|  | 1188 | C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); | 
|  | 1189 | if (!C) | 
|  | 1190 | return false; // This argument doesn't map to a constant. | 
|  | 1191 |  | 
|  | 1192 | ConstantArgs.push_back(C); | 
|  | 1193 | } | 
| Andrew Kaylor | 647025f | 2017-06-09 23:18:11 +0000 | [diff] [blame] | 1194 | if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) { | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1195 | SimplifiedValues[CS.getInstruction()] = C; | 
|  | 1196 | return true; | 
|  | 1197 | } | 
|  | 1198 |  | 
|  | 1199 | return false; | 
|  | 1200 | } | 
|  | 1201 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1202 | bool CallAnalyzer::visitCallSite(CallSite CS) { | 
| Chandler Carruth | 37d25de | 2013-12-13 08:00:01 +0000 | [diff] [blame] | 1203 | if (CS.hasFnAttr(Attribute::ReturnsTwice) && | 
| Duncan P. N. Exon Smith | b3fc83c | 2015-02-14 00:12:15 +0000 | [diff] [blame] | 1204 | !F.hasFnAttribute(Attribute::ReturnsTwice)) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1205 | // This aborts the entire analysis. | 
|  | 1206 | ExposesReturnsTwice = true; | 
|  | 1207 | return false; | 
|  | 1208 | } | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 1209 | if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate()) | 
| James Molloy | 4f6fb95 | 2012-12-20 16:04:27 +0000 | [diff] [blame] | 1210 | ContainsNoDuplicateCall = true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1211 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1212 | if (Function *F = CS.getCalledFunction()) { | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1213 | // When we have a concrete function, first try to simplify it directly. | 
|  | 1214 | if (simplifyCallSite(F, CS)) | 
|  | 1215 | return true; | 
|  | 1216 |  | 
|  | 1217 | // Next check if it is an intrinsic we know about. | 
|  | 1218 | // FIXME: Lift this into part of the InstVisitor. | 
|  | 1219 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { | 
|  | 1220 | switch (II->getIntrinsicID()) { | 
|  | 1221 | default: | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1222 | if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) | 
|  | 1223 | disableLoadElimination(); | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1224 | return Base::visitCallSite(CS); | 
|  | 1225 |  | 
| Peter Collingbourne | 7dd8dbf | 2016-04-22 21:18:02 +0000 | [diff] [blame] | 1226 | case Intrinsic::load_relative: | 
|  | 1227 | // This is normally lowered to 4 LLVM instructions. | 
|  | 1228 | Cost += 3 * InlineConstants::InstrCost; | 
|  | 1229 | return false; | 
|  | 1230 |  | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1231 | case Intrinsic::memset: | 
|  | 1232 | case Intrinsic::memcpy: | 
|  | 1233 | case Intrinsic::memmove: | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1234 | disableLoadElimination(); | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1235 | // SROA can usually chew through these intrinsics, but they aren't free. | 
|  | 1236 | return false; | 
| Vitaly Buka | 4296ea7 | 2018-04-04 21:46:27 +0000 | [diff] [blame] | 1237 | case Intrinsic::icall_branch_funnel: | 
| Reid Kleckner | 6038179 | 2015-07-07 22:25:32 +0000 | [diff] [blame] | 1238 | case Intrinsic::localescape: | 
| Vitaly Buka | 4296ea7 | 2018-04-04 21:46:27 +0000 | [diff] [blame] | 1239 | HasUninlineableIntrinsic = true; | 
| Reid Kleckner | 223de26 | 2015-04-14 20:38:14 +0000 | [diff] [blame] | 1240 | return false; | 
| Florian Hahn | 80788d8 | 2018-01-06 19:45:40 +0000 | [diff] [blame] | 1241 | case Intrinsic::vastart: | 
| Sameer AbuAsal | 77beee4 | 2018-09-20 18:39:34 +0000 | [diff] [blame] | 1242 | InitsVargArgs = true; | 
| Florian Hahn | 80788d8 | 2018-01-06 19:45:40 +0000 | [diff] [blame] | 1243 | return false; | 
| Chandler Carruth | 753e21d | 2012-12-28 14:23:32 +0000 | [diff] [blame] | 1244 | } | 
|  | 1245 | } | 
|  | 1246 |  | 
| Davide Italiano | 9d939c8 | 2017-11-30 22:10:35 +0000 | [diff] [blame] | 1247 | if (F == CS.getInstruction()->getFunction()) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1248 | // This flag will fully abort the analysis, so don't bother with anything | 
|  | 1249 | // else. | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 1250 | IsRecursiveCall = true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1251 | return false; | 
|  | 1252 | } | 
|  | 1253 |  | 
| Chandler Carruth | 0ba8db4 | 2013-01-22 11:26:02 +0000 | [diff] [blame] | 1254 | if (TTI.isLoweredToCall(F)) { | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1255 | // We account for the average 1 instruction per call argument setup | 
|  | 1256 | // here. | 
|  | 1257 | Cost += CS.arg_size() * InlineConstants::InstrCost; | 
|  | 1258 |  | 
|  | 1259 | // Everything other than inline ASM will also have a significant cost | 
|  | 1260 | // merely from making the call. | 
|  | 1261 | if (!isa<InlineAsm>(CS.getCalledValue())) | 
|  | 1262 | Cost += InlineConstants::CallPenalty; | 
|  | 1263 | } | 
|  | 1264 |  | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1265 | if (!CS.onlyReadsMemory()) | 
|  | 1266 | disableLoadElimination(); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1267 | return Base::visitCallSite(CS); | 
|  | 1268 | } | 
|  | 1269 |  | 
|  | 1270 | // Otherwise we're in a very special case -- an indirect function call. See | 
|  | 1271 | // if we can be particularly clever about this. | 
|  | 1272 | Value *Callee = CS.getCalledValue(); | 
|  | 1273 |  | 
|  | 1274 | // First, pay the price of the argument setup. We account for the average | 
|  | 1275 | // 1 instruction per call argument setup here. | 
|  | 1276 | Cost += CS.arg_size() * InlineConstants::InstrCost; | 
|  | 1277 |  | 
|  | 1278 | // Next, check if this happens to be an indirect function call to a known | 
|  | 1279 | // function in this inline context. If not, we've done all we can. | 
|  | 1280 | Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1281 | if (!F) { | 
|  | 1282 | if (!CS.onlyReadsMemory()) | 
|  | 1283 | disableLoadElimination(); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1284 | return Base::visitCallSite(CS); | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1285 | } | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1286 |  | 
|  | 1287 | // If we have a constant that we are calling as a function, we can peer | 
|  | 1288 | // through it and see the function target. This happens not infrequently | 
|  | 1289 | // during devirtualization and so we want to give it a hefty bonus for | 
|  | 1290 | // inlining, but cap that bonus in the event that inlining wouldn't pan | 
|  | 1291 | // out. Pretend to inline the function, with a custom threshold. | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 1292 | auto IndirectCallParams = Params; | 
|  | 1293 | IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1294 | CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS, | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 1295 | IndirectCallParams); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1296 | if (CA.analyzeCall(CS)) { | 
|  | 1297 | // We were able to inline the indirect call! Subtract the cost from the | 
| Easwaran Raman | 6d90d9f | 2015-12-07 21:21:20 +0000 | [diff] [blame] | 1298 | // threshold to get the bonus we want to apply, but don't go below zero. | 
|  | 1299 | Cost -= std::max(0, CA.getThreshold() - CA.getCost()); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1300 | } | 
|  | 1301 |  | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1302 | if (!F->onlyReadsMemory()) | 
|  | 1303 | disableLoadElimination(); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1304 | return Base::visitCallSite(CS); | 
|  | 1305 | } | 
|  | 1306 |  | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1307 | bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { | 
|  | 1308 | // At least one return instruction will be free after inlining. | 
|  | 1309 | bool Free = !HasReturn; | 
|  | 1310 | HasReturn = true; | 
|  | 1311 | return Free; | 
|  | 1312 | } | 
|  | 1313 |  | 
|  | 1314 | bool CallAnalyzer::visitBranchInst(BranchInst &BI) { | 
|  | 1315 | // We model unconditional branches as essentially free -- they really | 
|  | 1316 | // shouldn't exist at all, but handling them makes the behavior of the | 
|  | 1317 | // inliner more regular and predictable. Interestingly, conditional branches | 
|  | 1318 | // which will fold away are also free. | 
|  | 1319 | return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || | 
|  | 1320 | dyn_cast_or_null<ConstantInt>( | 
|  | 1321 | SimplifiedValues.lookup(BI.getCondition())); | 
|  | 1322 | } | 
|  | 1323 |  | 
| Haicheng Wu | 3ec848b | 2017-09-27 14:44:56 +0000 | [diff] [blame] | 1324 | bool CallAnalyzer::visitSelectInst(SelectInst &SI) { | 
|  | 1325 | bool CheckSROA = SI.getType()->isPointerTy(); | 
|  | 1326 | Value *TrueVal = SI.getTrueValue(); | 
|  | 1327 | Value *FalseVal = SI.getFalseValue(); | 
|  | 1328 |  | 
|  | 1329 | Constant *TrueC = dyn_cast<Constant>(TrueVal); | 
|  | 1330 | if (!TrueC) | 
|  | 1331 | TrueC = SimplifiedValues.lookup(TrueVal); | 
|  | 1332 | Constant *FalseC = dyn_cast<Constant>(FalseVal); | 
|  | 1333 | if (!FalseC) | 
|  | 1334 | FalseC = SimplifiedValues.lookup(FalseVal); | 
|  | 1335 | Constant *CondC = | 
|  | 1336 | dyn_cast_or_null<Constant>(SimplifiedValues.lookup(SI.getCondition())); | 
|  | 1337 |  | 
|  | 1338 | if (!CondC) { | 
|  | 1339 | // Select C, X, X => X | 
|  | 1340 | if (TrueC == FalseC && TrueC) { | 
|  | 1341 | SimplifiedValues[&SI] = TrueC; | 
|  | 1342 | return true; | 
|  | 1343 | } | 
|  | 1344 |  | 
|  | 1345 | if (!CheckSROA) | 
|  | 1346 | return Base::visitSelectInst(SI); | 
|  | 1347 |  | 
|  | 1348 | std::pair<Value *, APInt> TrueBaseAndOffset = | 
|  | 1349 | ConstantOffsetPtrs.lookup(TrueVal); | 
|  | 1350 | std::pair<Value *, APInt> FalseBaseAndOffset = | 
|  | 1351 | ConstantOffsetPtrs.lookup(FalseVal); | 
|  | 1352 | if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { | 
|  | 1353 | ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; | 
|  | 1354 |  | 
|  | 1355 | Value *SROAArg; | 
|  | 1356 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 1357 | if (lookupSROAArgAndCost(TrueVal, SROAArg, CostIt)) | 
|  | 1358 | SROAArgValues[&SI] = SROAArg; | 
|  | 1359 | return true; | 
|  | 1360 | } | 
|  | 1361 |  | 
|  | 1362 | return Base::visitSelectInst(SI); | 
|  | 1363 | } | 
|  | 1364 |  | 
|  | 1365 | // Select condition is a constant. | 
|  | 1366 | Value *SelectedV = CondC->isAllOnesValue() | 
|  | 1367 | ? TrueVal | 
|  | 1368 | : (CondC->isNullValue()) ? FalseVal : nullptr; | 
|  | 1369 | if (!SelectedV) { | 
|  | 1370 | // Condition is a vector constant that is not all 1s or all 0s.  If all | 
|  | 1371 | // operands are constants, ConstantExpr::getSelect() can handle the cases | 
|  | 1372 | // such as select vectors. | 
|  | 1373 | if (TrueC && FalseC) { | 
|  | 1374 | if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) { | 
|  | 1375 | SimplifiedValues[&SI] = C; | 
|  | 1376 | return true; | 
|  | 1377 | } | 
|  | 1378 | } | 
|  | 1379 | return Base::visitSelectInst(SI); | 
|  | 1380 | } | 
|  | 1381 |  | 
|  | 1382 | // Condition is either all 1s or all 0s. SI can be simplified. | 
|  | 1383 | if (Constant *SelectedC = dyn_cast<Constant>(SelectedV)) { | 
|  | 1384 | SimplifiedValues[&SI] = SelectedC; | 
|  | 1385 | return true; | 
|  | 1386 | } | 
|  | 1387 |  | 
|  | 1388 | if (!CheckSROA) | 
|  | 1389 | return true; | 
|  | 1390 |  | 
|  | 1391 | std::pair<Value *, APInt> BaseAndOffset = | 
|  | 1392 | ConstantOffsetPtrs.lookup(SelectedV); | 
|  | 1393 | if (BaseAndOffset.first) { | 
|  | 1394 | ConstantOffsetPtrs[&SI] = BaseAndOffset; | 
|  | 1395 |  | 
|  | 1396 | Value *SROAArg; | 
|  | 1397 | DenseMap<Value *, int>::iterator CostIt; | 
|  | 1398 | if (lookupSROAArgAndCost(SelectedV, SROAArg, CostIt)) | 
|  | 1399 | SROAArgValues[&SI] = SROAArg; | 
|  | 1400 | } | 
|  | 1401 |  | 
|  | 1402 | return true; | 
|  | 1403 | } | 
|  | 1404 |  | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1405 | bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { | 
|  | 1406 | // We model unconditional switches as free, see the comments on handling | 
|  | 1407 | // branches. | 
| Chandler Carruth | e01fd5f | 2014-04-28 08:52:44 +0000 | [diff] [blame] | 1408 | if (isa<ConstantInt>(SI.getCondition())) | 
|  | 1409 | return true; | 
|  | 1410 | if (Value *V = SimplifiedValues.lookup(SI.getCondition())) | 
|  | 1411 | if (isa<ConstantInt>(V)) | 
|  | 1412 | return true; | 
|  | 1413 |  | 
| Eric Christopher | 7ad02ee | 2017-06-28 21:10:31 +0000 | [diff] [blame] | 1414 | // Assume the most general case where the switch is lowered into | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1415 | // either a jump table, bit test, or a balanced binary tree consisting of | 
|  | 1416 | // case clusters without merging adjacent clusters with the same | 
|  | 1417 | // destination. We do not consider the switches that are lowered with a mix | 
|  | 1418 | // of jump table/bit test/binary search tree. The cost of the switch is | 
|  | 1419 | // proportional to the size of the tree or the size of jump table range. | 
|  | 1420 | // | 
| Chandler Carruth | e01fd5f | 2014-04-28 08:52:44 +0000 | [diff] [blame] | 1421 | // NB: We convert large switches which are just used to initialize large phi | 
|  | 1422 | // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent | 
|  | 1423 | // inlining those. It will prevent inlining in cases where the optimization | 
|  | 1424 | // does not (yet) fire. | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1425 |  | 
| Jun Bum Lim | 506cfb7 | 2017-06-23 16:12:37 +0000 | [diff] [blame] | 1426 | // Maximum valid cost increased in this function. | 
|  | 1427 | int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; | 
|  | 1428 |  | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1429 | // Exit early for a large switch, assuming one case needs at least one | 
|  | 1430 | // instruction. | 
|  | 1431 | // FIXME: This is not true for a bit test, but ignore such case for now to | 
|  | 1432 | // save compile-time. | 
|  | 1433 | int64_t CostLowerBound = | 
| Jun Bum Lim | 506cfb7 | 2017-06-23 16:12:37 +0000 | [diff] [blame] | 1434 | std::min((int64_t)CostUpperBound, | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1435 | (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); | 
|  | 1436 |  | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1437 | if (CostLowerBound > Threshold && !ComputeFullInlineCost) { | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1438 | Cost = CostLowerBound; | 
|  | 1439 | return false; | 
|  | 1440 | } | 
|  | 1441 |  | 
|  | 1442 | unsigned JumpTableSize = 0; | 
|  | 1443 | unsigned NumCaseCluster = | 
|  | 1444 | TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); | 
|  | 1445 |  | 
|  | 1446 | // If suitable for a jump table, consider the cost for the table size and | 
|  | 1447 | // branch to destination. | 
|  | 1448 | if (JumpTableSize) { | 
|  | 1449 | int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + | 
|  | 1450 | 4 * InlineConstants::InstrCost; | 
| Jun Bum Lim | 506cfb7 | 2017-06-23 16:12:37 +0000 | [diff] [blame] | 1451 |  | 
|  | 1452 | Cost = std::min((int64_t)CostUpperBound, JTCost + Cost); | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1453 | return false; | 
|  | 1454 | } | 
|  | 1455 |  | 
|  | 1456 | // Considering forming a binary search, we should find the number of nodes | 
|  | 1457 | // which is same as the number of comparisons when lowered. For a given | 
|  | 1458 | // number of clusters, n, we can define a recursive function, f(n), to find | 
|  | 1459 | // the number of nodes in the tree. The recursion is : | 
|  | 1460 | // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, | 
|  | 1461 | // and f(n) = n, when n <= 3. | 
|  | 1462 | // This will lead a binary tree where the leaf should be either f(2) or f(3) | 
|  | 1463 | // when n > 3.  So, the number of comparisons from leaves should be n, while | 
|  | 1464 | // the number of non-leaf should be : | 
|  | 1465 | //   2^(log2(n) - 1) - 1 | 
|  | 1466 | //   = 2^log2(n) * 2^-1 - 1 | 
|  | 1467 | //   = n / 2 - 1. | 
|  | 1468 | // Considering comparisons from leaf and non-leaf nodes, we can estimate the | 
|  | 1469 | // number of comparisons in a simple closed form : | 
|  | 1470 | //   n + n / 2 - 1 = n * 3 / 2 - 1 | 
|  | 1471 | if (NumCaseCluster <= 3) { | 
|  | 1472 | // Suppose a comparison includes one compare and one conditional branch. | 
|  | 1473 | Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; | 
|  | 1474 | return false; | 
|  | 1475 | } | 
| Jun Bum Lim | 506cfb7 | 2017-06-23 16:12:37 +0000 | [diff] [blame] | 1476 |  | 
|  | 1477 | int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; | 
|  | 1478 | int64_t SwitchCost = | 
| Jun Bum Lim | 2960d41 | 2017-06-02 20:42:54 +0000 | [diff] [blame] | 1479 | ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; | 
| Jun Bum Lim | 506cfb7 | 2017-06-23 16:12:37 +0000 | [diff] [blame] | 1480 |  | 
|  | 1481 | Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost); | 
| Chandler Carruth | e01fd5f | 2014-04-28 08:52:44 +0000 | [diff] [blame] | 1482 | return false; | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1483 | } | 
|  | 1484 |  | 
|  | 1485 | bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { | 
|  | 1486 | // We never want to inline functions that contain an indirectbr.  This is | 
|  | 1487 | // incorrect because all the blockaddress's (in static global initializers | 
|  | 1488 | // for example) would be referring to the original function, and this | 
|  | 1489 | // indirect jump would jump from the inlined copy of the function into the | 
|  | 1490 | // original function which is extremely undefined behavior. | 
|  | 1491 | // FIXME: This logic isn't really right; we can safely inline functions with | 
|  | 1492 | // indirectbr's as long as no other function or global references the | 
| Gerolf Hoflehner | 734f4c8 | 2014-07-01 00:19:34 +0000 | [diff] [blame] | 1493 | // blockaddress of a block within the current function. | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1494 | HasIndirectBr = true; | 
|  | 1495 | return false; | 
|  | 1496 | } | 
|  | 1497 |  | 
|  | 1498 | bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { | 
|  | 1499 | // FIXME: It's not clear that a single instruction is an accurate model for | 
|  | 1500 | // the inline cost of a resume instruction. | 
|  | 1501 | return false; | 
|  | 1502 | } | 
|  | 1503 |  | 
| David Majnemer | 654e130 | 2015-07-31 17:58:14 +0000 | [diff] [blame] | 1504 | bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { | 
|  | 1505 | // FIXME: It's not clear that a single instruction is an accurate model for | 
|  | 1506 | // the inline cost of a cleanupret instruction. | 
|  | 1507 | return false; | 
|  | 1508 | } | 
|  | 1509 |  | 
|  | 1510 | bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { | 
|  | 1511 | // FIXME: It's not clear that a single instruction is an accurate model for | 
| Joseph Tremoulet | 8220bcc | 2015-08-23 00:26:33 +0000 | [diff] [blame] | 1512 | // the inline cost of a catchret instruction. | 
| David Majnemer | 654e130 | 2015-07-31 17:58:14 +0000 | [diff] [blame] | 1513 | return false; | 
|  | 1514 | } | 
|  | 1515 |  | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1516 | bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { | 
|  | 1517 | // FIXME: It might be reasonably to discount the cost of instructions leading | 
|  | 1518 | // to unreachable as they have the lowest possible impact on both runtime and | 
|  | 1519 | // code size. | 
|  | 1520 | return true; // No actual code is needed for unreachable. | 
|  | 1521 | } | 
|  | 1522 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1523 | bool CallAnalyzer::visitInstruction(Instruction &I) { | 
| Chandler Carruth | da7513a | 2012-05-04 00:58:03 +0000 | [diff] [blame] | 1524 | // Some instructions are free. All of the free intrinsics can also be | 
|  | 1525 | // handled by SROA, etc. | 
| Chandler Carruth | b8cf510 | 2013-01-21 12:05:16 +0000 | [diff] [blame] | 1526 | if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) | 
| Chandler Carruth | da7513a | 2012-05-04 00:58:03 +0000 | [diff] [blame] | 1527 | return true; | 
|  | 1528 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1529 | // We found something we don't understand or can't handle. Mark any SROA-able | 
|  | 1530 | // values in the operand list as no longer viable. | 
|  | 1531 | for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) | 
|  | 1532 | disableSROA(*OI); | 
|  | 1533 |  | 
|  | 1534 | return false; | 
|  | 1535 | } | 
|  | 1536 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1537 | /// Analyze a basic block for its contribution to the inline cost. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1538 | /// | 
|  | 1539 | /// This method walks the analyzer over every instruction in the given basic | 
|  | 1540 | /// block and accounts for their cost during inlining at this callsite. It | 
|  | 1541 | /// aborts early if the threshold has been exceeded or an impossible to inline | 
|  | 1542 | /// construct has been detected. It returns false if inlining is no longer | 
|  | 1543 | /// viable, and true if inlining remains viable. | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1544 | InlineResult | 
|  | 1545 | CallAnalyzer::analyzeBlock(BasicBlock *BB, | 
|  | 1546 | SmallPtrSetImpl<const Value *> &EphValues) { | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1547 | for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { | 
| Chandler Carruth | 6b4cc8b | 2014-02-01 10:38:17 +0000 | [diff] [blame] | 1548 | // FIXME: Currently, the number of instructions in a function regardless of | 
|  | 1549 | // our ability to simplify them during inline to constants or dead code, | 
|  | 1550 | // are actually used by the vector bonus heuristic. As long as that's true, | 
|  | 1551 | // we have to special case debug intrinsics here to prevent differences in | 
|  | 1552 | // inlining due to debug symbols. Eventually, the number of unsimplified | 
|  | 1553 | // instructions shouldn't factor into the cost computation, but until then, | 
|  | 1554 | // hack around it here. | 
|  | 1555 | if (isa<DbgInfoIntrinsic>(I)) | 
|  | 1556 | continue; | 
|  | 1557 |  | 
| Hal Finkel | 57f03dd | 2014-09-07 13:49:57 +0000 | [diff] [blame] | 1558 | // Skip ephemeral values. | 
| Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 1559 | if (EphValues.count(&*I)) | 
| Hal Finkel | 57f03dd | 2014-09-07 13:49:57 +0000 | [diff] [blame] | 1560 | continue; | 
|  | 1561 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1562 | ++NumInstructions; | 
|  | 1563 | if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) | 
|  | 1564 | ++NumVectorInstructions; | 
|  | 1565 |  | 
|  | 1566 | // If the instruction simplified to a constant, there is no cost to this | 
|  | 1567 | // instruction. Visit the instructions using our InstVisitor to account for | 
|  | 1568 | // all of the per-instruction logic. The visit tree returns true if we | 
|  | 1569 | // consumed the instruction in any way, and false if the instruction's base | 
|  | 1570 | // cost should count against inlining. | 
| Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 1571 | if (Base::visit(&*I)) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1572 | ++NumInstructionsSimplified; | 
|  | 1573 | else | 
|  | 1574 | Cost += InlineConstants::InstrCost; | 
|  | 1575 |  | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1576 | using namespace ore; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1577 | // If the visit this instruction detected an uninlinable pattern, abort. | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1578 | InlineResult IR; | 
|  | 1579 | if (IsRecursiveCall) | 
|  | 1580 | IR = "recursive"; | 
|  | 1581 | else if (ExposesReturnsTwice) | 
|  | 1582 | IR = "exposes returns twice"; | 
|  | 1583 | else if (HasDynamicAlloca) | 
|  | 1584 | IR = "dynamic alloca"; | 
|  | 1585 | else if (HasIndirectBr) | 
|  | 1586 | IR = "indirect branch"; | 
|  | 1587 | else if (HasUninlineableIntrinsic) | 
|  | 1588 | IR = "uninlinable intrinsic"; | 
| Sameer AbuAsal | 77beee4 | 2018-09-20 18:39:34 +0000 | [diff] [blame] | 1589 | else if (InitsVargArgs) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1590 | IR = "varargs"; | 
|  | 1591 | if (!IR) { | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1592 | if (ORE) | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 1593 | ORE->emit([&]() { | 
|  | 1594 | return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", | 
|  | 1595 | CandidateCS.getInstruction()) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1596 | << NV("Callee", &F) << " has uninlinable pattern (" | 
|  | 1597 | << NV("InlineResult", IR.message) | 
|  | 1598 | << ") and cost is not fully computed"; | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 1599 | }); | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1600 | return IR; | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1601 | } | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 1602 |  | 
|  | 1603 | // If the caller is a recursive function then we don't want to inline | 
|  | 1604 | // functions which allocate a lot of stack space because it would increase | 
|  | 1605 | // the caller stack usage dramatically. | 
|  | 1606 | if (IsCallerRecursive && | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1607 | AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1608 | InlineResult IR = "recursive and allocates too much stack space"; | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1609 | if (ORE) | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 1610 | ORE->emit([&]() { | 
|  | 1611 | return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", | 
|  | 1612 | CandidateCS.getInstruction()) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1613 | << NV("Callee", &F) << " is " << NV("InlineResult", IR.message) | 
|  | 1614 | << ". Cost is not fully computed"; | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 1615 | }); | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1616 | return IR; | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1617 | } | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1618 |  | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1619 | // Check if we've past the maximum possible threshold so we don't spin in | 
|  | 1620 | // huge basic blocks that will never inline. | 
| Haicheng Wu | 6199536 | 2017-08-25 19:00:33 +0000 | [diff] [blame] | 1621 | if (Cost >= Threshold && !ComputeFullInlineCost) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1622 | return false; | 
|  | 1623 | } | 
|  | 1624 |  | 
|  | 1625 | return true; | 
|  | 1626 | } | 
|  | 1627 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1628 | /// Compute the base pointer and cumulative constant offsets for V. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1629 | /// | 
|  | 1630 | /// This strips all constant offsets off of V, leaving it the base pointer, and | 
|  | 1631 | /// accumulates the total constant offset applied in the returned constant. It | 
|  | 1632 | /// returns 0 if V is not a pointer, and returns the constant '0' if there are | 
|  | 1633 | /// no constant offsets applied. | 
|  | 1634 | ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { | 
| Mehdi Amini | a28d91d | 2015-03-10 02:37:25 +0000 | [diff] [blame] | 1635 | if (!V->getType()->isPointerTy()) | 
| Craig Topper | 353eda4 | 2014-04-24 06:44:33 +0000 | [diff] [blame] | 1636 | return nullptr; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1637 |  | 
| Bjorn Pettersson | 77f3299 | 2018-01-04 18:23:40 +0000 | [diff] [blame] | 1638 | unsigned AS = V->getType()->getPointerAddressSpace(); | 
| Elena Demikhovsky | 945b7e5 | 2018-02-14 06:58:08 +0000 | [diff] [blame] | 1639 | unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1640 | APInt Offset = APInt::getNullValue(IntPtrWidth); | 
|  | 1641 |  | 
|  | 1642 | // Even though we don't look through PHI nodes, we could be called on an | 
|  | 1643 | // instruction in an unreachable block, which may be on a cycle. | 
|  | 1644 | SmallPtrSet<Value *, 4> Visited; | 
|  | 1645 | Visited.insert(V); | 
|  | 1646 | do { | 
|  | 1647 | if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { | 
|  | 1648 | if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) | 
| Craig Topper | 353eda4 | 2014-04-24 06:44:33 +0000 | [diff] [blame] | 1649 | return nullptr; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1650 | V = GEP->getPointerOperand(); | 
|  | 1651 | } else if (Operator::getOpcode(V) == Instruction::BitCast) { | 
|  | 1652 | V = cast<Operator>(V)->getOperand(0); | 
|  | 1653 | } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { | 
| Sanjoy Das | 5ce3272 | 2016-04-08 00:48:30 +0000 | [diff] [blame] | 1654 | if (GA->isInterposable()) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1655 | break; | 
|  | 1656 | V = GA->getAliasee(); | 
|  | 1657 | } else { | 
|  | 1658 | break; | 
|  | 1659 | } | 
|  | 1660 | assert(V->getType()->isPointerTy() && "Unexpected operand type!"); | 
| David Blaikie | 70573dc | 2014-11-19 07:49:26 +0000 | [diff] [blame] | 1661 | } while (Visited.insert(V).second); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1662 |  | 
| Bjorn Pettersson | 77f3299 | 2018-01-04 18:23:40 +0000 | [diff] [blame] | 1663 | Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1664 | return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); | 
|  | 1665 | } | 
|  | 1666 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1667 | /// Find dead blocks due to deleted CFG edges during inlining. | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 1668 | /// | 
|  | 1669 | /// If we know the successor of the current block, \p CurrBB, has to be \p | 
|  | 1670 | /// NextBB, the other successors of \p CurrBB are dead if these successors have | 
|  | 1671 | /// no live incoming CFG edges.  If one block is found to be dead, we can | 
|  | 1672 | /// continue growing the dead block list by checking the successors of the dead | 
|  | 1673 | /// blocks to see if all their incoming edges are dead or not. | 
|  | 1674 | void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { | 
|  | 1675 | auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { | 
|  | 1676 | // A CFG edge is dead if the predecessor is dead or the predessor has a | 
|  | 1677 | // known successor which is not the one under exam. | 
|  | 1678 | return (DeadBlocks.count(Pred) || | 
|  | 1679 | (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); | 
|  | 1680 | }; | 
|  | 1681 |  | 
|  | 1682 | auto IsNewlyDead = [&](BasicBlock *BB) { | 
|  | 1683 | // If all the edges to a block are dead, the block is also dead. | 
|  | 1684 | return (!DeadBlocks.count(BB) && | 
|  | 1685 | llvm::all_of(predecessors(BB), | 
|  | 1686 | [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); | 
|  | 1687 | }; | 
|  | 1688 |  | 
|  | 1689 | for (BasicBlock *Succ : successors(CurrBB)) { | 
|  | 1690 | if (Succ == NextBB || !IsNewlyDead(Succ)) | 
|  | 1691 | continue; | 
|  | 1692 | SmallVector<BasicBlock *, 4> NewDead; | 
|  | 1693 | NewDead.push_back(Succ); | 
|  | 1694 | while (!NewDead.empty()) { | 
|  | 1695 | BasicBlock *Dead = NewDead.pop_back_val(); | 
|  | 1696 | if (DeadBlocks.insert(Dead)) | 
|  | 1697 | // Continue growing the dead block lists. | 
|  | 1698 | for (BasicBlock *S : successors(Dead)) | 
|  | 1699 | if (IsNewlyDead(S)) | 
|  | 1700 | NewDead.push_back(S); | 
|  | 1701 | } | 
|  | 1702 | } | 
|  | 1703 | } | 
|  | 1704 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1705 | /// Analyze a call site for potential inlining. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1706 | /// | 
|  | 1707 | /// Returns true if inlining this call is viable, and false if it is not | 
|  | 1708 | /// viable. It computes the cost and adjusts the threshold based on numerous | 
|  | 1709 | /// factors and heuristics. If this method returns false but the computed cost | 
|  | 1710 | /// is below the computed threshold, then inlining was forcibly disabled by | 
| Bob Wilson | 266802d | 2012-11-19 07:04:30 +0000 | [diff] [blame] | 1711 | /// some artifact of the routine. | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1712 | InlineResult CallAnalyzer::analyzeCall(CallSite CS) { | 
| Chandler Carruth | 7ae90d4 | 2012-04-11 10:15:10 +0000 | [diff] [blame] | 1713 | ++NumCallsAnalyzed; | 
|  | 1714 |  | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1715 | // Perform some tweaks to the cost and threshold based on the direct | 
|  | 1716 | // callsite information. | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1717 |  | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1718 | // We want to more aggressively inline vector-dense kernels, so up the | 
|  | 1719 | // threshold, and we'll lower it if the % of vector instructions gets too | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1720 | // low. Note that these bonuses are some what arbitrary and evolved over time | 
|  | 1721 | // by accident as much as because they are principled bonuses. | 
|  | 1722 | // | 
|  | 1723 | // FIXME: It would be nice to remove all such bonuses. At least it would be | 
|  | 1724 | // nice to base the bonus values on something more scientific. | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1725 | assert(NumInstructions == 0); | 
|  | 1726 | assert(NumVectorInstructions == 0); | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 1727 |  | 
|  | 1728 | // Update the threshold based on callsite properties | 
|  | 1729 | updateThreshold(CS, F); | 
|  | 1730 |  | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1731 | // Speculatively apply all possible bonuses to Threshold. If cost exceeds | 
|  | 1732 | // this Threshold any time, and cost cannot decrease, we can stop processing | 
|  | 1733 | // the rest of the function body. | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 1734 | Threshold += (SingleBBBonus + VectorBonus); | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1735 |  | 
| Xinliang David Li | 351d9b0 | 2017-05-02 05:38:41 +0000 | [diff] [blame] | 1736 | // Give out bonuses for the callsite, as the instructions setting them up | 
|  | 1737 | // will be gone after inlining. | 
|  | 1738 | Cost -= getCallsiteCost(CS, DL); | 
| Benjamin Kramer | c99d0e9 | 2012-08-07 11:13:19 +0000 | [diff] [blame] | 1739 |  | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1740 | // If this function uses the coldcc calling convention, prefer not to inline | 
|  | 1741 | // it. | 
|  | 1742 | if (F.getCallingConv() == CallingConv::Cold) | 
|  | 1743 | Cost += InlineConstants::ColdccPenalty; | 
|  | 1744 |  | 
|  | 1745 | // Check if we're done. This can happen due to bonuses and penalties. | 
| Haicheng Wu | 6199536 | 2017-08-25 19:00:33 +0000 | [diff] [blame] | 1746 | if (Cost >= Threshold && !ComputeFullInlineCost) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1747 | return "high cost"; | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1748 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1749 | if (F.empty()) | 
|  | 1750 | return true; | 
|  | 1751 |  | 
| Davide Italiano | 9d939c8 | 2017-11-30 22:10:35 +0000 | [diff] [blame] | 1752 | Function *Caller = CS.getInstruction()->getFunction(); | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 1753 | // Check if the caller function is recursive itself. | 
| Chandler Carruth | cdf4788 | 2014-03-09 03:16:01 +0000 | [diff] [blame] | 1754 | for (User *U : Caller->users()) { | 
|  | 1755 | CallSite Site(U); | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 1756 | if (!Site) | 
|  | 1757 | continue; | 
|  | 1758 | Instruction *I = Site.getInstruction(); | 
| Davide Italiano | 9d939c8 | 2017-11-30 22:10:35 +0000 | [diff] [blame] | 1759 | if (I->getFunction() == Caller) { | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 1760 | IsCallerRecursive = true; | 
|  | 1761 | break; | 
|  | 1762 | } | 
|  | 1763 | } | 
|  | 1764 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1765 | // Populate our simplified values by mapping from function arguments to call | 
|  | 1766 | // arguments with known important simplifications. | 
|  | 1767 | CallSite::arg_iterator CAI = CS.arg_begin(); | 
|  | 1768 | for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); | 
|  | 1769 | FAI != FAE; ++FAI, ++CAI) { | 
|  | 1770 | assert(CAI != CS.arg_end()); | 
|  | 1771 | if (Constant *C = dyn_cast<Constant>(CAI)) | 
| Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 1772 | SimplifiedValues[&*FAI] = C; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1773 |  | 
|  | 1774 | Value *PtrArg = *CAI; | 
|  | 1775 | if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { | 
| Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 1776 | ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue()); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1777 |  | 
|  | 1778 | // We can SROA any pointer arguments derived from alloca instructions. | 
|  | 1779 | if (isa<AllocaInst>(PtrArg)) { | 
| Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 1780 | SROAArgValues[&*FAI] = PtrArg; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1781 | SROAArgCosts[PtrArg] = 0; | 
|  | 1782 | } | 
|  | 1783 | } | 
|  | 1784 | } | 
|  | 1785 | NumConstantArgs = SimplifiedValues.size(); | 
|  | 1786 | NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); | 
|  | 1787 | NumAllocaArgs = SROAArgValues.size(); | 
|  | 1788 |  | 
| Hal Finkel | 57f03dd | 2014-09-07 13:49:57 +0000 | [diff] [blame] | 1789 | // FIXME: If a caller has multiple calls to a callee, we end up recomputing | 
|  | 1790 | // the ephemeral values multiple times (and they're completely determined by | 
|  | 1791 | // the callee, so this is purely duplicate work). | 
|  | 1792 | SmallPtrSet<const Value *, 32> EphValues; | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 1793 | CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); | 
| Hal Finkel | 57f03dd | 2014-09-07 13:49:57 +0000 | [diff] [blame] | 1794 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1795 | // The worklist of live basic blocks in the callee *after* inlining. We avoid | 
|  | 1796 | // adding basic blocks of the callee which can be proven to be dead for this | 
|  | 1797 | // particular call site in order to get more accurate cost estimates. This | 
|  | 1798 | // requires a somewhat heavyweight iteration pattern: we need to walk the | 
|  | 1799 | // basic blocks in a breadth-first order as we insert live successors. To | 
|  | 1800 | // accomplish this, prioritizing for small iterations because we exit after | 
|  | 1801 | // crossing our threshold, we use a small-size optimized SetVector. | 
|  | 1802 | typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 1803 | SmallPtrSet<BasicBlock *, 16>> | 
|  | 1804 | BBSetVector; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1805 | BBSetVector BBWorklist; | 
|  | 1806 | BBWorklist.insert(&F.getEntryBlock()); | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 1807 | bool SingleBB = true; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1808 | // Note that we *must not* cache the size, this loop grows the worklist. | 
|  | 1809 | for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { | 
|  | 1810 | // Bail out the moment we cross the threshold. This means we'll under-count | 
|  | 1811 | // the cost, but only when undercounting doesn't matter. | 
| Haicheng Wu | 6199536 | 2017-08-25 19:00:33 +0000 | [diff] [blame] | 1812 | if (Cost >= Threshold && !ComputeFullInlineCost) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1813 | break; | 
|  | 1814 |  | 
|  | 1815 | BasicBlock *BB = BBWorklist[Idx]; | 
|  | 1816 | if (BB->empty()) | 
| Chandler Carruth | 4d1d34f | 2012-03-14 23:19:53 +0000 | [diff] [blame] | 1817 | continue; | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 1818 |  | 
| Gerolf Hoflehner | 734f4c8 | 2014-07-01 00:19:34 +0000 | [diff] [blame] | 1819 | // Disallow inlining a blockaddress. A blockaddress only has defined | 
|  | 1820 | // behavior for an indirect branch in the same function, and we do not | 
|  | 1821 | // currently support inlining indirect branches. But, the inliner may not | 
|  | 1822 | // see an indirect branch that ends up being dead code at a particular call | 
|  | 1823 | // site. If the blockaddress escapes the function, e.g., via a global | 
|  | 1824 | // variable, inlining may lead to an invalid cross-function reference. | 
|  | 1825 | if (BB->hasAddressTaken()) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1826 | return "blockaddress"; | 
| Gerolf Hoflehner | 734f4c8 | 2014-07-01 00:19:34 +0000 | [diff] [blame] | 1827 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1828 | // Analyze the cost of this block. If we blow through the threshold, this | 
|  | 1829 | // returns false, and we can bail on out. | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1830 | InlineResult IR = analyzeBlock(BB, EphValues); | 
|  | 1831 | if (!IR) | 
|  | 1832 | return IR; | 
| Eric Christopher | 46308e6 | 2011-02-01 01:16:32 +0000 | [diff] [blame] | 1833 |  | 
| Chandler Carruth | 0814d2a | 2013-12-13 07:59:56 +0000 | [diff] [blame] | 1834 | TerminatorInst *TI = BB->getTerminator(); | 
|  | 1835 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1836 | // Add in the live successors by first checking whether we have terminator | 
|  | 1837 | // that may be simplified based on the values simplified by this call. | 
|  | 1838 | if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { | 
|  | 1839 | if (BI->isConditional()) { | 
|  | 1840 | Value *Cond = BI->getCondition(); | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 1841 | if (ConstantInt *SimpleCond = | 
|  | 1842 | dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 1843 | BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); | 
|  | 1844 | BBWorklist.insert(NextBB); | 
|  | 1845 | KnownSuccessors[BB] = NextBB; | 
|  | 1846 | findDeadBlocks(BB, NextBB); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1847 | continue; | 
| Eric Christopher | 46308e6 | 2011-02-01 01:16:32 +0000 | [diff] [blame] | 1848 | } | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1849 | } | 
|  | 1850 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { | 
|  | 1851 | Value *Cond = SI->getCondition(); | 
| Chad Rosier | 567556a | 2016-04-28 14:47:23 +0000 | [diff] [blame] | 1852 | if (ConstantInt *SimpleCond = | 
|  | 1853 | dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { | 
| Haicheng Wu | 3739e14 | 2017-12-14 14:36:18 +0000 | [diff] [blame] | 1854 | BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); | 
|  | 1855 | BBWorklist.insert(NextBB); | 
|  | 1856 | KnownSuccessors[BB] = NextBB; | 
|  | 1857 | findDeadBlocks(BB, NextBB); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1858 | continue; | 
|  | 1859 | } | 
|  | 1860 | } | 
| Eric Christopher | 46308e6 | 2011-02-01 01:16:32 +0000 | [diff] [blame] | 1861 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1862 | // If we're unable to select a particular successor, just count all of | 
|  | 1863 | // them. | 
| Nadav Rotem | 4eb3d4b | 2012-09-19 08:08:04 +0000 | [diff] [blame] | 1864 | for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; | 
|  | 1865 | ++TIdx) | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1866 | BBWorklist.insert(TI->getSuccessor(TIdx)); | 
|  | 1867 |  | 
|  | 1868 | // If we had any successors at this point, than post-inlining is likely to | 
|  | 1869 | // have them as well. Note that we assume any basic blocks which existed | 
|  | 1870 | // due to branches or switches which folded above will also fold after | 
|  | 1871 | // inlining. | 
|  | 1872 | if (SingleBB && TI->getNumSuccessors() > 1) { | 
|  | 1873 | // Take off the bonus we applied to the threshold. | 
|  | 1874 | Threshold -= SingleBBBonus; | 
|  | 1875 | SingleBB = false; | 
| Eric Christopher | 46308e6 | 2011-02-01 01:16:32 +0000 | [diff] [blame] | 1876 | } | 
|  | 1877 | } | 
| Andrew Trick | caa500b | 2011-10-01 01:27:56 +0000 | [diff] [blame] | 1878 |  | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 1879 | bool OnlyOneCallAndLocalLinkage = | 
|  | 1880 | F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); | 
| Chandler Carruth | cb5beb3 | 2013-12-12 11:59:26 +0000 | [diff] [blame] | 1881 | // If this is a noduplicate call, we can still inline as long as | 
| James Molloy | 4f6fb95 | 2012-12-20 16:04:27 +0000 | [diff] [blame] | 1882 | // inlining this would cause the removal of the caller (so the instruction | 
|  | 1883 | // is not actually duplicated, just moved). | 
|  | 1884 | if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1885 | return "noduplicate"; | 
| James Molloy | 4f6fb95 | 2012-12-20 16:04:27 +0000 | [diff] [blame] | 1886 |  | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1887 | // We applied the maximum possible vector bonus at the beginning. Now, | 
|  | 1888 | // subtract the excess bonus, if any, from the Threshold before | 
|  | 1889 | // comparing against Cost. | 
|  | 1890 | if (NumVectorInstructions <= NumInstructions / 10) | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 1891 | Threshold -= VectorBonus; | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1892 | else if (NumVectorInstructions <= NumInstructions / 2) | 
| Easwaran Raman | 51b809b | 2017-07-28 21:47:36 +0000 | [diff] [blame] | 1893 | Threshold -= VectorBonus/2; | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1894 |  | 
| Hans Wennborg | 00ab73d | 2016-02-05 20:32:42 +0000 | [diff] [blame] | 1895 | return Cost < std::max(1, Threshold); | 
| Eric Christopher | 2dfbd7e | 2011-02-05 00:49:15 +0000 | [diff] [blame] | 1896 | } | 
|  | 1897 |  | 
| Aaron Ballman | 615eb47 | 2017-10-15 14:32:27 +0000 | [diff] [blame] | 1898 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1899 | /// Dump stats about this call's analysis. | 
| Yaron Keren | eb2a254 | 2016-01-29 20:50:44 +0000 | [diff] [blame] | 1900 | LLVM_DUMP_METHOD void CallAnalyzer::dump() { | 
| Eric Christopher | a13839f | 2014-02-26 23:27:16 +0000 | [diff] [blame] | 1901 | #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n" | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1902 | DEBUG_PRINT_STAT(NumConstantArgs); | 
|  | 1903 | DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); | 
|  | 1904 | DEBUG_PRINT_STAT(NumAllocaArgs); | 
|  | 1905 | DEBUG_PRINT_STAT(NumConstantPtrCmps); | 
|  | 1906 | DEBUG_PRINT_STAT(NumConstantPtrDiffs); | 
|  | 1907 | DEBUG_PRINT_STAT(NumInstructionsSimplified); | 
| Chandler Carruth | a004f22 | 2015-05-27 02:49:05 +0000 | [diff] [blame] | 1908 | DEBUG_PRINT_STAT(NumInstructions); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1909 | DEBUG_PRINT_STAT(SROACostSavings); | 
|  | 1910 | DEBUG_PRINT_STAT(SROACostSavingsLost); | 
| Haicheng Wu | a446151 | 2017-12-15 14:34:41 +0000 | [diff] [blame] | 1911 | DEBUG_PRINT_STAT(LoadEliminationCost); | 
| James Molloy | 4f6fb95 | 2012-12-20 16:04:27 +0000 | [diff] [blame] | 1912 | DEBUG_PRINT_STAT(ContainsNoDuplicateCall); | 
| Chandler Carruth | 394e34f | 2014-01-31 22:32:32 +0000 | [diff] [blame] | 1913 | DEBUG_PRINT_STAT(Cost); | 
|  | 1914 | DEBUG_PRINT_STAT(Threshold); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 1915 | #undef DEBUG_PRINT_STAT | 
| Eric Christopher | 2dfbd7e | 2011-02-05 00:49:15 +0000 | [diff] [blame] | 1916 | } | 
| Manman Ren | c3366cc | 2012-09-06 19:55:56 +0000 | [diff] [blame] | 1917 | #endif | 
| Eric Christopher | 2dfbd7e | 2011-02-05 00:49:15 +0000 | [diff] [blame] | 1918 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1919 | /// Test that there are no attribute conflicts between Caller and Callee | 
| Evgeniy Stepanov | 2ad3698 | 2013-08-08 08:22:39 +0000 | [diff] [blame] | 1920 | ///        that prevent inlining. | 
|  | 1921 | static bool functionsHaveCompatibleAttributes(Function *Caller, | 
| Eric Christopher | 4371b13 | 2015-07-02 01:11:47 +0000 | [diff] [blame] | 1922 | Function *Callee, | 
|  | 1923 | TargetTransformInfo &TTI) { | 
| Eric Christopher | d566fb1 | 2015-07-29 22:09:48 +0000 | [diff] [blame] | 1924 | return TTI.areInlineCompatible(Caller, Callee) && | 
| Akira Hatanaka | 1cb242e | 2015-12-22 23:57:37 +0000 | [diff] [blame] | 1925 | AttributeFuncs::areInlineCompatible(*Caller, *Callee); | 
| Evgeniy Stepanov | 2ad3698 | 2013-08-08 08:22:39 +0000 | [diff] [blame] | 1926 | } | 
|  | 1927 |  | 
| Xinliang David Li | 351d9b0 | 2017-05-02 05:38:41 +0000 | [diff] [blame] | 1928 | int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { | 
|  | 1929 | int Cost = 0; | 
|  | 1930 | for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { | 
|  | 1931 | if (CS.isByValArgument(I)) { | 
|  | 1932 | // We approximate the number of loads and stores needed by dividing the | 
|  | 1933 | // size of the byval type by the target's pointer size. | 
|  | 1934 | PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); | 
|  | 1935 | unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); | 
| Bjorn Pettersson | 77f3299 | 2018-01-04 18:23:40 +0000 | [diff] [blame] | 1936 | unsigned AS = PTy->getAddressSpace(); | 
|  | 1937 | unsigned PointerSize = DL.getPointerSizeInBits(AS); | 
| Xinliang David Li | 351d9b0 | 2017-05-02 05:38:41 +0000 | [diff] [blame] | 1938 | // Ceiling division. | 
|  | 1939 | unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; | 
|  | 1940 |  | 
|  | 1941 | // If it generates more than 8 stores it is likely to be expanded as an | 
|  | 1942 | // inline memcpy so we take that as an upper bound. Otherwise we assume | 
|  | 1943 | // one load and one store per word copied. | 
|  | 1944 | // FIXME: The maxStoresPerMemcpy setting from the target should be used | 
|  | 1945 | // here instead of a magic number of 8, but it's not available via | 
|  | 1946 | // DataLayout. | 
|  | 1947 | NumStores = std::min(NumStores, 8U); | 
|  | 1948 |  | 
|  | 1949 | Cost += 2 * NumStores * InlineConstants::InstrCost; | 
|  | 1950 | } else { | 
|  | 1951 | // For non-byval arguments subtract off one instruction per call | 
|  | 1952 | // argument. | 
|  | 1953 | Cost += InlineConstants::InstrCost; | 
|  | 1954 | } | 
|  | 1955 | } | 
|  | 1956 | // The call instruction also disappears after inlining. | 
|  | 1957 | Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty; | 
|  | 1958 | return Cost; | 
|  | 1959 | } | 
|  | 1960 |  | 
| Sean Silva | ab6a683 | 2016-07-23 04:22:50 +0000 | [diff] [blame] | 1961 | InlineCost llvm::getInlineCost( | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 1962 | CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 1963 | std::function<AssumptionCache &(Function &)> &GetAssumptionCache, | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 1964 | Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1965 | ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 1966 | return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1967 | GetAssumptionCache, GetBFI, PSI, ORE); | 
| Easwaran Raman | b9f7120 | 2015-12-28 20:28:19 +0000 | [diff] [blame] | 1968 | } | 
|  | 1969 |  | 
| Sean Silva | ab6a683 | 2016-07-23 04:22:50 +0000 | [diff] [blame] | 1970 | InlineCost llvm::getInlineCost( | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 1971 | CallSite CS, Function *Callee, const InlineParams &Params, | 
| Daniel Jasper | aec2fa3 | 2016-12-19 08:22:17 +0000 | [diff] [blame] | 1972 | TargetTransformInfo &CalleeTTI, | 
|  | 1973 | std::function<AssumptionCache &(Function &)> &GetAssumptionCache, | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 1974 | Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 1975 | ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { | 
| Easwaran Raman | f4bb2f0 | 2016-01-14 23:16:29 +0000 | [diff] [blame] | 1976 |  | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1977 | // Cannot inline indirect calls. | 
|  | 1978 | if (!Callee) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1979 | return llvm::InlineCost::getNever("indirect call"); | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1980 |  | 
| Bjorn Pettersson | 3851496 | 2018-01-10 13:01:18 +0000 | [diff] [blame] | 1981 | // Never inline calls with byval arguments that does not have the alloca | 
|  | 1982 | // address space. Since byval arguments can be replaced with a copy to an | 
|  | 1983 | // alloca, the inlined code would need to be adjusted to handle that the | 
|  | 1984 | // argument is in the alloca address space (so it is a little bit complicated | 
|  | 1985 | // to solve). | 
|  | 1986 | unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); | 
|  | 1987 | for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) | 
|  | 1988 | if (CS.isByValArgument(I)) { | 
|  | 1989 | PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); | 
|  | 1990 | if (PTy->getAddressSpace() != AllocaAS) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1991 | return llvm::InlineCost::getNever("byval arguments without alloca" | 
|  | 1992 | " address space"); | 
| Bjorn Pettersson | 3851496 | 2018-01-10 13:01:18 +0000 | [diff] [blame] | 1993 | } | 
|  | 1994 |  | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1995 | // Calls to functions with always-inline attributes should be inlined | 
|  | 1996 | // whenever possible. | 
| Peter Collingbourne | 68a8897 | 2014-05-19 18:25:54 +0000 | [diff] [blame] | 1997 | if (CS.hasFnAttr(Attribute::AlwaysInline)) { | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 1998 | if (isInlineViable(*Callee)) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 1999 | return llvm::InlineCost::getAlways("always inline attribute"); | 
|  | 2000 | return llvm::InlineCost::getNever("inapplicable always inline attribute"); | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2001 | } | 
|  | 2002 |  | 
| Evgeniy Stepanov | 2ad3698 | 2013-08-08 08:22:39 +0000 | [diff] [blame] | 2003 | // Never inline functions with conflicting attributes (unless callee has | 
|  | 2004 | // always-inline attribute). | 
| Chad Rosier | 5ce28f4 | 2017-08-02 14:50:27 +0000 | [diff] [blame] | 2005 | Function *Caller = CS.getCaller(); | 
|  | 2006 | if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI)) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2007 | return llvm::InlineCost::getNever("conflicting attributes"); | 
| Evgeniy Stepanov | 2ad3698 | 2013-08-08 08:22:39 +0000 | [diff] [blame] | 2008 |  | 
| Paul Robinson | dcbe35b | 2013-11-18 21:44:03 +0000 | [diff] [blame] | 2009 | // Don't inline this call if the caller has the optnone attribute. | 
| Chad Rosier | 5ce28f4 | 2017-08-02 14:50:27 +0000 | [diff] [blame] | 2010 | if (Caller->hasFnAttribute(Attribute::OptimizeNone)) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2011 | return llvm::InlineCost::getNever("optnone attribute"); | 
| Paul Robinson | dcbe35b | 2013-11-18 21:44:03 +0000 | [diff] [blame] | 2012 |  | 
| Manoj Gupta | 77eeac3 | 2018-07-09 22:27:23 +0000 | [diff] [blame] | 2013 | // Don't inline a function that treats null pointer as valid into a caller | 
|  | 2014 | // that does not have this attribute. | 
|  | 2015 | if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2016 | return llvm::InlineCost::getNever("nullptr definitions incompatible"); | 
| Manoj Gupta | 77eeac3 | 2018-07-09 22:27:23 +0000 | [diff] [blame] | 2017 |  | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2018 | // Don't inline functions which can be interposed at link-time. | 
|  | 2019 | if (Callee->isInterposable()) | 
|  | 2020 | return llvm::InlineCost::getNever("interposable"); | 
|  | 2021 |  | 
|  | 2022 | // Don't inline functions marked noinline. | 
|  | 2023 | if (Callee->hasFnAttribute(Attribute::NoInline)) | 
|  | 2024 | return llvm::InlineCost::getNever("noinline function attribute"); | 
|  | 2025 |  | 
|  | 2026 | // Don't inline call sites marked noinline. | 
|  | 2027 | if (CS.isNoInline()) | 
|  | 2028 | return llvm::InlineCost::getNever("noinline call site attribute"); | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 2029 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 2030 | LLVM_DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName() | 
|  | 2031 | << "... (caller:" << Caller->getName() << ")\n"); | 
| Andrew Trick | caa500b | 2011-10-01 01:27:56 +0000 | [diff] [blame] | 2032 |  | 
| Haicheng Wu | 0812c5b | 2017-08-21 20:00:09 +0000 | [diff] [blame] | 2033 | CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 2034 | Params); | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2035 | InlineResult ShouldInline = CA.analyzeCall(CS); | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 2036 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 2037 | LLVM_DEBUG(CA.dump()); | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 2038 |  | 
|  | 2039 | // Check if there was a reason to force inlining or no inlining. | 
|  | 2040 | if (!ShouldInline && CA.getCost() < CA.getThreshold()) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2041 | return InlineCost::getNever(ShouldInline.message); | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2042 | if (ShouldInline && CA.getCost() >= CA.getThreshold()) | 
| David Bolvansky | c0aa4b7 | 2018-08-05 14:53:08 +0000 | [diff] [blame] | 2043 | return InlineCost::getAlways("empty function"); | 
| Andrew Trick | caa500b | 2011-10-01 01:27:56 +0000 | [diff] [blame] | 2044 |  | 
| Chandler Carruth | 0539c07 | 2012-03-31 12:42:41 +0000 | [diff] [blame] | 2045 | return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); | 
| Dan Gohman | 4552e3c | 2009-10-13 18:30:07 +0000 | [diff] [blame] | 2046 | } | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2047 |  | 
| Easwaran Raman | b9f7120 | 2015-12-28 20:28:19 +0000 | [diff] [blame] | 2048 | bool llvm::isInlineViable(Function &F) { | 
| Duncan P. N. Exon Smith | b3fc83c | 2015-02-14 00:12:15 +0000 | [diff] [blame] | 2049 | bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2050 | for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { | 
| Gerolf Hoflehner | 734f4c8 | 2014-07-01 00:19:34 +0000 | [diff] [blame] | 2051 | // Disallow inlining of functions which contain indirect branches or | 
|  | 2052 | // blockaddresses. | 
|  | 2053 | if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2054 | return false; | 
|  | 2055 |  | 
| Duncan P. N. Exon Smith | 5a82c91 | 2015-10-10 00:53:03 +0000 | [diff] [blame] | 2056 | for (auto &II : *BI) { | 
|  | 2057 | CallSite CS(&II); | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2058 | if (!CS) | 
|  | 2059 | continue; | 
|  | 2060 |  | 
|  | 2061 | // Disallow recursive calls. | 
|  | 2062 | if (&F == CS.getCalledFunction()) | 
|  | 2063 | return false; | 
|  | 2064 |  | 
|  | 2065 | // Disallow calls which expose returns-twice to a function not previously | 
|  | 2066 | // attributed as such. | 
|  | 2067 | if (!ReturnsTwice && CS.isCall() && | 
|  | 2068 | cast<CallInst>(CS.getInstruction())->canReturnTwice()) | 
|  | 2069 | return false; | 
| Reid Kleckner | 223de26 | 2015-04-14 20:38:14 +0000 | [diff] [blame] | 2070 |  | 
| Florian Hahn | 1636651 | 2018-01-28 19:11:49 +0000 | [diff] [blame] | 2071 | if (CS.getCalledFunction()) | 
|  | 2072 | switch (CS.getCalledFunction()->getIntrinsicID()) { | 
|  | 2073 | default: | 
|  | 2074 | break; | 
| Vitaly Buka | 4296ea7 | 2018-04-04 21:46:27 +0000 | [diff] [blame] | 2075 | // Disallow inlining of @llvm.icall.branch.funnel because current | 
|  | 2076 | // backend can't separate call targets from call arguments. | 
|  | 2077 | case llvm::Intrinsic::icall_branch_funnel: | 
| Florian Hahn | 1636651 | 2018-01-28 19:11:49 +0000 | [diff] [blame] | 2078 | // Disallow inlining functions that call @llvm.localescape. Doing this | 
|  | 2079 | // correctly would require major changes to the inliner. | 
|  | 2080 | case llvm::Intrinsic::localescape: | 
| Sameer AbuAsal | 77beee4 | 2018-09-20 18:39:34 +0000 | [diff] [blame] | 2081 | // Disallow inlining of functions that initialize VarArgs with va_start. | 
| Florian Hahn | 1636651 | 2018-01-28 19:11:49 +0000 | [diff] [blame] | 2082 | case llvm::Intrinsic::vastart: | 
| Florian Hahn | 1636651 | 2018-01-28 19:11:49 +0000 | [diff] [blame] | 2083 | return false; | 
|  | 2084 | } | 
| Bob Wilson | a5b0dc8 | 2012-11-19 07:04:35 +0000 | [diff] [blame] | 2085 | } | 
|  | 2086 | } | 
|  | 2087 |  | 
|  | 2088 | return true; | 
|  | 2089 | } | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 2090 |  | 
|  | 2091 | // APIs to create InlineParams based on command line flags and/or other | 
|  | 2092 | // parameters. | 
|  | 2093 |  | 
|  | 2094 | InlineParams llvm::getInlineParams(int Threshold) { | 
|  | 2095 | InlineParams Params; | 
|  | 2096 |  | 
|  | 2097 | // This field is the threshold to use for a callee by default. This is | 
|  | 2098 | // derived from one or more of: | 
|  | 2099 | //  * optimization or size-optimization levels, | 
|  | 2100 | //  * a value passed to createFunctionInliningPass function, or | 
|  | 2101 | //  * the -inline-threshold flag. | 
|  | 2102 | //  If the -inline-threshold flag is explicitly specified, that is used | 
|  | 2103 | //  irrespective of anything else. | 
|  | 2104 | if (InlineThreshold.getNumOccurrences() > 0) | 
|  | 2105 | Params.DefaultThreshold = InlineThreshold; | 
|  | 2106 | else | 
|  | 2107 | Params.DefaultThreshold = Threshold; | 
|  | 2108 |  | 
|  | 2109 | // Set the HintThreshold knob from the -inlinehint-threshold. | 
|  | 2110 | Params.HintThreshold = HintThreshold; | 
|  | 2111 |  | 
|  | 2112 | // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. | 
|  | 2113 | Params.HotCallSiteThreshold = HotCallSiteThreshold; | 
|  | 2114 |  | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 2115 | // If the -locally-hot-callsite-threshold is explicitly specified, use it to | 
|  | 2116 | // populate LocallyHotCallSiteThreshold. Later, we populate | 
|  | 2117 | // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if | 
|  | 2118 | // we know that optimization level is O3 (in the getInlineParams variant that | 
|  | 2119 | // takes the opt and size levels). | 
|  | 2120 | // FIXME: Remove this check (and make the assignment unconditional) after | 
|  | 2121 | // addressing size regression issues at O2. | 
|  | 2122 | if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) | 
|  | 2123 | Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; | 
|  | 2124 |  | 
| Easwaran Raman | 12585b0 | 2017-01-20 22:44:04 +0000 | [diff] [blame] | 2125 | // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. | 
|  | 2126 | Params.ColdCallSiteThreshold = ColdCallSiteThreshold; | 
|  | 2127 |  | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 2128 | // Set the OptMinSizeThreshold and OptSizeThreshold params only if the | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 2129 | // -inlinehint-threshold commandline option is not explicitly given. If that | 
|  | 2130 | // option is present, then its value applies even for callees with size and | 
|  | 2131 | // minsize attributes. | 
|  | 2132 | // If the -inline-threshold is not specified, set the ColdThreshold from the | 
|  | 2133 | // -inlinecold-threshold even if it is not explicitly passed. If | 
|  | 2134 | // -inline-threshold is specified, then -inlinecold-threshold needs to be | 
|  | 2135 | // explicitly specified to set the ColdThreshold knob | 
|  | 2136 | if (InlineThreshold.getNumOccurrences() == 0) { | 
|  | 2137 | Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; | 
|  | 2138 | Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; | 
|  | 2139 | Params.ColdThreshold = ColdThreshold; | 
|  | 2140 | } else if (ColdThreshold.getNumOccurrences() > 0) { | 
|  | 2141 | Params.ColdThreshold = ColdThreshold; | 
|  | 2142 | } | 
|  | 2143 | return Params; | 
|  | 2144 | } | 
|  | 2145 |  | 
|  | 2146 | InlineParams llvm::getInlineParams() { | 
|  | 2147 | return getInlineParams(InlineThreshold); | 
|  | 2148 | } | 
|  | 2149 |  | 
|  | 2150 | // Compute the default threshold for inlining based on the opt level and the | 
|  | 2151 | // size opt level. | 
|  | 2152 | static int computeThresholdFromOptLevels(unsigned OptLevel, | 
|  | 2153 | unsigned SizeOptLevel) { | 
|  | 2154 | if (OptLevel > 2) | 
|  | 2155 | return InlineConstants::OptAggressiveThreshold; | 
|  | 2156 | if (SizeOptLevel == 1) // -Os | 
|  | 2157 | return InlineConstants::OptSizeThreshold; | 
|  | 2158 | if (SizeOptLevel == 2) // -Oz | 
|  | 2159 | return InlineConstants::OptMinSizeThreshold; | 
|  | 2160 | return InlineThreshold; | 
|  | 2161 | } | 
|  | 2162 |  | 
|  | 2163 | InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { | 
| Easwaran Raman | 974d4ee | 2017-08-03 22:23:33 +0000 | [diff] [blame] | 2164 | auto Params = | 
|  | 2165 | getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); | 
|  | 2166 | // At O3, use the value of -locally-hot-callsite-threshold option to populate | 
|  | 2167 | // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only | 
|  | 2168 | // when it is specified explicitly. | 
|  | 2169 | if (OptLevel > 2) | 
|  | 2170 | Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; | 
|  | 2171 | return Params; | 
| Easwaran Raman | 1c57cc2 | 2016-08-10 00:48:04 +0000 | [diff] [blame] | 2172 | } |