blob: 3dfafee146e4534b66f8bc706cbc6c3c729189ac [file] [log] [blame]
Rong Xu48596b62017-04-04 16:42:20 +00001//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===//
Rong Xu6e34c492016-04-27 23:20:27 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the transformation that promotes indirect calls to
11// conditional direct calls when the indirect-call value profile metadata is
12// available.
13//
14//===----------------------------------------------------------------------===//
15
Eugene Zelenkocdc71612016-08-11 17:20:18 +000016#include "llvm/ADT/ArrayRef.h"
Rong Xu6e34c492016-04-27 23:20:27 +000017#include "llvm/ADT/Statistic.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000018#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Twine.h"
Rong Xu48596b62017-04-04 16:42:20 +000020#include "llvm/Analysis/BlockFrequencyInfo.h"
Teresa Johnson1e44b5d2016-07-12 21:13:44 +000021#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
22#include "llvm/Analysis/IndirectCallSiteVisitor.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000023#include "llvm/IR/BasicBlock.h"
Rong Xu6e34c492016-04-27 23:20:27 +000024#include "llvm/IR/CallSite.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000025#include "llvm/IR/DerivedTypes.h"
Rong Xu6e34c492016-04-27 23:20:27 +000026#include "llvm/IR/DiagnosticInfo.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000027#include "llvm/IR/Function.h"
Rong Xu6e34c492016-04-27 23:20:27 +000028#include "llvm/IR/IRBuilder.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000029#include "llvm/IR/InstrTypes.h"
30#include "llvm/IR/Instruction.h"
Rong Xu6e34c492016-04-27 23:20:27 +000031#include "llvm/IR/Instructions.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000032#include "llvm/IR/LLVMContext.h"
Rong Xu6e34c492016-04-27 23:20:27 +000033#include "llvm/IR/MDBuilder.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000034#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
Rong Xu6e34c492016-04-27 23:20:27 +000036#include "llvm/Pass.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000037#include "llvm/PassRegistry.h"
38#include "llvm/PassSupport.h"
39#include "llvm/ProfileData/InstrProf.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/CommandLine.h"
Rong Xu6e34c492016-04-27 23:20:27 +000042#include "llvm/Support/Debug.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000043#include "llvm/Support/ErrorHandling.h"
Rong Xu48596b62017-04-04 16:42:20 +000044#include "llvm/Support/MathExtras.h"
Rong Xu6e34c492016-04-27 23:20:27 +000045#include "llvm/Transforms/Instrumentation.h"
Xinliang David Lif3c7a352016-05-16 16:31:07 +000046#include "llvm/Transforms/PGOInstrumentation.h"
Rong Xu6e34c492016-04-27 23:20:27 +000047#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000048#include <cassert>
49#include <cstdint>
Rong Xu6e34c492016-04-27 23:20:27 +000050#include <vector>
51
52using namespace llvm;
53
Xinliang David Li0b293302016-06-02 01:52:05 +000054#define DEBUG_TYPE "pgo-icall-prom"
Rong Xu6e34c492016-04-27 23:20:27 +000055
56STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
57STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
Rong Xu48596b62017-04-04 16:42:20 +000058STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
59STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
Rong Xu6e34c492016-04-27 23:20:27 +000060
61// Command line option to disable indirect-call promotion with the default as
62// false. This is for debug purpose.
63static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
64 cl::desc("Disable indirect call promotion"));
65
Rong Xu6e34c492016-04-27 23:20:27 +000066// Set the cutoff value for the promotion. If the value is other than 0, we
67// stop the transformation once the total number of promotions equals the cutoff
68// value.
69// For debug use only.
70static cl::opt<unsigned>
71 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
72 cl::desc("Max number of promotions for this compilaiton"));
73
74// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
75// For debug use only.
76static cl::opt<unsigned>
77 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
78 cl::desc("Skip Callsite up to this number for this compilaiton"));
79
80// Set if the pass is called in LTO optimization. The difference for LTO mode
81// is the pass won't prefix the source module name to the internal linkage
82// symbols.
83static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
84 cl::desc("Run indirect-call promotion in LTO "
85 "mode"));
Teresa Johnson1e44b5d2016-07-12 21:13:44 +000086
Dehao Chencc75d242017-02-23 22:15:18 +000087// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
88// mode is it will add prof metadatato the created direct call.
89static cl::opt<bool>
90 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
91 cl::desc("Run indirect-call promotion in SamplePGO mode"));
92
Rong Xu6e34c492016-04-27 23:20:27 +000093// If the option is set to true, only call instructions will be considered for
94// transformation -- invoke instructions will be ignored.
95static cl::opt<bool>
96 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
97 cl::desc("Run indirect-call promotion for call instructions "
98 "only"));
99
100// If the option is set to true, only invoke instructions will be considered for
101// transformation -- call instructions will be ignored.
102static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
103 cl::Hidden,
104 cl::desc("Run indirect-call promotion for "
105 "invoke instruction only"));
106
107// Dump the function level IR if the transformation happened in this
108// function. For debug use only.
109static cl::opt<bool>
110 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
111 cl::desc("Dump IR after transformation happens"));
112
Rong Xu48596b62017-04-04 16:42:20 +0000113// The minimum call count to optimize memory intrinsic calls.
114static cl::opt<unsigned>
115 MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
116 cl::init(1000),
117 cl::desc("The minimum count to optimize memory "
118 "intrinsic calls"));
119
120// Command line option to disable memory intrinsic optimization. The default is
121// false. This is for debug purpose.
122static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
123 cl::Hidden, cl::desc("Disable optimize"));
124
125// The percent threshold to optimize memory intrinsic calls.
126static cl::opt<unsigned>
127 MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
128 cl::Hidden, cl::ZeroOrMore,
129 cl::desc("The percentage threshold for the "
130 "memory intrinsic calls optimization"));
131
132// Maximum number of versions for optimizing memory intrinsic call.
133static cl::opt<unsigned>
134 MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
135 cl::ZeroOrMore,
136 cl::desc("The max version for the optimized memory "
137 " intrinsic calls"));
138
139// Scale the counts from the annotation using the BB count value.
140static cl::opt<bool>
141 MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
142 cl::desc("Scale the memop size counts using the basic "
143 " block count value"));
144
145// This option sets the rangge of precise profile memop sizes.
146extern cl::opt<std::string> MemOPSizeRange;
147
148// This option sets the value that groups large memop sizes
149extern cl::opt<unsigned> MemOPSizeLarge;
150
Rong Xu6e34c492016-04-27 23:20:27 +0000151namespace {
Xinliang David Li72616182016-05-15 01:04:24 +0000152class PGOIndirectCallPromotionLegacyPass : public ModulePass {
Rong Xu6e34c492016-04-27 23:20:27 +0000153public:
154 static char ID;
155
Dehao Chencc75d242017-02-23 22:15:18 +0000156 PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
157 : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
Xinliang David Li72616182016-05-15 01:04:24 +0000158 initializePGOIndirectCallPromotionLegacyPassPass(
159 *PassRegistry::getPassRegistry());
Rong Xu6e34c492016-04-27 23:20:27 +0000160 }
161
Mehdi Amini117296c2016-10-01 02:56:57 +0000162 StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
Rong Xu6e34c492016-04-27 23:20:27 +0000163
164private:
165 bool runOnModule(Module &M) override;
166
167 // If this pass is called in LTO. We need to special handling the PGOFuncName
168 // for the static variables due to LTO's internalization.
169 bool InLTO;
Dehao Chencc75d242017-02-23 22:15:18 +0000170
171 // If this pass is called in SamplePGO. We need to add the prof metadata to
172 // the promoted direct call.
173 bool SamplePGO;
Rong Xu6e34c492016-04-27 23:20:27 +0000174};
Rong Xu48596b62017-04-04 16:42:20 +0000175
176class PGOMemOPSizeOptLegacyPass : public FunctionPass {
177public:
178 static char ID;
179
180 PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
181 initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
182 }
183
184 StringRef getPassName() const override { return "PGOMemOPSize"; }
185
186private:
187 bool runOnFunction(Function &F) override;
188 void getAnalysisUsage(AnalysisUsage &AU) const override {
189 AU.addRequired<BlockFrequencyInfoWrapperPass>();
190 }
191};
Rong Xu6e34c492016-04-27 23:20:27 +0000192} // end anonymous namespace
193
Xinliang David Li72616182016-05-15 01:04:24 +0000194char PGOIndirectCallPromotionLegacyPass::ID = 0;
195INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
Rong Xu6e34c492016-04-27 23:20:27 +0000196 "Use PGO instrumentation profile to promote indirect calls to "
197 "direct calls.",
198 false, false)
199
Dehao Chencc75d242017-02-23 22:15:18 +0000200ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
201 bool SamplePGO) {
202 return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
Rong Xu6e34c492016-04-27 23:20:27 +0000203}
204
Rong Xu48596b62017-04-04 16:42:20 +0000205char PGOMemOPSizeOptLegacyPass::ID = 0;
206INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
207 "Optimize memory intrinsic using its size value profile",
208 false, false)
209INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
210INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
211 "Optimize memory intrinsic using its size value profile",
212 false, false)
213
214FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
215 return new PGOMemOPSizeOptLegacyPass();
216}
217
Benjamin Kramera65b6102016-05-15 15:18:11 +0000218namespace {
Rong Xu6e34c492016-04-27 23:20:27 +0000219// The class for main data structure to promote indirect calls to conditional
220// direct calls.
221class ICallPromotionFunc {
222private:
223 Function &F;
224 Module *M;
225
226 // Symtab that maps indirect call profile values to function names and
227 // defines.
228 InstrProfSymtab *Symtab;
229
Dehao Chencc75d242017-02-23 22:15:18 +0000230 bool SamplePGO;
231
Rong Xu6e34c492016-04-27 23:20:27 +0000232 // Test if we can legally promote this direct-call of Target.
Dehao Chen6775f5d2017-01-30 22:46:37 +0000233 bool isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&F,
234 const char **Reason = nullptr);
Rong Xu6e34c492016-04-27 23:20:27 +0000235
236 // A struct that records the direct target and it's call count.
237 struct PromotionCandidate {
238 Function *TargetFunction;
239 uint64_t Count;
240 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
241 };
242
243 // Check if the indirect-call call site should be promoted. Return the number
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000244 // of promotions. Inst is the candidate indirect call, ValueDataRef
245 // contains the array of value profile data for profiled targets,
246 // TotalCount is the total profiled count of call executions, and
247 // NumCandidates is the number of candidate entries in ValueDataRef.
Rong Xu6e34c492016-04-27 23:20:27 +0000248 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
249 Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000250 uint64_t TotalCount, uint32_t NumCandidates);
Rong Xu6e34c492016-04-27 23:20:27 +0000251
Rong Xu6e34c492016-04-27 23:20:27 +0000252 // Promote a list of targets for one indirect-call callsite. Return
253 // the number of promotions.
254 uint32_t tryToPromote(Instruction *Inst,
255 const std::vector<PromotionCandidate> &Candidates,
256 uint64_t &TotalCount);
257
Rong Xu6e34c492016-04-27 23:20:27 +0000258 // Noncopyable
259 ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
260 ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
261
262public:
Dehao Chencc75d242017-02-23 22:15:18 +0000263 ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
264 bool SamplePGO)
265 : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO) {}
Eugene Zelenkocdc71612016-08-11 17:20:18 +0000266
Rong Xu6e34c492016-04-27 23:20:27 +0000267 bool processFunction();
268};
Benjamin Kramera65b6102016-05-15 15:18:11 +0000269} // end anonymous namespace
Rong Xu6e34c492016-04-27 23:20:27 +0000270
Dehao Chen6775f5d2017-01-30 22:46:37 +0000271bool llvm::isLegalToPromote(Instruction *Inst, Function *F,
272 const char **Reason) {
Rong Xu6e34c492016-04-27 23:20:27 +0000273 // Check the return type.
274 Type *CallRetType = Inst->getType();
275 if (!CallRetType->isVoidTy()) {
Dehao Chen6775f5d2017-01-30 22:46:37 +0000276 Type *FuncRetType = F->getReturnType();
Rong Xu6e34c492016-04-27 23:20:27 +0000277 if (FuncRetType != CallRetType &&
Dehao Chen6775f5d2017-01-30 22:46:37 +0000278 !CastInst::isBitCastable(FuncRetType, CallRetType)) {
279 if (Reason)
280 *Reason = "Return type mismatch";
281 return false;
282 }
Rong Xu6e34c492016-04-27 23:20:27 +0000283 }
284
285 // Check if the arguments are compatible with the parameters
Dehao Chen6775f5d2017-01-30 22:46:37 +0000286 FunctionType *DirectCalleeType = F->getFunctionType();
Rong Xu6e34c492016-04-27 23:20:27 +0000287 unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
288 CallSite CS(Inst);
289 unsigned ArgNum = CS.arg_size();
290
Dehao Chen6775f5d2017-01-30 22:46:37 +0000291 if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) {
292 if (Reason)
293 *Reason = "The number of arguments mismatch";
294 return false;
295 }
Rong Xu6e34c492016-04-27 23:20:27 +0000296
297 for (unsigned I = 0; I < ParamNum; ++I) {
298 Type *PTy = DirectCalleeType->getFunctionParamType(I);
299 Type *ATy = CS.getArgument(I)->getType();
300 if (PTy == ATy)
301 continue;
Dehao Chen6775f5d2017-01-30 22:46:37 +0000302 if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) {
303 if (Reason)
Benjamin Kramer365c9bd2017-01-30 23:11:29 +0000304 *Reason = "Argument type mismatch";
Dehao Chen6775f5d2017-01-30 22:46:37 +0000305 return false;
306 }
Rong Xu6e34c492016-04-27 23:20:27 +0000307 }
308
309 DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
Dehao Chen6775f5d2017-01-30 22:46:37 +0000310 << F->getName() << "\n");
311 return true;
312}
313
314bool ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
315 Function *&TargetFunction,
316 const char **Reason) {
317 TargetFunction = Symtab->getFunction(Target);
318 if (TargetFunction == nullptr) {
319 *Reason = "Cannot find the target";
320 return false;
321 }
322 return isLegalToPromote(Inst, TargetFunction, Reason);
Rong Xu6e34c492016-04-27 23:20:27 +0000323}
324
325// Indirect-call promotion heuristic. The direct targets are sorted based on
326// the count. Stop at the first target that is not promoted.
327std::vector<ICallPromotionFunc::PromotionCandidate>
328ICallPromotionFunc::getPromotionCandidatesForCallSite(
329 Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000330 uint64_t TotalCount, uint32_t NumCandidates) {
Rong Xu6e34c492016-04-27 23:20:27 +0000331 std::vector<PromotionCandidate> Ret;
332
333 DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
Teresa Johnson8950ad12016-07-12 21:29:05 +0000334 << " Num_targets: " << ValueDataRef.size()
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000335 << " Num_candidates: " << NumCandidates << "\n");
Rong Xu6e34c492016-04-27 23:20:27 +0000336 NumOfPGOICallsites++;
337 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
338 DEBUG(dbgs() << " Skip: User options.\n");
339 return Ret;
340 }
341
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000342 for (uint32_t I = 0; I < NumCandidates; I++) {
Rong Xu6e34c492016-04-27 23:20:27 +0000343 uint64_t Count = ValueDataRef[I].Count;
344 assert(Count <= TotalCount);
345 uint64_t Target = ValueDataRef[I].Value;
346 DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
347 << " Target_func: " << Target << "\n");
348
Teresa Johnsonce7de9b2016-07-17 14:46:58 +0000349 if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
350 DEBUG(dbgs() << " Not promote: User options.\n");
351 break;
352 }
353 if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
354 DEBUG(dbgs() << " Not promote: User option.\n");
355 break;
356 }
Rong Xu6e34c492016-04-27 23:20:27 +0000357 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
358 DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
359 break;
360 }
Rong Xu6e34c492016-04-27 23:20:27 +0000361 Function *TargetFunction = nullptr;
Dehao Chen6775f5d2017-01-30 22:46:37 +0000362 const char *Reason = nullptr;
363 if (!isPromotionLegal(Inst, Target, TargetFunction, &Reason)) {
Rong Xu6e34c492016-04-27 23:20:27 +0000364 StringRef TargetFuncName = Symtab->getFuncName(Target);
Rong Xu6e34c492016-04-27 23:20:27 +0000365 DEBUG(dbgs() << " Not promote: " << Reason << "\n");
Rong Xu62d5e472016-04-28 17:49:56 +0000366 emitOptimizationRemarkMissed(
Xinliang David Li0b293302016-06-02 01:52:05 +0000367 F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
Rong Xu6e34c492016-04-27 23:20:27 +0000368 Twine("Cannot promote indirect call to ") +
Rong Xu62d5e472016-04-28 17:49:56 +0000369 (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) +
370 Twine(" with count of ") + Twine(Count) + ": " + Reason);
Rong Xu6e34c492016-04-27 23:20:27 +0000371 break;
372 }
373 Ret.push_back(PromotionCandidate(TargetFunction, Count));
374 TotalCount -= Count;
375 }
376 return Ret;
377}
378
379// Create a diamond structure for If_Then_Else. Also update the profile
380// count. Do the fix-up for the invoke instruction.
381static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
382 uint64_t Count, uint64_t TotalCount,
383 BasicBlock **DirectCallBB,
384 BasicBlock **IndirectCallBB,
385 BasicBlock **MergeBB) {
386 CallSite CS(Inst);
387 Value *OrigCallee = CS.getCalledValue();
388
389 IRBuilder<> BBBuilder(Inst);
390 LLVMContext &Ctx = Inst->getContext();
391 Value *BCI1 =
392 BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
393 Value *BCI2 =
394 BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
395 Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
396
397 uint64_t ElseCount = TotalCount - Count;
398 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
399 uint64_t Scale = calculateCountScale(MaxCount);
400 MDBuilder MDB(Inst->getContext());
401 MDNode *BranchWeights = MDB.createBranchWeights(
402 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
403 TerminatorInst *ThenTerm, *ElseTerm;
404 SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
405 BranchWeights);
406 *DirectCallBB = ThenTerm->getParent();
407 (*DirectCallBB)->setName("if.true.direct_targ");
408 *IndirectCallBB = ElseTerm->getParent();
409 (*IndirectCallBB)->setName("if.false.orig_indirect");
410 *MergeBB = Inst->getParent();
411 (*MergeBB)->setName("if.end.icp");
412
413 // Special handing of Invoke instructions.
414 InvokeInst *II = dyn_cast<InvokeInst>(Inst);
415 if (!II)
416 return;
417
418 // We don't need branch instructions for invoke.
419 ThenTerm->eraseFromParent();
420 ElseTerm->eraseFromParent();
421
422 // Add jump from Merge BB to the NormalDest. This is needed for the newly
423 // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
424 BranchInst::Create(II->getNormalDest(), *MergeBB);
425}
426
427// Find the PHI in BB that have the CallResult as the operand.
428static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
429 BasicBlock *From = Inst->getParent();
430 for (auto &I : *BB) {
431 PHINode *PHI = dyn_cast<PHINode>(&I);
432 if (!PHI)
433 continue;
434 int IX = PHI->getBasicBlockIndex(From);
435 if (IX == -1)
436 continue;
437 Value *V = PHI->getIncomingValue(IX);
438 if (dyn_cast<Instruction>(V) == Inst)
439 return true;
440 }
441 return false;
442}
443
444// This method fixes up PHI nodes in BB where BB is the UnwindDest of an
445// invoke instruction. In BB, there may be PHIs with incoming block being
446// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
447// instructions to its own BB, OrigBB is no longer the predecessor block of BB.
448// Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
449// so the PHI node's incoming BBs need to be fixed up accordingly.
450static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
451 BasicBlock *OrigBB,
452 BasicBlock *IndirectCallBB,
453 BasicBlock *DirectCallBB) {
454 for (auto &I : *BB) {
455 PHINode *PHI = dyn_cast<PHINode>(&I);
456 if (!PHI)
457 continue;
458 int IX = PHI->getBasicBlockIndex(OrigBB);
459 if (IX == -1)
460 continue;
461 Value *V = PHI->getIncomingValue(IX);
462 PHI->addIncoming(V, IndirectCallBB);
463 PHI->setIncomingBlock(IX, DirectCallBB);
464 }
465}
466
467// This method fixes up PHI nodes in BB where BB is the NormalDest of an
468// invoke instruction. In BB, there may be PHIs with incoming block being
469// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
470// instructions to its own BB, a new incoming edge will be added to the original
471// NormalDstBB from the IndirectCallBB.
472static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
473 BasicBlock *OrigBB,
474 BasicBlock *IndirectCallBB,
475 Instruction *NewInst) {
476 for (auto &I : *BB) {
477 PHINode *PHI = dyn_cast<PHINode>(&I);
478 if (!PHI)
479 continue;
480 int IX = PHI->getBasicBlockIndex(OrigBB);
481 if (IX == -1)
482 continue;
483 Value *V = PHI->getIncomingValue(IX);
484 if (dyn_cast<Instruction>(V) == Inst) {
485 PHI->setIncomingBlock(IX, IndirectCallBB);
486 PHI->addIncoming(NewInst, OrigBB);
487 continue;
488 }
489 PHI->addIncoming(V, IndirectCallBB);
490 }
491}
492
493// Add a bitcast instruction to the direct-call return value if needed.
Rong Xu6e34c492016-04-27 23:20:27 +0000494static Instruction *insertCallRetCast(const Instruction *Inst,
495 Instruction *DirectCallInst,
496 Function *DirectCallee) {
497 if (Inst->getType()->isVoidTy())
498 return DirectCallInst;
499
500 Type *CallRetType = Inst->getType();
501 Type *FuncRetType = DirectCallee->getReturnType();
502 if (FuncRetType == CallRetType)
503 return DirectCallInst;
504
505 BasicBlock *InsertionBB;
506 if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst))
507 InsertionBB = CI->getParent();
508 else
509 InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest();
510
511 return (new BitCastInst(DirectCallInst, CallRetType, "",
512 InsertionBB->getTerminator()));
513}
514
515// Create a DirectCall instruction in the DirectCallBB.
516// Parameter Inst is the indirect-call (invoke) instruction.
517// DirectCallee is the decl of the direct-call (invoke) target.
518// DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
519// MergeBB is the bottom BB of the if-then-else-diamond after the
520// transformation. For invoke instruction, the edges from DirectCallBB and
521// IndirectCallBB to MergeBB are removed before this call (during
522// createIfThenElse).
523static Instruction *createDirectCallInst(const Instruction *Inst,
524 Function *DirectCallee,
525 BasicBlock *DirectCallBB,
526 BasicBlock *MergeBB) {
527 Instruction *NewInst = Inst->clone();
528 if (CallInst *CI = dyn_cast<CallInst>(NewInst)) {
529 CI->setCalledFunction(DirectCallee);
530 CI->mutateFunctionType(DirectCallee->getFunctionType());
531 } else {
532 // Must be an invoke instruction. Direct invoke's normal destination is
533 // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
534 // Also since IndirectCallBB does not have an edge to MergeBB, there is no
535 // need to insert new PHIs into MergeBB.
536 InvokeInst *II = dyn_cast<InvokeInst>(NewInst);
537 assert(II);
538 II->setCalledFunction(DirectCallee);
539 II->mutateFunctionType(DirectCallee->getFunctionType());
540 II->setNormalDest(MergeBB);
541 }
542
543 DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
544 NewInst);
545
546 // Clear the value profile data.
Eugene Zelenkocdc71612016-08-11 17:20:18 +0000547 NewInst->setMetadata(LLVMContext::MD_prof, nullptr);
Rong Xu6e34c492016-04-27 23:20:27 +0000548 CallSite NewCS(NewInst);
549 FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
550 unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
551 for (unsigned I = 0; I < ParamNum; ++I) {
552 Type *ATy = NewCS.getArgument(I)->getType();
553 Type *PTy = DirectCalleeType->getParamType(I);
554 if (ATy != PTy) {
555 BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
556 NewCS.setArgument(I, BI);
557 }
558 }
559
560 return insertCallRetCast(Inst, NewInst, DirectCallee);
561}
562
563// Create a PHI to unify the return values of calls.
564static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
565 Function *DirectCallee) {
566 if (Inst->getType()->isVoidTy())
567 return;
568
569 BasicBlock *RetValBB = CallResult->getParent();
570
571 BasicBlock *PHIBB;
572 if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult))
573 RetValBB = II->getNormalDest();
574
575 PHIBB = RetValBB->getSingleSuccessor();
576 if (getCallRetPHINode(PHIBB, Inst))
577 return;
578
579 PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
580 PHIBB->getInstList().push_front(CallRetPHI);
581 Inst->replaceAllUsesWith(CallRetPHI);
582 CallRetPHI->addIncoming(Inst, Inst->getParent());
583 CallRetPHI->addIncoming(CallResult, RetValBB);
584}
585
586// This function does the actual indirect-call promotion transformation:
587// For an indirect-call like:
588// Ret = (*Foo)(Args);
589// It transforms to:
590// if (Foo == DirectCallee)
591// Ret1 = DirectCallee(Args);
592// else
593// Ret2 = (*Foo)(Args);
594// Ret = phi(Ret1, Ret2);
595// It adds type casts for the args do not match the parameters and the return
596// value. Branch weights metadata also updated.
Dehao Chencc75d242017-02-23 22:15:18 +0000597// If \p AttachProfToDirectCall is true, a prof metadata is attached to the
598// new direct call to contain \p Count. This is used by SamplePGO inliner to
599// check callsite hotness.
Dehao Chen14bf0292017-01-23 23:18:24 +0000600// Returns the promoted direct call instruction.
601Instruction *llvm::promoteIndirectCall(Instruction *Inst,
602 Function *DirectCallee, uint64_t Count,
Dehao Chencc75d242017-02-23 22:15:18 +0000603 uint64_t TotalCount,
604 bool AttachProfToDirectCall) {
Rong Xu6e34c492016-04-27 23:20:27 +0000605 assert(DirectCallee != nullptr);
606 BasicBlock *BB = Inst->getParent();
607 // Just to suppress the non-debug build warning.
608 (void)BB;
609 DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
610 DEBUG(dbgs() << *BB << "\n");
611
612 BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
613 createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
614 &IndirectCallBB, &MergeBB);
615
616 Instruction *NewInst =
617 createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
618
Dehao Chencc75d242017-02-23 22:15:18 +0000619 if (AttachProfToDirectCall) {
620 SmallVector<uint32_t, 1> Weights;
621 Weights.push_back(Count);
622 MDBuilder MDB(NewInst->getContext());
623 dyn_cast<Instruction>(NewInst->stripPointerCasts())
624 ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
625 }
626
Rong Xu6e34c492016-04-27 23:20:27 +0000627 // Move Inst from MergeBB to IndirectCallBB.
628 Inst->removeFromParent();
629 IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
630 Inst);
631
632 if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) {
633 // At this point, the original indirect invoke instruction has the original
634 // UnwindDest and NormalDest. For the direct invoke instruction, the
635 // NormalDest points to MergeBB, and MergeBB jumps to the original
636 // NormalDest. MergeBB might have a new bitcast instruction for the return
637 // value. The PHIs are with the original NormalDest. Since we now have two
638 // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
639 //
640 // UnwindDest will not use the return value. So pass nullptr here.
641 fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
642 DirectCallBB);
643 // We don't need to update the operand from NormalDest for DirectCallBB.
644 // Pass nullptr here.
645 fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
646 IndirectCallBB, NewInst);
647 }
648
649 insertCallRetPHI(Inst, NewInst, DirectCallee);
650
651 DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
652 DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
653
Rong Xu62d5e472016-04-28 17:49:56 +0000654 emitOptimizationRemark(
Dehao Chen14bf0292017-01-23 23:18:24 +0000655 BB->getContext(), "pgo-icall-prom", *BB->getParent(), Inst->getDebugLoc(),
Rong Xu62d5e472016-04-28 17:49:56 +0000656 Twine("Promote indirect call to ") + DirectCallee->getName() +
657 " with count " + Twine(Count) + " out of " + Twine(TotalCount));
Dehao Chen14bf0292017-01-23 23:18:24 +0000658 return NewInst;
Rong Xu6e34c492016-04-27 23:20:27 +0000659}
660
661// Promote indirect-call to conditional direct-call for one callsite.
662uint32_t ICallPromotionFunc::tryToPromote(
663 Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
664 uint64_t &TotalCount) {
665 uint32_t NumPromoted = 0;
666
667 for (auto &C : Candidates) {
668 uint64_t Count = C.Count;
Dehao Chencc75d242017-02-23 22:15:18 +0000669 promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO);
Rong Xu6e34c492016-04-27 23:20:27 +0000670 assert(TotalCount >= Count);
671 TotalCount -= Count;
672 NumOfPGOICallPromotion++;
673 NumPromoted++;
674 }
675 return NumPromoted;
676}
677
678// Traverse all the indirect-call callsite and get the value profile
679// annotation to perform indirect-call promotion.
680bool ICallPromotionFunc::processFunction() {
681 bool Changed = false;
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000682 ICallPromotionAnalysis ICallAnalysis;
Rong Xu6e34c492016-04-27 23:20:27 +0000683 for (auto &I : findIndirectCallSites(F)) {
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000684 uint32_t NumVals, NumCandidates;
Rong Xu6e34c492016-04-27 23:20:27 +0000685 uint64_t TotalCount;
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000686 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
687 I, NumVals, TotalCount, NumCandidates);
688 if (!NumCandidates)
Rong Xu6e34c492016-04-27 23:20:27 +0000689 continue;
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000690 auto PromotionCandidates = getPromotionCandidatesForCallSite(
691 I, ICallProfDataRef, TotalCount, NumCandidates);
Rong Xu6e34c492016-04-27 23:20:27 +0000692 uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
693 if (NumPromoted == 0)
694 continue;
695
696 Changed = true;
697 // Adjust the MD.prof metadata. First delete the old one.
Eugene Zelenkocdc71612016-08-11 17:20:18 +0000698 I->setMetadata(LLVMContext::MD_prof, nullptr);
Rong Xu6e34c492016-04-27 23:20:27 +0000699 // If all promoted, we don't need the MD.prof metadata.
700 if (TotalCount == 0 || NumPromoted == NumVals)
701 continue;
702 // Otherwise we need update with the un-promoted records back.
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000703 annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
704 IPVK_IndirectCallTarget, NumCandidates);
Rong Xu6e34c492016-04-27 23:20:27 +0000705 }
706 return Changed;
707}
708
709// A wrapper function that does the actual work.
Dehao Chencc75d242017-02-23 22:15:18 +0000710static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {
Rong Xu6e34c492016-04-27 23:20:27 +0000711 if (DisableICP)
712 return false;
713 InstrProfSymtab Symtab;
714 Symtab.create(M, InLTO);
715 bool Changed = false;
716 for (auto &F : M) {
717 if (F.isDeclaration())
718 continue;
719 if (F.hasFnAttribute(Attribute::OptimizeNone))
720 continue;
Dehao Chencc75d242017-02-23 22:15:18 +0000721 ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO);
Rong Xu6e34c492016-04-27 23:20:27 +0000722 bool FuncChanged = ICallPromotion.processFunction();
723 if (ICPDUMPAFTER && FuncChanged) {
724 DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
725 DEBUG(dbgs() << "\n");
726 }
727 Changed |= FuncChanged;
728 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
729 DEBUG(dbgs() << " Stop: Cutoff reached.\n");
730 break;
731 }
732 }
733 return Changed;
734}
735
Xinliang David Li72616182016-05-15 01:04:24 +0000736bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
Rong Xu6e34c492016-04-27 23:20:27 +0000737 // Command-line option has the priority for InLTO.
Dehao Chencc75d242017-02-23 22:15:18 +0000738 return promoteIndirectCalls(M, InLTO | ICPLTOMode,
739 SamplePGO | ICPSamplePGOMode);
Rong Xu6e34c492016-04-27 23:20:27 +0000740}
Xinliang David Lif3c7a352016-05-16 16:31:07 +0000741
Dehao Chencc75d242017-02-23 22:15:18 +0000742PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
743 ModuleAnalysisManager &AM) {
744 if (!promoteIndirectCalls(M, InLTO | ICPLTOMode,
745 SamplePGO | ICPSamplePGOMode))
Xinliang David Lif3c7a352016-05-16 16:31:07 +0000746 return PreservedAnalyses::all();
747
748 return PreservedAnalyses::none();
749}
Rong Xu48596b62017-04-04 16:42:20 +0000750
751namespace {
752class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
753public:
754 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI)
755 : Func(Func), BFI(BFI), Changed(false) {
756 ValueDataArray =
757 llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
758 // Get the MemOPSize range information from option MemOPSizeRange,
759 getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
760 PreciseRangeLast);
761 }
762 bool isChanged() const { return Changed; }
763 void perform() {
764 WorkList.clear();
765 visit(Func);
766
767 for (auto &MI : WorkList) {
768 ++NumOfPGOMemOPAnnotate;
769 if (perform(MI)) {
770 Changed = true;
771 ++NumOfPGOMemOPOpt;
772 DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName()
773 << "is Transformed.\n");
774 }
775 }
776 }
777
778 void visitMemIntrinsic(MemIntrinsic &MI) {
779 Value *Length = MI.getLength();
780 // Not perform on constant length calls.
781 if (dyn_cast<ConstantInt>(Length))
782 return;
783 WorkList.push_back(&MI);
784 }
785
786private:
787 Function &Func;
788 BlockFrequencyInfo &BFI;
789 bool Changed;
790 std::vector<MemIntrinsic *> WorkList;
791 // Start of the previse range.
792 int64_t PreciseRangeStart;
793 // Last value of the previse range.
794 int64_t PreciseRangeLast;
795 // The space to read the profile annotation.
796 std::unique_ptr<InstrProfValueData[]> ValueDataArray;
797 bool perform(MemIntrinsic *MI);
798
799 // This kind shows which group the value falls in. For PreciseValue, we have
800 // the profile count for that value. LargeGroup groups the values that are in
801 // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
802 enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
803
804 MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
805 if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
806 return LargeGroup;
807 if (Value == PreciseRangeLast + 1)
808 return NonLargeGroup;
809 return PreciseValue;
810 }
811};
812
813static const char *getMIName(const MemIntrinsic *MI) {
814 switch (MI->getIntrinsicID()) {
815 case Intrinsic::memcpy:
816 return "memcpy";
817 case Intrinsic::memmove:
818 return "memmove";
819 case Intrinsic::memset:
820 return "memset";
821 default:
822 return "unknown";
823 }
824}
825
826static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
827 assert(Count <= TotalCount);
828 if (Count < MemOPCountThreshold)
829 return false;
830 if (Count < TotalCount * MemOPPercentThreshold / 100)
831 return false;
832 return true;
833}
834
835static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
836 uint64_t Denom) {
837 if (!MemOPScaleCount)
838 return Count;
839 bool Overflowed;
840 uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
841 return ScaleCount / Denom;
842}
843
844bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
845 assert(MI);
846 if (MI->getIntrinsicID() == Intrinsic::memmove)
847 return false;
848
849 uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
850 uint64_t TotalCount;
851 if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
852 ValueDataArray.get(), NumVals, TotalCount))
853 return false;
854
855 uint64_t ActualCount = TotalCount;
856 uint64_t SavedTotalCount = TotalCount;
857 if (MemOPScaleCount) {
858 auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
859 if (!BBEdgeCount)
860 return false;
861 ActualCount = *BBEdgeCount;
862 }
863
864 if (ActualCount < MemOPCountThreshold)
865 return false;
866
867 ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
868 TotalCount = ActualCount;
869 if (MemOPScaleCount)
870 DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount
871 << " denominator = " << SavedTotalCount << "\n");
872
873 // Keeping track of the count of the default case:
874 uint64_t RemainCount = TotalCount;
875 SmallVector<uint64_t, 16> SizeIds;
876 SmallVector<uint64_t, 16> CaseCounts;
877 uint64_t MaxCount = 0;
878 unsigned Version = 0;
879 // Default case is in the front -- save the slot here.
880 CaseCounts.push_back(0);
881 for (auto &VD : VDs) {
882 int64_t V = VD.Value;
883 uint64_t C = VD.Count;
884 if (MemOPScaleCount)
885 C = getScaledCount(C, ActualCount, SavedTotalCount);
886
887 // Only care precise value here.
888 if (getMemOPSizeKind(V) != PreciseValue)
889 continue;
890
891 // ValueCounts are sorted on the count. Break at the first un-profitable
892 // value.
893 if (!isProfitable(C, RemainCount))
894 break;
895
896 SizeIds.push_back(V);
897 CaseCounts.push_back(C);
898 if (C > MaxCount)
899 MaxCount = C;
900
901 assert(RemainCount >= C);
902 RemainCount -= C;
903
904 if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
905 break;
906 }
907
908 if (Version == 0)
909 return false;
910
911 CaseCounts[0] = RemainCount;
912 if (RemainCount > MaxCount)
913 MaxCount = RemainCount;
914
915 uint64_t SumForOpt = TotalCount - RemainCount;
916 DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs "
917 << TotalCount << "\n");
918 DEBUG(
919 for (auto &VD
920 : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
921
922 DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
923 << " Versions\n");
924
925 // mem_op(..., size)
926 // ==>
927 // switch (size) {
928 // case s1:
929 // mem_op(..., s1);
930 // goto merge_bb;
931 // case s2:
932 // mem_op(..., s2);
933 // goto merge_bb;
934 // ...
935 // default:
936 // mem_op(..., size);
937 // goto merge_bb;
938 // }
939 // merge_bb:
940
941 BasicBlock *BB = MI->getParent();
942 DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
943 DEBUG(dbgs() << *BB << "\n");
944
945 BasicBlock *DefaultBB = SplitBlock(BB, MI);
946 BasicBlock::iterator It(*MI);
947 ++It;
948 assert(It != DefaultBB->end());
949 BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
950 DefaultBB->setName("MemOP.Default");
951 MergeBB->setName("MemOP.Merge");
952
953 auto &Ctx = Func.getContext();
954 IRBuilder<> IRB(BB);
955 BB->getTerminator()->eraseFromParent();
956 Value *SizeVar = MI->getLength();
957 SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
958
959 // Clear the value profile data.
960 MI->setMetadata(LLVMContext::MD_prof, nullptr);
961
962 DEBUG(dbgs() << "\n\n== Basic Block After==\n");
963
964 for (uint64_t SizeId : SizeIds) {
965 ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId);
966 BasicBlock *CaseBB = BasicBlock::Create(
967 Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
968 Instruction *NewInst = MI->clone();
969 // Fix the argument.
970 dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId);
971 CaseBB->getInstList().push_back(NewInst);
972 IRBuilder<> IRBCase(CaseBB);
973 IRBCase.CreateBr(MergeBB);
974 SI->addCase(CaseSizeId, CaseBB);
975 DEBUG(dbgs() << *CaseBB << "\n");
976 }
977 setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
978
979 DEBUG(dbgs() << *BB << "\n");
980 DEBUG(dbgs() << *DefaultBB << "\n");
981 DEBUG(dbgs() << *MergeBB << "\n");
982
983 emitOptimizationRemark(Func.getContext(), "memop-opt", Func,
984 MI->getDebugLoc(),
985 Twine("optimize ") + getMIName(MI) + " with count " +
986 Twine(SumForOpt) + " out of " + Twine(TotalCount) +
987 " for " + Twine(Version) + " versions");
988
989 return true;
990}
991} // namespace
992
993static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) {
994 if (DisableMemOPOPT)
995 return false;
996
997 if (F.hasFnAttribute(Attribute::OptimizeForSize))
998 return false;
999 MemOPSizeOpt MemOPSizeOpt(F, BFI);
1000 MemOPSizeOpt.perform();
1001 return MemOPSizeOpt.isChanged();
1002}
1003
1004bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
1005 BlockFrequencyInfo &BFI =
1006 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1007 return PGOMemOPSizeOptImpl(F, BFI);
1008}
1009
1010namespace llvm {
1011char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
1012
1013PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
1014 FunctionAnalysisManager &FAM) {
1015 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1016 bool Changed = PGOMemOPSizeOptImpl(F, BFI);
1017 if (!Changed)
1018 return PreservedAnalyses::all();
1019 return PreservedAnalyses::none();
1020}
1021} // namespace llvm