blob: 96027bc3d0a91c2d8cea13da30593b5a24562267 [file] [log] [blame]
Rong Xu48596b62017-04-04 16:42:20 +00001//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===//
Rong Xu6e34c492016-04-27 23:20:27 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the transformation that promotes indirect calls to
11// conditional direct calls when the indirect-call value profile metadata is
12// available.
13//
14//===----------------------------------------------------------------------===//
15
Eugene Zelenkocdc71612016-08-11 17:20:18 +000016#include "llvm/ADT/ArrayRef.h"
Rong Xu6e34c492016-04-27 23:20:27 +000017#include "llvm/ADT/Statistic.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000018#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Twine.h"
Rong Xu48596b62017-04-04 16:42:20 +000020#include "llvm/Analysis/BlockFrequencyInfo.h"
Rong Xu2bf4c592017-04-06 20:56:00 +000021#include "llvm/Analysis/GlobalsModRef.h"
Teresa Johnson1e44b5d2016-07-12 21:13:44 +000022#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
23#include "llvm/Analysis/IndirectCallSiteVisitor.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000024#include "llvm/IR/BasicBlock.h"
Rong Xu6e34c492016-04-27 23:20:27 +000025#include "llvm/IR/CallSite.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000026#include "llvm/IR/DerivedTypes.h"
Rong Xu6e34c492016-04-27 23:20:27 +000027#include "llvm/IR/DiagnosticInfo.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000028#include "llvm/IR/Function.h"
Rong Xu6e34c492016-04-27 23:20:27 +000029#include "llvm/IR/IRBuilder.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000030#include "llvm/IR/InstrTypes.h"
31#include "llvm/IR/Instruction.h"
Rong Xu6e34c492016-04-27 23:20:27 +000032#include "llvm/IR/Instructions.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000033#include "llvm/IR/LLVMContext.h"
Rong Xu6e34c492016-04-27 23:20:27 +000034#include "llvm/IR/MDBuilder.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000035#include "llvm/IR/PassManager.h"
36#include "llvm/IR/Type.h"
Rong Xu6e34c492016-04-27 23:20:27 +000037#include "llvm/Pass.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000038#include "llvm/PassRegistry.h"
39#include "llvm/PassSupport.h"
40#include "llvm/ProfileData/InstrProf.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Support/CommandLine.h"
Rong Xu6e34c492016-04-27 23:20:27 +000043#include "llvm/Support/Debug.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000044#include "llvm/Support/ErrorHandling.h"
Rong Xu48596b62017-04-04 16:42:20 +000045#include "llvm/Support/MathExtras.h"
Rong Xu6e34c492016-04-27 23:20:27 +000046#include "llvm/Transforms/Instrumentation.h"
Xinliang David Lif3c7a352016-05-16 16:31:07 +000047#include "llvm/Transforms/PGOInstrumentation.h"
Rong Xu6e34c492016-04-27 23:20:27 +000048#include "llvm/Transforms/Utils/BasicBlockUtils.h"
Eugene Zelenkocdc71612016-08-11 17:20:18 +000049#include <cassert>
50#include <cstdint>
Rong Xu6e34c492016-04-27 23:20:27 +000051#include <vector>
52
53using namespace llvm;
54
Xinliang David Li0b293302016-06-02 01:52:05 +000055#define DEBUG_TYPE "pgo-icall-prom"
Rong Xu6e34c492016-04-27 23:20:27 +000056
57STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
58STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
Rong Xu48596b62017-04-04 16:42:20 +000059STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
60STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
Rong Xu6e34c492016-04-27 23:20:27 +000061
62// Command line option to disable indirect-call promotion with the default as
63// false. This is for debug purpose.
64static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
65 cl::desc("Disable indirect call promotion"));
66
Rong Xu6e34c492016-04-27 23:20:27 +000067// Set the cutoff value for the promotion. If the value is other than 0, we
68// stop the transformation once the total number of promotions equals the cutoff
69// value.
70// For debug use only.
71static cl::opt<unsigned>
72 ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore,
Craig Toppera49e7682017-05-05 22:31:11 +000073 cl::desc("Max number of promotions for this compilation"));
Rong Xu6e34c492016-04-27 23:20:27 +000074
75// If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped.
76// For debug use only.
77static cl::opt<unsigned>
78 ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore,
Craig Toppera49e7682017-05-05 22:31:11 +000079 cl::desc("Skip Callsite up to this number for this compilation"));
Rong Xu6e34c492016-04-27 23:20:27 +000080
81// Set if the pass is called in LTO optimization. The difference for LTO mode
82// is the pass won't prefix the source module name to the internal linkage
83// symbols.
84static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
85 cl::desc("Run indirect-call promotion in LTO "
86 "mode"));
Teresa Johnson1e44b5d2016-07-12 21:13:44 +000087
Dehao Chencc75d242017-02-23 22:15:18 +000088// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
89// mode is it will add prof metadatato the created direct call.
90static cl::opt<bool>
91 ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
92 cl::desc("Run indirect-call promotion in SamplePGO mode"));
93
Rong Xu6e34c492016-04-27 23:20:27 +000094// If the option is set to true, only call instructions will be considered for
95// transformation -- invoke instructions will be ignored.
96static cl::opt<bool>
97 ICPCallOnly("icp-call-only", cl::init(false), cl::Hidden,
98 cl::desc("Run indirect-call promotion for call instructions "
99 "only"));
100
101// If the option is set to true, only invoke instructions will be considered for
102// transformation -- call instructions will be ignored.
103static cl::opt<bool> ICPInvokeOnly("icp-invoke-only", cl::init(false),
104 cl::Hidden,
105 cl::desc("Run indirect-call promotion for "
106 "invoke instruction only"));
107
108// Dump the function level IR if the transformation happened in this
109// function. For debug use only.
110static cl::opt<bool>
111 ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
112 cl::desc("Dump IR after transformation happens"));
113
Rong Xu48596b62017-04-04 16:42:20 +0000114// The minimum call count to optimize memory intrinsic calls.
115static cl::opt<unsigned>
116 MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
117 cl::init(1000),
118 cl::desc("The minimum count to optimize memory "
119 "intrinsic calls"));
120
121// Command line option to disable memory intrinsic optimization. The default is
122// false. This is for debug purpose.
123static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
124 cl::Hidden, cl::desc("Disable optimize"));
125
126// The percent threshold to optimize memory intrinsic calls.
127static cl::opt<unsigned>
128 MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
129 cl::Hidden, cl::ZeroOrMore,
130 cl::desc("The percentage threshold for the "
131 "memory intrinsic calls optimization"));
132
133// Maximum number of versions for optimizing memory intrinsic call.
134static cl::opt<unsigned>
135 MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
136 cl::ZeroOrMore,
137 cl::desc("The max version for the optimized memory "
138 " intrinsic calls"));
139
140// Scale the counts from the annotation using the BB count value.
141static cl::opt<bool>
142 MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
143 cl::desc("Scale the memop size counts using the basic "
144 " block count value"));
145
146// This option sets the rangge of precise profile memop sizes.
147extern cl::opt<std::string> MemOPSizeRange;
148
149// This option sets the value that groups large memop sizes
150extern cl::opt<unsigned> MemOPSizeLarge;
151
Rong Xu6e34c492016-04-27 23:20:27 +0000152namespace {
Xinliang David Li72616182016-05-15 01:04:24 +0000153class PGOIndirectCallPromotionLegacyPass : public ModulePass {
Rong Xu6e34c492016-04-27 23:20:27 +0000154public:
155 static char ID;
156
Dehao Chencc75d242017-02-23 22:15:18 +0000157 PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
158 : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
Xinliang David Li72616182016-05-15 01:04:24 +0000159 initializePGOIndirectCallPromotionLegacyPassPass(
160 *PassRegistry::getPassRegistry());
Rong Xu6e34c492016-04-27 23:20:27 +0000161 }
162
Mehdi Amini117296c2016-10-01 02:56:57 +0000163 StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
Rong Xu6e34c492016-04-27 23:20:27 +0000164
165private:
166 bool runOnModule(Module &M) override;
167
168 // If this pass is called in LTO. We need to special handling the PGOFuncName
169 // for the static variables due to LTO's internalization.
170 bool InLTO;
Dehao Chencc75d242017-02-23 22:15:18 +0000171
172 // If this pass is called in SamplePGO. We need to add the prof metadata to
173 // the promoted direct call.
174 bool SamplePGO;
Rong Xu6e34c492016-04-27 23:20:27 +0000175};
Rong Xu48596b62017-04-04 16:42:20 +0000176
177class PGOMemOPSizeOptLegacyPass : public FunctionPass {
178public:
179 static char ID;
180
181 PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
182 initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
183 }
184
185 StringRef getPassName() const override { return "PGOMemOPSize"; }
186
187private:
188 bool runOnFunction(Function &F) override;
189 void getAnalysisUsage(AnalysisUsage &AU) const override {
190 AU.addRequired<BlockFrequencyInfoWrapperPass>();
Rong Xu2bf4c592017-04-06 20:56:00 +0000191 AU.addPreserved<GlobalsAAWrapperPass>();
Rong Xu48596b62017-04-04 16:42:20 +0000192 }
193};
Rong Xu6e34c492016-04-27 23:20:27 +0000194} // end anonymous namespace
195
Xinliang David Li72616182016-05-15 01:04:24 +0000196char PGOIndirectCallPromotionLegacyPass::ID = 0;
197INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
Rong Xu6e34c492016-04-27 23:20:27 +0000198 "Use PGO instrumentation profile to promote indirect calls to "
199 "direct calls.",
200 false, false)
201
Dehao Chencc75d242017-02-23 22:15:18 +0000202ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
203 bool SamplePGO) {
204 return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
Rong Xu6e34c492016-04-27 23:20:27 +0000205}
206
Rong Xu48596b62017-04-04 16:42:20 +0000207char PGOMemOPSizeOptLegacyPass::ID = 0;
208INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
209 "Optimize memory intrinsic using its size value profile",
210 false, false)
211INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
212INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
213 "Optimize memory intrinsic using its size value profile",
214 false, false)
215
216FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
217 return new PGOMemOPSizeOptLegacyPass();
218}
219
Benjamin Kramera65b6102016-05-15 15:18:11 +0000220namespace {
Rong Xu6e34c492016-04-27 23:20:27 +0000221// The class for main data structure to promote indirect calls to conditional
222// direct calls.
223class ICallPromotionFunc {
224private:
225 Function &F;
226 Module *M;
227
228 // Symtab that maps indirect call profile values to function names and
229 // defines.
230 InstrProfSymtab *Symtab;
231
Dehao Chencc75d242017-02-23 22:15:18 +0000232 bool SamplePGO;
233
Rong Xu6e34c492016-04-27 23:20:27 +0000234 // Test if we can legally promote this direct-call of Target.
Dehao Chen6775f5d2017-01-30 22:46:37 +0000235 bool isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&F,
236 const char **Reason = nullptr);
Rong Xu6e34c492016-04-27 23:20:27 +0000237
238 // A struct that records the direct target and it's call count.
239 struct PromotionCandidate {
240 Function *TargetFunction;
241 uint64_t Count;
242 PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
243 };
244
245 // Check if the indirect-call call site should be promoted. Return the number
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000246 // of promotions. Inst is the candidate indirect call, ValueDataRef
247 // contains the array of value profile data for profiled targets,
248 // TotalCount is the total profiled count of call executions, and
249 // NumCandidates is the number of candidate entries in ValueDataRef.
Rong Xu6e34c492016-04-27 23:20:27 +0000250 std::vector<PromotionCandidate> getPromotionCandidatesForCallSite(
251 Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000252 uint64_t TotalCount, uint32_t NumCandidates);
Rong Xu6e34c492016-04-27 23:20:27 +0000253
Rong Xu6e34c492016-04-27 23:20:27 +0000254 // Promote a list of targets for one indirect-call callsite. Return
255 // the number of promotions.
256 uint32_t tryToPromote(Instruction *Inst,
257 const std::vector<PromotionCandidate> &Candidates,
258 uint64_t &TotalCount);
259
Rong Xu6e34c492016-04-27 23:20:27 +0000260 // Noncopyable
261 ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
262 ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
263
264public:
Dehao Chencc75d242017-02-23 22:15:18 +0000265 ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
266 bool SamplePGO)
267 : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO) {}
Eugene Zelenkocdc71612016-08-11 17:20:18 +0000268
Rong Xu6e34c492016-04-27 23:20:27 +0000269 bool processFunction();
270};
Benjamin Kramera65b6102016-05-15 15:18:11 +0000271} // end anonymous namespace
Rong Xu6e34c492016-04-27 23:20:27 +0000272
Dehao Chen6775f5d2017-01-30 22:46:37 +0000273bool llvm::isLegalToPromote(Instruction *Inst, Function *F,
274 const char **Reason) {
Rong Xu6e34c492016-04-27 23:20:27 +0000275 // Check the return type.
276 Type *CallRetType = Inst->getType();
277 if (!CallRetType->isVoidTy()) {
Dehao Chen6775f5d2017-01-30 22:46:37 +0000278 Type *FuncRetType = F->getReturnType();
Rong Xu6e34c492016-04-27 23:20:27 +0000279 if (FuncRetType != CallRetType &&
Dehao Chen6775f5d2017-01-30 22:46:37 +0000280 !CastInst::isBitCastable(FuncRetType, CallRetType)) {
281 if (Reason)
282 *Reason = "Return type mismatch";
283 return false;
284 }
Rong Xu6e34c492016-04-27 23:20:27 +0000285 }
286
287 // Check if the arguments are compatible with the parameters
Dehao Chen6775f5d2017-01-30 22:46:37 +0000288 FunctionType *DirectCalleeType = F->getFunctionType();
Rong Xu6e34c492016-04-27 23:20:27 +0000289 unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
290 CallSite CS(Inst);
291 unsigned ArgNum = CS.arg_size();
292
Dehao Chen6775f5d2017-01-30 22:46:37 +0000293 if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) {
294 if (Reason)
295 *Reason = "The number of arguments mismatch";
296 return false;
297 }
Rong Xu6e34c492016-04-27 23:20:27 +0000298
299 for (unsigned I = 0; I < ParamNum; ++I) {
300 Type *PTy = DirectCalleeType->getFunctionParamType(I);
301 Type *ATy = CS.getArgument(I)->getType();
302 if (PTy == ATy)
303 continue;
Dehao Chen6775f5d2017-01-30 22:46:37 +0000304 if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) {
305 if (Reason)
Benjamin Kramer365c9bd2017-01-30 23:11:29 +0000306 *Reason = "Argument type mismatch";
Dehao Chen6775f5d2017-01-30 22:46:37 +0000307 return false;
308 }
Rong Xu6e34c492016-04-27 23:20:27 +0000309 }
310
311 DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
Dehao Chen6775f5d2017-01-30 22:46:37 +0000312 << F->getName() << "\n");
313 return true;
314}
315
316bool ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
317 Function *&TargetFunction,
318 const char **Reason) {
319 TargetFunction = Symtab->getFunction(Target);
320 if (TargetFunction == nullptr) {
321 *Reason = "Cannot find the target";
322 return false;
323 }
324 return isLegalToPromote(Inst, TargetFunction, Reason);
Rong Xu6e34c492016-04-27 23:20:27 +0000325}
326
327// Indirect-call promotion heuristic. The direct targets are sorted based on
328// the count. Stop at the first target that is not promoted.
329std::vector<ICallPromotionFunc::PromotionCandidate>
330ICallPromotionFunc::getPromotionCandidatesForCallSite(
331 Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000332 uint64_t TotalCount, uint32_t NumCandidates) {
Rong Xu6e34c492016-04-27 23:20:27 +0000333 std::vector<PromotionCandidate> Ret;
334
335 DEBUG(dbgs() << " \nWork on callsite #" << NumOfPGOICallsites << *Inst
Teresa Johnson8950ad12016-07-12 21:29:05 +0000336 << " Num_targets: " << ValueDataRef.size()
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000337 << " Num_candidates: " << NumCandidates << "\n");
Rong Xu6e34c492016-04-27 23:20:27 +0000338 NumOfPGOICallsites++;
339 if (ICPCSSkip != 0 && NumOfPGOICallsites <= ICPCSSkip) {
340 DEBUG(dbgs() << " Skip: User options.\n");
341 return Ret;
342 }
343
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000344 for (uint32_t I = 0; I < NumCandidates; I++) {
Rong Xu6e34c492016-04-27 23:20:27 +0000345 uint64_t Count = ValueDataRef[I].Count;
346 assert(Count <= TotalCount);
347 uint64_t Target = ValueDataRef[I].Value;
348 DEBUG(dbgs() << " Candidate " << I << " Count=" << Count
349 << " Target_func: " << Target << "\n");
350
Teresa Johnsonce7de9b2016-07-17 14:46:58 +0000351 if (ICPInvokeOnly && dyn_cast<CallInst>(Inst)) {
352 DEBUG(dbgs() << " Not promote: User options.\n");
353 break;
354 }
355 if (ICPCallOnly && dyn_cast<InvokeInst>(Inst)) {
356 DEBUG(dbgs() << " Not promote: User option.\n");
357 break;
358 }
Rong Xu6e34c492016-04-27 23:20:27 +0000359 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
360 DEBUG(dbgs() << " Not promote: Cutoff reached.\n");
361 break;
362 }
Rong Xu6e34c492016-04-27 23:20:27 +0000363 Function *TargetFunction = nullptr;
Dehao Chen6775f5d2017-01-30 22:46:37 +0000364 const char *Reason = nullptr;
365 if (!isPromotionLegal(Inst, Target, TargetFunction, &Reason)) {
Rong Xu6e34c492016-04-27 23:20:27 +0000366 StringRef TargetFuncName = Symtab->getFuncName(Target);
Rong Xu6e34c492016-04-27 23:20:27 +0000367 DEBUG(dbgs() << " Not promote: " << Reason << "\n");
Rong Xu62d5e472016-04-28 17:49:56 +0000368 emitOptimizationRemarkMissed(
Xinliang David Li0b293302016-06-02 01:52:05 +0000369 F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
Rong Xu6e34c492016-04-27 23:20:27 +0000370 Twine("Cannot promote indirect call to ") +
Rong Xu62d5e472016-04-28 17:49:56 +0000371 (TargetFuncName.empty() ? Twine(Target) : Twine(TargetFuncName)) +
372 Twine(" with count of ") + Twine(Count) + ": " + Reason);
Rong Xu6e34c492016-04-27 23:20:27 +0000373 break;
374 }
375 Ret.push_back(PromotionCandidate(TargetFunction, Count));
376 TotalCount -= Count;
377 }
378 return Ret;
379}
380
381// Create a diamond structure for If_Then_Else. Also update the profile
382// count. Do the fix-up for the invoke instruction.
383static void createIfThenElse(Instruction *Inst, Function *DirectCallee,
384 uint64_t Count, uint64_t TotalCount,
385 BasicBlock **DirectCallBB,
386 BasicBlock **IndirectCallBB,
387 BasicBlock **MergeBB) {
388 CallSite CS(Inst);
389 Value *OrigCallee = CS.getCalledValue();
390
391 IRBuilder<> BBBuilder(Inst);
392 LLVMContext &Ctx = Inst->getContext();
393 Value *BCI1 =
394 BBBuilder.CreateBitCast(OrigCallee, Type::getInt8PtrTy(Ctx), "");
395 Value *BCI2 =
396 BBBuilder.CreateBitCast(DirectCallee, Type::getInt8PtrTy(Ctx), "");
397 Value *PtrCmp = BBBuilder.CreateICmpEQ(BCI1, BCI2, "");
398
399 uint64_t ElseCount = TotalCount - Count;
400 uint64_t MaxCount = (Count >= ElseCount ? Count : ElseCount);
401 uint64_t Scale = calculateCountScale(MaxCount);
402 MDBuilder MDB(Inst->getContext());
403 MDNode *BranchWeights = MDB.createBranchWeights(
404 scaleBranchCount(Count, Scale), scaleBranchCount(ElseCount, Scale));
405 TerminatorInst *ThenTerm, *ElseTerm;
406 SplitBlockAndInsertIfThenElse(PtrCmp, Inst, &ThenTerm, &ElseTerm,
407 BranchWeights);
408 *DirectCallBB = ThenTerm->getParent();
409 (*DirectCallBB)->setName("if.true.direct_targ");
410 *IndirectCallBB = ElseTerm->getParent();
411 (*IndirectCallBB)->setName("if.false.orig_indirect");
412 *MergeBB = Inst->getParent();
413 (*MergeBB)->setName("if.end.icp");
414
415 // Special handing of Invoke instructions.
416 InvokeInst *II = dyn_cast<InvokeInst>(Inst);
417 if (!II)
418 return;
419
420 // We don't need branch instructions for invoke.
421 ThenTerm->eraseFromParent();
422 ElseTerm->eraseFromParent();
423
424 // Add jump from Merge BB to the NormalDest. This is needed for the newly
425 // created direct invoke stmt -- as its NormalDst will be fixed up to MergeBB.
426 BranchInst::Create(II->getNormalDest(), *MergeBB);
427}
428
429// Find the PHI in BB that have the CallResult as the operand.
430static bool getCallRetPHINode(BasicBlock *BB, Instruction *Inst) {
431 BasicBlock *From = Inst->getParent();
432 for (auto &I : *BB) {
433 PHINode *PHI = dyn_cast<PHINode>(&I);
434 if (!PHI)
435 continue;
436 int IX = PHI->getBasicBlockIndex(From);
437 if (IX == -1)
438 continue;
439 Value *V = PHI->getIncomingValue(IX);
440 if (dyn_cast<Instruction>(V) == Inst)
441 return true;
442 }
443 return false;
444}
445
446// This method fixes up PHI nodes in BB where BB is the UnwindDest of an
447// invoke instruction. In BB, there may be PHIs with incoming block being
448// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
449// instructions to its own BB, OrigBB is no longer the predecessor block of BB.
450// Instead two new predecessors are added: IndirectCallBB and DirectCallBB,
451// so the PHI node's incoming BBs need to be fixed up accordingly.
452static void fixupPHINodeForUnwind(Instruction *Inst, BasicBlock *BB,
453 BasicBlock *OrigBB,
454 BasicBlock *IndirectCallBB,
455 BasicBlock *DirectCallBB) {
456 for (auto &I : *BB) {
457 PHINode *PHI = dyn_cast<PHINode>(&I);
458 if (!PHI)
459 continue;
460 int IX = PHI->getBasicBlockIndex(OrigBB);
461 if (IX == -1)
462 continue;
463 Value *V = PHI->getIncomingValue(IX);
464 PHI->addIncoming(V, IndirectCallBB);
465 PHI->setIncomingBlock(IX, DirectCallBB);
466 }
467}
468
469// This method fixes up PHI nodes in BB where BB is the NormalDest of an
470// invoke instruction. In BB, there may be PHIs with incoming block being
471// OrigBB (the MergeBB after if-then-else splitting). After moving the invoke
472// instructions to its own BB, a new incoming edge will be added to the original
473// NormalDstBB from the IndirectCallBB.
474static void fixupPHINodeForNormalDest(Instruction *Inst, BasicBlock *BB,
475 BasicBlock *OrigBB,
476 BasicBlock *IndirectCallBB,
477 Instruction *NewInst) {
478 for (auto &I : *BB) {
479 PHINode *PHI = dyn_cast<PHINode>(&I);
480 if (!PHI)
481 continue;
482 int IX = PHI->getBasicBlockIndex(OrigBB);
483 if (IX == -1)
484 continue;
485 Value *V = PHI->getIncomingValue(IX);
486 if (dyn_cast<Instruction>(V) == Inst) {
487 PHI->setIncomingBlock(IX, IndirectCallBB);
488 PHI->addIncoming(NewInst, OrigBB);
489 continue;
490 }
491 PHI->addIncoming(V, IndirectCallBB);
492 }
493}
494
495// Add a bitcast instruction to the direct-call return value if needed.
Rong Xu6e34c492016-04-27 23:20:27 +0000496static Instruction *insertCallRetCast(const Instruction *Inst,
497 Instruction *DirectCallInst,
498 Function *DirectCallee) {
499 if (Inst->getType()->isVoidTy())
500 return DirectCallInst;
501
502 Type *CallRetType = Inst->getType();
503 Type *FuncRetType = DirectCallee->getReturnType();
504 if (FuncRetType == CallRetType)
505 return DirectCallInst;
506
507 BasicBlock *InsertionBB;
508 if (CallInst *CI = dyn_cast<CallInst>(DirectCallInst))
509 InsertionBB = CI->getParent();
510 else
511 InsertionBB = (dyn_cast<InvokeInst>(DirectCallInst))->getNormalDest();
512
513 return (new BitCastInst(DirectCallInst, CallRetType, "",
514 InsertionBB->getTerminator()));
515}
516
517// Create a DirectCall instruction in the DirectCallBB.
518// Parameter Inst is the indirect-call (invoke) instruction.
519// DirectCallee is the decl of the direct-call (invoke) target.
520// DirecallBB is the BB that the direct-call (invoke) instruction is inserted.
521// MergeBB is the bottom BB of the if-then-else-diamond after the
522// transformation. For invoke instruction, the edges from DirectCallBB and
523// IndirectCallBB to MergeBB are removed before this call (during
524// createIfThenElse).
525static Instruction *createDirectCallInst(const Instruction *Inst,
526 Function *DirectCallee,
527 BasicBlock *DirectCallBB,
528 BasicBlock *MergeBB) {
529 Instruction *NewInst = Inst->clone();
530 if (CallInst *CI = dyn_cast<CallInst>(NewInst)) {
531 CI->setCalledFunction(DirectCallee);
532 CI->mutateFunctionType(DirectCallee->getFunctionType());
533 } else {
534 // Must be an invoke instruction. Direct invoke's normal destination is
535 // fixed up to MergeBB. MergeBB is the place where return cast is inserted.
536 // Also since IndirectCallBB does not have an edge to MergeBB, there is no
537 // need to insert new PHIs into MergeBB.
538 InvokeInst *II = dyn_cast<InvokeInst>(NewInst);
539 assert(II);
540 II->setCalledFunction(DirectCallee);
541 II->mutateFunctionType(DirectCallee->getFunctionType());
542 II->setNormalDest(MergeBB);
543 }
544
545 DirectCallBB->getInstList().insert(DirectCallBB->getFirstInsertionPt(),
546 NewInst);
547
548 // Clear the value profile data.
Eugene Zelenkocdc71612016-08-11 17:20:18 +0000549 NewInst->setMetadata(LLVMContext::MD_prof, nullptr);
Rong Xu6e34c492016-04-27 23:20:27 +0000550 CallSite NewCS(NewInst);
551 FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
552 unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
553 for (unsigned I = 0; I < ParamNum; ++I) {
554 Type *ATy = NewCS.getArgument(I)->getType();
555 Type *PTy = DirectCalleeType->getParamType(I);
556 if (ATy != PTy) {
557 BitCastInst *BI = new BitCastInst(NewCS.getArgument(I), PTy, "", NewInst);
558 NewCS.setArgument(I, BI);
559 }
560 }
561
562 return insertCallRetCast(Inst, NewInst, DirectCallee);
563}
564
565// Create a PHI to unify the return values of calls.
566static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
567 Function *DirectCallee) {
568 if (Inst->getType()->isVoidTy())
569 return;
570
571 BasicBlock *RetValBB = CallResult->getParent();
572
573 BasicBlock *PHIBB;
574 if (InvokeInst *II = dyn_cast<InvokeInst>(CallResult))
575 RetValBB = II->getNormalDest();
576
577 PHIBB = RetValBB->getSingleSuccessor();
578 if (getCallRetPHINode(PHIBB, Inst))
579 return;
580
581 PHINode *CallRetPHI = PHINode::Create(Inst->getType(), 0);
582 PHIBB->getInstList().push_front(CallRetPHI);
583 Inst->replaceAllUsesWith(CallRetPHI);
584 CallRetPHI->addIncoming(Inst, Inst->getParent());
585 CallRetPHI->addIncoming(CallResult, RetValBB);
586}
587
588// This function does the actual indirect-call promotion transformation:
589// For an indirect-call like:
590// Ret = (*Foo)(Args);
591// It transforms to:
592// if (Foo == DirectCallee)
593// Ret1 = DirectCallee(Args);
594// else
595// Ret2 = (*Foo)(Args);
596// Ret = phi(Ret1, Ret2);
597// It adds type casts for the args do not match the parameters and the return
598// value. Branch weights metadata also updated.
Dehao Chencc75d242017-02-23 22:15:18 +0000599// If \p AttachProfToDirectCall is true, a prof metadata is attached to the
600// new direct call to contain \p Count. This is used by SamplePGO inliner to
601// check callsite hotness.
Dehao Chen14bf0292017-01-23 23:18:24 +0000602// Returns the promoted direct call instruction.
603Instruction *llvm::promoteIndirectCall(Instruction *Inst,
604 Function *DirectCallee, uint64_t Count,
Dehao Chencc75d242017-02-23 22:15:18 +0000605 uint64_t TotalCount,
606 bool AttachProfToDirectCall) {
Rong Xu6e34c492016-04-27 23:20:27 +0000607 assert(DirectCallee != nullptr);
608 BasicBlock *BB = Inst->getParent();
609 // Just to suppress the non-debug build warning.
610 (void)BB;
611 DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
612 DEBUG(dbgs() << *BB << "\n");
613
614 BasicBlock *DirectCallBB, *IndirectCallBB, *MergeBB;
615 createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB,
616 &IndirectCallBB, &MergeBB);
617
618 Instruction *NewInst =
619 createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
620
Dehao Chencc75d242017-02-23 22:15:18 +0000621 if (AttachProfToDirectCall) {
622 SmallVector<uint32_t, 1> Weights;
623 Weights.push_back(Count);
624 MDBuilder MDB(NewInst->getContext());
625 dyn_cast<Instruction>(NewInst->stripPointerCasts())
626 ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
627 }
628
Rong Xu6e34c492016-04-27 23:20:27 +0000629 // Move Inst from MergeBB to IndirectCallBB.
630 Inst->removeFromParent();
631 IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
632 Inst);
633
634 if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) {
635 // At this point, the original indirect invoke instruction has the original
636 // UnwindDest and NormalDest. For the direct invoke instruction, the
637 // NormalDest points to MergeBB, and MergeBB jumps to the original
638 // NormalDest. MergeBB might have a new bitcast instruction for the return
639 // value. The PHIs are with the original NormalDest. Since we now have two
640 // incoming edges to NormalDest and UnwindDest, we have to do some fixups.
641 //
642 // UnwindDest will not use the return value. So pass nullptr here.
643 fixupPHINodeForUnwind(Inst, II->getUnwindDest(), MergeBB, IndirectCallBB,
644 DirectCallBB);
645 // We don't need to update the operand from NormalDest for DirectCallBB.
646 // Pass nullptr here.
647 fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB,
648 IndirectCallBB, NewInst);
649 }
650
651 insertCallRetPHI(Inst, NewInst, DirectCallee);
652
653 DEBUG(dbgs() << "\n== Basic Blocks After ==\n");
654 DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
655
Rong Xu62d5e472016-04-28 17:49:56 +0000656 emitOptimizationRemark(
Dehao Chen14bf0292017-01-23 23:18:24 +0000657 BB->getContext(), "pgo-icall-prom", *BB->getParent(), Inst->getDebugLoc(),
Rong Xu62d5e472016-04-28 17:49:56 +0000658 Twine("Promote indirect call to ") + DirectCallee->getName() +
659 " with count " + Twine(Count) + " out of " + Twine(TotalCount));
Dehao Chen14bf0292017-01-23 23:18:24 +0000660 return NewInst;
Rong Xu6e34c492016-04-27 23:20:27 +0000661}
662
663// Promote indirect-call to conditional direct-call for one callsite.
664uint32_t ICallPromotionFunc::tryToPromote(
665 Instruction *Inst, const std::vector<PromotionCandidate> &Candidates,
666 uint64_t &TotalCount) {
667 uint32_t NumPromoted = 0;
668
669 for (auto &C : Candidates) {
670 uint64_t Count = C.Count;
Dehao Chencc75d242017-02-23 22:15:18 +0000671 promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO);
Rong Xu6e34c492016-04-27 23:20:27 +0000672 assert(TotalCount >= Count);
673 TotalCount -= Count;
674 NumOfPGOICallPromotion++;
675 NumPromoted++;
676 }
677 return NumPromoted;
678}
679
680// Traverse all the indirect-call callsite and get the value profile
681// annotation to perform indirect-call promotion.
682bool ICallPromotionFunc::processFunction() {
683 bool Changed = false;
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000684 ICallPromotionAnalysis ICallAnalysis;
Rong Xu6e34c492016-04-27 23:20:27 +0000685 for (auto &I : findIndirectCallSites(F)) {
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000686 uint32_t NumVals, NumCandidates;
Rong Xu6e34c492016-04-27 23:20:27 +0000687 uint64_t TotalCount;
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000688 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
689 I, NumVals, TotalCount, NumCandidates);
690 if (!NumCandidates)
Rong Xu6e34c492016-04-27 23:20:27 +0000691 continue;
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000692 auto PromotionCandidates = getPromotionCandidatesForCallSite(
693 I, ICallProfDataRef, TotalCount, NumCandidates);
Rong Xu6e34c492016-04-27 23:20:27 +0000694 uint32_t NumPromoted = tryToPromote(I, PromotionCandidates, TotalCount);
695 if (NumPromoted == 0)
696 continue;
697
698 Changed = true;
699 // Adjust the MD.prof metadata. First delete the old one.
Eugene Zelenkocdc71612016-08-11 17:20:18 +0000700 I->setMetadata(LLVMContext::MD_prof, nullptr);
Rong Xu6e34c492016-04-27 23:20:27 +0000701 // If all promoted, we don't need the MD.prof metadata.
702 if (TotalCount == 0 || NumPromoted == NumVals)
703 continue;
704 // Otherwise we need update with the un-promoted records back.
Teresa Johnson1e44b5d2016-07-12 21:13:44 +0000705 annotateValueSite(*M, *I, ICallProfDataRef.slice(NumPromoted), TotalCount,
706 IPVK_IndirectCallTarget, NumCandidates);
Rong Xu6e34c492016-04-27 23:20:27 +0000707 }
708 return Changed;
709}
710
711// A wrapper function that does the actual work.
Dehao Chencc75d242017-02-23 22:15:18 +0000712static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {
Rong Xu6e34c492016-04-27 23:20:27 +0000713 if (DisableICP)
714 return false;
715 InstrProfSymtab Symtab;
716 Symtab.create(M, InLTO);
717 bool Changed = false;
718 for (auto &F : M) {
719 if (F.isDeclaration())
720 continue;
721 if (F.hasFnAttribute(Attribute::OptimizeNone))
722 continue;
Dehao Chencc75d242017-02-23 22:15:18 +0000723 ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO);
Rong Xu6e34c492016-04-27 23:20:27 +0000724 bool FuncChanged = ICallPromotion.processFunction();
725 if (ICPDUMPAFTER && FuncChanged) {
726 DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
727 DEBUG(dbgs() << "\n");
728 }
729 Changed |= FuncChanged;
730 if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) {
731 DEBUG(dbgs() << " Stop: Cutoff reached.\n");
732 break;
733 }
734 }
735 return Changed;
736}
737
Xinliang David Li72616182016-05-15 01:04:24 +0000738bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
Rong Xu6e34c492016-04-27 23:20:27 +0000739 // Command-line option has the priority for InLTO.
Dehao Chencc75d242017-02-23 22:15:18 +0000740 return promoteIndirectCalls(M, InLTO | ICPLTOMode,
741 SamplePGO | ICPSamplePGOMode);
Rong Xu6e34c492016-04-27 23:20:27 +0000742}
Xinliang David Lif3c7a352016-05-16 16:31:07 +0000743
Dehao Chencc75d242017-02-23 22:15:18 +0000744PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
745 ModuleAnalysisManager &AM) {
746 if (!promoteIndirectCalls(M, InLTO | ICPLTOMode,
747 SamplePGO | ICPSamplePGOMode))
Xinliang David Lif3c7a352016-05-16 16:31:07 +0000748 return PreservedAnalyses::all();
749
750 return PreservedAnalyses::none();
751}
Rong Xu48596b62017-04-04 16:42:20 +0000752
753namespace {
754class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
755public:
756 MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI)
757 : Func(Func), BFI(BFI), Changed(false) {
758 ValueDataArray =
759 llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
760 // Get the MemOPSize range information from option MemOPSizeRange,
761 getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
762 PreciseRangeLast);
763 }
764 bool isChanged() const { return Changed; }
765 void perform() {
766 WorkList.clear();
767 visit(Func);
768
769 for (auto &MI : WorkList) {
770 ++NumOfPGOMemOPAnnotate;
771 if (perform(MI)) {
772 Changed = true;
773 ++NumOfPGOMemOPOpt;
Teresa Johnsonf9ea1762017-04-27 18:25:22 +0000774 DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()
Rong Xu48596b62017-04-04 16:42:20 +0000775 << "is Transformed.\n");
776 }
777 }
778 }
779
780 void visitMemIntrinsic(MemIntrinsic &MI) {
781 Value *Length = MI.getLength();
782 // Not perform on constant length calls.
783 if (dyn_cast<ConstantInt>(Length))
784 return;
785 WorkList.push_back(&MI);
786 }
787
788private:
789 Function &Func;
790 BlockFrequencyInfo &BFI;
791 bool Changed;
792 std::vector<MemIntrinsic *> WorkList;
793 // Start of the previse range.
794 int64_t PreciseRangeStart;
795 // Last value of the previse range.
796 int64_t PreciseRangeLast;
797 // The space to read the profile annotation.
798 std::unique_ptr<InstrProfValueData[]> ValueDataArray;
799 bool perform(MemIntrinsic *MI);
800
801 // This kind shows which group the value falls in. For PreciseValue, we have
802 // the profile count for that value. LargeGroup groups the values that are in
803 // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
804 enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
805
806 MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
807 if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
808 return LargeGroup;
809 if (Value == PreciseRangeLast + 1)
810 return NonLargeGroup;
811 return PreciseValue;
812 }
813};
814
815static const char *getMIName(const MemIntrinsic *MI) {
816 switch (MI->getIntrinsicID()) {
817 case Intrinsic::memcpy:
818 return "memcpy";
819 case Intrinsic::memmove:
820 return "memmove";
821 case Intrinsic::memset:
822 return "memset";
823 default:
824 return "unknown";
825 }
826}
827
828static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
829 assert(Count <= TotalCount);
830 if (Count < MemOPCountThreshold)
831 return false;
832 if (Count < TotalCount * MemOPPercentThreshold / 100)
833 return false;
834 return true;
835}
836
837static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
838 uint64_t Denom) {
839 if (!MemOPScaleCount)
840 return Count;
841 bool Overflowed;
842 uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
843 return ScaleCount / Denom;
844}
845
846bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
847 assert(MI);
848 if (MI->getIntrinsicID() == Intrinsic::memmove)
849 return false;
850
851 uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
852 uint64_t TotalCount;
853 if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
854 ValueDataArray.get(), NumVals, TotalCount))
855 return false;
856
857 uint64_t ActualCount = TotalCount;
858 uint64_t SavedTotalCount = TotalCount;
859 if (MemOPScaleCount) {
860 auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
861 if (!BBEdgeCount)
862 return false;
863 ActualCount = *BBEdgeCount;
864 }
865
Teresa Johnsonf9ea1762017-04-27 18:25:22 +0000866 ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
867 DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount
868 << "\n");
869 DEBUG(
870 for (auto &VD
871 : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
872
Rong Xu48596b62017-04-04 16:42:20 +0000873 if (ActualCount < MemOPCountThreshold)
874 return false;
Teresa Johnson51177292017-04-28 14:30:54 +0000875 // Skip if the total value profiled count is 0, in which case we can't
876 // scale up the counts properly (and there is no profitable transformation).
877 if (TotalCount == 0)
878 return false;
Rong Xu48596b62017-04-04 16:42:20 +0000879
Rong Xu48596b62017-04-04 16:42:20 +0000880 TotalCount = ActualCount;
881 if (MemOPScaleCount)
Teresa Johnsonf9ea1762017-04-27 18:25:22 +0000882 DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount
Rong Xu48596b62017-04-04 16:42:20 +0000883 << " denominator = " << SavedTotalCount << "\n");
884
885 // Keeping track of the count of the default case:
886 uint64_t RemainCount = TotalCount;
887 SmallVector<uint64_t, 16> SizeIds;
888 SmallVector<uint64_t, 16> CaseCounts;
889 uint64_t MaxCount = 0;
890 unsigned Version = 0;
891 // Default case is in the front -- save the slot here.
892 CaseCounts.push_back(0);
893 for (auto &VD : VDs) {
894 int64_t V = VD.Value;
895 uint64_t C = VD.Count;
896 if (MemOPScaleCount)
897 C = getScaledCount(C, ActualCount, SavedTotalCount);
898
899 // Only care precise value here.
900 if (getMemOPSizeKind(V) != PreciseValue)
901 continue;
902
903 // ValueCounts are sorted on the count. Break at the first un-profitable
904 // value.
905 if (!isProfitable(C, RemainCount))
906 break;
907
908 SizeIds.push_back(V);
909 CaseCounts.push_back(C);
910 if (C > MaxCount)
911 MaxCount = C;
912
913 assert(RemainCount >= C);
914 RemainCount -= C;
915
916 if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
917 break;
918 }
919
920 if (Version == 0)
921 return false;
922
923 CaseCounts[0] = RemainCount;
924 if (RemainCount > MaxCount)
925 MaxCount = RemainCount;
926
927 uint64_t SumForOpt = TotalCount - RemainCount;
Rong Xu48596b62017-04-04 16:42:20 +0000928
929 DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
Teresa Johnsonf9ea1762017-04-27 18:25:22 +0000930 << " Versions (covering " << SumForOpt << " out of "
931 << TotalCount << ")\n");
Rong Xu48596b62017-04-04 16:42:20 +0000932
933 // mem_op(..., size)
934 // ==>
935 // switch (size) {
936 // case s1:
937 // mem_op(..., s1);
938 // goto merge_bb;
939 // case s2:
940 // mem_op(..., s2);
941 // goto merge_bb;
942 // ...
943 // default:
944 // mem_op(..., size);
945 // goto merge_bb;
946 // }
947 // merge_bb:
948
949 BasicBlock *BB = MI->getParent();
950 DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
951 DEBUG(dbgs() << *BB << "\n");
Teresa Johnsonb2c390e2017-04-24 20:30:42 +0000952 auto OrigBBFreq = BFI.getBlockFreq(BB);
Rong Xu48596b62017-04-04 16:42:20 +0000953
954 BasicBlock *DefaultBB = SplitBlock(BB, MI);
955 BasicBlock::iterator It(*MI);
956 ++It;
957 assert(It != DefaultBB->end());
958 BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
Rong Xu48596b62017-04-04 16:42:20 +0000959 MergeBB->setName("MemOP.Merge");
Teresa Johnsonb2c390e2017-04-24 20:30:42 +0000960 BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
961 DefaultBB->setName("MemOP.Default");
Rong Xu48596b62017-04-04 16:42:20 +0000962
963 auto &Ctx = Func.getContext();
964 IRBuilder<> IRB(BB);
965 BB->getTerminator()->eraseFromParent();
966 Value *SizeVar = MI->getLength();
967 SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
968
969 // Clear the value profile data.
970 MI->setMetadata(LLVMContext::MD_prof, nullptr);
971
972 DEBUG(dbgs() << "\n\n== Basic Block After==\n");
973
974 for (uint64_t SizeId : SizeIds) {
975 ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId);
976 BasicBlock *CaseBB = BasicBlock::Create(
977 Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
978 Instruction *NewInst = MI->clone();
979 // Fix the argument.
980 dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId);
981 CaseBB->getInstList().push_back(NewInst);
982 IRBuilder<> IRBCase(CaseBB);
983 IRBCase.CreateBr(MergeBB);
984 SI->addCase(CaseSizeId, CaseBB);
985 DEBUG(dbgs() << *CaseBB << "\n");
986 }
987 setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
988
989 DEBUG(dbgs() << *BB << "\n");
990 DEBUG(dbgs() << *DefaultBB << "\n");
991 DEBUG(dbgs() << *MergeBB << "\n");
992
993 emitOptimizationRemark(Func.getContext(), "memop-opt", Func,
994 MI->getDebugLoc(),
995 Twine("optimize ") + getMIName(MI) + " with count " +
996 Twine(SumForOpt) + " out of " + Twine(TotalCount) +
997 " for " + Twine(Version) + " versions");
998
999 return true;
1000}
1001} // namespace
1002
1003static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) {
1004 if (DisableMemOPOPT)
1005 return false;
1006
1007 if (F.hasFnAttribute(Attribute::OptimizeForSize))
1008 return false;
1009 MemOPSizeOpt MemOPSizeOpt(F, BFI);
1010 MemOPSizeOpt.perform();
1011 return MemOPSizeOpt.isChanged();
1012}
1013
1014bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
1015 BlockFrequencyInfo &BFI =
1016 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1017 return PGOMemOPSizeOptImpl(F, BFI);
1018}
1019
1020namespace llvm {
1021char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
1022
1023PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
1024 FunctionAnalysisManager &FAM) {
1025 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
1026 bool Changed = PGOMemOPSizeOptImpl(F, BFI);
1027 if (!Changed)
1028 return PreservedAnalyses::all();
Rong Xu2bf4c592017-04-06 20:56:00 +00001029 auto PA = PreservedAnalyses();
1030 PA.preserve<GlobalsAA>();
1031 return PA;
Rong Xu48596b62017-04-04 16:42:20 +00001032}
1033} // namespace llvm