blob: e3d290194cf25d384fed349bdc2cc85c8514ff55 [file] [log] [blame]
Diego Novillo8d6568b2013-11-13 12:22:21 +00001//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SampleProfileLoader transformation. This pass
11// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
12// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
13// profile information in the given profile.
14//
15// This pass generates branch weight annotations on the IR:
16//
17// - prof: Represents branch weights. This annotation is added to branches
18// to indicate the weights of each edge coming out of the branch.
19// The weight of each edge is the weight of the target block for
20// that edge. The weight of a block B is computed as the maximum
21// number of samples found in B.
22//
23//===----------------------------------------------------------------------===//
24
25#define DEBUG_TYPE "sample-profile"
26
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/OwningPtr.h"
Diego Novillo0accb3d2014-01-10 23:23:46 +000029#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/SmallPtrSet.h"
Diego Novillo8d6568b2013-11-13 12:22:21 +000031#include "llvm/ADT/StringMap.h"
32#include "llvm/ADT/StringRef.h"
Diego Novillo0accb3d2014-01-10 23:23:46 +000033#include "llvm/Analysis/Dominators.h"
34#include "llvm/Analysis/PostDominators.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/DebugInfo.h"
Diego Novillo8d6568b2013-11-13 12:22:21 +000037#include "llvm/IR/Constants.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/LLVMContext.h"
Diego Novillo8d6568b2013-11-13 12:22:21 +000041#include "llvm/IR/MDBuilder.h"
Chandler Carruth8a8cd2b2014-01-07 11:48:04 +000042#include "llvm/IR/Metadata.h"
Diego Novillo8d6568b2013-11-13 12:22:21 +000043#include "llvm/IR/Module.h"
44#include "llvm/Pass.h"
45#include "llvm/Support/CommandLine.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/InstIterator.h"
48#include "llvm/Support/MemoryBuffer.h"
49#include "llvm/Support/Regex.h"
50#include "llvm/Support/raw_ostream.h"
51#include "llvm/Transforms/Scalar.h"
52
53using namespace llvm;
54
55// Command line option to specify the file to read samples from. This is
56// mainly used for debugging.
57static cl::opt<std::string> SampleProfileFile(
58 "sample-profile-file", cl::init(""), cl::value_desc("filename"),
59 cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
Diego Novillo0accb3d2014-01-10 23:23:46 +000060static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
61 "sample-profile-max-propagate-iterations", cl::init(100),
62 cl::desc("Maximum number of iterations to go through when propagating "
63 "sample block/edge weights through the CFG."));
Diego Novillo8d6568b2013-11-13 12:22:21 +000064
65namespace {
Diego Novilloc0dd1032013-11-26 20:37:33 +000066
67typedef DenseMap<uint32_t, uint32_t> BodySampleMap;
68typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap;
Diego Novillo0accb3d2014-01-10 23:23:46 +000069typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
70typedef std::pair<BasicBlock *, BasicBlock *> Edge;
71typedef DenseMap<Edge, uint32_t> EdgeWeightMap;
72typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8> > BlockEdgeMap;
Diego Novilloc0dd1032013-11-26 20:37:33 +000073
74/// \brief Representation of the runtime profile for a function.
75///
76/// This data structure contains the runtime profile for a given
77/// function. It contains the total number of samples collected
78/// in the function and a map of samples collected in every statement.
79class SampleFunctionProfile {
80public:
Diego Novillo0accb3d2014-01-10 23:23:46 +000081 SampleFunctionProfile()
82 : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0),
83 LI(0) {}
Diego Novilloc0dd1032013-11-26 20:37:33 +000084
Diego Novillo0accb3d2014-01-10 23:23:46 +000085 unsigned getFunctionLoc(Function &F);
86 bool emitAnnotations(Function &F, DominatorTree *DomTree,
87 PostDominatorTree *PostDomTree, LoopInfo *Loops);
88 uint32_t getInstWeight(Instruction &I);
89 uint32_t getBlockWeight(BasicBlock *B);
Diego Novilloc0dd1032013-11-26 20:37:33 +000090 void addTotalSamples(unsigned Num) { TotalSamples += Num; }
91 void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
92 void addBodySamples(unsigned LineOffset, unsigned Num) {
93 BodySamples[LineOffset] += Num;
94 }
95 void print(raw_ostream &OS);
Diego Novillo0accb3d2014-01-10 23:23:46 +000096 void printEdgeWeight(raw_ostream &OS, Edge E);
97 void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
98 void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
99 bool computeBlockWeights(Function &F);
100 void findEquivalenceClasses(Function &F);
101 void findEquivalencesFor(BasicBlock *BB1,
102 SmallVector<BasicBlock *, 8> Descendants,
103 DominatorTreeBase<BasicBlock> *DomTree);
104 void propagateWeights(Function &F);
105 uint32_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
106 void buildEdges(Function &F);
107 bool propagateThroughEdges(Function &F);
108 bool empty() { return BodySamples.empty(); }
Diego Novilloc0dd1032013-11-26 20:37:33 +0000109
110protected:
111 /// \brief Total number of samples collected inside this function.
112 ///
113 /// Samples are cumulative, they include all the samples collected
114 /// inside this function and all its inlined callees.
115 unsigned TotalSamples;
116
Diego Novillo0accb3d2014-01-10 23:23:46 +0000117 /// \brief Total number of samples collected at the head of the function.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000118 unsigned TotalHeadSamples;
119
Diego Novillo0accb3d2014-01-10 23:23:46 +0000120 /// \brief Line number for the function header. Used to compute relative
121 /// line numbers from the absolute line LOCs found in instruction locations.
122 /// The relative line numbers are needed to address the samples from the
123 /// profile file.
124 unsigned HeaderLineno;
125
Diego Novilloc0dd1032013-11-26 20:37:33 +0000126 /// \brief Map line offsets to collected samples.
127 ///
128 /// Each entry in this map contains the number of samples
129 /// collected at the corresponding line offset. All line locations
130 /// are an offset from the start of the function.
131 BodySampleMap BodySamples;
132
133 /// \brief Map basic blocks to their computed weights.
134 ///
135 /// The weight of a basic block is defined to be the maximum
136 /// of all the instruction weights in that block.
137 BlockWeightMap BlockWeights;
Diego Novillo0accb3d2014-01-10 23:23:46 +0000138
139 /// \brief Map edges to their computed weights.
140 ///
141 /// Edge weights are computed by propagating basic block weights in
142 /// SampleProfile::propagateWeights.
143 EdgeWeightMap EdgeWeights;
144
145 /// \brief Set of visited blocks during propagation.
146 SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
147
148 /// \brief Set of visited edges during propagation.
149 SmallSet<Edge, 128> VisitedEdges;
150
151 /// \brief Equivalence classes for block weights.
152 ///
153 /// Two blocks BB1 and BB2 are in the same equivalence class if they
154 /// dominate and post-dominate each other, and they are in the same loop
155 /// nest. When this happens, the two blocks are guaranteed to execute
156 /// the same number of times.
157 EquivalenceClassMap EquivalenceClass;
158
159 /// \brief Dominance, post-dominance and loop information.
160 DominatorTree *DT;
161 PostDominatorTree *PDT;
162 LoopInfo *LI;
163
164 /// \brief Predecessors for each basic block in the CFG.
165 BlockEdgeMap Predecessors;
166
167 /// \brief Successors for each basic block in the CFG.
168 BlockEdgeMap Successors;
Diego Novilloc0dd1032013-11-26 20:37:33 +0000169};
170
Diego Novillo8d6568b2013-11-13 12:22:21 +0000171/// \brief Sample-based profile reader.
172///
173/// Each profile contains sample counts for all the functions
174/// executed. Inside each function, statements are annotated with the
175/// collected samples on all the instructions associated with that
176/// statement.
177///
178/// For this to produce meaningful data, the program needs to be
179/// compiled with some debug information (at minimum, line numbers:
180/// -gline-tables-only). Otherwise, it will be impossible to match IR
181/// instructions to the line numbers collected by the profiler.
182///
183/// From the profile file, we are interested in collecting the
184/// following information:
185///
186/// * A list of functions included in the profile (mangled names).
187///
188/// * For each function F:
189/// 1. The total number of samples collected in F.
190///
191/// 2. The samples collected at each line in F. To provide some
192/// protection against source code shuffling, line numbers should
193/// be relative to the start of the function.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000194class SampleModuleProfile {
Diego Novillo8d6568b2013-11-13 12:22:21 +0000195public:
Diego Novilloc0dd1032013-11-26 20:37:33 +0000196 SampleModuleProfile(StringRef F) : Profiles(0), Filename(F) {}
Diego Novillo8d6568b2013-11-13 12:22:21 +0000197
Alexey Samsonovaa19c0a2013-11-13 13:09:39 +0000198 void dump();
199 void loadText();
200 void loadNative() { llvm_unreachable("not implemented"); }
Diego Novillo8d6568b2013-11-13 12:22:21 +0000201 void printFunctionProfile(raw_ostream &OS, StringRef FName);
202 void dumpFunctionProfile(StringRef FName);
Diego Novilloc0dd1032013-11-26 20:37:33 +0000203 SampleFunctionProfile &getProfile(const Function &F) {
204 return Profiles[F.getName()];
205 }
Diego Novillo8d6568b2013-11-13 12:22:21 +0000206
207protected:
Diego Novillo8d6568b2013-11-13 12:22:21 +0000208 /// \brief Map every function to its associated profile.
209 ///
210 /// The profile of every function executed at runtime is collected
Diego Novilloc0dd1032013-11-26 20:37:33 +0000211 /// in the structure SampleFunctionProfile. This maps function objects
Diego Novillo8d6568b2013-11-13 12:22:21 +0000212 /// to their corresponding profiles.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000213 StringMap<SampleFunctionProfile> Profiles;
Diego Novillo8d6568b2013-11-13 12:22:21 +0000214
215 /// \brief Path name to the file holding the profile data.
216 ///
217 /// The format of this file is defined by each profiler
218 /// independently. If possible, the profiler should have a text
219 /// version of the profile format to be used in constructing test
220 /// cases and debugging.
221 StringRef Filename;
222};
223
224/// \brief Loader class for text-based profiles.
225///
226/// This class defines a simple interface to read text files containing
227/// profiles. It keeps track of line number information and location of
228/// the file pointer. Users of this class are responsible for actually
229/// parsing the lines returned by the readLine function.
230///
231/// TODO - This does not really belong here. It is a generic text file
232/// reader. It should be moved to the Support library and made more general.
233class ExternalProfileTextLoader {
234public:
235 ExternalProfileTextLoader(StringRef F) : Filename(F) {
236 error_code EC;
237 EC = MemoryBuffer::getFile(Filename, Buffer);
238 if (EC)
239 report_fatal_error("Could not open profile file " + Filename + ": " +
240 EC.message());
241 FP = Buffer->getBufferStart();
242 Lineno = 0;
243 }
244
245 /// \brief Read a line from the mapped file.
246 StringRef readLine() {
247 size_t Length = 0;
248 const char *start = FP;
249 while (FP != Buffer->getBufferEnd() && *FP != '\n') {
250 Length++;
251 FP++;
252 }
253 if (FP != Buffer->getBufferEnd())
254 FP++;
255 Lineno++;
256 return StringRef(start, Length);
257 }
258
259 /// \brief Return true, if we've reached EOF.
260 bool atEOF() const { return FP == Buffer->getBufferEnd(); }
261
262 /// \brief Report a parse error message and stop compilation.
263 void reportParseError(Twine Msg) const {
264 report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n");
265 }
266
267private:
268 /// \brief Memory buffer holding the text file.
269 OwningPtr<MemoryBuffer> Buffer;
270
271 /// \brief Current position into the memory buffer.
272 const char *FP;
273
274 /// \brief Current line number.
275 int64_t Lineno;
276
277 /// \brief Path name where to the profile file.
278 StringRef Filename;
279};
280
281/// \brief Sample profile pass.
282///
283/// This pass reads profile data from the file specified by
284/// -sample-profile-file and annotates every affected function with the
285/// profile information found in that file.
286class SampleProfileLoader : public FunctionPass {
287public:
288 // Class identification, replacement for typeinfo
289 static char ID;
290
291 SampleProfileLoader(StringRef Name = SampleProfileFile)
292 : FunctionPass(ID), Profiler(0), Filename(Name) {
293 initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
294 }
295
296 virtual bool doInitialization(Module &M);
297
298 void dump() { Profiler->dump(); }
299
300 virtual const char *getPassName() const { return "Sample profile pass"; }
301
302 virtual bool runOnFunction(Function &F);
303
304 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
305 AU.setPreservesCFG();
Diego Novillo0accb3d2014-01-10 23:23:46 +0000306 AU.addRequired<LoopInfo>();
307 AU.addRequired<DominatorTree>();
308 AU.addRequired<PostDominatorTree>();
Diego Novillo8d6568b2013-11-13 12:22:21 +0000309 }
310
311protected:
312 /// \brief Profile reader object.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000313 OwningPtr<SampleModuleProfile> Profiler;
Diego Novillo8d6568b2013-11-13 12:22:21 +0000314
315 /// \brief Name of the profile file to load.
316 StringRef Filename;
317};
318}
319
Diego Novilloc0dd1032013-11-26 20:37:33 +0000320/// \brief Print this function profile on stream \p OS.
Diego Novillo8d6568b2013-11-13 12:22:21 +0000321///
322/// \param OS Stream to emit the output to.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000323void SampleFunctionProfile::print(raw_ostream &OS) {
324 OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
Diego Novillo8d6568b2013-11-13 12:22:21 +0000325 << " sampled lines\n";
Diego Novilloc0dd1032013-11-26 20:37:33 +0000326 for (BodySampleMap::const_iterator SI = BodySamples.begin(),
327 SE = BodySamples.end();
Diego Novillo8d6568b2013-11-13 12:22:21 +0000328 SI != SE; ++SI)
329 OS << "\tline offset: " << SI->first
330 << ", number of samples: " << SI->second << "\n";
331 OS << "\n";
332}
333
Diego Novillo0accb3d2014-01-10 23:23:46 +0000334/// \brief Print the weight of edge \p E on stream \p OS.
335///
336/// \param OS Stream to emit the output to.
337/// \param E Edge to print.
338void SampleFunctionProfile::printEdgeWeight(raw_ostream &OS, Edge E) {
339 OS << "weight[" << E.first->getName() << "->" << E.second->getName()
340 << "]: " << EdgeWeights[E] << "\n";
341}
342
343/// \brief Print the equivalence class of block \p BB on stream \p OS.
344///
345/// \param OS Stream to emit the output to.
346/// \param BB Block to print.
347void SampleFunctionProfile::printBlockEquivalence(raw_ostream &OS,
348 BasicBlock *BB) {
349 BasicBlock *Equiv = EquivalenceClass[BB];
350 OS << "equivalence[" << BB->getName()
351 << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
352}
353
354/// \brief Print the weight of block \p BB on stream \p OS.
355///
356/// \param OS Stream to emit the output to.
357/// \param BB Block to print.
358void SampleFunctionProfile::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
359 OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
360}
361
Diego Novilloc0dd1032013-11-26 20:37:33 +0000362/// \brief Print the function profile for \p FName on stream \p OS.
363///
364/// \param OS Stream to emit the output to.
365/// \param FName Name of the function to print.
366void SampleModuleProfile::printFunctionProfile(raw_ostream &OS,
367 StringRef FName) {
368 OS << "Function: " << FName << ":\n";
369 Profiles[FName].print(OS);
370}
371
Diego Novillo8d6568b2013-11-13 12:22:21 +0000372/// \brief Dump the function profile for \p FName.
373///
374/// \param FName Name of the function to print.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000375void SampleModuleProfile::dumpFunctionProfile(StringRef FName) {
Diego Novillo8d6568b2013-11-13 12:22:21 +0000376 printFunctionProfile(dbgs(), FName);
377}
378
379/// \brief Dump all the function profiles found.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000380void SampleModuleProfile::dump() {
381 for (StringMap<SampleFunctionProfile>::const_iterator I = Profiles.begin(),
382 E = Profiles.end();
Diego Novillo8d6568b2013-11-13 12:22:21 +0000383 I != E; ++I)
384 dumpFunctionProfile(I->getKey());
385}
386
387/// \brief Load samples from a text file.
388///
389/// The file is divided in two segments:
390///
391/// Symbol table (represented with the string "symbol table")
392/// Number of symbols in the table
393/// symbol 1
394/// symbol 2
395/// ...
396/// symbol N
397///
398/// Function body profiles
399/// function1:total_samples:total_head_samples:number_of_locations
400/// location_offset_1: number_of_samples
401/// location_offset_2: number_of_samples
402/// ...
403/// location_offset_N: number_of_samples
404///
405/// Function names must be mangled in order for the profile loader to
406/// match them in the current translation unit.
407///
408/// Since this is a flat profile, a function that shows up more than
409/// once gets all its samples aggregated across all its instances.
410/// TODO - flat profiles are too imprecise to provide good optimization
411/// opportunities. Convert them to context-sensitive profile.
412///
413/// This textual representation is useful to generate unit tests and
414/// for debugging purposes, but it should not be used to generate
415/// profiles for large programs, as the representation is extremely
416/// inefficient.
Diego Novilloc0dd1032013-11-26 20:37:33 +0000417void SampleModuleProfile::loadText() {
Diego Novillo8d6568b2013-11-13 12:22:21 +0000418 ExternalProfileTextLoader Loader(Filename);
419
420 // Read the symbol table.
421 StringRef Line = Loader.readLine();
422 if (Line != "symbol table")
423 Loader.reportParseError("Expected 'symbol table', found " + Line);
424 int NumSymbols;
425 Line = Loader.readLine();
426 if (Line.getAsInteger(10, NumSymbols))
427 Loader.reportParseError("Expected a number, found " + Line);
Diego Novilloc0dd1032013-11-26 20:37:33 +0000428 for (int I = 0; I < NumSymbols; I++)
429 Profiles[Loader.readLine()] = SampleFunctionProfile();
Diego Novillo8d6568b2013-11-13 12:22:21 +0000430
431 // Read the profile of each function. Since each function may be
432 // mentioned more than once, and we are collecting flat profiles,
433 // accumulate samples as we parse them.
434 Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$");
435 Regex LineSample("^([0-9]+): ([0-9]+)$");
436 while (!Loader.atEOF()) {
437 SmallVector<StringRef, 4> Matches;
438 Line = Loader.readLine();
439 if (!HeadRE.match(Line, &Matches))
440 Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " +
441 Line);
442 assert(Matches.size() == 5);
443 StringRef FName = Matches[1];
444 unsigned NumSamples, NumHeadSamples, NumSampledLines;
445 Matches[2].getAsInteger(10, NumSamples);
446 Matches[3].getAsInteger(10, NumHeadSamples);
447 Matches[4].getAsInteger(10, NumSampledLines);
Diego Novilloc0dd1032013-11-26 20:37:33 +0000448 SampleFunctionProfile &FProfile = Profiles[FName];
449 FProfile.addTotalSamples(NumSamples);
450 FProfile.addHeadSamples(NumHeadSamples);
Diego Novillo8d6568b2013-11-13 12:22:21 +0000451 unsigned I;
452 for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) {
453 Line = Loader.readLine();
454 if (!LineSample.match(Line, &Matches))
455 Loader.reportParseError("Expected 'NUM: NUM', found " + Line);
456 assert(Matches.size() == 3);
457 unsigned LineOffset, NumSamples;
458 Matches[1].getAsInteger(10, LineOffset);
459 Matches[2].getAsInteger(10, NumSamples);
Diego Novillo0accb3d2014-01-10 23:23:46 +0000460 // When dealing with instruction weights, we use the value
461 // zero to indicate the absence of a sample. If we read an
462 // actual zero from the profile file, return it as 1 to
463 // avoid the confusion later on.
464 if (NumSamples == 0)
465 NumSamples = 1;
Diego Novilloc0dd1032013-11-26 20:37:33 +0000466 FProfile.addBodySamples(LineOffset, NumSamples);
Diego Novillo8d6568b2013-11-13 12:22:21 +0000467 }
468
469 if (I < NumSampledLines)
470 Loader.reportParseError("Unexpected end of file");
471 }
472}
473
Diego Novillo0accb3d2014-01-10 23:23:46 +0000474/// \brief Get the weight for an instruction.
475///
476/// The "weight" of an instruction \p Inst is the number of samples
477/// collected on that instruction at runtime. To retrieve it, we
478/// need to compute the line number of \p Inst relative to the start of its
479/// function. We use HeaderLineno to compute the offset. We then
480/// look up the samples collected for \p Inst using BodySamples.
481///
482/// \param Inst Instruction to query.
483///
484/// \returns The profiled weight of I.
485uint32_t SampleFunctionProfile::getInstWeight(Instruction &Inst) {
486 unsigned Lineno = Inst.getDebugLoc().getLine();
487 if (Lineno < HeaderLineno)
488 return 0;
489 unsigned LOffset = Lineno - HeaderLineno;
490 uint32_t Weight = BodySamples.lookup(LOffset);
491 DEBUG(dbgs() << " " << Lineno << ":" << Inst.getDebugLoc().getCol() << ":"
492 << Inst << " (line offset: " << LOffset
493 << " - weight: " << Weight << ")\n");
494 return Weight;
495}
496
497/// \brief Compute the weight of a basic block.
498///
499/// The weight of basic block \p B is the maximum weight of all the
500/// instructions in B. The weight of \p B is computed and cached in
501/// the BlockWeights map.
502///
503/// \param B The basic block to query.
504///
505/// \returns The computed weight of B.
506uint32_t SampleFunctionProfile::getBlockWeight(BasicBlock *B) {
507 // If we've computed B's weight before, return it.
508 std::pair<BlockWeightMap::iterator, bool> Entry =
509 BlockWeights.insert(std::make_pair(B, 0));
510 if (!Entry.second)
511 return Entry.first->second;
512
513 // Otherwise, compute and cache B's weight.
514 uint32_t Weight = 0;
515 for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
516 uint32_t InstWeight = getInstWeight(*I);
517 if (InstWeight > Weight)
518 Weight = InstWeight;
519 }
520 Entry.first->second = Weight;
521 return Weight;
522}
523
524/// \brief Compute and store the weights of every basic block.
525///
526/// This populates the BlockWeights map by computing
527/// the weights of every basic block in the CFG.
528///
529/// \param F The function to query.
530bool SampleFunctionProfile::computeBlockWeights(Function &F) {
531 bool Changed = false;
532 DEBUG(dbgs() << "Block weights\n");
533 for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
534 uint32_t Weight = getBlockWeight(B);
535 Changed |= (Weight > 0);
536 DEBUG(printBlockWeight(dbgs(), B));
537 }
538
539 return Changed;
540}
541
542/// \brief Find equivalence classes for the given block.
543///
544/// This finds all the blocks that are guaranteed to execute the same
545/// number of times as \p BB1. To do this, it traverses all the the
546/// descendants of \p BB1 in the dominator or post-dominator tree.
547///
548/// A block BB2 will be in the same equivalence class as \p BB1 if
549/// the following holds:
550///
551/// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
552/// is a descendant of \p BB1 in the dominator tree, then BB2 should
553/// dominate BB1 in the post-dominator tree.
554///
555/// 2- Both BB2 and \p BB1 must be in the same loop.
556///
557/// For every block BB2 that meets those two requirements, we set BB2's
558/// equivalence class to \p BB1.
559///
560/// \param BB1 Block to check.
561/// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
562/// \param DomTree Opposite dominator tree. If \p Descendants is filled
563/// with blocks from \p BB1's dominator tree, then
564/// this is the post-dominator tree, and vice versa.
565void SampleFunctionProfile::findEquivalencesFor(
566 BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
567 DominatorTreeBase<BasicBlock> *DomTree) {
568 for (SmallVectorImpl<BasicBlock *>::iterator I = Descendants.begin(),
569 E = Descendants.end();
570 I != E; ++I) {
571 BasicBlock *BB2 = *I;
572 bool IsDomParent = DomTree->dominates(BB2, BB1);
573 bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
574 if (BB1 != BB2 && VisitedBlocks.insert(BB2) && IsDomParent &&
575 IsInSameLoop) {
576 EquivalenceClass[BB2] = BB1;
577
578 // If BB2 is heavier than BB1, make BB2 have the same weight
579 // as BB1.
580 //
581 // Note that we don't worry about the opposite situation here
582 // (when BB2 is lighter than BB1). We will deal with this
583 // during the propagation phase. Right now, we just want to
584 // make sure that BB1 has the largest weight of all the
585 // members of its equivalence set.
586 uint32_t &BB1Weight = BlockWeights[BB1];
587 uint32_t &BB2Weight = BlockWeights[BB2];
588 BB1Weight = std::max(BB1Weight, BB2Weight);
589 }
590 }
591}
592
593/// \brief Find equivalence classes.
594///
595/// Since samples may be missing from blocks, we can fill in the gaps by setting
596/// the weights of all the blocks in the same equivalence class to the same
597/// weight. To compute the concept of equivalence, we use dominance and loop
598/// information. Two blocks B1 and B2 are in the same equivalence class if B1
599/// dominates B2, B2 post-dominates B1 and both are in the same loop.
600///
601/// \param F The function to query.
602void SampleFunctionProfile::findEquivalenceClasses(Function &F) {
603 SmallVector<BasicBlock *, 8> DominatedBBs;
604 DEBUG(dbgs() << "\nBlock equivalence classes\n");
605 // Find equivalence sets based on dominance and post-dominance information.
606 for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
607 BasicBlock *BB1 = B;
608
609 // Compute BB1's equivalence class once.
610 if (EquivalenceClass.count(BB1)) {
611 DEBUG(printBlockEquivalence(dbgs(), BB1));
612 continue;
613 }
614
615 // By default, blocks are in their own equivalence class.
616 EquivalenceClass[BB1] = BB1;
617
618 // Traverse all the blocks dominated by BB1. We are looking for
619 // every basic block BB2 such that:
620 //
621 // 1- BB1 dominates BB2.
622 // 2- BB2 post-dominates BB1.
623 // 3- BB1 and BB2 are in the same loop nest.
624 //
625 // If all those conditions hold, it means that BB2 is executed
626 // as many times as BB1, so they are placed in the same equivalence
627 // class by making BB2's equivalence class be BB1.
628 DominatedBBs.clear();
629 DT->getDescendants(BB1, DominatedBBs);
630 findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
631
632 // Repeat the same logic for all the blocks post-dominated by BB1.
633 // We are looking for every basic block BB2 such that:
634 //
635 // 1- BB1 post-dominates BB2.
636 // 2- BB2 dominates BB1.
637 // 3- BB1 and BB2 are in the same loop nest.
638 //
639 // If all those conditions hold, BB2's equivalence class is BB1.
640 DominatedBBs.clear();
641 PDT->getDescendants(BB1, DominatedBBs);
642 findEquivalencesFor(BB1, DominatedBBs, DT->DT);
643
644 DEBUG(printBlockEquivalence(dbgs(), BB1));
645 }
646
647 // Assign weights to equivalence classes.
648 //
649 // All the basic blocks in the same equivalence class will execute
650 // the same number of times. Since we know that the head block in
651 // each equivalence class has the largest weight, assign that weight
652 // to all the blocks in that equivalence class.
653 DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
654 for (Function::iterator B = F.begin(), E = F.end(); B != E; ++B) {
655 BasicBlock *BB = B;
656 BasicBlock *EquivBB = EquivalenceClass[BB];
657 if (BB != EquivBB)
658 BlockWeights[BB] = BlockWeights[EquivBB];
659 DEBUG(printBlockWeight(dbgs(), BB));
660 }
661}
662
663/// \brief Visit the given edge to decide if it has a valid weight.
664///
665/// If \p E has not been visited before, we copy to \p UnknownEdge
666/// and increment the count of unknown edges.
667///
668/// \param E Edge to visit.
669/// \param NumUnknownEdges Current number of unknown edges.
670/// \param UnknownEdge Set if E has not been visited before.
671///
672/// \returns E's weight, if known. Otherwise, return 0.
673uint32_t SampleFunctionProfile::visitEdge(Edge E, unsigned *NumUnknownEdges,
674 Edge *UnknownEdge) {
675 if (!VisitedEdges.count(E)) {
676 (*NumUnknownEdges)++;
677 *UnknownEdge = E;
678 return 0;
679 }
680
681 return EdgeWeights[E];
682}
683
684/// \brief Propagate weights through incoming/outgoing edges.
685///
686/// If the weight of a basic block is known, and there is only one edge
687/// with an unknown weight, we can calculate the weight of that edge.
688///
689/// Similarly, if all the edges have a known count, we can calculate the
690/// count of the basic block, if needed.
691///
692/// \param F Function to process.
693///
694/// \returns True if new weights were assigned to edges or blocks.
695bool SampleFunctionProfile::propagateThroughEdges(Function &F) {
696 bool Changed = false;
697 DEBUG(dbgs() << "\nPropagation through edges\n");
698 for (Function::iterator BI = F.begin(), EI = F.end(); BI != EI; ++BI) {
699 BasicBlock *BB = BI;
700
701 // Visit all the predecessor and successor edges to determine
702 // which ones have a weight assigned already. Note that it doesn't
703 // matter that we only keep track of a single unknown edge. The
704 // only case we are interested in handling is when only a single
705 // edge is unknown (see setEdgeOrBlockWeight).
706 for (unsigned i = 0; i < 2; i++) {
707 uint32_t TotalWeight = 0;
708 unsigned NumUnknownEdges = 0;
709 Edge UnknownEdge, SelfReferentialEdge;
710
711 if (i == 0) {
712 // First, visit all predecessor edges.
713 for (size_t I = 0; I < Predecessors[BB].size(); I++) {
714 Edge E = std::make_pair(Predecessors[BB][I], BB);
715 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
716 if (E.first == E.second)
717 SelfReferentialEdge = E;
718 }
719 } else {
720 // On the second round, visit all successor edges.
721 for (size_t I = 0; I < Successors[BB].size(); I++) {
722 Edge E = std::make_pair(BB, Successors[BB][I]);
723 TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
724 }
725 }
726
727 // After visiting all the edges, there are three cases that we
728 // can handle immediately:
729 //
730 // - All the edge weights are known (i.e., NumUnknownEdges == 0).
731 // In this case, we simply check that the sum of all the edges
732 // is the same as BB's weight. If not, we change BB's weight
733 // to match. Additionally, if BB had not been visited before,
734 // we mark it visited.
735 //
736 // - Only one edge is unknown and BB has already been visited.
737 // In this case, we can compute the weight of the edge by
738 // subtracting the total block weight from all the known
739 // edge weights. If the edges weight more than BB, then the
740 // edge of the last remaining edge is set to zero.
741 //
742 // - There exists a self-referential edge and the weight of BB is
743 // known. In this case, this edge can be based on BB's weight.
744 // We add up all the other known edges and set the weight on
745 // the self-referential edge as we did in the previous case.
746 //
747 // In any other case, we must continue iterating. Eventually,
748 // all edges will get a weight, or iteration will stop when
749 // it reaches SampleProfileMaxPropagateIterations.
750 if (NumUnknownEdges <= 1) {
751 uint32_t &BBWeight = BlockWeights[BB];
752 if (NumUnknownEdges == 0) {
753 // If we already know the weight of all edges, the weight of the
754 // basic block can be computed. It should be no larger than the sum
755 // of all edge weights.
756 if (TotalWeight > BBWeight) {
757 BBWeight = TotalWeight;
758 Changed = true;
759 DEBUG(dbgs() << "All edge weights for " << BB->getName()
760 << " known. Set weight for block: ";
761 printBlockWeight(dbgs(), BB););
762 }
763 if (VisitedBlocks.insert(BB))
764 Changed = true;
765 } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
766 // If there is a single unknown edge and the block has been
767 // visited, then we can compute E's weight.
768 if (BBWeight >= TotalWeight)
769 EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
770 else
771 EdgeWeights[UnknownEdge] = 0;
772 VisitedEdges.insert(UnknownEdge);
773 Changed = true;
774 DEBUG(dbgs() << "Set weight for edge: ";
775 printEdgeWeight(dbgs(), UnknownEdge));
776 }
777 } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
778 uint32_t &BBWeight = BlockWeights[BB];
779 // We have a self-referential edge and the weight of BB is known.
780 if (BBWeight >= TotalWeight)
781 EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
782 else
783 EdgeWeights[SelfReferentialEdge] = 0;
784 VisitedEdges.insert(SelfReferentialEdge);
785 Changed = true;
786 DEBUG(dbgs() << "Set self-referential edge weight to: ";
787 printEdgeWeight(dbgs(), SelfReferentialEdge));
788 }
789 }
790 }
791
792 return Changed;
793}
794
795/// \brief Build in/out edge lists for each basic block in the CFG.
796///
797/// We are interested in unique edges. If a block B1 has multiple
798/// edges to another block B2, we only add a single B1->B2 edge.
799void SampleFunctionProfile::buildEdges(Function &F) {
800 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
801 BasicBlock *B1 = I;
802
803 // Add predecessors for B1.
804 SmallPtrSet<BasicBlock *, 16> Visited;
805 if (!Predecessors[B1].empty())
806 llvm_unreachable("Found a stale predecessors list in a basic block.");
807 for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
808 BasicBlock *B2 = *PI;
809 if (Visited.insert(B2))
810 Predecessors[B1].push_back(B2);
811 }
812
813 // Add successors for B1.
814 Visited.clear();
815 if (!Successors[B1].empty())
816 llvm_unreachable("Found a stale successors list in a basic block.");
817 for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
818 BasicBlock *B2 = *SI;
819 if (Visited.insert(B2))
820 Successors[B1].push_back(B2);
821 }
822 }
823}
824
825/// \brief Propagate weights into edges
826///
827/// The following rules are applied to every block B in the CFG:
828///
829/// - If B has a single predecessor/successor, then the weight
830/// of that edge is the weight of the block.
831///
832/// - If all incoming or outgoing edges are known except one, and the
833/// weight of the block is already known, the weight of the unknown
834/// edge will be the weight of the block minus the sum of all the known
835/// edges. If the sum of all the known edges is larger than B's weight,
836/// we set the unknown edge weight to zero.
837///
838/// - If there is a self-referential edge, and the weight of the block is
839/// known, the weight for that edge is set to the weight of the block
840/// minus the weight of the other incoming edges to that block (if
841/// known).
842void SampleFunctionProfile::propagateWeights(Function &F) {
843 bool Changed = true;
844 unsigned i = 0;
845
846 // Before propagation starts, build, for each block, a list of
847 // unique predecessors and successors. This is necessary to handle
848 // identical edges in multiway branches. Since we visit all blocks and all
849 // edges of the CFG, it is cleaner to build these lists once at the start
850 // of the pass.
851 buildEdges(F);
852
853 // Propagate until we converge or we go past the iteration limit.
854 while (Changed && i++ < SampleProfileMaxPropagateIterations) {
855 Changed = propagateThroughEdges(F);
856 }
857
858 // Generate MD_prof metadata for every branch instruction using the
859 // edge weights computed during propagation.
860 DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
861 MDBuilder MDB(F.getContext());
862 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
863 BasicBlock *B = I;
864 TerminatorInst *TI = B->getTerminator();
865 if (TI->getNumSuccessors() == 1)
866 continue;
867 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
868 continue;
869
870 DEBUG(dbgs() << "\nGetting weights for branch at line "
871 << TI->getDebugLoc().getLine() << ":"
872 << TI->getDebugLoc().getCol() << ".\n");
873 SmallVector<uint32_t, 4> Weights;
874 bool AllWeightsZero = true;
875 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
876 BasicBlock *Succ = TI->getSuccessor(I);
877 Edge E = std::make_pair(B, Succ);
878 uint32_t Weight = EdgeWeights[E];
879 DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
880 Weights.push_back(Weight);
881 if (Weight != 0)
882 AllWeightsZero = false;
883 }
884
885 // Only set weights if there is at least one non-zero weight.
886 // In any other case, let the analyzer set weights.
887 if (!AllWeightsZero) {
888 DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
889 TI->setMetadata(llvm::LLVMContext::MD_prof,
890 MDB.createBranchWeights(Weights));
891 } else {
892 DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
893 }
894 }
895}
896
897/// \brief Get the line number for the function header.
898///
899/// This looks up function \p F in the current compilation unit and
900/// retrieves the line number where the function is defined. This is
901/// line 0 for all the samples read from the profile file. Every line
902/// number is relative to this line.
903///
904/// \param F Function object to query.
905///
906/// \returns the line number where \p F is defined.
907unsigned SampleFunctionProfile::getFunctionLoc(Function &F) {
908 NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
909 if (CUNodes) {
910 for (unsigned I = 0, E1 = CUNodes->getNumOperands(); I != E1; ++I) {
911 DICompileUnit CU(CUNodes->getOperand(I));
912 DIArray Subprograms = CU.getSubprograms();
913 for (unsigned J = 0, E2 = Subprograms.getNumElements(); J != E2; ++J) {
914 DISubprogram Subprogram(Subprograms.getElement(J));
915 if (Subprogram.describes(&F))
916 return Subprogram.getLineNumber();
917 }
918 }
919 }
920
921 report_fatal_error("No debug information found in function " + F.getName() +
922 "\n");
923}
924
925/// \brief Generate branch weight metadata for all branches in \p F.
926///
927/// Branch weights are computed out of instruction samples using a
928/// propagation heuristic. Propagation proceeds in 3 phases:
929///
930/// 1- Assignment of block weights. All the basic blocks in the function
931/// are initial assigned the same weight as their most frequently
932/// executed instruction.
933///
934/// 2- Creation of equivalence classes. Since samples may be missing from
935/// blocks, we can fill in the gaps by setting the weights of all the
936/// blocks in the same equivalence class to the same weight. To compute
937/// the concept of equivalence, we use dominance and loop information.
938/// Two blocks B1 and B2 are in the same equivalence class if B1
939/// dominates B2, B2 post-dominates B1 and both are in the same loop.
940///
941/// 3- Propagation of block weights into edges. This uses a simple
942/// propagation heuristic. The following rules are applied to every
943/// block B in the CFG:
944///
945/// - If B has a single predecessor/successor, then the weight
946/// of that edge is the weight of the block.
947///
948/// - If all the edges are known except one, and the weight of the
949/// block is already known, the weight of the unknown edge will
950/// be the weight of the block minus the sum of all the known
951/// edges. If the sum of all the known edges is larger than B's weight,
952/// we set the unknown edge weight to zero.
953///
954/// - If there is a self-referential edge, and the weight of the block is
955/// known, the weight for that edge is set to the weight of the block
956/// minus the weight of the other incoming edges to that block (if
957/// known).
958///
959/// Since this propagation is not guaranteed to finalize for every CFG, we
960/// only allow it to proceed for a limited number of iterations (controlled
961/// by -sample-profile-max-propagate-iterations).
962///
963/// FIXME: Try to replace this propagation heuristic with a scheme
964/// that is guaranteed to finalize. A work-list approach similar to
965/// the standard value propagation algorithm used by SSA-CCP might
966/// work here.
967///
968/// Once all the branch weights are computed, we emit the MD_prof
969/// metadata on B using the computed values for each of its branches.
970///
971/// \param F The function to query.
972bool SampleFunctionProfile::emitAnnotations(Function &F, DominatorTree *DomTree,
973 PostDominatorTree *PostDomTree,
974 LoopInfo *Loops) {
975 bool Changed = false;
976
977 // Initialize invariants used during computation and propagation.
978 HeaderLineno = getFunctionLoc(F);
979 DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
980 << ": " << HeaderLineno << "\n");
981 DT = DomTree;
982 PDT = PostDomTree;
983 LI = Loops;
984
985 // Compute basic block weights.
986 Changed |= computeBlockWeights(F);
987
988 if (Changed) {
989 // Find equivalence classes.
990 findEquivalenceClasses(F);
991
992 // Propagate weights to all edges.
993 propagateWeights(F);
994 }
995
996 return Changed;
997}
998
Diego Novilloc0dd1032013-11-26 20:37:33 +0000999char SampleProfileLoader::ID = 0;
Diego Novillo0accb3d2014-01-10 23:23:46 +00001000INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
1001 "Sample Profile loader", false, false)
1002INITIALIZE_PASS_DEPENDENCY(DominatorTree)
1003INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
1004INITIALIZE_PASS_DEPENDENCY(LoopInfo)
1005INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
1006 "Sample Profile loader", false, false)
Diego Novilloc0dd1032013-11-26 20:37:33 +00001007
1008bool SampleProfileLoader::doInitialization(Module &M) {
1009 Profiler.reset(new SampleModuleProfile(Filename));
1010 Profiler->loadText();
1011 return true;
1012}
1013
1014FunctionPass *llvm::createSampleProfileLoaderPass() {
1015 return new SampleProfileLoader(SampleProfileFile);
1016}
1017
1018FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
1019 return new SampleProfileLoader(Name);
1020}
1021
Diego Novillo8d6568b2013-11-13 12:22:21 +00001022bool SampleProfileLoader::runOnFunction(Function &F) {
Diego Novillo0accb3d2014-01-10 23:23:46 +00001023 DominatorTree *DT = &getAnalysis<DominatorTree>();
1024 PostDominatorTree *PDT = &getAnalysis<PostDominatorTree>();
1025 LoopInfo *LI = &getAnalysis<LoopInfo>();
1026 SampleFunctionProfile &FunctionProfile = Profiler->getProfile(F);
1027 if (!FunctionProfile.empty())
1028 return FunctionProfile.emitAnnotations(F, DT, PDT, LI);
1029 return false;
Diego Novillo8d6568b2013-11-13 12:22:21 +00001030}