blob: 1ea7d2a0c4b5f295ac39560efd0844fd80d6ac27 [file] [log] [blame]
Diego Novillode1ab262014-09-09 12:40:50 +00001//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the class that reads LLVM sample profiles. It
Diego Novillobb5605c2015-10-14 18:36:30 +000011// supports three file formats: text, binary and gcov.
Diego Novillode1ab262014-09-09 12:40:50 +000012//
Diego Novillobb5605c2015-10-14 18:36:30 +000013// The textual representation is useful for debugging and testing purposes. The
14// binary representation is more compact, resulting in smaller file sizes.
Diego Novillode1ab262014-09-09 12:40:50 +000015//
Diego Novillobb5605c2015-10-14 18:36:30 +000016// The gcov encoding is the one generated by GCC's AutoFDO profile creation
17// tool (https://github.com/google/autofdo)
Diego Novillode1ab262014-09-09 12:40:50 +000018//
Diego Novillobb5605c2015-10-14 18:36:30 +000019// All three encodings can be used interchangeably as an input sample profile.
Diego Novillode1ab262014-09-09 12:40:50 +000020//
Diego Novillode1ab262014-09-09 12:40:50 +000021//===----------------------------------------------------------------------===//
22
23#include "llvm/ProfileData/SampleProfReader.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/ErrorOr.h"
Diego Novilloc572e922014-10-30 18:00:06 +000026#include "llvm/Support/LEB128.h"
Diego Novillode1ab262014-09-09 12:40:50 +000027#include "llvm/Support/LineIterator.h"
Diego Novilloc572e922014-10-30 18:00:06 +000028#include "llvm/Support/MemoryBuffer.h"
Dehao Chen67226882015-09-30 00:42:46 +000029#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
Diego Novillode1ab262014-09-09 12:40:50 +000031
Diego Novilloc572e922014-10-30 18:00:06 +000032using namespace llvm::sampleprof;
Diego Novillode1ab262014-09-09 12:40:50 +000033using namespace llvm;
34
35/// \brief Print the samples collected for a function on stream \p OS.
36///
37/// \param OS Stream to emit the output to.
Diego Novilloaae1ed82015-10-08 19:40:37 +000038void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
Diego Novillode1ab262014-09-09 12:40:50 +000039 OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
40 << " sampled lines\n";
Diego Novillod5336ae2014-11-01 00:56:55 +000041 for (const auto &SI : BodySamples) {
42 LineLocation Loc = SI.first;
43 const SampleRecord &Sample = SI.second;
Diego Novilloaae1ed82015-10-08 19:40:37 +000044 OS.indent(Indent);
45 OS << "line offset: " << Loc.LineOffset
Diego Novilloc572e922014-10-30 18:00:06 +000046 << ", discriminator: " << Loc.Discriminator
47 << ", number of samples: " << Sample.getSamples();
48 if (Sample.hasCalls()) {
49 OS << ", calls:";
Diego Novillod5336ae2014-11-01 00:56:55 +000050 for (const auto &I : Sample.getCallTargets())
51 OS << " " << I.first() << ":" << I.second;
Diego Novilloc572e922014-10-30 18:00:06 +000052 }
53 OS << "\n";
54 }
Diego Novilloaae1ed82015-10-08 19:40:37 +000055 for (const auto &CS : CallsiteSamples) {
56 CallsiteLocation Loc = CS.first;
57 const FunctionSamples &CalleeSamples = CS.second;
58 OS.indent(Indent);
59 OS << "line offset: " << Loc.LineOffset
60 << ", discriminator: " << Loc.Discriminator
61 << ", inlined callee: " << Loc.CalleeName << ": ";
62 CalleeSamples.print(OS, Indent + 2);
63 }
Diego Novillode1ab262014-09-09 12:40:50 +000064}
65
Diego Novillod5336ae2014-11-01 00:56:55 +000066/// \brief Dump the function profile for \p FName.
Diego Novillode1ab262014-09-09 12:40:50 +000067///
Diego Novillode1ab262014-09-09 12:40:50 +000068/// \param FName Name of the function to print.
Diego Novillod5336ae2014-11-01 00:56:55 +000069/// \param OS Stream to emit the output to.
70void SampleProfileReader::dumpFunctionProfile(StringRef FName,
71 raw_ostream &OS) {
Diego Novilloc572e922014-10-30 18:00:06 +000072 OS << "Function: " << FName << ": ";
Diego Novillode1ab262014-09-09 12:40:50 +000073 Profiles[FName].print(OS);
74}
75
Diego Novillod5336ae2014-11-01 00:56:55 +000076/// \brief Dump all the function profiles found on stream \p OS.
77void SampleProfileReader::dump(raw_ostream &OS) {
78 for (const auto &I : Profiles)
79 dumpFunctionProfile(I.getKey(), OS);
Diego Novillode1ab262014-09-09 12:40:50 +000080}
81
Dehao Chen67226882015-09-30 00:42:46 +000082/// \brief Parse \p Input as function head.
83///
84/// Parse one line of \p Input, and update function name in \p FName,
85/// function's total sample count in \p NumSamples, function's entry
86/// count in \p NumHeadSamples.
87///
88/// \returns true if parsing is successful.
89static bool ParseHead(const StringRef &Input, StringRef &FName,
Diego Novillo38be3332015-10-15 16:36:21 +000090 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
Dehao Chen67226882015-09-30 00:42:46 +000091 if (Input[0] == ' ')
92 return false;
93 size_t n2 = Input.rfind(':');
94 size_t n1 = Input.rfind(':', n2 - 1);
95 FName = Input.substr(0, n1);
96 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
97 return false;
98 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
99 return false;
100 return true;
101}
102
103/// \brief Parse \p Input as line sample.
104///
105/// \param Input input line.
106/// \param IsCallsite true if the line represents an inlined callsite.
107/// \param Depth the depth of the inline stack.
108/// \param NumSamples total samples of the line/inlined callsite.
109/// \param LineOffset line offset to the start of the function.
110/// \param Discriminator discriminator of the line.
111/// \param TargetCountMap map from indirect call target to count.
112///
113/// returns true if parsing is successful.
Diego Novillo38be3332015-10-15 16:36:21 +0000114static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
115 uint64_t &NumSamples, uint32_t &LineOffset,
116 uint32_t &Discriminator, StringRef &CalleeName,
117 DenseMap<StringRef, uint64_t> &TargetCountMap) {
Dehao Chen67226882015-09-30 00:42:46 +0000118 for (Depth = 0; Input[Depth] == ' '; Depth++)
119 ;
120 if (Depth == 0)
121 return false;
122
123 size_t n1 = Input.find(':');
124 StringRef Loc = Input.substr(Depth, n1 - Depth);
125 size_t n2 = Loc.find('.');
126 if (n2 == StringRef::npos) {
127 if (Loc.getAsInteger(10, LineOffset))
128 return false;
129 Discriminator = 0;
130 } else {
131 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
132 return false;
133 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
134 return false;
135 }
136
137 StringRef Rest = Input.substr(n1 + 2);
138 if (Rest[0] >= '0' && Rest[0] <= '9') {
139 IsCallsite = false;
140 size_t n3 = Rest.find(' ');
141 if (n3 == StringRef::npos) {
142 if (Rest.getAsInteger(10, NumSamples))
143 return false;
144 } else {
145 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
146 return false;
147 }
148 while (n3 != StringRef::npos) {
149 n3 += Rest.substr(n3).find_first_not_of(' ');
150 Rest = Rest.substr(n3);
151 n3 = Rest.find(' ');
152 StringRef pair = Rest;
153 if (n3 != StringRef::npos) {
154 pair = Rest.substr(0, n3);
155 }
Diego Novillo38be3332015-10-15 16:36:21 +0000156 size_t n4 = pair.find(':');
157 uint64_t count;
Dehao Chen67226882015-09-30 00:42:46 +0000158 if (pair.substr(n4 + 1).getAsInteger(10, count))
159 return false;
160 TargetCountMap[pair.substr(0, n4)] = count;
161 }
162 } else {
163 IsCallsite = true;
Diego Novillo38be3332015-10-15 16:36:21 +0000164 size_t n3 = Rest.find_last_of(':');
Dehao Chen67226882015-09-30 00:42:46 +0000165 CalleeName = Rest.substr(0, n3);
166 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
167 return false;
168 }
169 return true;
170}
171
Diego Novillode1ab262014-09-09 12:40:50 +0000172/// \brief Load samples from a text file.
173///
174/// See the documentation at the top of the file for an explanation of
175/// the expected format.
176///
177/// \returns true if the file was loaded successfully, false otherwise.
Diego Novilloc572e922014-10-30 18:00:06 +0000178std::error_code SampleProfileReaderText::read() {
179 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
Diego Novillode1ab262014-09-09 12:40:50 +0000180
Diego Novilloaae1ed82015-10-08 19:40:37 +0000181 InlineCallStack InlineStack;
Dehao Chen67226882015-09-30 00:42:46 +0000182
183 for (; !LineIt.is_at_eof(); ++LineIt) {
184 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
185 continue;
Diego Novillode1ab262014-09-09 12:40:50 +0000186 // Read the header of each function.
187 //
188 // Note that for function identifiers we are actually expecting
189 // mangled names, but we may not always get them. This happens when
190 // the compiler decides not to emit the function (e.g., it was inlined
191 // and removed). In this case, the binary will not have the linkage
192 // name for the function, so the profiler will emit the function's
193 // unmangled name, which may contain characters like ':' and '>' in its
194 // name (member functions, templates, etc).
195 //
196 // The only requirement we place on the identifier, then, is that it
197 // should not begin with a number.
Dehao Chen67226882015-09-30 00:42:46 +0000198 if ((*LineIt)[0] != ' ') {
Diego Novillo38be3332015-10-15 16:36:21 +0000199 uint64_t NumSamples, NumHeadSamples;
Dehao Chen67226882015-09-30 00:42:46 +0000200 StringRef FName;
201 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
202 reportError(LineIt.line_number(),
203 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
204 return sampleprof_error::malformed;
205 }
206 Profiles[FName] = FunctionSamples();
207 FunctionSamples &FProfile = Profiles[FName];
208 FProfile.addTotalSamples(NumSamples);
209 FProfile.addHeadSamples(NumHeadSamples);
210 InlineStack.clear();
211 InlineStack.push_back(&FProfile);
212 } else {
Diego Novillo38be3332015-10-15 16:36:21 +0000213 uint64_t NumSamples;
Dehao Chen67226882015-09-30 00:42:46 +0000214 StringRef FName;
Diego Novillo38be3332015-10-15 16:36:21 +0000215 DenseMap<StringRef, uint64_t> TargetCountMap;
Dehao Chen67226882015-09-30 00:42:46 +0000216 bool IsCallsite;
Diego Novillo38be3332015-10-15 16:36:21 +0000217 uint32_t Depth, LineOffset, Discriminator;
Dehao Chen67226882015-09-30 00:42:46 +0000218 if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
219 Discriminator, FName, TargetCountMap)) {
Diego Novillo3376a782015-09-17 00:17:24 +0000220 reportError(LineIt.line_number(),
221 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
222 *LineIt);
Diego Novilloc572e922014-10-30 18:00:06 +0000223 return sampleprof_error::malformed;
Diego Novillode1ab262014-09-09 12:40:50 +0000224 }
Dehao Chen67226882015-09-30 00:42:46 +0000225 if (IsCallsite) {
226 while (InlineStack.size() > Depth) {
227 InlineStack.pop_back();
Diego Novilloc572e922014-10-30 18:00:06 +0000228 }
Dehao Chen67226882015-09-30 00:42:46 +0000229 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
230 CallsiteLocation(LineOffset, Discriminator, FName));
231 FSamples.addTotalSamples(NumSamples);
232 InlineStack.push_back(&FSamples);
233 } else {
234 while (InlineStack.size() > Depth) {
235 InlineStack.pop_back();
236 }
237 FunctionSamples &FProfile = *InlineStack.back();
238 for (const auto &name_count : TargetCountMap) {
239 FProfile.addCalledTargetSamples(LineOffset, Discriminator,
240 name_count.first, name_count.second);
241 }
242 FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
Diego Novilloc572e922014-10-30 18:00:06 +0000243 }
Diego Novillode1ab262014-09-09 12:40:50 +0000244 }
245 }
246
Diego Novilloc572e922014-10-30 18:00:06 +0000247 return sampleprof_error::success;
Diego Novillode1ab262014-09-09 12:40:50 +0000248}
249
Diego Novillod5336ae2014-11-01 00:56:55 +0000250template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
Diego Novilloc572e922014-10-30 18:00:06 +0000251 unsigned NumBytesRead = 0;
252 std::error_code EC;
253 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
254
255 if (Val > std::numeric_limits<T>::max())
256 EC = sampleprof_error::malformed;
257 else if (Data + NumBytesRead > End)
258 EC = sampleprof_error::truncated;
259 else
260 EC = sampleprof_error::success;
261
262 if (EC) {
Diego Novillo3376a782015-09-17 00:17:24 +0000263 reportError(0, EC.message());
Diego Novilloc572e922014-10-30 18:00:06 +0000264 return EC;
265 }
266
267 Data += NumBytesRead;
268 return static_cast<T>(Val);
269}
270
271ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
272 std::error_code EC;
273 StringRef Str(reinterpret_cast<const char *>(Data));
274 if (Data + Str.size() + 1 > End) {
275 EC = sampleprof_error::truncated;
Diego Novillo3376a782015-09-17 00:17:24 +0000276 reportError(0, EC.message());
Diego Novilloc572e922014-10-30 18:00:06 +0000277 return EC;
278 }
279
280 Data += Str.size() + 1;
281 return Str;
282}
283
Diego Novillo760c5a82015-10-13 22:48:46 +0000284ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
285 std::error_code EC;
Diego Novillo38be3332015-10-15 16:36:21 +0000286 auto Idx = readNumber<uint32_t>();
Diego Novillo760c5a82015-10-13 22:48:46 +0000287 if (std::error_code EC = Idx.getError())
288 return EC;
289 if (*Idx >= NameTable.size())
290 return sampleprof_error::truncated_name_table;
291 return NameTable[*Idx];
292}
293
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000294std::error_code
295SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
Diego Novillo38be3332015-10-15 16:36:21 +0000296 auto Val = readNumber<uint64_t>();
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000297 if (std::error_code EC = Val.getError())
298 return EC;
299 FProfile.addTotalSamples(*Val);
300
Diego Novillo38be3332015-10-15 16:36:21 +0000301 Val = readNumber<uint64_t>();
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000302 if (std::error_code EC = Val.getError())
303 return EC;
304 FProfile.addHeadSamples(*Val);
305
306 // Read the samples in the body.
Diego Novillo38be3332015-10-15 16:36:21 +0000307 auto NumRecords = readNumber<uint32_t>();
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000308 if (std::error_code EC = NumRecords.getError())
309 return EC;
310
Diego Novillo38be3332015-10-15 16:36:21 +0000311 for (uint32_t I = 0; I < *NumRecords; ++I) {
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000312 auto LineOffset = readNumber<uint64_t>();
313 if (std::error_code EC = LineOffset.getError())
314 return EC;
315
316 auto Discriminator = readNumber<uint64_t>();
317 if (std::error_code EC = Discriminator.getError())
318 return EC;
319
320 auto NumSamples = readNumber<uint64_t>();
321 if (std::error_code EC = NumSamples.getError())
322 return EC;
323
Diego Novillo38be3332015-10-15 16:36:21 +0000324 auto NumCalls = readNumber<uint32_t>();
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000325 if (std::error_code EC = NumCalls.getError())
326 return EC;
327
Diego Novillo38be3332015-10-15 16:36:21 +0000328 for (uint32_t J = 0; J < *NumCalls; ++J) {
Diego Novillo760c5a82015-10-13 22:48:46 +0000329 auto CalledFunction(readStringFromTable());
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000330 if (std::error_code EC = CalledFunction.getError())
331 return EC;
332
333 auto CalledFunctionSamples = readNumber<uint64_t>();
334 if (std::error_code EC = CalledFunctionSamples.getError())
335 return EC;
336
337 FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
338 *CalledFunction, *CalledFunctionSamples);
339 }
340
341 FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
342 }
343
344 // Read all the samples for inlined function calls.
Diego Novillo38be3332015-10-15 16:36:21 +0000345 auto NumCallsites = readNumber<uint32_t>();
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000346 if (std::error_code EC = NumCallsites.getError())
347 return EC;
348
Diego Novillo38be3332015-10-15 16:36:21 +0000349 for (uint32_t J = 0; J < *NumCallsites; ++J) {
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000350 auto LineOffset = readNumber<uint64_t>();
351 if (std::error_code EC = LineOffset.getError())
352 return EC;
353
354 auto Discriminator = readNumber<uint64_t>();
355 if (std::error_code EC = Discriminator.getError())
356 return EC;
357
Diego Novillo760c5a82015-10-13 22:48:46 +0000358 auto FName(readStringFromTable());
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000359 if (std::error_code EC = FName.getError())
360 return EC;
361
362 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
363 CallsiteLocation(*LineOffset, *Discriminator, *FName));
364 if (std::error_code EC = readProfile(CalleeProfile))
365 return EC;
366 }
367
368 return sampleprof_error::success;
369}
370
Diego Novilloc572e922014-10-30 18:00:06 +0000371std::error_code SampleProfileReaderBinary::read() {
372 while (!at_eof()) {
Diego Novillo760c5a82015-10-13 22:48:46 +0000373 auto FName(readStringFromTable());
Diego Novilloc572e922014-10-30 18:00:06 +0000374 if (std::error_code EC = FName.getError())
375 return EC;
376
377 Profiles[*FName] = FunctionSamples();
378 FunctionSamples &FProfile = Profiles[*FName];
379
Diego Novilloa7f1e8e2015-10-09 17:54:24 +0000380 if (std::error_code EC = readProfile(FProfile))
Diego Novilloc572e922014-10-30 18:00:06 +0000381 return EC;
Diego Novilloc572e922014-10-30 18:00:06 +0000382 }
383
384 return sampleprof_error::success;
385}
386
387std::error_code SampleProfileReaderBinary::readHeader() {
388 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
389 End = Data + Buffer->getBufferSize();
390
391 // Read and check the magic identifier.
392 auto Magic = readNumber<uint64_t>();
393 if (std::error_code EC = Magic.getError())
394 return EC;
395 else if (*Magic != SPMagic())
396 return sampleprof_error::bad_magic;
397
398 // Read the version number.
399 auto Version = readNumber<uint64_t>();
400 if (std::error_code EC = Version.getError())
401 return EC;
402 else if (*Version != SPVersion())
403 return sampleprof_error::unsupported_version;
404
Diego Novillo760c5a82015-10-13 22:48:46 +0000405 // Read the name table.
Diego Novillo38be3332015-10-15 16:36:21 +0000406 auto Size = readNumber<uint32_t>();
Diego Novillo760c5a82015-10-13 22:48:46 +0000407 if (std::error_code EC = Size.getError())
408 return EC;
409 NameTable.reserve(*Size);
Diego Novillo38be3332015-10-15 16:36:21 +0000410 for (uint32_t I = 0; I < *Size; ++I) {
Diego Novillo760c5a82015-10-13 22:48:46 +0000411 auto Name(readString());
412 if (std::error_code EC = Name.getError())
413 return EC;
414 NameTable.push_back(*Name);
415 }
416
Diego Novilloc572e922014-10-30 18:00:06 +0000417 return sampleprof_error::success;
418}
419
420bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
421 const uint8_t *Data =
422 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
423 uint64_t Magic = decodeULEB128(Data);
424 return Magic == SPMagic();
425}
426
Diego Novillo3376a782015-09-17 00:17:24 +0000427std::error_code SampleProfileReaderGCC::skipNextWord() {
428 uint32_t dummy;
429 if (!GcovBuffer.readInt(dummy))
430 return sampleprof_error::truncated;
431 return sampleprof_error::success;
432}
433
434template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
435 if (sizeof(T) <= sizeof(uint32_t)) {
436 uint32_t Val;
437 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
438 return static_cast<T>(Val);
439 } else if (sizeof(T) <= sizeof(uint64_t)) {
440 uint64_t Val;
441 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
442 return static_cast<T>(Val);
443 }
444
445 std::error_code EC = sampleprof_error::malformed;
446 reportError(0, EC.message());
447 return EC;
448}
449
450ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
451 StringRef Str;
452 if (!GcovBuffer.readString(Str))
453 return sampleprof_error::truncated;
454 return Str;
455}
456
457std::error_code SampleProfileReaderGCC::readHeader() {
458 // Read the magic identifier.
459 if (!GcovBuffer.readGCDAFormat())
460 return sampleprof_error::unrecognized_format;
461
462 // Read the version number. Note - the GCC reader does not validate this
463 // version, but the profile creator generates v704.
464 GCOV::GCOVVersion version;
465 if (!GcovBuffer.readGCOVVersion(version))
466 return sampleprof_error::unrecognized_format;
467
468 if (version != GCOV::V704)
469 return sampleprof_error::unsupported_version;
470
471 // Skip the empty integer.
472 if (std::error_code EC = skipNextWord())
473 return EC;
474
475 return sampleprof_error::success;
476}
477
478std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
479 uint32_t Tag;
480 if (!GcovBuffer.readInt(Tag))
481 return sampleprof_error::truncated;
482
483 if (Tag != Expected)
484 return sampleprof_error::malformed;
485
486 if (std::error_code EC = skipNextWord())
487 return EC;
488
489 return sampleprof_error::success;
490}
491
492std::error_code SampleProfileReaderGCC::readNameTable() {
493 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
494 return EC;
495
496 uint32_t Size;
497 if (!GcovBuffer.readInt(Size))
498 return sampleprof_error::truncated;
499
500 for (uint32_t I = 0; I < Size; ++I) {
501 StringRef Str;
502 if (!GcovBuffer.readString(Str))
503 return sampleprof_error::truncated;
504 Names.push_back(Str);
505 }
506
507 return sampleprof_error::success;
508}
509
510std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
511 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
512 return EC;
513
514 uint32_t NumFunctions;
515 if (!GcovBuffer.readInt(NumFunctions))
516 return sampleprof_error::truncated;
517
Diego Novilloaae1ed82015-10-08 19:40:37 +0000518 InlineCallStack Stack;
Diego Novillo3376a782015-09-17 00:17:24 +0000519 for (uint32_t I = 0; I < NumFunctions; ++I)
Diego Novilloaae1ed82015-10-08 19:40:37 +0000520 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
Diego Novillo3376a782015-09-17 00:17:24 +0000521 return EC;
522
523 return sampleprof_error::success;
524}
525
Diego Novilloaae1ed82015-10-08 19:40:37 +0000526std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
527 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
Diego Novillo3376a782015-09-17 00:17:24 +0000528 uint64_t HeadCount = 0;
Diego Novilloaae1ed82015-10-08 19:40:37 +0000529 if (InlineStack.size() == 0)
Diego Novillo3376a782015-09-17 00:17:24 +0000530 if (!GcovBuffer.readInt64(HeadCount))
531 return sampleprof_error::truncated;
532
533 uint32_t NameIdx;
534 if (!GcovBuffer.readInt(NameIdx))
535 return sampleprof_error::truncated;
536
537 StringRef Name(Names[NameIdx]);
538
539 uint32_t NumPosCounts;
540 if (!GcovBuffer.readInt(NumPosCounts))
541 return sampleprof_error::truncated;
542
Diego Novilloaae1ed82015-10-08 19:40:37 +0000543 uint32_t NumCallsites;
544 if (!GcovBuffer.readInt(NumCallsites))
Diego Novillo3376a782015-09-17 00:17:24 +0000545 return sampleprof_error::truncated;
546
Diego Novilloaae1ed82015-10-08 19:40:37 +0000547 FunctionSamples *FProfile = nullptr;
548 if (InlineStack.size() == 0) {
549 // If this is a top function that we have already processed, do not
550 // update its profile again. This happens in the presence of
551 // function aliases. Since these aliases share the same function
552 // body, there will be identical replicated profiles for the
553 // original function. In this case, we simply not bother updating
554 // the profile of the original function.
555 FProfile = &Profiles[Name];
556 FProfile->addHeadSamples(HeadCount);
557 if (FProfile->getTotalSamples() > 0)
Diego Novillo3376a782015-09-17 00:17:24 +0000558 Update = false;
Diego Novilloaae1ed82015-10-08 19:40:37 +0000559 } else {
560 // Otherwise, we are reading an inlined instance. The top of the
561 // inline stack contains the profile of the caller. Insert this
562 // callee in the caller's CallsiteMap.
563 FunctionSamples *CallerProfile = InlineStack.front();
564 uint32_t LineOffset = Offset >> 16;
565 uint32_t Discriminator = Offset & 0xffff;
566 FProfile = &CallerProfile->functionSamplesAt(
567 CallsiteLocation(LineOffset, Discriminator, Name));
Diego Novillo3376a782015-09-17 00:17:24 +0000568 }
569
570 for (uint32_t I = 0; I < NumPosCounts; ++I) {
571 uint32_t Offset;
572 if (!GcovBuffer.readInt(Offset))
573 return sampleprof_error::truncated;
574
575 uint32_t NumTargets;
576 if (!GcovBuffer.readInt(NumTargets))
577 return sampleprof_error::truncated;
578
579 uint64_t Count;
580 if (!GcovBuffer.readInt64(Count))
581 return sampleprof_error::truncated;
582
Diego Novilloaae1ed82015-10-08 19:40:37 +0000583 // The line location is encoded in the offset as:
584 // high 16 bits: line offset to the start of the function.
585 // low 16 bits: discriminator.
586 uint32_t LineOffset = Offset >> 16;
587 uint32_t Discriminator = Offset & 0xffff;
Diego Novillo3376a782015-09-17 00:17:24 +0000588
Diego Novilloaae1ed82015-10-08 19:40:37 +0000589 InlineCallStack NewStack;
590 NewStack.push_back(FProfile);
591 NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
592 if (Update) {
593 // Walk up the inline stack, adding the samples on this line to
594 // the total sample count of the callers in the chain.
595 for (auto CallerProfile : NewStack)
596 CallerProfile->addTotalSamples(Count);
597
598 // Update the body samples for the current profile.
599 FProfile->addBodySamples(LineOffset, Discriminator, Count);
600 }
601
602 // Process the list of functions called at an indirect call site.
603 // These are all the targets that a function pointer (or virtual
604 // function) resolved at runtime.
Diego Novillo3376a782015-09-17 00:17:24 +0000605 for (uint32_t J = 0; J < NumTargets; J++) {
606 uint32_t HistVal;
607 if (!GcovBuffer.readInt(HistVal))
608 return sampleprof_error::truncated;
609
610 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
611 return sampleprof_error::malformed;
612
613 uint64_t TargetIdx;
614 if (!GcovBuffer.readInt64(TargetIdx))
615 return sampleprof_error::truncated;
616 StringRef TargetName(Names[TargetIdx]);
617
618 uint64_t TargetCount;
619 if (!GcovBuffer.readInt64(TargetCount))
620 return sampleprof_error::truncated;
621
622 if (Update) {
623 FunctionSamples &TargetProfile = Profiles[TargetName];
Diego Novilloaae1ed82015-10-08 19:40:37 +0000624 TargetProfile.addCalledTargetSamples(LineOffset, Discriminator,
625 TargetName, TargetCount);
Diego Novillo3376a782015-09-17 00:17:24 +0000626 }
627 }
628 }
629
Diego Novilloaae1ed82015-10-08 19:40:37 +0000630 // Process all the inlined callers into the current function. These
631 // are all the callsites that were inlined into this function.
632 for (uint32_t I = 0; I < NumCallsites; I++) {
Diego Novillo3376a782015-09-17 00:17:24 +0000633 // The offset is encoded as:
634 // high 16 bits: line offset to the start of the function.
635 // low 16 bits: discriminator.
636 uint32_t Offset;
637 if (!GcovBuffer.readInt(Offset))
638 return sampleprof_error::truncated;
Diego Novilloaae1ed82015-10-08 19:40:37 +0000639 InlineCallStack NewStack;
640 NewStack.push_back(FProfile);
641 NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
642 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
Diego Novillo3376a782015-09-17 00:17:24 +0000643 return EC;
644 }
645
646 return sampleprof_error::success;
647}
648
Diego Novillo3376a782015-09-17 00:17:24 +0000649/// \brief Read a GCC AutoFDO profile.
650///
651/// This format is generated by the Linux Perf conversion tool at
652/// https://github.com/google/autofdo.
653std::error_code SampleProfileReaderGCC::read() {
654 // Read the string table.
655 if (std::error_code EC = readNameTable())
656 return EC;
657
658 // Read the source profile.
659 if (std::error_code EC = readFunctionProfiles())
660 return EC;
661
Diego Novillo3376a782015-09-17 00:17:24 +0000662 return sampleprof_error::success;
663}
664
665bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
666 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
667 return Magic == "adcg*704";
668}
669
Diego Novilloc572e922014-10-30 18:00:06 +0000670/// \brief Prepare a memory buffer for the contents of \p Filename.
Diego Novillode1ab262014-09-09 12:40:50 +0000671///
Diego Novilloc572e922014-10-30 18:00:06 +0000672/// \returns an error code indicating the status of the buffer.
Diego Novillofcd55602014-11-03 00:51:45 +0000673static ErrorOr<std::unique_ptr<MemoryBuffer>>
674setupMemoryBuffer(std::string Filename) {
Diego Novilloc572e922014-10-30 18:00:06 +0000675 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
676 if (std::error_code EC = BufferOrErr.getError())
677 return EC;
Diego Novillofcd55602014-11-03 00:51:45 +0000678 auto Buffer = std::move(BufferOrErr.get());
Diego Novilloc572e922014-10-30 18:00:06 +0000679
680 // Sanity check the file.
Diego Novillo38be3332015-10-15 16:36:21 +0000681 if (Buffer->getBufferSize() > std::numeric_limits<uint32_t>::max())
Diego Novilloc572e922014-10-30 18:00:06 +0000682 return sampleprof_error::too_large;
683
Diego Novillofcd55602014-11-03 00:51:45 +0000684 return std::move(Buffer);
Diego Novilloc572e922014-10-30 18:00:06 +0000685}
686
687/// \brief Create a sample profile reader based on the format of the input file.
688///
689/// \param Filename The file to open.
690///
691/// \param Reader The reader to instantiate according to \p Filename's format.
692///
693/// \param C The LLVM context to use to emit diagnostics.
694///
695/// \returns an error code indicating the status of the created reader.
Diego Novillofcd55602014-11-03 00:51:45 +0000696ErrorOr<std::unique_ptr<SampleProfileReader>>
697SampleProfileReader::create(StringRef Filename, LLVMContext &C) {
698 auto BufferOrError = setupMemoryBuffer(Filename);
699 if (std::error_code EC = BufferOrError.getError())
Diego Novilloc572e922014-10-30 18:00:06 +0000700 return EC;
701
Diego Novillofcd55602014-11-03 00:51:45 +0000702 auto Buffer = std::move(BufferOrError.get());
703 std::unique_ptr<SampleProfileReader> Reader;
Diego Novilloc572e922014-10-30 18:00:06 +0000704 if (SampleProfileReaderBinary::hasFormat(*Buffer))
705 Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C));
Diego Novillo3376a782015-09-17 00:17:24 +0000706 else if (SampleProfileReaderGCC::hasFormat(*Buffer))
707 Reader.reset(new SampleProfileReaderGCC(std::move(Buffer), C));
Diego Novilloc572e922014-10-30 18:00:06 +0000708 else
709 Reader.reset(new SampleProfileReaderText(std::move(Buffer), C));
710
Diego Novillofcd55602014-11-03 00:51:45 +0000711 if (std::error_code EC = Reader->readHeader())
712 return EC;
713
714 return std::move(Reader);
Diego Novillode1ab262014-09-09 12:40:50 +0000715}