[SampleFDO] Add a new compact binary format for sample profile.
Name table occupies a big chunk of size in current binary format sample profile.
In order to reduce its size, the patch changes the sample writer/reader to
save/restore MD5Hash of names in the name table. Sample annotation phase will
also use MD5Hash of name to query samples accordingly.
Experiment shows compact binary format can reduce the size of sample profile by
2/3 compared with binary format generally.
Differential Revision: https://reviews.llvm.org/D47955
llvm-svn: 334447
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index e192b58..345549e 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -319,16 +319,33 @@
return Str;
}
-ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
+template <typename T>
+inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
std::error_code EC;
auto Idx = readNumber<uint32_t>();
if (std::error_code EC = Idx.getError())
return EC;
- if (*Idx >= NameTable.size())
+ if (*Idx >= Table.size())
return sampleprof_error::truncated_name_table;
+ return *Idx;
+}
+
+ErrorOr<StringRef> SampleProfileReaderRawBinary::readStringFromTable() {
+ auto Idx = readStringIndex(NameTable);
+ if (std::error_code EC = Idx.getError())
+ return EC;
+
return NameTable[*Idx];
}
+ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
+ auto Idx = readStringIndex(NameTable);
+ if (std::error_code EC = Idx.getError())
+ return EC;
+
+ return StringRef(NameTable[*Idx]);
+}
+
std::error_code
SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
auto NumSamples = readNumber<uint64_t>();
@@ -429,6 +446,48 @@
return sampleprof_error::success;
}
+std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
+ if (Magic == SPMagic())
+ return sampleprof_error::success;
+ return sampleprof_error::bad_magic;
+}
+
+std::error_code
+SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
+ if (Magic == SPMagic(SPF_Compact_Binary))
+ return sampleprof_error::success;
+ return sampleprof_error::bad_magic;
+}
+
+std::error_code SampleProfileReaderRawBinary::readNameTable() {
+ auto Size = readNumber<uint32_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ NameTable.reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ auto Name(readString());
+ if (std::error_code EC = Name.getError())
+ return EC;
+ NameTable.push_back(*Name);
+ }
+
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderCompactBinary::readNameTable() {
+ auto Size = readNumber<uint64_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+ NameTable.reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ auto FID = readNumber<uint64_t>();
+ if (std::error_code EC = FID.getError())
+ return EC;
+ NameTable.push_back(std::to_string(*FID));
+ }
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileReaderBinary::readHeader() {
Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
End = Data + Buffer->getBufferSize();
@@ -437,7 +496,7 @@
auto Magic = readNumber<uint64_t>();
if (std::error_code EC = Magic.getError())
return EC;
- else if (*Magic != SPMagic())
+ else if (std::error_code EC = verifySPMagic(*Magic))
return sampleprof_error::bad_magic;
// Read the version number.
@@ -450,18 +509,8 @@
if (std::error_code EC = readSummary())
return EC;
- // Read the name table.
- auto Size = readNumber<uint32_t>();
- if (std::error_code EC = Size.getError())
+ if (std::error_code EC = readNameTable())
return EC;
- NameTable.reserve(*Size);
- for (uint32_t I = 0; I < *Size; ++I) {
- auto Name(readString());
- if (std::error_code EC = Name.getError())
- return EC;
- NameTable.push_back(*Name);
- }
-
return sampleprof_error::success;
}
@@ -521,13 +570,20 @@
return sampleprof_error::success;
}
-bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
+bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
const uint8_t *Data =
reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
uint64_t Magic = decodeULEB128(Data);
return Magic == SPMagic();
}
+bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
+ const uint8_t *Data =
+ reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
+ uint64_t Magic = decodeULEB128(Data);
+ return Magic == SPMagic(SPF_Compact_Binary);
+}
+
std::error_code SampleProfileReaderGCC::skipNextWord() {
uint32_t dummy;
if (!GcovBuffer.readInt(dummy))
@@ -813,8 +869,10 @@
ErrorOr<std::unique_ptr<SampleProfileReader>>
SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
std::unique_ptr<SampleProfileReader> Reader;
- if (SampleProfileReaderBinary::hasFormat(*B))
- Reader.reset(new SampleProfileReaderBinary(std::move(B), C));
+ if (SampleProfileReaderRawBinary::hasFormat(*B))
+ Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
+ else if (SampleProfileReaderCompactBinary::hasFormat(*B))
+ Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
else if (SampleProfileReaderGCC::hasFormat(*B))
Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
else if (SampleProfileReaderText::hasFormat(*B))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 45c8178..092139d 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
@@ -144,13 +145,61 @@
}
}
-std::error_code SampleProfileWriterBinary::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
- auto &OS = *OutputStream;
+void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
+ // Sort the names to make NameTable deterministic.
+ for (const auto &I : NameTable)
+ V.insert(I.first);
+ int i = 0;
+ for (const StringRef &N : V)
+ NameTable[N] = i++;
+}
+std::error_code SampleProfileWriterRawBinary::writeNameTable() {
+ auto &OS = *OutputStream;
+ std::set<StringRef> V;
+ stablizeNameTable(V);
+
+ // Write out the name table.
+ encodeULEB128(NameTable.size(), OS);
+ for (auto N : V) {
+ OS << N;
+ encodeULEB128(0, OS);
+ }
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
+ auto &OS = *OutputStream;
+ std::set<StringRef> V;
+ stablizeNameTable(V);
+
+ // Write out the name table.
+ encodeULEB128(NameTable.size(), OS);
+ for (auto N : V) {
+ encodeULEB128(MD5Hash(N), OS);
+ }
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterRawBinary::writeMagicIdent() {
+ auto &OS = *OutputStream;
// Write file magic identifier.
encodeULEB128(SPMagic(), OS);
encodeULEB128(SPVersion(), OS);
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterCompactBinary::writeMagicIdent() {
+ auto &OS = *OutputStream;
+ // Write file magic identifier.
+ encodeULEB128(SPMagic(SPF_Compact_Binary), OS);
+ encodeULEB128(SPVersion(), OS);
+ return sampleprof_error::success;
+}
+
+std::error_code SampleProfileWriterBinary::writeHeader(
+ const StringMap<FunctionSamples> &ProfileMap) {
+ writeMagicIdent();
computeSummary(ProfileMap);
if (auto EC = writeSummary())
@@ -162,20 +211,7 @@
addNames(I.second);
}
- // Sort the names to make NameTable is deterministic.
- std::set<StringRef> V;
- for (const auto &I : NameTable)
- V.insert(I.first);
- int i = 0;
- for (const StringRef &N : V)
- NameTable[N] = i++;
-
- // Write out the name table.
- encodeULEB128(NameTable.size(), OS);
- for (auto N : V) {
- OS << N;
- encodeULEB128(0, OS);
- }
+ writeNameTable();
return sampleprof_error::success;
}
@@ -258,7 +294,7 @@
SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
std::error_code EC;
std::unique_ptr<raw_ostream> OS;
- if (Format == SPF_Binary)
+ if (Format == SPF_Raw_Binary || Format == SPF_Compact_Binary)
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None));
else
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text));
@@ -281,8 +317,10 @@
std::error_code EC;
std::unique_ptr<SampleProfileWriter> Writer;
- if (Format == SPF_Binary)
- Writer.reset(new SampleProfileWriterBinary(OS));
+ if (Format == SPF_Raw_Binary)
+ Writer.reset(new SampleProfileWriterRawBinary(OS));
+ else if (Format == SPF_Compact_Binary)
+ Writer.reset(new SampleProfileWriterCompactBinary(OS));
else if (Format == SPF_Text)
Writer.reset(new SampleProfileWriterText(OS));
else if (Format == SPF_GCC)