[SampleFDO] Add FunctionOffsetTable in compact binary format profile.

The patch saves a function offset table which maps function name index to the
offset of its function profile to the start of the binary profile. By using
the function offset table, for those function profiles which will not be used
when compiling a module, the profile reader does't have to read them. For
profile size around 10~20M, it saves ~10% compile time.

Differential Revision: https://reviews.llvm.org/D51863

llvm-svn: 342283
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index b0818d1..1a12441 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -67,6 +67,8 @@
       return "Unimplemented feature";
     case sampleprof_error::counter_overflow:
       return "Counter overflow";
+    case sampleprof_error::ostream_seek_unsupported:
+      return "Ostream does not support seek";
     }
     llvm_unreachable("A value of sampleprof_error has no message.");
   }
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 5503104..2b4551b 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -320,6 +321,21 @@
 }
 
 template <typename T>
+ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
+  std::error_code EC;
+
+  if (Data + sizeof(T) > End) {
+    EC = sampleprof_error::truncated;
+    reportError(0, EC.message());
+    return EC;
+  }
+
+  using namespace support;
+  T Val = endian::readNext<T, little, unaligned>(Data);
+  return Val;
+}
+
+template <typename T>
 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
   std::error_code EC;
   auto Idx = readNumber<uint32_t>();
@@ -423,29 +439,51 @@
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderBinary::readFuncProfile() {
+  auto NumHeadSamples = readNumber<uint64_t>();
+  if (std::error_code EC = NumHeadSamples.getError())
+    return EC;
+
+  auto FName(readStringFromTable());
+  if (std::error_code EC = FName.getError())
+    return EC;
+
+  Profiles[*FName] = FunctionSamples();
+  FunctionSamples &FProfile = Profiles[*FName];
+  FProfile.setName(*FName);
+
+  FProfile.addHeadSamples(*NumHeadSamples);
+
+  if (std::error_code EC = readProfile(FProfile))
+    return EC;
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileReaderBinary::read() {
   while (!at_eof()) {
-    auto NumHeadSamples = readNumber<uint64_t>();
-    if (std::error_code EC = NumHeadSamples.getError())
-      return EC;
-
-    auto FName(readStringFromTable());
-    if (std::error_code EC = FName.getError())
-      return EC;
-
-    Profiles[*FName] = FunctionSamples();
-    FunctionSamples &FProfile = Profiles[*FName];
-    FProfile.setName(*FName);
-
-    FProfile.addHeadSamples(*NumHeadSamples);
-
-    if (std::error_code EC = readProfile(FProfile))
+    if (std::error_code EC = readFuncProfile())
       return EC;
   }
 
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderCompactBinary::read() {
+  for (auto Name : FuncsToUse) {
+    auto GUID = std::to_string(MD5Hash(Name));
+    auto iter = FuncOffsetTable.find(StringRef(GUID));
+    if (iter == FuncOffsetTable.end())
+      continue;
+    const uint8_t *SavedData = Data;
+    Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+           iter->second;
+    if (std::error_code EC = readFuncProfile())
+      return EC;
+    Data = SavedData;
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
   if (Magic == SPMagic())
     return sampleprof_error::success;
@@ -514,6 +552,53 @@
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderCompactBinary::readHeader() {
+  SampleProfileReaderBinary::readHeader();
+  if (std::error_code EC = readFuncOffsetTable())
+    return EC;
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
+  auto TableOffset = readUnencodedNumber<uint64_t>();
+  if (std::error_code EC = TableOffset.getError())
+    return EC;
+
+  const uint8_t *SavedData = Data;
+  const uint8_t *TableStart =
+      reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+      *TableOffset;
+  Data = TableStart;
+
+  auto Size = readNumber<uint64_t>();
+  if (std::error_code EC = Size.getError())
+    return EC;
+
+  FuncOffsetTable.reserve(*Size);
+  for (uint32_t I = 0; I < *Size; ++I) {
+    auto FName(readStringFromTable());
+    if (std::error_code EC = FName.getError())
+      return EC;
+
+    auto Offset = readNumber<uint64_t>();
+    if (std::error_code EC = Offset.getError())
+      return EC;
+
+    FuncOffsetTable[*FName] = *Offset;
+  }
+  End = TableStart;
+  Data = SavedData;
+  return sampleprof_error::success;
+}
+
+void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
+  FuncsToUse.clear();
+  for (auto &F : M) {
+    StringRef Fname = F.getName().split('.').first;
+    FuncsToUse.insert(Fname);
+  }
+}
+
 std::error_code SampleProfileReaderBinary::readSummaryEntry(
     std::vector<ProfileSummaryEntry> &Entries) {
   auto Cutoff = readNumber<uint64_t>();
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index b4de301..b1c669e 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -22,6 +22,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/LEB128.h"
@@ -64,6 +66,15 @@
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterCompactBinary::write(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
+    return EC;
+  if (std::error_code EC = writeFuncOffsetTable())
+    return EC;
+  return sampleprof_error::success;
+}
+
 /// Write samples to a text file.
 ///
 /// Note: it may be tempting to implement this in terms of
@@ -168,6 +179,30 @@
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
+  auto &OS = *OutputStream;
+
+  // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
+  auto &OFS = static_cast<raw_fd_ostream &>(OS);
+  uint64_t FuncOffsetTableStart = OS.tell();
+  if (OFS.seek(TableOffset) == (uint64_t)-1)
+    return sampleprof_error::ostream_seek_unsupported;
+  support::endian::Writer Writer(*OutputStream, support::little);
+  Writer.write(FuncOffsetTableStart);
+  if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
+    return sampleprof_error::ostream_seek_unsupported;
+
+  // Write out the table size.
+  encodeULEB128(FuncOffsetTable.size(), OS);
+
+  // Write out FuncOffsetTable.
+  for (auto entry : FuncOffsetTable) {
+    writeNameIdx(entry.first);
+    encodeULEB128(entry.second, OS);
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
   auto &OS = *OutputStream;
   std::set<StringRef> V;
@@ -215,6 +250,19 @@
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterCompactBinary::writeHeader(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  support::endian::Writer Writer(*OutputStream, support::little);
+  if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
+    return EC;
+
+  // Reserve a slot for the offset of function offset table. The slot will
+  // be populated with the offset of FuncOffsetTable later.
+  TableOffset = OutputStream->tell();
+  Writer.write(static_cast<uint64_t>(-2));
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterBinary::writeSummary() {
   auto &OS = *OutputStream;
   encodeULEB128(Summary->getTotalCount(), OS);
@@ -283,6 +331,15 @@
   return writeBody(S);
 }
 
+std::error_code
+SampleProfileWriterCompactBinary::write(const FunctionSamples &S) {
+  uint64_t Offset = OutputStream->tell();
+  StringRef Name = S.getName();
+  FuncOffsetTable[Name] = Offset;
+  encodeULEB128(S.getHeadSamples(), *OutputStream);
+  return writeBody(S);
+}
+
 /// Create a sample profile file writer based on the specified format.
 ///
 /// \param Filename The file to create.