[SampleFDO] Add indexing for function profiles so they can be loaded on demand
in ExtBinary format

Currently for Text, Binary and ExtBinary format profiles, when we compile a
module with samplefdo, even if there is no function showing up in the profile,
we have to load all the function profiles from the profile input. That is a
waste of compile time.

CompactBinary format profile has already had the support of loading function
profiles on demand. In this patch, we add the support to load profile on
demand for ExtBinary format. It will work no matter the sections in ExtBinary
format profile are compressed or not. Experiment shows it reduces the time to
compile a server benchmark by 30%.

When profile remapping and loading function profiles on demand are both used,
extra work needs to be done so that the loading on demand process will take
the name remapping into consideration. It will be addressed in a follow-up
patch.

Differential Revision: https://reviews.llvm.org/D68601

llvm-svn: 374233
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6d00404..cf3e567 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -439,7 +439,9 @@
   return sampleprof_error::success;
 }
 
-std::error_code SampleProfileReaderBinary::readFuncProfile() {
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+  Data = Start;
   auto NumHeadSamples = readNumber<uint64_t>();
   if (std::error_code EC = NumHeadSamples.getError())
     return EC;
@@ -461,7 +463,7 @@
 
 std::error_code SampleProfileReaderBinary::read() {
   while (!at_eof()) {
-    if (std::error_code EC = readFuncProfile())
+    if (std::error_code EC = readFuncProfile(Data))
       return EC;
   }
 
@@ -483,13 +485,15 @@
       return EC;
     break;
   case SecLBRProfile:
-    while (Data < Start + Size) {
-      if (std::error_code EC = readFuncProfile())
-        return EC;
-    }
+    if (std::error_code EC = readFuncProfiles())
+      return EC;
     break;
   case SecProfileSymbolList:
-    if (std::error_code EC = readProfileSymbolList(Size))
+    if (std::error_code EC = readProfileSymbolList())
+      return EC;
+    break;
+  case SecFuncOffsetTable:
+    if (std::error_code EC = readFuncOffsetTable())
       return EC;
     break;
   default:
@@ -498,15 +502,65 @@
   return sampleprof_error::success;
 }
 
-std::error_code
-SampleProfileReaderExtBinary::readProfileSymbolList(uint64_t Size) {
+void SampleProfileReaderExtBinary::collectFuncsFrom(const Module &M) {
+  UseAllFuncs = false;
+  FuncsToUse.clear();
+  for (auto &F : M)
+    FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
+}
+
+std::error_code SampleProfileReaderExtBinary::readFuncOffsetTable() {
+  auto Size = readNumber<uint64_t>();
+  if (std::error_code EC = Size.getError())
+    return EC;
+
+  FuncOffsetTable.reserve(*Size);
+  for (uint32_t I = 0; I < *Size; ++I) {
+    auto FName(readStringFromTable());
+    if (std::error_code EC = FName.getError())
+      return EC;
+
+    auto Offset = readNumber<uint64_t>();
+    if (std::error_code EC = Offset.getError())
+      return EC;
+
+    FuncOffsetTable[*FName] = *Offset;
+  }
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinary::readFuncProfiles() {
+  const uint8_t *Start = Data;
+  if (UseAllFuncs) {
+    while (Data < End) {
+      if (std::error_code EC = readFuncProfile(Data))
+        return EC;
+    }
+    assert(Data == End && "More data is read than expected");
+    return sampleprof_error::success;
+  }
+
+  for (auto Name : FuncsToUse) {
+    auto iter = FuncOffsetTable.find(Name);
+    if (iter == FuncOffsetTable.end())
+      continue;
+    const uint8_t *FuncProfileAddr = Start + iter->second;
+    assert(FuncProfileAddr < End && "out of LBRProfile section");
+    if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+      return EC;
+  }
+  Data = End;
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() {
   if (!ProfSymList)
     ProfSymList = std::make_unique<ProfileSymbolList>();
 
-  if (std::error_code EC = ProfSymList->read(Data, Size))
+  if (std::error_code EC = ProfSymList->read(Data, End - Data))
     return EC;
 
-  Data = Data + Size;
+  Data = End;
   return sampleprof_error::success;
 }
 
@@ -600,9 +654,9 @@
 
   for (auto Offset : OffsetsToUse) {
     const uint8_t *SavedData = Data;
-    Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
-           Offset;
-    if (std::error_code EC = readFuncProfile())
+    if (std::error_code EC = readFuncProfile(
+            reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
+            Offset))
       return EC;
     Data = SavedData;
   }
@@ -719,8 +773,16 @@
 }
 
 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
-  auto &LastEntry = SecHdrTable.back();
-  return LastEntry.Offset + LastEntry.Size;
+  // Sections in SecHdrTable is not necessarily in the same order as
+  // sections in the profile because section like FuncOffsetTable needs
+  // to be written after section LBRProfile but needs to be read before
+  // section LBRProfile, so we cannot simply use the last entry in
+  // SecHdrTable to calculate the file size.
+  uint64_t FileSize = 0;
+  for (auto &Entry : SecHdrTable) {
+    FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
+  }
+  return FileSize;
 }
 
 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
@@ -812,13 +874,11 @@
   return sampleprof_error::success;
 }
 
-void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module &M) {
+void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
   UseAllFuncs = false;
   FuncsToUse.clear();
-  for (auto &F : M) {
-    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
-    FuncsToUse.insert(CanonName);
-  }
+  for (auto &F : M)
+    FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
 }
 
 std::error_code SampleProfileReaderBinary::readSummaryEntry(
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 0344636..8d09af3 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -76,7 +76,7 @@
 SecHdrTableEntry &
 SampleProfileWriterExtBinaryBase::getEntryInLayout(SecType Type) {
   auto SecIt = std::find_if(
-      SectionLayout.begin(), SectionLayout.end(),
+      SectionHdrLayout.begin(), SectionHdrLayout.end(),
       [=](const auto &Entry) -> bool { return Entry.Type == Type; });
   return *SecIt;
 }
@@ -143,6 +143,29 @@
   return sampleprof_error::success;
 }
 
+std::error_code
+SampleProfileWriterExtBinary::writeSample(const FunctionSamples &S) {
+  uint64_t Offset = OutputStream->tell();
+  StringRef Name = S.getName();
+  FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
+  encodeULEB128(S.getHeadSamples(), *OutputStream);
+  return writeBody(S);
+}
+
+std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() {
+  auto &OS = *OutputStream;
+
+  // Write out the table size.
+  encodeULEB128(FuncOffsetTable.size(), OS);
+
+  // Write out FuncOffsetTable.
+  for (auto entry : FuncOffsetTable) {
+    writeNameIdx(entry.first);
+    encodeULEB128(entry.second, OS);
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterExtBinary::writeSections(
     const StringMap<FunctionSamples> &ProfileMap) {
   uint64_t SectionStart = markSectionStart(SecProfSummary);
@@ -163,6 +186,7 @@
     return EC;
 
   SectionStart = markSectionStart(SecLBRProfile);
+  SecLBRProfileStart = OutputStream->tell();
   if (std::error_code EC = writeFuncProfiles(ProfileMap))
     return EC;
   if (std::error_code EC = addNewSection(SecLBRProfile, SectionStart))
@@ -178,6 +202,12 @@
   if (std::error_code EC = addNewSection(SecProfileSymbolList, SectionStart))
     return EC;
 
+  SectionStart = markSectionStart(SecFuncOffsetTable);
+  if (std::error_code EC = writeFuncOffsetTable())
+    return EC;
+  if (std::error_code EC = addNewSection(SecFuncOffsetTable, SectionStart))
+    return EC;
+
   return sampleprof_error::success;
 }
 
@@ -359,7 +389,7 @@
 }
 
 void SampleProfileWriterExtBinaryBase::setToCompressAllSections() {
-  for (auto &Entry : SectionLayout)
+  for (auto &Entry : SectionHdrLayout)
     addSecFlags(Entry, SecFlagCompress);
 }
 
@@ -369,7 +399,7 @@
 
 void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type,
                                                        SecFlags Flags) {
-  for (auto &Entry : SectionLayout) {
+  for (auto &Entry : SectionHdrLayout) {
     if (Entry.Type == Type)
       addSecFlags(Entry, Flags);
   }
@@ -378,9 +408,9 @@
 void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
   support::endian::Writer Writer(*OutputStream, support::little);
 
-  Writer.write(static_cast<uint64_t>(SectionLayout.size()));
+  Writer.write(static_cast<uint64_t>(SectionHdrLayout.size()));
   SecHdrTableOffset = OutputStream->tell();
-  for (uint32_t i = 0; i < SectionLayout.size(); i++) {
+  for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
     Writer.write(static_cast<uint64_t>(-1));
     Writer.write(static_cast<uint64_t>(-1));
     Writer.write(static_cast<uint64_t>(-1));
@@ -402,14 +432,15 @@
     IndexMap.insert({static_cast<uint32_t>(SecHdrTable[i].Type), i});
   }
 
-  // Write the sections in the order specified in SectionLayout.
-  // That is the sections order Reader will see. Note that the
-  // sections order in which Reader expects to read may be different
-  // from the order in which Writer is able to write, so we need
-  // to adjust the order in SecHdrTable to be consistent with
-  // SectionLayout when we write SecHdrTable to the memory.
-  for (uint32_t i = 0; i < SectionLayout.size(); i++) {
-    uint32_t idx = IndexMap[static_cast<uint32_t>(SectionLayout[i].Type)];
+  // Write the section header table in the order specified in
+  // SectionHdrLayout. That is the sections order Reader will see.
+  // Note that the sections order in which Reader expects to read
+  // may be different from the order in which Writer is able to
+  // write, so we need to adjust the order in SecHdrTable to be
+  // consistent with SectionHdrLayout when we write SecHdrTable
+  // to the memory.
+  for (uint32_t i = 0; i < SectionHdrLayout.size(); i++) {
+    uint32_t idx = IndexMap[static_cast<uint32_t>(SectionHdrLayout[i].Type)];
     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Type));
     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Flags));
     Writer.write(static_cast<uint64_t>(SecHdrTable[idx].Offset));