[PGO] Profile summary reader/writer support

With this patch, the profile summary data will be available in indexed
profile data file so that profiler reader/compiler optimizer can start
to make use of.

Differential Revision: http://reviews.llvm.org/D16258

llvm-svn: 259626
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 0ccc162..a06a437 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -641,4 +641,19 @@
   }
 }
 
+ProfileSummary::ProfileSummary(const IndexedInstrProf::Summary &S)
+    : TotalCount(S.get(IndexedInstrProf::Summary::TotalBlockCount)),
+      MaxBlockCount(S.get(IndexedInstrProf::Summary::MaxBlockCount)),
+      MaxInternalBlockCount(
+          S.get(IndexedInstrProf::Summary::MaxInternalBlockCount)),
+      MaxFunctionCount(S.get(IndexedInstrProf::Summary::MaxFunctionCount)),
+      NumBlocks(S.get(IndexedInstrProf::Summary::TotalNumBlocks)),
+      NumFunctions(S.get(IndexedInstrProf::Summary::TotalNumFunctions)) {
+  for (unsigned I = 0; I < S.NumCutoffEntries; I++) {
+    const IndexedInstrProf::Summary::Entry &Ent = S.getEntry(I);
+    DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount,
+                                 Ent.NumBlocks);
+  }
+}
+
 } // end namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 7e9b7ab..db6dd72 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -554,6 +554,41 @@
   return Magic == IndexedInstrProf::Magic;
 }
 
+const unsigned char *
+IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
+                                    const unsigned char *Cur) {
+  using namespace support;
+  if (Version >= IndexedInstrProf::Version4) {
+    const IndexedInstrProf::Summary *SummaryInLE =
+        reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
+    uint64_t NFields =
+        endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields);
+    uint64_t NEntries =
+        endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries);
+    uint32_t SummarySize =
+        IndexedInstrProf::Summary::getSize(NFields, NEntries);
+    std::unique_ptr<IndexedInstrProf::Summary> SummaryData =
+        IndexedInstrProf::allocSummary(SummarySize);
+
+    const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE);
+    uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get());
+    for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
+      Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]);
+
+    // initialize ProfileSummary using the SummaryData from disk.
+    this->Summary = llvm::make_unique<ProfileSummary>(*(SummaryData.get()));
+    return Cur + SummarySize;
+  } else {
+    // For older version of profile data, we need to compute on the fly:
+    using namespace IndexedInstrProf;
+    std::vector<uint32_t> Cutoffs(&SummaryCutoffs[0],
+                                  &SummaryCutoffs[NumSummaryCutoffs]);
+    this->Summary = llvm::make_unique<ProfileSummary>(Cutoffs);
+    this->Summary->computeDetailedSummary();
+    return Cur;
+  }
+}
+
 std::error_code IndexedInstrProfReader::readHeader() {
   const unsigned char *Start =
       (const unsigned char *)DataBuffer->getBufferStart();
@@ -576,9 +611,7 @@
   if (FormatVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return error(instrprof_error::unsupported_version);
 
-  // Read the maximal function count.
-  MaxFunctionCount =
-      endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
+  Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur);
 
   // Read the hash type and start offset.
   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 204d340..4330b06 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -84,6 +84,7 @@
   typedef uint64_t offset_type;
 
   support::endianness ValueProfDataEndianness;
+  ProfileSummary *TheProfileSummary;
 
   InstrProfRecordWriterTrait() : ValueProfDataEndianness(support::little) {}
   static hash_value_type ComputeHash(key_type_ref K) {
@@ -122,6 +123,7 @@
     endian::Writer<little> LE(Out);
     for (const auto &ProfileData : *V) {
       const InstrProfRecord &ProfRecord = ProfileData.second;
+      TheProfileSummary->addRecord(ProfRecord);
 
       LE.write<uint64_t>(ProfileData.first); // Function hash
       LE.write<uint64_t>(ProfRecord.Counts.size());
@@ -140,7 +142,7 @@
 }
 
 InstrProfWriter::InstrProfWriter(bool Sparse)
-    : Sparse(Sparse), FunctionData(), MaxFunctionCount(0),
+    : Sparse(Sparse), FunctionData(),
       InfoObj(new InstrProfRecordWriterTrait()) {}
 
 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
@@ -179,11 +181,6 @@
 
   Dest.sortValueData();
 
-  // We keep track of the max function count as we go for simplicity.
-  // Update this statistic no matter the result of the merge.
-  if (Dest.Counts[0] > MaxFunctionCount)
-    MaxFunctionCount = Dest.Counts[0];
-
   return Result;
 }
 
@@ -199,8 +196,32 @@
   return false;
 }
 
+static void setSummary(IndexedInstrProf::Summary *TheSummary,
+                       ProfileSummary &PS) {
+  using namespace IndexedInstrProf;
+  std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
+  TheSummary->NumSummaryFields = Summary::NumKinds;
+  TheSummary->NumCutoffEntries = Res.size();
+  TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
+  TheSummary->set(Summary::MaxBlockCount, PS.getMaxBlockCount());
+  TheSummary->set(Summary::MaxInternalBlockCount,
+                  PS.getMaxInternalBlockCount());
+  TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount());
+  TheSummary->set(Summary::TotalNumBlocks, PS.getNumBlocks());
+  TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions());
+  for (unsigned I = 0; I < Res.size(); I++)
+    TheSummary->setEntry(I, Res[I]);
+}
+
 void InstrProfWriter::writeImpl(ProfOStream &OS) {
   OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
+
+  using namespace IndexedInstrProf;
+  std::vector<uint32_t> Cutoffs(&SummaryCutoffs[0],
+                                &SummaryCutoffs[NumSummaryCutoffs]);
+  ProfileSummary PS(Cutoffs);
+  InfoObj->TheProfileSummary = &PS;
+
   // Populate the hash table generator.
   for (const auto &I : FunctionData)
     if (shouldEncodeData(I.getValue()))
@@ -209,7 +230,7 @@
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
   Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion;
-  Header.MaxFunctionCount = MaxFunctionCount;
+  Header.Unused = 0;
   Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
   Header.HashOffset = 0;
   int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
@@ -220,15 +241,37 @@
   for (int I = 0; I < N - 1; I++)
     OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
 
-  // Save a space to write the hash table start location.
-  uint64_t HashTableStartLoc = OS.tell();
+  // Save the location of Header.HashOffset field in \c OS.
+  uint64_t HashTableStartFieldOffset = OS.tell();
   // Reserve the space for HashOffset field.
   OS.write(0);
+
+  // Reserve space to write profile summary data.
+  uint32_t NumEntries = Cutoffs.size();
+  uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
+  // Remember the summary offset.
+  uint64_t SummaryOffset = OS.tell();
+  for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++)
+    OS.write(0);
+
   // Write the hash table.
   uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj);
 
+  // Allocate space for data to be serialized out.
+  std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
+      IndexedInstrProf::allocSummary(SummarySize);
+  // Compute the Summary and copy the data to the data
+  // structure to be serialized out (to disk or buffer).
+  setSummary(TheSummary.get(), PS);
+  InfoObj->TheProfileSummary = 0;
+
   // Now do the final patch:
-  PatchItem PatchItems[1] = {{HashTableStartLoc, &HashTableStart, 1}};
+  PatchItem PatchItems[] = {
+      // Patch the Header.HashOffset field.
+      {HashTableStartFieldOffset, &HashTableStart, 1},
+      // Patch the summary data.
+      {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
+       (int)(SummarySize / sizeof(uint64_t))}};
   OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems));
 }