[SampleFDO] Add profile symbol list section to discriminate function being
cold versus function being newly added.

This is the second half of https://reviews.llvm.org/D66374.

Profile symbol list is the collection of function symbols showing up in
the binary which generates the current profile. It is used to discriminate
function being cold versus function being newly added. Profile symbol list
is only added for profile with ExtBinary format.

During profile use compilation, when profile-sample-accurate is enabled,
a function without profile will be regarded as cold only when it is
contained in that list.

Differential Revision: https://reviews.llvm.org/D66766

llvm-svn: 370563
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index ce0f537..36c39c1 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -15,8 +15,11 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
@@ -66,6 +69,12 @@
       return "Counter overflow";
     case sampleprof_error::ostream_seek_unsupported:
       return "Ostream does not support seek";
+    case sampleprof_error::compress_failed:
+      return "Compress failure";
+    case sampleprof_error::uncompress_failed:
+      return "Uncompress failure";
+    case sampleprof_error::zlib_unavailable:
+      return "Zlib is unavailable";
     }
     llvm_unreachable("A value of sampleprof_error has no message.");
   }
@@ -188,3 +197,75 @@
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }
 #endif
+
+std::error_code ProfileSymbolList::read(uint64_t CompressSize,
+                                        uint64_t UncompressSize,
+                                        const uint8_t *Data) {
+  const char *ListStart = reinterpret_cast<const char *>(Data);
+  // CompressSize being non-zero means the profile is compressed and
+  // needs to be uncompressed first.
+  if (CompressSize) {
+    if (!llvm::zlib::isAvailable())
+      return sampleprof_error::zlib_unavailable;
+
+    StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
+                                CompressSize);
+    char *Buffer = Allocator.Allocate<char>(UncompressSize);
+    llvm::Error E = zlib::uncompress(CompressedStrings, Buffer, UncompressSize);
+    if (E)
+      return sampleprof_error::uncompress_failed;
+    ListStart = Buffer;
+  }
+
+  uint64_t Size = 0;
+  while (Size < UncompressSize) {
+    StringRef Str(ListStart + Size);
+    add(Str);
+    Size += Str.size() + 1;
+  }
+  return sampleprof_error::success;
+}
+
+std::error_code ProfileSymbolList::write(raw_ostream &OS) {
+  // Sort the symbols before doing compression. It will make the
+  // compression much more effective.
+  std::vector<StringRef> SortedList;
+  SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
+  llvm::sort(SortedList);
+
+  std::string UncompressedStrings;
+  for (auto &Sym : SortedList) {
+    UncompressedStrings.append(Sym.str());
+    UncompressedStrings.append(1, '\0');
+  }
+
+  if (ToCompress) {
+    if (!llvm::zlib::isAvailable())
+      return sampleprof_error::zlib_unavailable;
+    SmallString<128> CompressedStrings;
+    llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
+                                   zlib::BestSizeCompression);
+    if (E)
+      return sampleprof_error::compress_failed;
+    encodeULEB128(UncompressedStrings.size(), OS);
+    encodeULEB128(CompressedStrings.size(), OS);
+    OS << CompressedStrings.str();
+  } else {
+    encodeULEB128(UncompressedStrings.size(), OS);
+    // If profile symbol list is not compressed, we will still save
+    // a compressed size value, but the value of the size is 0.
+    encodeULEB128(0, OS);
+    OS << UncompressedStrings;
+  }
+  return sampleprof_error::success;
+}
+
+void ProfileSymbolList::dump(raw_ostream &OS) const {
+  OS << "======== Dump profile symbol list ========\n";
+  std::vector<StringRef> SortedList;
+  SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
+  llvm::sort(SortedList);
+
+  for (auto &Sym : SortedList)
+    OS << Sym << "\n";
+}