[pdb] added support for dumping globals stream

Summary: This adds support for dumping the globals stream from PDB files using llvm-pdbdump, similar to the support we have for the publics stream.

Reviewers: ruiu, zturner

Subscribers: beanz, mgorny, modocache

Differential Revision: https://reviews.llvm.org/D25801

llvm-svn: 284861
diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt
index f28cbe7..04144f2 100644
--- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt
@@ -31,6 +31,8 @@
   Raw/DbiStream.cpp
   Raw/DbiStreamBuilder.cpp
   Raw/EnumTables.cpp
+  Raw/GlobalsStream.cpp
+  Raw/GSI.cpp
   Raw/Hash.cpp
   Raw/InfoStream.cpp
   Raw/InfoStreamBuilder.cpp
diff --git a/llvm/lib/DebugInfo/PDB/Raw/GSI.cpp b/llvm/lib/DebugInfo/PDB/Raw/GSI.cpp
new file mode 100644
index 0000000..6ecbb5c
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Raw/GSI.cpp
@@ -0,0 +1,93 @@
+//===- GSI.cpp - Common Functions for GlobalsStream and PublicsStream  ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "GSI.h"
+
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace pdb {
+
+static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
+  if (HashHdr->VerHdr != GSIHashHeader::HdrVersion)
+    return make_error<RawError>(
+        raw_error_code::feature_unsupported,
+        "Encountered unsupported globals stream version.");
+
+  return Error::success();
+}
+
+Error readGSIHashBuckets(
+    msf::FixedStreamArray<support::ulittle32_t> &HashBuckets,
+    const GSIHashHeader *HashHdr, msf::StreamReader &Reader) {
+  if (auto EC = checkHashHdrVersion(HashHdr))
+    return EC;
+
+  // Before the actual hash buckets, there is a bitmap of length determined by
+  // IPHR_HASH.
+  ArrayRef<uint8_t> Bitmap;
+  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
+  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
+  if (auto EC = Reader.readBytes(Bitmap, NumBitmapEntries))
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Could not read a bitmap."));
+  uint32_t NumBuckets = 0;
+  for (uint8_t B : Bitmap)
+    NumBuckets += countPopulation(B);
+
+  // Hash buckets follow.
+  if (auto EC = Reader.readArray(HashBuckets, NumBuckets))
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Hash buckets corrupted."));
+
+  return Error::success();
+}
+
+Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
+                        msf::StreamReader &Reader) {
+  if (Reader.readObject(HashHdr))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Stream does not contain a GSIHashHeader.");
+
+  if (HashHdr->VerSignature != GSIHashHeader::HdrSignature)
+    return make_error<RawError>(
+        raw_error_code::feature_unsupported,
+        "GSIHashHeader signature (0xffffffff) not found.");
+
+  return Error::success();
+}
+
+Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords,
+                         const GSIHashHeader *HashHdr,
+                         msf::StreamReader &Reader) {
+  if (auto EC = checkHashHdrVersion(HashHdr))
+    return EC;
+
+  // HashHdr->HrSize specifies the number of bytes of PSHashRecords we have.
+  // Verify that we can read them all.
+  if (HashHdr->HrSize % sizeof(PSHashRecord))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid HR array size.");
+  uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord);
+  if (auto EC = Reader.readArray(HashRecords, NumHashRecords))
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Error reading hash records."));
+
+  return Error::success();
+}
+}
+}
diff --git a/llvm/lib/DebugInfo/PDB/Raw/GSI.h b/llvm/lib/DebugInfo/PDB/Raw/GSI.h
new file mode 100644
index 0000000..82cebd9
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Raw/GSI.h
@@ -0,0 +1,70 @@
+//===- GSI.h - Common Declarations for GlobalsStream and PublicsStream ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The data structures defined in this file are based on the reference
+// implementation which is available at
+// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
+//
+// When you are reading the reference source code, you'd find the
+// information below useful.
+//
+//  - ppdb1->m_fMinimalDbgInfo seems to be always true.
+//  - SMALLBUCKETS macro is defined.
+//
+// The reference doesn't compile, so I learned just by reading code.
+// It's not guaranteed to be correct.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H
+#define LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H
+
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+namespace msf {
+class StreamReader;
+}
+
+namespace pdb {
+
+/// From https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp
+static const unsigned IPHR_HASH = 4096;
+
+/// Header of the hash tables found in the globals and publics sections.
+/// Based on GSIHashHeader in
+/// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
+struct GSIHashHeader {
+  enum : unsigned {
+    HdrSignature = ~0U,
+    HdrVersion = 0xeffe0000 + 19990810,
+  };
+  support::ulittle32_t VerSignature;
+  support::ulittle32_t VerHdr;
+  support::ulittle32_t HrSize;
+  support::ulittle32_t NumBuckets;
+};
+
+Error readGSIHashBuckets(
+    msf::FixedStreamArray<support::ulittle32_t> &HashBuckets,
+    const GSIHashHeader *HashHdr, msf::StreamReader &Reader);
+Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
+                        msf::StreamReader &Reader);
+Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords,
+                         const GSIHashHeader *HashHdr,
+                         msf::StreamReader &Reader);
+}
+}
+
+#endif
diff --git a/llvm/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp
new file mode 100644
index 0000000..c518452
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp
@@ -0,0 +1,46 @@
+//===- GlobalsStream.cpp - PDB Index of Symbols by Name ---- ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
+
+#include "GSI.h"
+#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+
+GlobalsStream::GlobalsStream(std::unique_ptr<MappedBlockStream> Stream)
+    : Stream(std::move(Stream)) {}
+
+GlobalsStream::~GlobalsStream() {}
+
+Error GlobalsStream::reload() {
+  StreamReader Reader(*Stream);
+
+  const GSIHashHeader *HashHdr;
+  if (auto EC = readGSIHashHeader(HashHdr, Reader))
+    return EC;
+
+  if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
+    return EC;
+
+  if (auto EC = readGSIHashBuckets(HashBuckets, HashHdr, Reader))
+    return EC;
+  NumBuckets = HashBuckets.size();
+
+  return Error::success();
+}
+
+Error GlobalsStream::commit() { return Error::success(); }
diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
index 75840b0..f7bb8ab 100644
--- a/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
@@ -15,6 +15,7 @@
 #include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/MSF/StreamWriter.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
@@ -217,6 +218,22 @@
   return ContainerLayout.DirectoryBlocks;
 }
 
+Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
+  if (!Globals) {
+    auto DbiS = getPDBDbiStream();
+    if (!DbiS)
+      return DbiS.takeError();
+
+    auto GlobalS = MappedBlockStream::createIndexedStream(
+        ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
+    auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(GlobalS));
+    if (auto EC = TempGlobals->reload())
+      return std::move(EC);
+    Globals = std::move(TempGlobals);
+  }
+  return *Globals;
+}
+
 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
   if (!Info) {
     auto InfoS = MappedBlockStream::createIndexedStream(ContainerLayout,
diff --git a/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
index 5d4431d..43ac4e5 100644
--- a/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
@@ -24,6 +24,7 @@
 
 #include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
 
+#include "GSI.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
@@ -44,8 +45,6 @@
 using namespace llvm::pdb;
 
 
-static const unsigned IPHR_HASH = 4096;
-
 // This is PSGSIHDR struct defined in
 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
 struct PublicsStream::HeaderInfo {
@@ -59,18 +58,6 @@
   ulittle32_t NumSections;
 };
 
-// This is GSIHashHdr.
-struct PublicsStream::GSIHashHeader {
-  enum : unsigned {
-    HdrSignature = ~0U,
-    HdrVersion = 0xeffe0000 + 19990810,
-  };
-  ulittle32_t VerSignature;
-  ulittle32_t VerHdr;
-  ulittle32_t HrSize;
-  ulittle32_t NumBuckets;
-};
-
 PublicsStream::PublicsStream(PDBFile &File,
                              std::unique_ptr<MappedBlockStream> Stream)
     : Pdb(File), Stream(std::move(Stream)) {}
@@ -98,40 +85,15 @@
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Publics Stream does not contain a header.");
 
-  if (Reader.readObject(HashHdr))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Publics Stream does not contain a header.");
+  if (auto EC = readGSIHashHeader(HashHdr, Reader))
+    return EC;
 
-  // An array of HashRecord follows. Read them.
-  if (HashHdr->HrSize % sizeof(PSHashRecord))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Invalid HR array size.");
-  uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord);
-  if (auto EC = Reader.readArray(HashRecords, NumHashRecords))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Could not read an HR array"));
+  if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
+    return EC;
 
-  // A bitmap of a fixed length follows.
-  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
-  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
-  if (auto EC = Reader.readBytes(Bitmap, NumBitmapEntries))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Could not read a bitmap."));
-  for (uint8_t B : Bitmap)
-    NumBuckets += countPopulation(B);
-
-  // We don't yet understand the following data structures completely,
-  // but we at least know the types and sizes. Here we are trying
-  // to read the stream till end so that we at least can detect
-  // corrupted streams.
-
-  // Hash buckets follow.
-  if (auto EC = Reader.readArray(HashBuckets, NumBuckets))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Hash buckets corrupted."));
+  if (auto EC = readGSIHashBuckets(HashBuckets, HashHdr, Reader))
+    return EC;
+  NumBuckets = HashBuckets.size();
 
   // Something called "address map" follows.
   uint32_t NumAddressMapEntries = Header->AddrMap / sizeof(uint32_t);