Resubmit "Refactor raw pdb dumper into library"

This fixes a number of endianness issues as well as an ODR
violation that hopefully causes everything to be happy.

llvm-svn: 267431
diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
new file mode 100644
index 0000000..01d7554
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
@@ -0,0 +1,238 @@
+//===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+namespace {
+static const char Magic[] = {'M',  'i',  'c',    'r', 'o', 's',  'o',  'f',
+                             't',  ' ',  'C',    '/', 'C', '+',  '+',  ' ',
+                             'M',  'S',  'F',    ' ', '7', '.',  '0',  '0',
+                             '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'};
+
+// The superblock is overlaid at the beginning of the file (offset 0).
+// It starts with a magic header and is followed by information which describes
+// the layout of the file system.
+struct SuperBlock {
+  char MagicBytes[sizeof(Magic)];
+  // The file system is split into a variable number of fixed size elements.
+  // These elements are referred to as blocks.  The size of a block may vary
+  // from system to system.
+  support::ulittle32_t BlockSize;
+  // This field's purpose is not yet known.
+  support::ulittle32_t Unknown0;
+  // This contains the number of blocks resident in the file system.  In
+  // practice, NumBlocks * BlockSize is equivalent to the size of the PDB file.
+  support::ulittle32_t NumBlocks;
+  // This contains the number of bytes which make up the directory.
+  support::ulittle32_t NumDirectoryBytes;
+  // This field's purpose is not yet known.
+  support::ulittle32_t Unknown1;
+  // This contains the block # of the block map.
+  support::ulittle32_t BlockMapAddr;
+};
+}
+
+struct llvm::PDBFileContext {
+  std::unique_ptr<MemoryBuffer> Buffer;
+  const SuperBlock *SB;
+  std::vector<uint32_t> StreamSizes;
+  DenseMap<uint32_t, std::vector<uint32_t>> StreamMap;
+};
+
+static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr,
+                                   const uint64_t Size) {
+  if (Addr + Size < Addr || Addr + Size < Size ||
+      Addr + Size > uintptr_t(M.getBufferEnd()) ||
+      Addr < uintptr_t(M.getBufferStart())) {
+    return std::make_error_code(std::errc::bad_address);
+  }
+  return std::error_code();
+}
+
+template <typename T>
+static std::error_code checkOffset(MemoryBufferRef M, ArrayRef<T> AR) {
+  return checkOffset(M, uintptr_t(AR.data()), (uint64_t)AR.size() * sizeof(T));
+}
+
+PDBFile::PDBFile(std::unique_ptr<MemoryBuffer> MemBuffer) {
+  Context.reset(new PDBFileContext());
+  Context->Buffer = std::move(MemBuffer);
+}
+
+PDBFile::~PDBFile() {}
+
+uint32_t PDBFile::getBlockSize() const { return Context->SB->BlockSize; }
+
+uint32_t PDBFile::getUnknown0() const { return Context->SB->Unknown0; }
+
+uint32_t PDBFile::getBlockCount() const { return Context->SB->NumBlocks; }
+
+uint32_t PDBFile::getNumDirectoryBytes() const {
+  return Context->SB->NumDirectoryBytes;
+}
+
+uint32_t PDBFile::getBlockMapIndex() const { return Context->SB->BlockMapAddr; }
+
+uint32_t PDBFile::getUnknown1() const { return Context->SB->Unknown1; }
+
+uint32_t PDBFile::getNumDirectoryBlocks() const {
+  return bytesToBlocks(Context->SB->NumDirectoryBytes, Context->SB->BlockSize);
+}
+
+uint64_t PDBFile::getBlockMapOffset() const {
+  return (uint64_t)Context->SB->BlockMapAddr * Context->SB->BlockSize;
+}
+
+uint32_t PDBFile::getNumStreams() const { return Context->StreamSizes.size(); }
+
+uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
+  return Context->StreamSizes[StreamIndex];
+}
+
+llvm::ArrayRef<uint32_t>
+PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
+  auto &Data = Context->StreamMap[StreamIndex];
+  return llvm::ArrayRef<uint32_t>(Data);
+}
+
+StringRef PDBFile::getBlockData(uint32_t BlockIndex, uint32_t NumBytes) const {
+  uint64_t StreamBlockOffset = blockToOffset(BlockIndex, getBlockSize());
+
+  return StringRef(Context->Buffer->getBufferStart() + StreamBlockOffset,
+                   NumBytes);
+}
+
+std::error_code PDBFile::parseFileHeaders() {
+  std::error_code EC;
+  MemoryBufferRef BufferRef = *Context->Buffer;
+
+  Context->SB =
+      reinterpret_cast<const SuperBlock *>(BufferRef.getBufferStart());
+  const SuperBlock *SB = Context->SB;
+  // Check the magic bytes.
+  if (memcmp(SB->MagicBytes, Magic, sizeof(Magic)) != 0)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  // We don't support blocksizes which aren't a multiple of four bytes.
+  if (SB->BlockSize % sizeof(support::ulittle32_t) != 0)
+    return std::make_error_code(std::errc::not_supported);
+
+  // We don't support directories whose sizes aren't a multiple of four bytes.
+  if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0)
+    return std::make_error_code(std::errc::not_supported);
+
+  // The number of blocks which comprise the directory is a simple function of
+  // the number of bytes it contains.
+  uint64_t NumDirectoryBlocks = getNumDirectoryBlocks();
+
+  // The block map, as we understand it, is a block which consists of a list of
+  // block numbers.
+  // It is unclear what would happen if the number of blocks couldn't fit on a
+  // single block.
+  if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  return std::error_code();
+}
+
+std::error_code PDBFile::parseStreamData() {
+  assert(Context && Context->SB);
+
+  bool SeenNumStreams = false;
+  uint32_t NumStreams = 0;
+  uint32_t StreamIdx = 0;
+  uint64_t DirectoryBytesRead = 0;
+
+  MemoryBufferRef M = *Context->Buffer;
+  const SuperBlock *SB = Context->SB;
+
+  auto DirectoryBlocks = getDirectoryBlockArray();
+
+  // The structure of the directory is as follows:
+  //    struct PDBDirectory {
+  //      uint32_t NumStreams;
+  //      uint32_t StreamSizes[NumStreams];
+  //      uint32_t StreamMap[NumStreams][];
+  //    };
+  //
+  //  Empty streams don't consume entries in the StreamMap.
+  for (uint32_t DirectoryBlockAddr : DirectoryBlocks) {
+    uint64_t DirectoryBlockOffset =
+        blockToOffset(DirectoryBlockAddr, SB->BlockSize);
+    auto DirectoryBlock =
+        makeArrayRef(reinterpret_cast<const support::ulittle32_t *>(
+                         M.getBufferStart() + DirectoryBlockOffset),
+                     SB->BlockSize / sizeof(support::ulittle32_t));
+    if (auto EC = checkOffset(M, DirectoryBlock))
+      return EC;
+
+    // We read data out of the directory four bytes at a time.  Depending on
+    // where we are in the directory, the contents may be: the number of streams
+    // in the directory, a stream's size, or a block in the stream map.
+    for (uint32_t Data : DirectoryBlock) {
+      // Don't read beyond the end of the directory.
+      if (DirectoryBytesRead == SB->NumDirectoryBytes)
+        break;
+
+      DirectoryBytesRead += sizeof(Data);
+
+      // This data must be the number of streams if we haven't seen it yet.
+      if (!SeenNumStreams) {
+        NumStreams = Data;
+        SeenNumStreams = true;
+        continue;
+      }
+      // This data must be a stream size if we have not seen them all yet.
+      if (Context->StreamSizes.size() < NumStreams) {
+        // It seems like some streams have their set to -1 when their contents
+        // are not present.  Treat them like empty streams for now.
+        if (Data == UINT32_MAX)
+          Context->StreamSizes.push_back(0);
+        else
+          Context->StreamSizes.push_back(Data);
+        continue;
+      }
+
+      // This data must be a stream block number if we have seen all of the
+      // stream sizes.
+      std::vector<uint32_t> *StreamBlocks = nullptr;
+      // Figure out which stream this block number belongs to.
+      while (StreamIdx < NumStreams) {
+        uint64_t NumExpectedStreamBlocks =
+            bytesToBlocks(Context->StreamSizes[StreamIdx], SB->BlockSize);
+        StreamBlocks = &Context->StreamMap[StreamIdx];
+        if (NumExpectedStreamBlocks > StreamBlocks->size())
+          break;
+        ++StreamIdx;
+      }
+      // It seems this block doesn't belong to any stream?  The stream is either
+      // corrupt or something more mysterious is going on.
+      if (StreamIdx == NumStreams)
+        return std::make_error_code(std::errc::illegal_byte_sequence);
+
+      StreamBlocks->push_back(Data);
+    }
+  }
+
+  // We should have read exactly SB->NumDirectoryBytes bytes.
+  assert(DirectoryBytesRead == SB->NumDirectoryBytes);
+  return std::error_code();
+}
+
+llvm::ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() {
+  return makeArrayRef(
+      reinterpret_cast<const support::ulittle32_t *>(
+          Context->Buffer->getBufferStart() + getBlockMapOffset()),
+      getNumDirectoryBlocks());
+}