Parse the TPI (type information) stream of PDB files. This parses the TPI stream (stream 2) from the PDB file. This stream contains some header information followed by a series of codeview records. There is some additional complexity here in that alongside this stream of codeview records is a serialized hash table in order to efficiently query the types. We parse the necessary bookkeeping information to allow us to reconstruct the hash table, but we do not actually construct it yet as there are still a few things that need to be understood first. Differential Revision: http://reviews.llvm.org/D19840 Reviewed By: ruiu, rnk llvm-svn: 268343

commit: f5c59654f7c8438294ecb6ac0b8613782e0d3956 [log] [tgz]
author: Zachary Turner <zturner@google.com> Tue May 03 00:28:21 2016 +0000
committer: Zachary Turner <zturner@google.com> Tue May 03 00:28:21 2016 +0000
tree: 8d242825d7c9ecf7eebccc365b729d0bdf04856a
parent: 88bb163f816d00d2805b6a0d97ea0f4200b84553 [diff] [blame]
diff --git a/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp
new file mode 100644
index 0000000..7ee4c60
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp

@@ -0,0 +1,143 @@
+//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
+
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+namespace {
+const uint32_t MinTypeIndex = codeview::TypeIndex::FirstNonSimpleIndex;
+
+const uint32_t MinHashBuckets = 0x1000;
+const uint32_t MaxHashBuckets = 0x40000;
+}
+
+static uint32_t HashBufferV8(uint8_t *buffer, uint32_t NumBuckets) {
+  // Not yet implemented, this is probably some variation of CRC32 but we need
+  // to be sure of the precise implementation otherwise we won't be able to work
+  // with persisted hash values.
+  return 0;
+}
+
+struct TpiStream::HeaderInfo {
+  struct EmbeddedBuf {
+    little32_t Off;
+    ulittle32_t Length;
+  };
+
+  ulittle32_t Version;
+  ulittle32_t HeaderSize;
+  ulittle32_t TypeIndexBegin;
+  ulittle32_t TypeIndexEnd;
+  ulittle32_t TypeRecordBytes;
+
+  ulittle16_t HashStreamIndex;
+  ulittle16_t HashAuxStreamIndex;
+  ulittle32_t HashKeySize;
+  ulittle32_t NumHashBuckets;
+
+  EmbeddedBuf HashValueBuffer;
+  EmbeddedBuf IndexOffsetBuffer;
+  EmbeddedBuf HashAdjBuffer;
+};
+
+TpiStream::TpiStream(PDBFile &File)
+    : Pdb(File), Stream(StreamTPI, File), HashFunction(nullptr) {}
+
+TpiStream::~TpiStream() {}
+
+std::error_code TpiStream::reload() {
+  StreamReader Reader(Stream);
+
+  if (Reader.bytesRemaining() < sizeof(HeaderInfo))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  Header.reset(new HeaderInfo());
+  Reader.readObject(Header.get());
+
+  if (Header->Version != PdbTpiV80)
+    return std::make_error_code(std::errc::not_supported);
+
+  if (Header->HeaderSize != sizeof(HeaderInfo))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  if (Header->HashKeySize != sizeof(ulittle32_t))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  if (Header->NumHashBuckets < MinHashBuckets ||
+      Header->NumHashBuckets > MaxHashBuckets)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  HashFunction = HashBufferV8;
+
+  // The actual type records themselves come from this stream
+  RecordsBuffer.initialize(Reader, Header->TypeRecordBytes);
+  TypeRecords.resize(TypeIndexEnd() - ::MinTypeIndex);
+  StreamReader RecordsReader(RecordsBuffer);
+  for (uint32_t I = TypeIndexBegin(); I < TypeIndexEnd(); ++I) {
+    HashedTypeRecord &Record = TypeRecords[I - ::MinTypeIndex];
+    codeview::TypeRecordPrefix Prefix;
+    if (auto EC = RecordsReader.readObject(&Prefix))
+      return EC;
+
+    Record.Kind =
+        static_cast<codeview::TypeLeafKind>(static_cast<uint16_t>(Prefix.Leaf));
+
+    // Since we read this entire buffer into a ByteStream, we are guaranteed
+    // that the entire buffer is contiguous (i.e. there's no longer a chance
+    // that it splits across a page boundary.  So we can request a reference
+    // directly into the stream buffer to avoid unnecessary memory copies.
+    uint32_t RecordSize = Prefix.Len - sizeof(Prefix.Leaf);
+    if (auto EC = RecordsReader.getArrayRef(Record.Record, RecordSize))
+      return EC;
+  }
+
+  // Hash indices, hash values, etc come from the hash stream.
+  MappedBlockStream HS(Header->HashStreamIndex, Pdb);
+  StreamReader HSR(HS);
+  HSR.setOffset(Header->HashValueBuffer.Off);
+  HashValuesBuffer.initialize(HSR, Header->HashValueBuffer.Length);
+
+  HSR.setOffset(Header->HashAdjBuffer.Off);
+  HashAdjBuffer.initialize(HSR, Header->HashAdjBuffer.Length);
+
+  HSR.setOffset(Header->IndexOffsetBuffer.Off);
+  TypeIndexOffsetBuffer.initialize(HSR, Header->IndexOffsetBuffer.Length);
+
+  return std::error_code();
+}
+
+PdbRaw_TpiVer TpiStream::getTpiVersion() const {
+  uint32_t Value = Header->Version;
+  return static_cast<PdbRaw_TpiVer>(Value);
+}
+
+uint32_t TpiStream::TypeIndexBegin() const { return Header->TypeIndexBegin; }
+
+uint32_t TpiStream::TypeIndexEnd() const { return Header->TypeIndexEnd; }
+
+uint32_t TpiStream::NumTypeRecords() const {
+  return TypeIndexEnd() - TypeIndexBegin();
+}
+
+ArrayRef<TpiStream::HashedTypeRecord> TpiStream::records() const {
+  const HashedTypeRecord *Begin =
+      &TypeRecords[TypeIndexBegin() - ::MinTypeIndex];
+  return ArrayRef<HashedTypeRecord>(Begin, NumTypeRecords());
+}
commit	f5c59654f7c8438294ecb6ac0b8613782e0d3956	[log] [tgz]
author	Zachary Turner <zturner@google.com>	Tue May 03 00:28:21 2016 +0000
committer	Zachary Turner <zturner@google.com>	Tue May 03 00:28:21 2016 +0000
tree	8d242825d7c9ecf7eebccc365b729d0bdf04856a
parent	88bb163f816d00d2805b6a0d97ea0f4200b84553 [diff] [blame]