Parse PDB Name Hash Table PDB has a lot of similar data structures. We already have code for parsing a Name Map, but PDB seems to have a different but very similar structure that is a hash table. This is the beginning of code needed in order to parse the name hash table, but it is not yet complete. It parses the basic metadata of the hash table, the bucket array, and the names buffer, but doesn't use any of these fields yet as the data structure requires a non-trivial amount of work to understand. llvm-svn: 268268

commit: 0eace0bae572f667dff0ba7d202f7e211d1b2625 [log] [tgz]
author: Zachary Turner <zturner@google.com> Mon May 02 18:09:14 2016 +0000
committer: Zachary Turner <zturner@google.com> Mon May 02 18:09:14 2016 +0000
tree: 813a7326b7348374f23ab02e1a05dcccb924742f
parent: 27233b727fb093efde9487180b61fee975442f72 [diff]
diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt
index d6a3d2b..2fa74b9 100644
--- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt

@@ -33,6 +33,7 @@
   Raw/PDBFile.cpp
   Raw/DbiStream.cpp
   Raw/InfoStream.cpp
+  Raw/NameHashTable.cpp
   Raw/NameMap.cpp
   Raw/RawSession.cpp
   Raw/StreamReader.cpp)

diff --git a/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp
index d8b78ec..89477ea 100644
--- a/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp

@@ -59,3 +59,8 @@
 }
 
 uint32_t ByteStream::getLength() const { return Data.size(); }
+
+StringRef ByteStream::str() const {
+  const char *CharData = reinterpret_cast<const char *>(Data.data());
+  return StringRef(CharData, Data.size());
+}

diff --git a/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp
index 7762fa3..57f7644 100644
--- a/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp
+++ b/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp

@@ -10,6 +10,7 @@
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
@@ -148,6 +149,9 @@
   if (Reader.bytesRemaining() > 0)
     return std::make_error_code(std::errc::illegal_byte_sequence);
 
+  StreamReader ECReader(ECSubstream);
+  ECNames.load(ECReader);
+
   return std::error_code();
 }
 

diff --git a/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp b/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
new file mode 100644
index 0000000..3e8de73
--- /dev/null
+++ b/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp

@@ -0,0 +1,141 @@
+//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/PDB/Raw/ByteStream.h"
+#include "llvm/DebugInfo/PDB/Raw/StreamReader.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::pdb;
+
+typedef uint32_t *PUL;
+typedef uint16_t *PUS;
+
+static inline uint32_t HashStringV1(StringRef Str) {
+  uint32_t Result = 0;
+  uint32_t Size = Str.size();
+
+  ArrayRef<ulittle32_t> Longs(reinterpret_cast<const ulittle32_t *>(Str.data()),
+                              Size / 4);
+
+  for (auto Value : Longs)
+    Result ^= Value;
+
+  const uint8_t *Remainder = reinterpret_cast<const uint8_t *>(Longs.end());
+  uint32_t RemainderSize = Size - Longs.size() * 4;
+
+  // Maximum of 3 bytes left.  Hash a 2 byte word if possible, then hash the
+  // possibly remaining 1 byte.
+  if (RemainderSize >= 2) {
+    Result ^= *reinterpret_cast<const ulittle16_t *>(Remainder);
+    Remainder += 2;
+    RemainderSize -= 2;
+  }
+
+  // hash possible odd byte
+  if (RemainderSize == 1) {
+    Result ^= *(Remainder++);
+  }
+
+  const uint32_t toLowerMask = 0x20202020;
+  Result |= toLowerMask;
+  Result ^= (Result >> 11);
+
+  return Result ^ (Result >> 16);
+}
+
+static inline uint32_t HashStringV2(StringRef Str) {
+  uint32_t Hash = 0xb170a1bf;
+
+  ArrayRef<char> Buffer(Str.begin(), Str.end());
+
+  ArrayRef<ulittle32_t> Items(
+      reinterpret_cast<const ulittle32_t *>(Buffer.data()),
+      Buffer.size() / sizeof(ulittle32_t));
+  for (ulittle32_t Item : Items) {
+    Hash += Item;
+    Hash += (Hash << 10);
+    Hash ^= (Hash >> 6);
+  }
+  Buffer = Buffer.slice(Items.size() * sizeof(ulittle32_t));
+  for (uint8_t Item : Buffer) {
+    Hash += Item;
+    Hash += (Hash << 10);
+    Hash ^= (Hash >> 6);
+  }
+
+  return Hash * 1664525L + 1013904223L;
+}
+
+NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {}
+
+std::error_code NameHashTable::load(StreamReader &Stream) {
+  struct Header {
+    support::ulittle32_t Signature;
+    support::ulittle32_t HashVersion;
+    support::ulittle32_t ByteSize;
+  };
+
+  Header H;
+  Stream.readObject(&H);
+  if (H.Signature != 0xEFFEEFFE)
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+  if (H.HashVersion != 1 && H.HashVersion != 2)
+    return std::make_error_code(std::errc::not_supported);
+
+  Signature = H.Signature;
+  HashVersion = H.HashVersion;
+  NamesBuffer.initialize(Stream, H.ByteSize);
+
+  support::ulittle32_t HashCount;
+  Stream.readObject(&HashCount);
+  std::vector<support::ulittle32_t> BucketArray(HashCount);
+  Stream.readArray<support::ulittle32_t>(BucketArray);
+  IDs.assign(BucketArray.begin(), BucketArray.end());
+
+  if (Stream.bytesRemaining() < sizeof(support::ulittle32_t))
+    return std::make_error_code(std::errc::illegal_byte_sequence);
+
+  Stream.readInteger(NameCount);
+  return std::error_code();
+}
+
+StringRef NameHashTable::getStringForID(uint32_t ID) const {
+  if (ID == IDs[0])
+    return StringRef();
+
+  return StringRef(NamesBuffer.str().begin() + ID);
+}
+
+uint32_t NameHashTable::getIDForString(StringRef Str) const {
+  uint32_t Hash = (HashVersion == 1) ? HashStringV1(Str) : HashStringV2(Str);
+  size_t Count = IDs.size();
+  uint32_t Start = Hash % Count;
+  for (size_t I = 0; I < Count; ++I) {
+    // The hash is just a starting point for the search, but if it
+    // doesn't work we should find the string no matter what, because
+    // we iterate the entire array.
+    uint32_t Index = (Start + I) % Count;
+
+    uint32_t ID = IDs[Index];
+    StringRef S = getStringForID(ID);
+    if (S == Str)
+      return ID;
+  }
+  // IDs[0] contains the ID of the "invalid" entry.
+  return IDs[0];
+}
+
+ArrayRef<uint32_t> NameHashTable::name_ids() const {
+  return ArrayRef<uint32_t>(IDs).slice(1, NameCount);
+}
commit	0eace0bae572f667dff0ba7d202f7e211d1b2625	[log] [tgz]
author	Zachary Turner <zturner@google.com>	Mon May 02 18:09:14 2016 +0000
committer	Zachary Turner <zturner@google.com>	Mon May 02 18:09:14 2016 +0000
tree	813a7326b7348374f23ab02e1a05dcccb924742f
parent	27233b727fb093efde9487180b61fee975442f72 [diff]