[PDB] Finish and simplify TPI hashing
Summary:
This removes the CVTypeVisitor updater and verifier classes. They were
made dead by the minimal type dumping refactoring. Replace them with a
single function that takes a type record and produces a hash. Call this
from the minimal type dumper and compare the hash.
I also noticed that the microsoft-pdb reference repository uses a basic
CRC32 for records that aren't special. We already have an implementation
of that CRC ready to use, because it's used in COFF for ICF.
I'll make LLD call this hashing utility in a follow-up change. We might
also consider using this same hash in type stream merging, so that we
don't have to hash our records twice.
Reviewers: inglorion, ruiu
Subscribers: llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D35515
llvm-svn: 308240
diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 91b8d64..77a2d57 100644
--- a/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -11,101 +11,79 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
-#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/JamCRC.h"
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::pdb;
// Corresponds to `fUDTAnon`.
-template <typename T> static bool isAnonymous(T &Rec) {
- StringRef Name = Rec.getName();
+static bool isAnonymous(StringRef Name) {
return Name == "<unnamed-tag>" || Name == "__unnamed" ||
Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed");
}
-// Computes a hash for a given TPI record.
-template <typename T>
-static uint32_t getTpiHash(T &Rec, ArrayRef<uint8_t> FullRecord) {
- auto Opts = static_cast<uint16_t>(Rec.getOptions());
-
- bool ForwardRef =
- Opts & static_cast<uint16_t>(ClassOptions::ForwardReference);
- bool Scoped = Opts & static_cast<uint16_t>(ClassOptions::Scoped);
- bool UniqueName = Opts & static_cast<uint16_t>(ClassOptions::HasUniqueName);
- bool IsAnon = UniqueName && isAnonymous(Rec);
+// Computes the hash for a user-defined type record. This could be a struct,
+// class, union, or enum.
+static uint32_t getHashForUdt(const TagRecord &Rec,
+ ArrayRef<uint8_t> FullRecord) {
+ ClassOptions Opts = Rec.getOptions();
+ bool ForwardRef = bool(Opts & ClassOptions::ForwardReference);
+ bool Scoped = bool(Opts & ClassOptions::Scoped);
+ bool HasUniqueName = bool(Opts & ClassOptions::HasUniqueName);
+ bool IsAnon = HasUniqueName && isAnonymous(Rec.getName());
if (!ForwardRef && !Scoped && !IsAnon)
return hashStringV1(Rec.getName());
- if (!ForwardRef && UniqueName && !IsAnon)
+ if (!ForwardRef && HasUniqueName && !IsAnon)
return hashStringV1(Rec.getUniqueName());
return hashBufferV8(FullRecord);
}
-template <typename T> static uint32_t getSourceLineHash(T &Rec) {
+template <typename T>
+static Expected<uint32_t> getHashForUdt(const CVType &Rec) {
+ T Deserialized;
+ if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
+ Deserialized))
+ return std::move(E);
+ return getHashForUdt(Deserialized, Rec.data());
+}
+
+template <typename T>
+static Expected<uint32_t> getSourceLineHash(const CVType &Rec) {
+ T Deserialized;
+ if (auto E = TypeDeserializer::deserializeAs(const_cast<CVType &>(Rec),
+ Deserialized))
+ return std::move(E);
char Buf[4];
- support::endian::write32le(Buf, Rec.getUDT().getIndex());
+ support::endian::write32le(Buf, Deserialized.getUDT().getIndex());
return hashStringV1(StringRef(Buf, 4));
}
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR,
- UdtSourceLineRecord &Rec) {
- CVR.Hash = getSourceLineHash(Rec);
-}
+Expected<uint32_t> llvm::pdb::hashTypeRecord(const CVType &Rec) {
+ switch (Rec.kind()) {
+ case LF_CLASS:
+ case LF_STRUCTURE:
+ case LF_INTERFACE:
+ return getHashForUdt<ClassRecord>(Rec);
+ case LF_UNION:
+ return getHashForUdt<UnionRecord>(Rec);
+ case LF_ENUM:
+ return getHashForUdt<EnumRecord>(Rec);
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR,
- UdtModSourceLineRecord &Rec) {
- CVR.Hash = getSourceLineHash(Rec);
-}
+ case LF_UDT_SRC_LINE:
+ return getSourceLineHash<UdtSourceLineRecord>(Rec);
+ case LF_UDT_MOD_SRC_LINE:
+ return getSourceLineHash<UdtModSourceLineRecord>(Rec);
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, ClassRecord &Rec) {
- CVR.Hash = getTpiHash(Rec, CVR.data());
-}
+ default:
+ break;
+ }
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, EnumRecord &Rec) {
- CVR.Hash = getTpiHash(Rec, CVR.data());
-}
-
-void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, UnionRecord &Rec) {
- CVR.Hash = getTpiHash(Rec, CVR.data());
-}
-
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &Rec) {
- return verifySourceLine(Rec.getUDT());
-}
-
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR,
- UdtModSourceLineRecord &Rec) {
- return verifySourceLine(Rec.getUDT());
-}
-
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, ClassRecord &Rec) {
- if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
-}
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, EnumRecord &Rec) {
- if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
-}
-Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UnionRecord &Rec) {
- if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
-}
-
-Error TpiHashVerifier::verifySourceLine(codeview::TypeIndex TI) {
- char Buf[4];
- support::endian::write32le(Buf, TI.getIndex());
- uint32_t Hash = hashStringV1(StringRef(Buf, 4));
- if (Hash % NumHashBuckets != HashValues[Index])
- return errorInvalidHash();
- return Error::success();
-}
-
-Error TpiHashVerifier::visitTypeBegin(CVType &Rec) {
- ++Index;
- RawRecord = Rec;
- return Error::success();
+ // Run CRC32 over the bytes. This corresponds to `hashBufv8`.
+ JamCRC JC(/*Init=*/0U);
+ ArrayRef<char> Bytes(reinterpret_cast<const char *>(Rec.data().data()),
+ Rec.data().size());
+ JC.update(Bytes);
+ return JC.getCRC();
}