[PDB] One more fix for hasing GSI records.
The reference implementation uses a case-insensitive string
comparison for strings of equal length. This will cause the
string "tEo" to compare less than "VUo". However we were using
a case sensitive comparison, which would generate the opposite
outcome. Switch to a case insensitive comparison. Also, when
one of the strings contains non-ascii characters, fallback to
a straight memcmp.
The only way to really test this is with a DIA test. Before this
patch, the test will fail (but succeed if link.exe is used instead
of lld-link). After the patch, it succeeds even with lld-link.
llvm-svn: 336464
diff --git a/lld/test/COFF/Inputs/globals-dia-func-collision3.obj b/lld/test/COFF/Inputs/globals-dia-func-collision3.obj
new file mode 100644
index 0000000..ce9b873
--- /dev/null
+++ b/lld/test/COFF/Inputs/globals-dia-func-collision3.obj
Binary files differ
diff --git a/lld/test/COFF/pdb-globals-dia-func-collision3.test b/lld/test/COFF/pdb-globals-dia-func-collision3.test
new file mode 100644
index 0000000..532bdd4
--- /dev/null
+++ b/lld/test/COFF/pdb-globals-dia-func-collision3.test
@@ -0,0 +1,81 @@
+REQUIRES: diasdk
+
+Input object file reconstruction:
+
+; // foo.cpp
+; void LJPwNRh() {}
+; void HGfxvKdQO() {}
+; void wuN() {}
+; void tEo() {}
+; void VUo() {}
+; void teO() {}
+; void bqSuLGQgWa() {}
+; void SyJYcL() {}
+; void OUV() {}
+; void quH() {}
+; void rbEaPKrlrRwk() {}
+; void oet() {}
+; void tuM() {}
+; void LuU() {}
+; void loxueqJLH() {}
+; void QplRJuDs() {}
+; void rWDokkLG() {}
+; void sEH() {}
+; void pui() {}
+; void xoZvxw() {}
+;
+; int main(int argc, char **argv) {
+; return 0;
+; }
+
+clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-func-collision3.obj
+
+RUN: lld-link /debug /nodefaultlib /incremental:no /entry:main /out:%t.exe %S/Inputs/globals-dia-func-collision3.obj
+RUN: llvm-pdbutil pretty -with-name=LuU -with-name=oet -with-name=OUV -with-name=pui \
+RUN: -with-name=quH -with-name=sEH -with-name=teO -with-name=tEo \
+RUN: -with-name=tuM -with-name=VUo -with-name=wuN -with-name=SyJYcL \
+RUN: -with-name=xoZvxw -with-name=LJPwNRh -with-name=QplRJuDs -with-name=rWDokkLG \
+RUN: -with-name=HGfxvKdQO -with-name=loxueqJLH -with-name=bqSuLGQgWa -with-name=rbEaPKrlrRwk \
+RUN: %t.pdb | FileCheck %s
+
+
+CHECK: [1 occurrences] - LuU
+CHECK-NEXT: func [0x000010d0+ 0 - 0x000010d1- 1 | sizeof= 1] (FPO) void __cdecl LuU()
+CHECK-NEXT: [1 occurrences] - oet
+CHECK-NEXT: func [0x000010b0+ 0 - 0x000010b1- 1 | sizeof= 1] (FPO) void __cdecl oet()
+CHECK-NEXT: [1 occurrences] - OUV
+CHECK-NEXT: func [0x00001080+ 0 - 0x00001081- 1 | sizeof= 1] (FPO) void __cdecl OUV()
+CHECK-NEXT: [1 occurrences] - pui
+CHECK-NEXT: func [0x00001120+ 0 - 0x00001121- 1 | sizeof= 1] (FPO) void __cdecl pui()
+CHECK-NEXT: [1 occurrences] - quH
+CHECK-NEXT: func [0x00001090+ 0 - 0x00001091- 1 | sizeof= 1] (FPO) void __cdecl quH()
+CHECK-NEXT: [1 occurrences] - sEH
+CHECK-NEXT: func [0x00001110+ 0 - 0x00001111- 1 | sizeof= 1] (FPO) void __cdecl sEH()
+CHECK-NEXT: [1 occurrences] - teO
+CHECK-NEXT: func [0x00001050+ 0 - 0x00001051- 1 | sizeof= 1] (FPO) void __cdecl teO()
+CHECK-NEXT: [1 occurrences] - tEo
+CHECK-NEXT: func [0x00001030+ 0 - 0x00001031- 1 | sizeof= 1] (FPO) void __cdecl tEo()
+CHECK-NEXT: [1 occurrences] - tuM
+CHECK-NEXT: func [0x000010c0+ 0 - 0x000010c1- 1 | sizeof= 1] (FPO) void __cdecl tuM()
+CHECK-NEXT: [1 occurrences] - VUo
+CHECK-NEXT: func [0x00001040+ 0 - 0x00001041- 1 | sizeof= 1] (FPO) void __cdecl VUo()
+CHECK-NEXT: [1 occurrences] - wuN
+CHECK-NEXT: func [0x00001020+ 0 - 0x00001021- 1 | sizeof= 1] (FPO) void __cdecl wuN()
+CHECK-NEXT: [1 occurrences] - SyJYcL
+CHECK-NEXT: func [0x00001070+ 0 - 0x00001071- 1 | sizeof= 1] (FPO) void __cdecl SyJYcL()
+CHECK-NEXT: [1 occurrences] - xoZvxw
+CHECK-NEXT: func [0x00001130+ 0 - 0x00001131- 1 | sizeof= 1] (FPO) void __cdecl xoZvxw()
+CHECK-NEXT: [1 occurrences] - LJPwNRh
+CHECK-NEXT: func [0x00001000+ 0 - 0x00001001- 1 | sizeof= 1] (FPO) void __cdecl LJPwNRh()
+CHECK-NEXT: [1 occurrences] - QplRJuDs
+CHECK-NEXT: func [0x000010f0+ 0 - 0x000010f1- 1 | sizeof= 1] (FPO) void __cdecl QplRJuDs()
+CHECK-NEXT: [1 occurrences] - rWDokkLG
+CHECK-NEXT: func [0x00001100+ 0 - 0x00001101- 1 | sizeof= 1] (FPO) void __cdecl rWDokkLG()
+CHECK-NEXT: [1 occurrences] - HGfxvKdQO
+CHECK-NEXT: func [0x00001010+ 0 - 0x00001011- 1 | sizeof= 1] (FPO) void __cdecl HGfxvKdQO()
+CHECK-NEXT: [1 occurrences] - loxueqJLH
+CHECK-NEXT: func [0x000010e0+ 0 - 0x000010e1- 1 | sizeof= 1] (FPO) void __cdecl loxueqJLH()
+CHECK-NEXT: [1 occurrences] - bqSuLGQgWa
+CHECK-NEXT: func [0x00001060+ 0 - 0x00001061- 1 | sizeof= 1] (FPO) void __cdecl bqSuLGQgWa()
+CHECK-NEXT: [1 occurrences] - rbEaPKrlrRwk
+CHECK-NEXT: func [0x000010a0+ 0 - 0x000010a1- 1 | sizeof= 1] (FPO) void __cdecl rbEaPKrlrRwk()
diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index eaea24a..c8437d2 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -82,6 +82,26 @@
return Error::success();
}
+static bool isAsciiString(StringRef S) {
+ return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
+}
+
+// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
+static bool gsiRecordLess(StringRef S1, StringRef S2) {
+ size_t LS = S1.size();
+ size_t RS = S2.size();
+ // Shorter strings always compare less than longer strings.
+ if (LS != RS)
+ return LS < RS;
+
+ // If either string contains non ascii characters, memcmp them.
+ if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
+ return memcmp(S1.data(), S2.data(), LS) < 0;
+
+ // Both strings are ascii, use memicmp.
+ return memicmp(S1.data(), S2.data(), LS) < 0;
+}
+
void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
TmpBuckets;
@@ -118,17 +138,16 @@
ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
HashBuckets.push_back(ChainStartOff);
- // Sort each bucket by memcmp of the symbol's name.
+ // Sort each bucket by memcmp of the symbol's name. It's important that
+ // we use the same sorting algorithm as is used by the reference
+ // implementation to ensure that the search for a record within a bucket
+ // can properly early-out when it detects the record won't be found. The
+ // algorithm used here corredsponds to the function
+ // caseInsensitiveComparePchPchCchCch in the reference implementation.
std::sort(Bucket.begin(), Bucket.end(),
[](const std::pair<StringRef, PSHashRecord> &Left,
const std::pair<StringRef, PSHashRecord> &Right) {
- size_t LS = Left.first.size();
- size_t RS = Right.first.size();
- if (LS < RS)
- return true;
- if (LS > RS)
- return false;
- return Left.first < Right.first;
+ return gsiRecordLess(Left.first, Right.first);
});
for (const auto &Entry : Bucket)
diff --git a/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
index 739a95e..1270223 100644
--- a/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp
@@ -35,7 +35,7 @@
Printer.NewLine();
uint64_t Addr = Symbol.getVirtualAddress();
- Printer << "[";
+ Printer << "public [";
WithColor(Printer, PDB_ColorItem::Address).get() << format_hex(Addr, 10);
Printer << "] ";
WithColor(Printer, PDB_ColorItem::Identifier).get() << LinkageName;
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 0088d30..5b0d21f 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -20,10 +20,13 @@
#include "InputFile.h"
#include "LinePrinter.h"
#include "OutputStyle.h"
+#include "PrettyClassDefinitionDumper.h"
#include "PrettyCompilandDumper.h"
+#include "PrettyEnumDumper.h"
#include "PrettyExternalSymbolDumper.h"
#include "PrettyFunctionDumper.h"
#include "PrettyTypeDumper.h"
+#include "PrettyTypedefDumper.h"
#include "PrettyVariableDumper.h"
#include "YAMLOutputStyle.h"
@@ -65,7 +68,11 @@
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/COM.h"
#include "llvm/Support/CommandLine.h"
@@ -161,6 +168,11 @@
cl::desc("When displaying an injected source, display the file content"),
cl::cat(OtherOptions), cl::sub(PrettySubcommand));
+cl::list<std::string> WithName(
+ "with-name",
+ cl::desc("Display any symbol or type with the specified exact name"),
+ cl::cat(TypeCategory), cl::ZeroOrMore, cl::sub(PrettySubcommand));
+
cl::opt<bool> Compilands("compilands", cl::desc("Display compilands"),
cl::cat(TypeCategory), cl::sub(PrettySubcommand));
cl::opt<bool> Symbols("module-syms",
@@ -963,6 +975,82 @@
outs() << "HasPrivateSymbols ";
Printer.Unindent();
+ if (!opts::pretty::WithName.empty()) {
+ Printer.NewLine();
+ WithColor(Printer, PDB_ColorItem::SectionHeader).get()
+ << "---SYMBOLS & TYPES BY NAME---";
+
+ for (StringRef Name : opts::pretty::WithName) {
+ auto Symbols = GlobalScope->findChildren(
+ PDB_SymType::None, Name, PDB_NameSearchFlags::NS_CaseSensitive);
+ if (!Symbols || Symbols->getChildCount() == 0) {
+ Printer.formatLine("[not found] - {0}", Name);
+ continue;
+ }
+ Printer.formatLine("[{0} occurrences] - {1}", Symbols->getChildCount(),
+ Name);
+
+ AutoIndent Indent(Printer);
+ Printer.NewLine();
+
+ while (auto Symbol = Symbols->getNext()) {
+ switch (Symbol->getSymTag()) {
+ case PDB_SymType::Typedef: {
+ TypedefDumper TD(Printer);
+ std::unique_ptr<PDBSymbolTypeTypedef> T =
+ llvm::unique_dyn_cast<PDBSymbolTypeTypedef>(std::move(Symbol));
+ TD.start(*T);
+ break;
+ }
+ case PDB_SymType::Enum: {
+ EnumDumper ED(Printer);
+ std::unique_ptr<PDBSymbolTypeEnum> E =
+ llvm::unique_dyn_cast<PDBSymbolTypeEnum>(std::move(Symbol));
+ ED.start(*E);
+ break;
+ }
+ case PDB_SymType::UDT: {
+ ClassDefinitionDumper CD(Printer);
+ std::unique_ptr<PDBSymbolTypeUDT> C =
+ llvm::unique_dyn_cast<PDBSymbolTypeUDT>(std::move(Symbol));
+ CD.start(*C);
+ break;
+ }
+ case PDB_SymType::BaseClass:
+ case PDB_SymType::Friend: {
+ TypeDumper TD(Printer);
+ Symbol->dump(TD);
+ break;
+ }
+ case PDB_SymType::Function: {
+ FunctionDumper FD(Printer);
+ std::unique_ptr<PDBSymbolFunc> F =
+ llvm::unique_dyn_cast<PDBSymbolFunc>(std::move(Symbol));
+ FD.start(*F, FunctionDumper::PointerType::None);
+ break;
+ }
+ case PDB_SymType::Data: {
+ VariableDumper VD(Printer);
+ std::unique_ptr<PDBSymbolData> D =
+ llvm::unique_dyn_cast<PDBSymbolData>(std::move(Symbol));
+ VD.start(*D);
+ break;
+ }
+ case PDB_SymType::PublicSymbol: {
+ ExternalSymbolDumper ED(Printer);
+ std::unique_ptr<PDBSymbolPublicSymbol> PS =
+ llvm::unique_dyn_cast<PDBSymbolPublicSymbol>(std::move(Symbol));
+ ED.dump(*PS);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected symbol tag!");
+ }
+ }
+ }
+ llvm::outs().flush();
+ }
+
if (opts::pretty::Compilands) {
Printer.NewLine();
WithColor(Printer, PDB_ColorItem::SectionHeader).get()
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
index 8dad5e8..7496ada 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -75,6 +75,8 @@
bool compareDataSymbols(const std::unique_ptr<llvm::pdb::PDBSymbolData> &F1,
const std::unique_ptr<llvm::pdb::PDBSymbolData> &F2);
+extern llvm::cl::list<std::string> WithName;
+
extern llvm::cl::opt<bool> Compilands;
extern llvm::cl::opt<bool> Symbols;
extern llvm::cl::opt<bool> Globals;