[llvm-pdbutil] Print detailed S_UDT stats.
This adds a new command line option, -udt-stats, which breaks
down the stats of S_UDT records. These are one of the biggest
contributors to the size of /DEBUG:FASTLINK PDBs, so they need
some additional tools to be able to analyze their usage. This
option will dig into each S_UDT record and determine what kind
of record it points to, and then break down the statistics by
the target type. The goal here is to identify how our object
files differ from MSVC object files in S_UDT records, so that
we can output fewer of them and reach size parity.
llvm-svn: 312276
diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index c573f60..09424bd 100644
--- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -59,6 +59,7 @@
#include "llvm/Support/FormatAdapters.h"
#include "llvm/Support/FormatVariadic.h"
+#include <cctype>
#include <unordered_map>
using namespace llvm;
@@ -82,8 +83,14 @@
P.NewLine();
}
- if (opts::dump::DumpModuleStats.getNumOccurrences() > 0) {
- if (auto EC = dumpModuleStats())
+ if (opts::dump::DumpSymbolStats.getNumOccurrences() > 0) {
+ if (auto EC = dumpSymbolStats())
+ return EC;
+ P.NewLine();
+ }
+
+ if (opts::dump::DumpUdtStats.getNumOccurrences() > 0) {
+ if (auto EC = dumpUdtStats())
return EC;
P.NewLine();
}
@@ -557,7 +564,7 @@
return Error::success();
}
-Error DumpOutputStyle::dumpModuleStats() {
+Error DumpOutputStyle::dumpSymbolStats() {
printHeader(P, "Module Stats");
ExitOnError Err("Unexpected error processing modules: ");
@@ -607,6 +614,154 @@
return Error::success();
}
+static bool isValidNamespaceIdentifier(StringRef S) {
+ if (S.empty())
+ return false;
+
+ if (std::isdigit(S[0]))
+ return false;
+
+ return llvm::all_of(S, [](char C) { return std::isalnum(C); });
+}
+
+namespace {
+constexpr uint32_t kNoneUdtKind = 0;
+constexpr uint32_t kSimpleUdtKind = 1;
+constexpr uint32_t kUnknownUdtKind = 2;
+const StringRef NoneLabel("<none type>");
+const StringRef SimpleLabel("<simple type>");
+const StringRef UnknownLabel("<unknown type>");
+
+} // namespace
+
+static StringRef getUdtStatLabel(uint32_t Kind) {
+ if (Kind == kNoneUdtKind)
+ return NoneLabel;
+
+ if (Kind == kSimpleUdtKind)
+ return SimpleLabel;
+
+ if (Kind == kUnknownUdtKind)
+ return UnknownLabel;
+
+ return formatTypeLeafKind(static_cast<TypeLeafKind>(Kind));
+}
+
+static uint32_t getLongestTypeLeafName(const StatCollection &Stats) {
+ size_t L = 0;
+ for (const auto &Stat : Stats.Individual) {
+ StringRef Label = getUdtStatLabel(Stat.first);
+ L = std::max(L, Label.size());
+ }
+ return static_cast<uint32_t>(L);
+}
+
+Error DumpOutputStyle::dumpUdtStats() {
+ printHeader(P, "S_UDT Record Stats");
+
+ StatCollection UdtStats;
+ StatCollection UdtTargetStats;
+ if (!File.hasPDBGlobalsStream()) {
+ P.printLine("- Error: globals stream not present");
+ return Error::success();
+ }
+
+ AutoIndent Indent(P, 4);
+
+ auto &SymbolRecords = cantFail(File.getPDBSymbolStream());
+ auto &Globals = cantFail(File.getPDBGlobalsStream());
+ auto &TpiTypes = cantFail(initializeTypes(StreamTPI));
+
+ StringMap<StatCollection::Stat> NamespacedStats;
+
+ P.NewLine();
+
+ size_t LongestNamespace = 0;
+ for (uint32_t PubSymOff : Globals.getGlobalsTable()) {
+ CVSymbol Sym = SymbolRecords.readRecord(PubSymOff);
+ if (Sym.kind() != SymbolKind::S_UDT)
+ continue;
+ UdtStats.update(SymbolKind::S_UDT, Sym.length());
+
+ UDTSym UDT = cantFail(SymbolDeserializer::deserializeAs<UDTSym>(Sym));
+
+ uint32_t Kind = 0;
+ uint32_t RecordSize = 0;
+ if (UDT.Type.isSimple() ||
+ (UDT.Type.toArrayIndex() >= TpiTypes.capacity())) {
+ if (UDT.Type.isNoneType())
+ Kind = kNoneUdtKind;
+ else if (UDT.Type.isSimple())
+ Kind = kSimpleUdtKind;
+ else
+ Kind = kUnknownUdtKind;
+ } else {
+ CVType T = TpiTypes.getType(UDT.Type);
+ Kind = T.kind();
+ RecordSize = T.length();
+ }
+
+ UdtTargetStats.update(Kind, RecordSize);
+
+ size_t Pos = UDT.Name.find("::");
+ if (Pos == StringRef::npos)
+ continue;
+
+ StringRef Scope = UDT.Name.take_front(Pos);
+ if (Scope.empty() || !isValidNamespaceIdentifier(Scope))
+ continue;
+
+ LongestNamespace = std::max(LongestNamespace, Scope.size());
+ NamespacedStats[Scope].update(RecordSize);
+ }
+
+ LongestNamespace += StringRef(" namespace ''").size();
+ uint32_t LongestTypeLeafKind = getLongestTypeLeafName(UdtTargetStats);
+ uint32_t FieldWidth = std::max(LongestNamespace, LongestTypeLeafKind);
+
+ // Compute the max number of digits for count and size fields, including comma
+ // separators.
+ StringRef CountHeader("Count");
+ StringRef SizeHeader("Size");
+ uint32_t CD = NumDigits(UdtStats.Totals.Count);
+ CD += (CD - 1) / 3;
+ CD = std::max(CD, CountHeader.size());
+
+ uint32_t SD = NumDigits(UdtStats.Totals.Size);
+ SD += (SD - 1) / 3;
+ SD = std::max(SD, SizeHeader.size());
+
+ uint32_t TableWidth = FieldWidth + 3 + CD + 2 + SD + 1;
+
+ P.formatLine("{0} | {1} {2}",
+ fmt_align("Record Kind", AlignStyle::Right, FieldWidth),
+ fmt_align(CountHeader, AlignStyle::Right, CD),
+ fmt_align(SizeHeader, AlignStyle::Right, SD));
+
+ P.formatLine("{0}", fmt_repeat('-', TableWidth));
+ for (const auto &Stat : UdtTargetStats.Individual) {
+ StringRef Label = getUdtStatLabel(Stat.first);
+ P.formatLine("{0} | {1:N} {2:N}",
+ fmt_align(Label, AlignStyle::Right, FieldWidth),
+ fmt_align(Stat.second.Count, AlignStyle::Right, CD),
+ fmt_align(Stat.second.Size, AlignStyle::Right, SD));
+ }
+ P.formatLine("{0}", fmt_repeat('-', TableWidth));
+ P.formatLine("{0} | {1:N} {2:N}",
+ fmt_align("Total (S_UDT)", AlignStyle::Right, FieldWidth),
+ fmt_align(UdtStats.Totals.Count, AlignStyle::Right, CD),
+ fmt_align(UdtStats.Totals.Size, AlignStyle::Right, SD));
+ P.formatLine("{0}", fmt_repeat('-', TableWidth));
+ for (const auto &Stat : NamespacedStats) {
+ std::string Label = formatv("namespace '{0}'", Stat.getKey());
+ P.formatLine("{0} | {1:N} {2:N}",
+ fmt_align(Label, AlignStyle::Right, FieldWidth),
+ fmt_align(Stat.second.Count, AlignStyle::Right, CD),
+ fmt_align(Stat.second.Size, AlignStyle::Right, SD));
+ }
+ return Error::success();
+}
+
static void typesetLinesAndColumns(PDBFile &File, LinePrinter &P,
uint32_t Start, const LineColumnEntry &E) {
const uint32_t kMaxCharsPerLineNumber = 4; // 4 digit line number
diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.h b/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
index 497c51f..7dd717c 100644
--- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
+++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.h
@@ -66,7 +66,8 @@
Error dumpFileSummary();
Error dumpStreamSummary();
- Error dumpModuleStats();
+ Error dumpSymbolStats();
+ Error dumpUdtStats();
Error dumpStringTable();
Error dumpLines();
Error dumpInlineeLines();
diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.cpp b/llvm/tools/llvm-pdbutil/FormatUtil.cpp
index eca0751..f55d478 100644
--- a/llvm/tools/llvm-pdbutil/FormatUtil.cpp
+++ b/llvm/tools/llvm-pdbutil/FormatUtil.cpp
@@ -157,6 +157,18 @@
return formatUnknownEnum(K);
}
+StringRef llvm::pdb::formatTypeLeafKind(TypeLeafKind K) {
+ switch (K) {
+#define TYPE_RECORD(EnumName, value, name) \
+ case EnumName: \
+ return #EnumName;
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+ default:
+ llvm_unreachable("Unknown type leaf kind!");
+ }
+ return "";
+}
+
std::string llvm::pdb::formatSegmentOffset(uint16_t Segment, uint32_t Offset) {
return formatv("{0:4}:{1:4}", Segment, Offset);
}
diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.h b/llvm/tools/llvm-pdbutil/FormatUtil.h
index 7804a1f..9a003c9 100644
--- a/llvm/tools/llvm-pdbutil/FormatUtil.h
+++ b/llvm/tools/llvm-pdbutil/FormatUtil.h
@@ -68,6 +68,7 @@
std::string formatChunkKind(codeview::DebugSubsectionKind Kind,
bool Friendly = true);
std::string formatSymbolKind(codeview::SymbolKind K);
+StringRef formatTypeLeafKind(codeview::TypeLeafKind K);
/// Returns the number of digits in the given integer.
inline int NumDigits(uint64_t N) {
diff --git a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index 0079b9e..fae8992 100644
--- a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -26,18 +26,6 @@
using namespace llvm::codeview;
using namespace llvm::pdb;
-static StringRef getLeafTypeName(TypeLeafKind K) {
- switch (K) {
-#define TYPE_RECORD(EnumName, value, name) \
- case EnumName: \
- return #EnumName;
-#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
- default:
- llvm_unreachable("Unknown type leaf kind!");
- }
- return "";
-}
-
static std::string formatClassOptions(uint32_t IndentLevel,
ClassOptions Options) {
std::vector<std::string> Opts;
@@ -212,7 +200,7 @@
if (!Hashes) {
P.formatLine("{0} | {1} [size = {2}]",
fmt_align(Index, AlignStyle::Right, Width),
- getLeafTypeName(Record.Type), Record.length());
+ formatTypeLeafKind(Record.Type), Record.length());
} else {
std::string H;
if (Index.toArrayIndex() >= HashValues.size()) {
@@ -231,7 +219,7 @@
}
P.formatLine("{0} | {1} [size = {2}, hash = {3}]",
fmt_align(Index, AlignStyle::Right, Width),
- getLeafTypeName(Record.Type), Record.length(), H);
+ formatTypeLeafKind(Record.Type), Record.length(), H);
}
P.Indent(Width + 3);
return Error::success();
@@ -246,7 +234,7 @@
}
Error MinimalTypeDumpVisitor::visitMemberBegin(CVMemberRecord &Record) {
- P.formatLine("- {0}", getLeafTypeName(Record.Kind));
+ P.formatLine("- {0}", formatTypeLeafKind(Record.Kind));
return Error::success();
}
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
index 045eb71..3f77813 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp
@@ -422,10 +422,15 @@
"stream-blocks",
cl::desc("Add block information to the output of -streams"),
cl::cat(MsfOptions), cl::sub(DumpSubcommand));
-cl::opt<bool>
- DumpModuleStats("mod-stats",
- cl::desc("Dump a detailed size breakdown for each module"),
- cl::cat(MsfOptions), cl::sub(DumpSubcommand));
+cl::opt<bool> DumpSymbolStats(
+ "sym-stats",
+ cl::desc("Dump a detailed breakdown of symbol usage/size for each module"),
+ cl::cat(MsfOptions), cl::sub(DumpSubcommand));
+
+cl::opt<bool> DumpUdtStats(
+ "udt-stats",
+ cl::desc("Dump a detailed breakdown of S_UDT record usage / stats"),
+ cl::cat(MsfOptions), cl::sub(DumpSubcommand));
// TYPE OPTIONS
cl::opt<bool> DumpTypes("types",
@@ -1095,27 +1100,28 @@
if (opts::DumpSubcommand) {
if (opts::dump::RawAll) {
- opts::dump::DumpLines = true;
- opts::dump::DumpInlineeLines = true;
- opts::dump::DumpXme = true;
- opts::dump::DumpXmi = true;
- opts::dump::DumpIds = true;
opts::dump::DumpGlobals = true;
+ opts::dump::DumpInlineeLines = true;
+ opts::dump::DumpIds = true;
+ opts::dump::DumpIdExtras = true;
+ opts::dump::DumpLines = true;
+ opts::dump::DumpModules = true;
+ opts::dump::DumpModuleFiles = true;
opts::dump::DumpPublics = true;
opts::dump::DumpSectionContribs = true;
+ opts::dump::DumpSectionHeaders = true;
opts::dump::DumpSectionMap = true;
opts::dump::DumpStreams = true;
opts::dump::DumpStreamBlocks = true;
opts::dump::DumpStringTable = true;
- opts::dump::DumpSectionHeaders = true;
opts::dump::DumpSummary = true;
opts::dump::DumpSymbols = true;
- opts::dump::DumpIds = true;
- opts::dump::DumpIdExtras = true;
+ opts::dump::DumpSymbolStats = true;
opts::dump::DumpTypes = true;
opts::dump::DumpTypeExtras = true;
- opts::dump::DumpModules = true;
- opts::dump::DumpModuleFiles = true;
+ opts::dump::DumpUdtStats = true;
+ opts::dump::DumpXme = true;
+ opts::dump::DumpXmi = true;
}
}
if (opts::PdbToYamlSubcommand) {
diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
index 901d2a8..98619da 100644
--- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
+++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h
@@ -127,7 +127,8 @@
extern llvm::cl::opt<bool> DumpSummary;
extern llvm::cl::opt<bool> DumpFpm;
extern llvm::cl::opt<bool> DumpStreams;
-extern llvm::cl::opt<bool> DumpModuleStats;
+extern llvm::cl::opt<bool> DumpSymbolStats;
+extern llvm::cl::opt<bool> DumpUdtStats;
extern llvm::cl::opt<bool> DumpStreamBlocks;
extern llvm::cl::opt<bool> DumpLines;