[NativePDB] Higher fidelity reconstruction of AST from Debug Info.
In order to accurately put a type into the correct location in the AST
we construct from debug info, we need to be able to determine what
DeclContext (namespace, global, nested class, etc) that it goes into.
PDB doesn't contain this mapping. It does, however, contain the reverse
mapping. That is, for a given class type T, you can determine all
classes Q1, Q2, ..., Qn that are nested inside of T. We need to know,
for a given class type Q, what type T is it nested inside of.
This patch builds this map as a pre-processing step when we first
load the PDB by scanning every type. Initial tests show that while
this can be slow in debug builds of LLDB, it is quite fast in release
builds (less than 2 seconds for a ~1GB PDB, and it only needs to happen
once).
Furthermore, having this pre-processing step in place allows us to
repurpose it for building up other kinds of indexing to it down the
line. For the time being, this gives us very accurate reconstruction
of the DeclContext hierarchy.
Differential Revision: https://reviews.llvm.org/D54216
llvm-svn: 346429
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp
index 385892f..e1ba03f 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp
@@ -21,6 +21,38 @@
using namespace llvm::codeview;
using namespace llvm::pdb;
+CVTagRecord CVTagRecord::create(CVType type) {
+ assert(IsTagRecord(type) && "type is not a tag record!");
+ switch (type.kind()) {
+ case LF_CLASS:
+ case LF_STRUCTURE:
+ case LF_INTERFACE: {
+ ClassRecord cr;
+ llvm::cantFail(TypeDeserializer::deserializeAs<ClassRecord>(type, cr));
+ return CVTagRecord(std::move(cr));
+ }
+ case LF_UNION: {
+ UnionRecord ur;
+ llvm::cantFail(TypeDeserializer::deserializeAs<UnionRecord>(type, ur));
+ return CVTagRecord(std::move(ur));
+ }
+ case LF_ENUM: {
+ EnumRecord er;
+ llvm::cantFail(TypeDeserializer::deserializeAs<EnumRecord>(type, er));
+ return CVTagRecord(std::move(er));
+ }
+ default:
+ llvm_unreachable("Unreachable!");
+ }
+}
+
+CVTagRecord::CVTagRecord(ClassRecord &&c)
+ : cvclass(std::move(c)),
+ m_kind(cvclass.Kind == TypeRecordKind::Struct ? Struct : Class) {}
+CVTagRecord::CVTagRecord(UnionRecord &&u)
+ : cvunion(std::move(u)), m_kind(Union) {}
+CVTagRecord::CVTagRecord(EnumRecord &&e) : cvenum(std::move(e)), m_kind(Enum) {}
+
PDB_SymType lldb_private::npdb::CVSymToPDBSym(SymbolKind kind) {
switch (kind) {
case S_COMPILE3:
@@ -94,6 +126,8 @@
return PDB_SymType::Enum;
case LF_PROCEDURE:
return PDB_SymType::FunctionSig;
+ case LF_BITFIELD:
+ return PDB_SymType::BuiltinType;
default:
lldbassert(false && "Invalid type record kind!");
}
@@ -306,6 +340,18 @@
}
}
+bool lldb_private::npdb::IsTagRecord(llvm::codeview::CVType cvt) {
+ switch (cvt.kind()) {
+ case LF_CLASS:
+ case LF_STRUCTURE:
+ case LF_UNION:
+ case LF_ENUM:
+ return true;
+ default:
+ return false;
+ }
+}
+
lldb::AccessType
lldb_private::npdb::TranslateMemberAccess(MemberAccess access) {
switch (access) {
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
index c5b5cae..d9a0ed8 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.h
@@ -13,6 +13,7 @@
#include "lldb/lldb-enumerations.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
#include <tuple>
@@ -21,6 +22,48 @@
namespace lldb_private {
namespace npdb {
+struct CVTagRecord {
+ enum Kind { Class, Struct, Union, Enum };
+
+ static CVTagRecord create(llvm::codeview::CVType type);
+
+ Kind kind() const { return m_kind; }
+
+ const llvm::codeview::TagRecord &asTag() const {
+ if (m_kind == Struct || m_kind == Class)
+ return cvclass;
+ if (m_kind == Enum)
+ return cvenum;
+ return cvunion;
+ }
+
+ const llvm::codeview::ClassRecord &asClass() const {
+ assert(m_kind == Struct || m_kind == Class);
+ return cvclass;
+ }
+
+ const llvm::codeview::EnumRecord &asEnum() const {
+ assert(m_kind == Enum);
+ return cvenum;
+ }
+
+ const llvm::codeview::UnionRecord &asUnion() const {
+ assert(m_kind == Union);
+ return cvunion;
+ }
+
+private:
+ CVTagRecord(llvm::codeview::ClassRecord &&c);
+ CVTagRecord(llvm::codeview::UnionRecord &&u);
+ CVTagRecord(llvm::codeview::EnumRecord &&e);
+ Kind m_kind;
+ union {
+ llvm::codeview::ClassRecord cvclass;
+ llvm::codeview::EnumRecord cvenum;
+ llvm::codeview::UnionRecord cvunion;
+ };
+};
+
struct SegmentOffset {
SegmentOffset() = default;
SegmentOffset(uint16_t s, uint32_t o) : segment(s), offset(o) {}
@@ -56,6 +99,7 @@
}
bool IsForwardRefUdt(llvm::codeview::CVType cvt);
+bool IsTagRecord(llvm::codeview::CVType cvt);
lldb::AccessType TranslateMemberAccess(llvm::codeview::MemberAccess access);
llvm::codeview::TypeIndex GetFieldListIndex(llvm::codeview::CVType cvt);
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 676d2eb..f086e4d 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -13,13 +13,16 @@
#include "clang/AST/CharUnits.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Type.h"
#include "lldb/Core/Module.h"
#include "lldb/Core/PluginManager.h"
#include "lldb/Core/StreamBuffer.h"
+#include "lldb/Core/StreamFile.h"
#include "lldb/Symbol/ClangASTContext.h"
#include "lldb/Symbol/ClangASTImporter.h"
#include "lldb/Symbol/ClangExternalASTSourceCommon.h"
+#include "lldb/Symbol/ClangUtil.h"
#include "lldb/Symbol/CompileUnit.h"
#include "lldb/Symbol/LineTable.h"
#include "lldb/Symbol/ObjectFile.h"
@@ -43,14 +46,14 @@
#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/Demangle/MicrosoftDemangle.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "Plugins/Language/CPlusPlus/CPlusPlusNameParser.h"
-
#include "PdbSymUid.h"
#include "PdbUtil.h"
#include "UdtRecordCompleter.h"
@@ -527,9 +530,60 @@
TypeSystem *ts = GetTypeSystemForLanguage(eLanguageTypeC_plus_plus);
m_clang = llvm::dyn_cast_or_null<ClangASTContext>(ts);
m_importer = llvm::make_unique<ClangASTImporter>();
+
+ PreprocessTpiStream();
lldbassert(m_clang);
}
+void SymbolFileNativePDB::PreprocessTpiStream() {
+ LazyRandomTypeCollection &types = m_index->tpi().typeCollection();
+
+ for (auto ti = types.getFirst(); ti; ti = types.getNext(*ti)) {
+ CVType type = types.getType(*ti);
+ if (!IsTagRecord(type))
+ continue;
+
+ CVTagRecord tag = CVTagRecord::create(type);
+ // We're looking for LF_NESTTYPE records in the field list, so ignore
+ // forward references (no field list), and anything without a nested class
+ // (since there won't be any LF_NESTTYPE records).
+ if (tag.asTag().isForwardRef() || !tag.asTag().containsNestedClass())
+ continue;
+
+ struct ProcessTpiStream : public TypeVisitorCallbacks {
+ ProcessTpiStream(PdbIndex &index, TypeIndex parent,
+ llvm::DenseMap<TypeIndex, TypeIndex> &parents)
+ : index(index), parents(parents), parent(parent) {}
+
+ PdbIndex &index;
+ llvm::DenseMap<TypeIndex, TypeIndex> &parents;
+ TypeIndex parent;
+
+ llvm::Error visitKnownMember(CVMemberRecord &CVR,
+ NestedTypeRecord &Record) override {
+ parents[Record.Type] = parent;
+ CVType child = index.tpi().getType(Record.Type);
+ if (!IsForwardRefUdt(child))
+ return llvm::ErrorSuccess();
+ llvm::Expected<TypeIndex> full_decl =
+ index.tpi().findFullDeclForForwardRef(Record.Type);
+ if (!full_decl) {
+ llvm::consumeError(full_decl.takeError());
+ return llvm::ErrorSuccess();
+ }
+ parents[*full_decl] = parent;
+ return llvm::ErrorSuccess();
+ }
+ };
+
+ CVType field_list = m_index->tpi().getType(tag.asTag().FieldList);
+ ProcessTpiStream process(*m_index, *ti, m_parent_types);
+ llvm::Error error = visitMemberRecordStream(field_list.data(), process);
+ if (error)
+ llvm::consumeError(std::move(error));
+ }
+}
+
uint32_t SymbolFileNativePDB::GetNumCompileUnits() {
const DbiModuleList &modules = m_index->dbi().modules();
uint32_t count = modules.getModuleCount();
@@ -730,16 +784,69 @@
ct, Type::eResolveStateFull);
}
+static std::string RenderDemanglerNode(llvm::ms_demangle::Node *n) {
+ OutputStream OS;
+ initializeOutputStream(nullptr, nullptr, OS, 1024);
+ n->output(OS, llvm::ms_demangle::OF_Default);
+ OS << '\0';
+ return {OS.getBuffer()};
+}
+
+std::pair<clang::DeclContext *, std::string>
+SymbolFileNativePDB::CreateDeclInfoForType(const TagRecord &record,
+ TypeIndex ti) {
+ llvm::ms_demangle::Demangler demangler;
+ StringView sv(record.UniqueName.begin(), record.UniqueName.size());
+ llvm::ms_demangle::TagTypeNode *ttn = demangler.parseTagUniqueName(sv);
+ llvm::ms_demangle::IdentifierNode *idn =
+ ttn->QualifiedName->getUnqualifiedIdentifier();
+ std::string uname = RenderDemanglerNode(idn);
+
+ llvm::ms_demangle::NodeArrayNode *name_components =
+ ttn->QualifiedName->Components;
+ llvm::ArrayRef<llvm::ms_demangle::Node *> scopes(name_components->Nodes,
+ name_components->Count - 1);
+
+ clang::DeclContext *context = m_clang->GetTranslationUnitDecl();
+
+ // If this type doesn't have a parent type in the debug info, then the best we
+ // can do is to say that it's either a series of namespaces (if the scope is
+ // non-empty), or the translation unit (if the scope is empty).
+ auto parent_iter = m_parent_types.find(ti);
+ if (parent_iter == m_parent_types.end()) {
+ if (scopes.empty())
+ return {context, uname};
+
+ for (llvm::ms_demangle::Node *scope : scopes) {
+ auto *nii = static_cast<llvm::ms_demangle::NamedIdentifierNode *>(scope);
+ std::string str = RenderDemanglerNode(nii);
+ context = m_clang->GetUniqueNamespaceDeclaration(str.c_str(), context);
+ }
+ return {context, uname};
+ }
+
+ // Otherwise, all we need to do is get the parent type of this type and
+ // recurse into our lazy type creation / AST reconstruction logic to get an
+ // LLDB TypeSP for the parent. This will cause the AST to automatically get
+ // the right DeclContext created for any parent.
+ TypeSP parent = GetOrCreateType(parent_iter->second);
+ if (!parent)
+ return {context, uname};
+ CompilerType parent_ct = parent->GetForwardCompilerType();
+ clang::QualType qt = ClangUtil::GetCanonicalQualType(parent_ct);
+ context = clang::TagDecl::castToDeclContext(qt->getAsTagDecl());
+ return {context, uname};
+}
+
lldb::TypeSP SymbolFileNativePDB::CreateClassStructUnion(
- PdbSymUid type_uid, llvm::StringRef name, size_t size,
+ PdbSymUid type_uid, const llvm::codeview::TagRecord &record, size_t size,
clang::TagTypeKind ttk, clang::MSInheritanceAttr::Spelling inheritance) {
- // Ignore unnamed-tag UDTs.
- name = DropNameScope(name);
- if (name.empty())
- return nullptr;
-
- clang::DeclContext *decl_context = m_clang->GetTranslationUnitDecl();
+ const PdbTypeSymId &tid = type_uid.asTypeSym();
+ TypeIndex ti(tid.index);
+ clang::DeclContext *decl_context = nullptr;
+ std::string uname;
+ std::tie(decl_context, uname) = CreateDeclInfoForType(record, ti);
lldb::AccessType access =
(ttk == clang::TTK_Class) ? lldb::eAccessPrivate : lldb::eAccessPublic;
@@ -749,8 +856,9 @@
metadata.SetIsDynamicCXXType(false);
CompilerType ct =
- m_clang->CreateRecordType(decl_context, access, name.str().c_str(), ttk,
+ m_clang->CreateRecordType(decl_context, access, uname.c_str(), ttk,
lldb::eLanguageTypeC_plus_plus, &metadata);
+
lldbassert(ct.IsValid());
clang::CXXRecordDecl *record_decl =
@@ -771,7 +879,7 @@
// FIXME: Search IPI stream for LF_UDT_MOD_SRC_LINE.
Declaration decl;
return std::make_shared<Type>(type_uid.toOpaqueId(), m_clang->GetSymbolFile(),
- ConstString(name), size, nullptr,
+ ConstString(uname), size, nullptr,
LLDB_INVALID_UID, Type::eEncodingIsUID, decl,
ct, Type::eResolveStateForward);
}
@@ -782,14 +890,13 @@
clang::MSInheritanceAttr::Spelling inheritance =
GetMSInheritance(m_index->tpi().typeCollection(), cr);
- return CreateClassStructUnion(type_uid, cr.getName(), cr.getSize(), ttk,
- inheritance);
+ return CreateClassStructUnion(type_uid, cr, cr.getSize(), ttk, inheritance);
}
lldb::TypeSP SymbolFileNativePDB::CreateTagType(PdbSymUid type_uid,
const UnionRecord &ur) {
return CreateClassStructUnion(
- type_uid, ur.getName(), ur.getSize(), clang::TTK_Union,
+ type_uid, ur, ur.getSize(), clang::TTK_Union,
clang::MSInheritanceAttr::Spelling::Keyword_single_inheritance);
}
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
index fd842b9..6d3e80b 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.h
@@ -162,6 +162,11 @@
void DumpClangAST(Stream &s) override;
private:
+ std::pair<clang::DeclContext *, std::string>
+ CreateDeclInfoForType(const llvm::codeview::TagRecord &record,
+ llvm::codeview::TypeIndex ti);
+
+ void PreprocessTpiStream();
size_t FindTypesByName(llvm::StringRef name, uint32_t max_matches,
TypeMap &types);
@@ -180,10 +185,9 @@
const llvm::codeview::ArrayRecord &ar);
lldb::TypeSP CreateProcedureType(PdbSymUid type_uid,
const llvm::codeview::ProcedureRecord &pr);
- lldb::TypeSP
- CreateClassStructUnion(PdbSymUid type_uid, llvm::StringRef name, size_t size,
- clang::TagTypeKind ttk,
- clang::MSInheritanceAttr::Spelling inheritance);
+ lldb::TypeSP CreateClassStructUnion(
+ PdbSymUid type_uid, const llvm::codeview::TagRecord &record, size_t size,
+ clang::TagTypeKind ttk, clang::MSInheritanceAttr::Spelling inheritance);
lldb::FunctionSP GetOrCreateFunction(PdbSymUid func_uid,
const SymbolContext &sc);
@@ -209,6 +213,8 @@
llvm::DenseMap<clang::TagDecl *, DeclStatus> m_decl_to_status;
llvm::DenseMap<lldb::user_id_t, clang::TagDecl *> m_uid_to_decl;
+ llvm::DenseMap<llvm::codeview::TypeIndex, llvm::codeview::TypeIndex>
+ m_parent_types;
llvm::DenseMap<lldb::user_id_t, lldb::VariableSP> m_global_vars;
llvm::DenseMap<lldb::user_id_t, lldb::FunctionSP> m_functions;