[DWARF v5] Rework of string offsets table reader

Reorganizes the DWARF consumer to derive the string offsets table 
contribution's format from the contribution header instead of 
(incorrectly) from the unit's format.

Reviewers: JDevliegehere, aprantl

Differential Revision: https://reviews.llvm.org/D41146
 

llvm-svn: 321295
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index a5defa9..5397994 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -88,70 +88,101 @@
   }
 }
 
-static void
-dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
-                                const DWARFObject &Obj,
-                                const DWARFSection &StringOffsetsSection,
-                                StringRef StringSection, bool LittleEndian) {
+using ContributionCollection =
+    std::vector<Optional<StrOffsetsContributionDescriptor>>;
+
+// Collect all the contributions to the string offsets table from all units,
+// sort them by their starting offsets and remove duplicates.
+static ContributionCollection
+collectContributionData(DWARFContext::cu_iterator_range CUs,
+                        DWARFContext::tu_section_iterator_range TUSs) {
+  ContributionCollection Contributions;
+  for (const auto &CU : CUs)
+    Contributions.push_back(CU->getStringOffsetsTableContribution());
+  for (const auto &TUS : TUSs)
+    for (const auto &TU : TUS)
+      Contributions.push_back(TU->getStringOffsetsTableContribution());
+
+  // Sort the contributions so that any invalid ones are placed at
+  // the start of the contributions vector. This way they are reported
+  // first.
+  std::sort(Contributions.begin(), Contributions.end(),
+            [](const Optional<StrOffsetsContributionDescriptor> &L,
+               const Optional<StrOffsetsContributionDescriptor> &R) {
+              if (L && R) return L->Base < R->Base;
+              return R.hasValue();
+            });
+
+  // Uniquify contributions, as it is possible that units (specifically
+  // type units in dwo or dwp files) share contributions. We don't want
+  // to report them more than once.
+  Contributions.erase(
+      std::unique(Contributions.begin(), Contributions.end(),
+                  [](const Optional<StrOffsetsContributionDescriptor> &L,
+                     const Optional<StrOffsetsContributionDescriptor> &R) {
+                    if (L && R)
+                      return L->Base == R->Base && L->Size == R->Size;
+                    return false;
+                  }),
+      Contributions.end());
+  return Contributions;
+}
+
+static void dumpDWARFv5StringOffsetsSection(
+    raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
+    const DWARFSection &StringOffsetsSection, StringRef StringSection,
+    DWARFContext::cu_iterator_range CUs,
+    DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian) {
+  auto Contributions = collectContributionData(CUs, TUSs);
   DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0);
-  uint32_t Offset = 0;
+  DataExtractor StrData(StringSection, LittleEndian, 0);
   uint64_t SectionSize = StringOffsetsSection.Data.size();
-
-  while (Offset < SectionSize) {
-    unsigned Version = 0;
-    DwarfFormat Format = DWARF32;
-    unsigned EntrySize = 4;
-    // Perform validation and extract the segment size from the header.
-    if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) {
-      OS << "error: invalid contribution to string offsets table in section ."
-         << SectionName << ".\n";
-      return;
-    }
-    uint32_t ContributionStart = Offset;
-    uint64_t ContributionSize = StrOffsetExt.getU32(&Offset);
-    // A contribution size of 0xffffffff indicates DWARF64, with the actual size
-    // in the following 8 bytes. Otherwise, the DWARF standard mandates that
-    // the contribution size must be at most 0xfffffff0.
-    if (ContributionSize == 0xffffffff) {
-      if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) {
-        OS << "error: invalid contribution to string offsets table in section ."
-           << SectionName << ".\n";
-        return;
-      }
-      Format = DWARF64;
-      EntrySize = 8;
-      ContributionSize = StrOffsetExt.getU64(&Offset);
-    } else if (ContributionSize > 0xfffffff0) {
+  uint32_t Offset = 0;
+  for (auto &Contribution : Contributions) {
+    // Report an ill-formed contribution.
+    if (!Contribution) {
       OS << "error: invalid contribution to string offsets table in section ."
          << SectionName << ".\n";
       return;
     }
 
-    // We must ensure that we don't read a partial record at the end, so we
-    // validate for a multiple of EntrySize. Also, we're expecting a version
-    // number and padding, which adds an additional 4 bytes.
-    uint64_t ValidationSize =
-        4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize));
-    if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) {
-      OS << "error: contribution to string offsets table in section ."
-         << SectionName << " has invalid length.\n";
+    dwarf::DwarfFormat Format = Contribution->getFormat();
+    uint16_t Version = Contribution->getVersion();
+    uint64_t ContributionHeader = Contribution->Base;
+    // In DWARF v5 there is a contribution header that immediately precedes
+    // the string offsets base (the location we have previously retrieved from
+    // the CU DIE's DW_AT_str_offsets attribute). The header is located either
+    // 8 or 16 bytes before the base, depending on the contribution's format.
+    if (Version >= 5)
+      ContributionHeader -= Format == DWARF32 ? 8 : 16;
+
+    // Detect overlapping contributions.
+    if (Offset > ContributionHeader) {
+      OS << "error: overlapping contributions to string offsets table in "
+            "section ."
+         << SectionName << ".\n";
       return;
     }
-
-    Version = StrOffsetExt.getU16(&Offset);
-    Offset += 2;
-    OS << format("0x%8.8x: ", ContributionStart);
-    OS << "Contribution size = " << ContributionSize
+    // Report a gap in the table.
+    if (Offset < ContributionHeader) {
+      OS << format("0x%8.8x: Gap, length = ", Offset);
+      OS << (ContributionHeader - Offset) << "\n";
+    }
+    OS << format("0x%8.8x: ", ContributionHeader);
+    OS << "Contribution size = " << Contribution->Size
+       << ", Format = " << (Format == DWARF32 ? "DWARF32" : "DWARF64")
        << ", Version = " << Version << "\n";
 
-    uint32_t ContributionBase = Offset;
-    DataExtractor StrData(StringSection, LittleEndian, 0);
-    while (Offset - ContributionBase < ContributionSize) {
+    Offset = Contribution->Base;
+    unsigned EntrySize = Contribution->getDwarfOffsetByteSize();
+    while (Offset - Contribution->Base < Contribution->Size) {
       OS << format("0x%8.8x: ", Offset);
-      // FIXME: We can only extract strings in DWARF32 format at the moment.
+      // FIXME: We can only extract strings if the offset fits in 32 bits.
       uint64_t StringOffset =
           StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
-      if (Format == DWARF32) {
+      // Extract the string if we can and display it. Otherwise just report
+      // the offset.
+      if (StringOffset <= std::numeric_limits<uint32_t>::max()) {
         uint32_t StringOffset32 = (uint32_t)StringOffset;
         OS << format("%8.8x ", StringOffset32);
         const char *S = StrData.getCStr(&StringOffset32);
@@ -162,6 +193,11 @@
       OS << "\n";
     }
   }
+  // Report a gap at the end of the table.
+  if (Offset < SectionSize) {
+    OS << format("0x%8.8x: Gap, length = ", Offset);
+    OS << (SectionSize - Offset) << "\n";
+  }
 }
 
 // Dump a DWARF string offsets section. This may be a DWARF v5 formatted
@@ -170,17 +206,18 @@
 // a header containing size and version number. Alternatively, it may be a
 // monolithic series of string offsets, as generated by the pre-DWARF v5
 // implementation of split DWARF.
-static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
-                                     const DWARFObject &Obj,
-                                     const DWARFSection &StringOffsetsSection,
-                                     StringRef StringSection, bool LittleEndian,
-                                     unsigned MaxVersion) {
+static void dumpStringOffsetsSection(
+    raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj,
+    const DWARFSection &StringOffsetsSection, StringRef StringSection,
+    DWARFContext::cu_iterator_range CUs,
+    DWARFContext::tu_section_iterator_range TUSs, bool LittleEndian,
+    unsigned MaxVersion) {
   // If we have at least one (compile or type) unit with DWARF v5 or greater,
   // we assume that the section is formatted like a DWARF v5 string offsets
   // section.
   if (MaxVersion >= 5)
     dumpDWARFv5StringOffsetsSection(OS, SectionName, Obj, StringOffsetsSection,
-                                    StringSection, LittleEndian);
+                                    StringSection, CUs, TUSs, LittleEndian);
   else {
     DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
     uint32_t offset = 0;
@@ -468,12 +505,14 @@
                  DObj->getStringOffsetSection().Data))
     dumpStringOffsetsSection(
         OS, "debug_str_offsets", *DObj, DObj->getStringOffsetSection(),
-        DObj->getStringSection(), isLittleEndian(), getMaxVersion());
+        DObj->getStringSection(), compile_units(), type_unit_sections(),
+        isLittleEndian(), getMaxVersion());
   if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets,
                  DObj->getStringOffsetDWOSection().Data))
     dumpStringOffsetsSection(
         OS, "debug_str_offsets.dwo", *DObj, DObj->getStringOffsetDWOSection(),
-        DObj->getStringDWOSection(), isLittleEndian(), getMaxVersion());
+        DObj->getStringDWOSection(), dwo_compile_units(),
+        dwo_type_unit_sections(), isLittleEndian(), getMaxVersion());
 
   if (shouldDump(Explicit, ".gnu_index", DIDT_ID_GdbIndex,
                  DObj->getGdbIndexSection())) {