[DWARF] Adding support for the DWARF v5 string offsets table (consumer/reader part only).

Reviewers: dblaikie, aprantl

Differential Revision: https://reviews.llvm.org/D32779

llvm-svn: 304759
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 1be156d..a781aa1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -84,6 +84,123 @@
   Accel.dump(OS);
 }
 
+static void
+dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
+                                const DWARFSection &StringOffsetsSection,
+                                StringRef StringSection, bool LittleEndian) {
+  DataExtractor StrOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
+  uint32_t Offset = 0;
+  uint64_t SectionSize = StringOffsetsSection.Data.size();
+
+  while (Offset < SectionSize) {
+    unsigned Version = 0;
+    DwarfFormat Format = DWARF32;
+    unsigned EntrySize = 4;
+    // Perform validation and extract the segment size from the header.
+    if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) {
+      OS << "error: invalid contribution to string offsets table in section ."
+         << SectionName << ".\n";
+      return;
+    }
+    uint32_t ContributionStart = Offset;
+    uint64_t ContributionSize = StrOffsetExt.getU32(&Offset);
+    // A contribution size of 0xffffffff indicates DWARF64, with the actual size
+    // in the following 8 bytes. Otherwise, the DWARF standard mandates that
+    // the contribution size must be at most 0xfffffff0.
+    if (ContributionSize == 0xffffffff) {
+      if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) {
+        OS << "error: invalid contribution to string offsets table in section ."
+           << SectionName << ".\n";
+        return;
+      }
+      Format = DWARF64;
+      EntrySize = 8;
+      ContributionSize = StrOffsetExt.getU64(&Offset);
+    } else if (ContributionSize > 0xfffffff0) {
+      OS << "error: invalid contribution to string offsets table in section ."
+         << SectionName << ".\n";
+      return;
+    }
+
+    // We must ensure that we don't read a partial record at the end, so we
+    // validate for a multiple of EntrySize. Also, we're expecting a version
+    // number and padding, which adds an additional 4 bytes.
+    uint64_t ValidationSize =
+        4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize));
+    if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) {
+      OS << "error: contribution to string offsets table in section ."
+         << SectionName << " has invalid length.\n";
+      return;
+    }
+
+    Version = StrOffsetExt.getU16(&Offset);
+    Offset += 2;
+    OS << format("0x%8.8x: ", ContributionStart);
+    OS << "Contribution size = " << ContributionSize
+       << ", Version = " << Version << "\n";
+
+    uint32_t ContributionBase = Offset;
+    DataExtractor StrData(StringSection, LittleEndian, 0);
+    while (Offset - ContributionBase < ContributionSize) {
+      OS << format("0x%8.8x: ", Offset);
+      // FIXME: We can only extract strings in DWARF32 format at the moment.
+      uint64_t StringOffset = getRelocatedValue(
+          StrOffsetExt, EntrySize, &Offset, &StringOffsetsSection.Relocs);
+      if (Format == DWARF32) {
+        OS << format("%8.8x ", StringOffset);
+        uint32_t StringOffset32 = (uint32_t)StringOffset;
+        const char *S = StrData.getCStr(&StringOffset32);
+        if (S)
+          OS << format("\"%s\"", S);
+      } else
+        OS << format("%16.16x ", StringOffset);
+      OS << "\n";
+    }
+  }
+}
+
+// Dump a DWARF string offsets section. This may be a DWARF v5 formatted
+// string offsets section, where each compile or type unit contributes a
+// number of entries (string offsets), with each contribution preceded by
+// a header containing size and version number. Alternatively, it may be a
+// monolithic series of string offsets, as generated by the pre-DWARF v5
+// implementation of split DWARF.
+static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName,
+                                     const DWARFSection &StringOffsetsSection,
+                                     StringRef StringSection, bool LittleEndian,
+                                     unsigned MaxVersion) {
+  if (StringOffsetsSection.Data.empty())
+    return;
+  OS << "\n." << SectionName << " contents:\n";
+  // If we have at least one (compile or type) unit with DWARF v5 or greater,
+  // we assume that the section is formatted like a DWARF v5 string offsets
+  // section.
+  if (MaxVersion >= 5)
+    dumpDWARFv5StringOffsetsSection(OS, SectionName, StringOffsetsSection,
+                                    StringSection, LittleEndian);
+  else {
+    DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0);
+    uint32_t offset = 0;
+    uint64_t size = StringOffsetsSection.Data.size();
+    // Ensure that size is a multiple of the size of an entry.
+    if (size & ((uint64_t)(sizeof(uint32_t) - 1))) {
+      OS << "error: size of ." << SectionName << " is not a multiple of "
+         << sizeof(uint32_t) << ".\n";
+      size &= -(uint64_t)sizeof(uint32_t);
+    }
+    DataExtractor StrData(StringSection, LittleEndian, 0);
+    while (offset < size) {
+      OS << format("0x%8.8x: ", offset);
+      uint32_t StringOffset = strOffsetExt.getU32(&offset);
+      OS << format("%8.8x  ", StringOffset);
+      const char *S = StrData.getCStr(&StringOffset);
+      if (S)
+        OS << format("\"%s\"", S);
+      OS << "\n";
+    }
+  }
+}
+
 void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){
 
   DIDumpType DumpType = DumpOpts.DumpType;
@@ -258,17 +375,15 @@
                        true /* GnuStyle */)
         .dump("debug_gnu_pubtypes", OS);
 
-  if ((DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) &&
-      !getStringOffsetDWOSection().empty()) {
-    OS << "\n.debug_str_offsets.dwo contents:\n";
-    DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(),
-                               0);
-    offset = 0;
-    uint64_t size = getStringOffsetDWOSection().size();
-    while (offset < size) {
-      OS << format("0x%8.8x: ", offset);
-      OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
-    }
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsets)
+    dumpStringOffsetsSection(OS, "debug_str_offsets", getStringOffsetSection(),
+                             getStringSection(), isLittleEndian(),
+                             getMaxVersion());
+
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
+    dumpStringOffsetsSection(OS, "debug_str_offsets.dwo",
+                             getStringOffsetDWOSection(), getStringDWOSection(),
+                             isLittleEndian(), getMaxVersion());
   }
 
   if ((DumpType == DIDT_All || DumpType == DIDT_GdbIndex) &&
@@ -1109,6 +1224,10 @@
       TypesDWOSections[Section].Data = data;
     }
 
+    // Map platform specific debug section names to DWARF standard section
+    // names.
+    name = Obj.mapDebugSectionName(name);
+
     if (RelocatedSection == Obj.section_end())
       continue;
 
@@ -1141,6 +1260,7 @@
             .Case("debug_loc", &LocSection.Relocs)
             .Case("debug_info.dwo", &InfoDWOSection.Relocs)
             .Case("debug_line", &LineSection.Relocs)
+            .Case("debug_str_offsets", &StringOffsetSection.Relocs)
             .Case("debug_ranges", &RangeSection.Relocs)
             .Case("debug_addr", &AddrSection.Relocs)
             .Case("apple_names", &AppleNamesSection.Relocs)
@@ -1211,6 +1331,7 @@
       .Case("debug_frame", &DebugFrameSection)
       .Case("eh_frame", &EHFrameSection)
       .Case("debug_str", &StringSection)
+      .Case("debug_str_offsets", &StringOffsetSection.Data)
       .Case("debug_ranges", &RangeSection.Data)
       .Case("debug_macinfo", &MacinfoSection)
       .Case("debug_pubnames", &PubNamesSection)
@@ -1222,7 +1343,7 @@
       .Case("debug_loc.dwo", &LocDWOSection.Data)
       .Case("debug_line.dwo", &LineDWOSection.Data)
       .Case("debug_str.dwo", &StringDWOSection)
-      .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+      .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data)
       .Case("debug_addr", &AddrSection.Data)
       .Case("apple_names", &AppleNamesSection.Data)
       .Case("apple_types", &AppleTypesSection.Data)
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 0963d7b..8d23a9a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -301,6 +301,7 @@
     return (FC == FC_Address);
   case DW_FORM_GNU_str_index:
   case DW_FORM_GNU_strp_alt:
+  case DW_FORM_strx:
     return (FC == FC_String);
   case DW_FORM_implicit_const:
     return (FC == FC_Constant);
@@ -415,6 +416,7 @@
       break;
     case DW_FORM_GNU_addr_index:
     case DW_FORM_GNU_str_index:
+    case DW_FORM_strx:
       Value.uval = Data.getULEB128(OffsetPtr);
       break;
     default:
@@ -542,6 +544,7 @@
     OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)UValue);
     dumpString(OS);
     break;
+  case DW_FORM_strx:
   case DW_FORM_GNU_str_index:
     OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue);
     dumpString(OS);
@@ -620,10 +623,11 @@
   if (Form == DW_FORM_GNU_strp_alt || U == nullptr)
     return None;
   uint32_t Offset = Value.uval;
-  if (Form == DW_FORM_GNU_str_index) {
-    uint32_t StrOffset;
+  if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) {
+    uint64_t StrOffset;
     if (!U->getStringOffsetSectionItem(Offset, StrOffset))
       return None;
+    StrOffset += U->getStringOffsetSectionRelocation(Offset);
     Offset = StrOffset;
   }
   if (const char *Str = U->getStringExtractor().getCStr(&Offset)) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index c5add6a..eb138e2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -33,8 +33,9 @@
 
 void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
   parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(),
-            C.getStringSection(), StringRef(), &C.getAddrSection(),
-            C.getLineSection().Data, C.isLittleEndian(), false);
+            C.getStringSection(), C.getStringOffsetSection(),
+            &C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(),
+            false);
 }
 
 void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
@@ -48,19 +49,14 @@
 
 DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
                      const DWARFDebugAbbrev *DA, const DWARFSection *RS,
-                     StringRef SS, StringRef SOS, const DWARFSection *AOS,
-                     StringRef LS, bool LE, bool IsDWO,
+                     StringRef SS, const DWARFSection &SOS,
+                     const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO,
                      const DWARFUnitSectionBase &UnitSection,
                      const DWARFUnitIndex::Entry *IndexEntry)
     : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
-      LineSection(LS), StringSection(SS), StringOffsetSection([&]() {
-        if (IndexEntry)
-          if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
-            return SOS.slice(C->Offset, C->Offset + C->Length);
-        return SOS;
-      }()),
-      AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO),
-      UnitSection(UnitSection), IndexEntry(IndexEntry) {
+      LineSection(LS), StringSection(SS), StringOffsetSection(SOS),
+      StringOffsetSectionBase(0), AddrOffsetSection(AOS), isLittleEndian(LE),
+      isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) {
   clear();
 }
 
@@ -77,17 +73,25 @@
 }
 
 bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
-                                                  uint32_t &Result) const {
-  // FIXME: string offset section entries are 8-byte for DWARF64.
-  const uint32_t ItemSize = 4;
-  uint32_t Offset = Index * ItemSize;
-  if (StringOffsetSection.size() < Offset + ItemSize)
+                                           uint64_t &Result) const {
+  unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
+  uint32_t Offset = StringOffsetSectionBase + Index * ItemSize;
+  if (StringOffsetSection.Data.size() < Offset + ItemSize)
     return false;
-  DataExtractor DA(StringOffsetSection, isLittleEndian, 0);
-  Result = DA.getU32(&Offset);
+  DataExtractor DA(StringOffsetSection.Data, isLittleEndian, 0);
+  Result = ItemSize == 4 ? DA.getU32(&Offset) : DA.getU64(&Offset);
   return true;
 }
 
+uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const {
+  unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4;
+  uint64_t ByteOffset = StringOffsetSectionBase + Index * ItemSize;
+  RelocAddrMap::const_iterator AI = getStringOffsetsRelocMap().find(ByteOffset);
+  if (AI != getStringOffsetsRelocMap().end())
+    return AI->second.Value;
+  return 0;
+}
+
 bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
   Length = debug_info.getU32(offset_ptr);
   Version = debug_info.getU16(offset_ptr);
@@ -119,6 +123,9 @@
   if (!LengthOK || !VersionOK || !AddrSizeOK)
     return false;
 
+  // Keep track of the highest DWARF version we encounter across all units.
+  Context.setMaxVersionIfGreater(Version);
+
   Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset);
   return Abbrevs != nullptr;
 }
@@ -242,6 +249,17 @@
       setBaseAddress(*BaseAddr);
     AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
     RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
+
+    // In general, we derive the offset of the unit's contibution to the
+    // debug_str_offsets{.dwo} section from the unit DIE's
+    // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset
+    // we get from the index table.
+    StringOffsetSectionBase =
+        toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0);
+    if (IndexEntry)
+      if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
+        StringOffsetSectionBase += C->Offset;
+
     // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
     // skeleton CU DIE, so that DWARF users not aware of it are not broken.
   }