[DWARF] Generate DWARF v5 string offsets tables along with strx* index forms.

Summary: This is the producer side for DWARF v5 string offsets tables. The reader/consumer
side was committed with r321295. All compile and type units in a module share a 
contribution to the string offsets table. Indirect strings use the strx{1,2,3,4} index forms.

Reviewers: dblaikie, aprantl, JDevliegehere

Differential Revision: https://reviews.llvm.org/D42021

llvm-svn: 323546
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index b3148db..0832544 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -388,6 +388,7 @@
   case dwarf::DW_FORM_data2:
   case dwarf::DW_FORM_strx2:
   case dwarf::DW_FORM_addrx2:
+  case dwarf::DW_FORM_strx3:
   case dwarf::DW_FORM_strp:
   case dwarf::DW_FORM_ref4:
   case dwarf::DW_FORM_data4:
@@ -410,6 +411,7 @@
   case dwarf::DW_FORM_GNU_str_index:
   case dwarf::DW_FORM_GNU_addr_index:
   case dwarf::DW_FORM_ref_udata:
+  case dwarf::DW_FORM_strx:
   case dwarf::DW_FORM_udata:
     Asm->EmitULEB128(Integer);
     return;
@@ -438,6 +440,8 @@
   case dwarf::DW_FORM_strx2:
   case dwarf::DW_FORM_addrx2:
     return sizeof(int16_t);
+  case dwarf::DW_FORM_strx3:
+    return 3;
   case dwarf::DW_FORM_ref4:
   case dwarf::DW_FORM_data4:
   case dwarf::DW_FORM_ref_sup4:
@@ -469,6 +473,7 @@
   case dwarf::DW_FORM_GNU_str_index:
   case dwarf::DW_FORM_GNU_addr_index:
   case dwarf::DW_FORM_ref_udata:
+  case dwarf::DW_FORM_strx:
   case dwarf::DW_FORM_udata:
     return getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata:
@@ -564,44 +569,46 @@
 /// EmitValue - Emit string value.
 ///
 void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
-  assert(
-      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
-      "Expected valid string form");
-
   // Index of string in symbol table.
-  if (Form == dwarf::DW_FORM_GNU_str_index) {
+  switch (Form) {
+  case dwarf::DW_FORM_GNU_str_index:
+  case dwarf::DW_FORM_strx:
+  case dwarf::DW_FORM_strx1:
+  case dwarf::DW_FORM_strx2:
+  case dwarf::DW_FORM_strx3:
+  case dwarf::DW_FORM_strx4:
     DIEInteger(S.getIndex()).EmitValue(AP, Form);
     return;
-  }
-
-  // Relocatable symbol.
-  assert(Form == dwarf::DW_FORM_strp);
-  if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
-    DIELabel(S.getSymbol()).EmitValue(AP, Form);
+  case dwarf::DW_FORM_strp:
+    if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+      DIELabel(S.getSymbol()).EmitValue(AP, Form);
+    else
+      DIEInteger(S.getOffset()).EmitValue(AP, Form);
     return;
+  default:
+    llvm_unreachable("Expected valid string form");
   }
-
-  // Offset into symbol table.
-  DIEInteger(S.getOffset()).EmitValue(AP, Form);
 }
 
 /// SizeOf - Determine size of delta value in bytes.
 ///
 unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
-  assert(
-      (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) &&
-      "Expected valid string form");
-
   // Index of string in symbol table.
-  if (Form == dwarf::DW_FORM_GNU_str_index)
+  switch (Form) {
+  case dwarf::DW_FORM_GNU_str_index:
+  case dwarf::DW_FORM_strx:
+  case dwarf::DW_FORM_strx1:
+  case dwarf::DW_FORM_strx2:
+  case dwarf::DW_FORM_strx3:
+  case dwarf::DW_FORM_strx4:
     return DIEInteger(S.getIndex()).SizeOf(AP, Form);
-
-  // Relocatable symbol.
-  if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
-    return DIELabel(S.getSymbol()).SizeOf(AP, Form);
-
-  // Offset into symbol table.
-  return DIEInteger(S.getOffset()).SizeOf(AP, Form);
+  case dwarf::DW_FORM_strp:
+    if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
+      return DIELabel(S.getSymbol()).SizeOf(AP, Form);
+    return DIEInteger(S.getOffset()).SizeOf(AP, Form);
+  default:
+    llvm_unreachable("Expected valid string form");
+  }
 }
 
 LLVM_DUMP_METHOD
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1e098cc..a417bc7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -323,6 +323,12 @@
   // GDB does not fully support the DWARF 4 representation for bitfields.
   UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB();
 
+  // The DWARF v5 string offsets table has - possibly shared - contributions
+  // from each compile and type unit each preceded by a header. The string
+  // offsets table used by the pre-DWARF v5 split-DWARF implementation uses
+  // a monolithic string offsets table without any header.
+  UseSegmentedStringOffsetsTable = DwarfVersion >= 5;
+
   Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
 }
 
@@ -488,6 +494,10 @@
                 DIUnit->getSourceLanguage());
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
 
+  // Add DW_str_offsets_base to the unit DIE, except for split units.
+  if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
+    NewCU.addStringOffsetsStart();
+
   if (!useSplitDwarf()) {
     NewCU.initStmtList();
 
@@ -592,6 +602,13 @@
       GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
   }
 
+  // Create the symbol that designates the start of the unit's contribution
+  // to the string offsets table. In a split DWARF scenario, only the skeleton
+  // unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol).
+  if (useSegmentedStringOffsetsTable())
+    (useSplitDwarf() ? SkeletonHolder : InfoHolder)
+        .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base"));
+
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
     // FIXME: Move local imported entities into a list attached to the
     // subprogram, then this search won't be needed and a
@@ -1401,6 +1418,12 @@
   Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
 }
 
+void DwarfDebug::emitStringOffsetsTableHeader() {
+  DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
+  Holder.emitStringOffsetsTableHeader(
+      Asm->getObjFileLowering().getDwarfStrOffSection());
+}
+
 void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section,
                            StringRef TableName) {
   Accel.FinalizeTable(Asm, TableName);
@@ -1575,8 +1598,14 @@
 
 /// Emit null-terminated strings into a debug str section.
 void DwarfDebug::emitDebugStr() {
+  MCSection *StringOffsetsSection = nullptr;
+  if (useSegmentedStringOffsetsTable()) {
+    emitStringOffsetsTableHeader();
+    StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection();
+  }
   DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-  Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
+  Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(),
+                     StringOffsetsSection, /* UseRelativeOffsets = */ true);
 }
 
 void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
@@ -2026,6 +2055,9 @@
 
   NewCU.initStmtList();
 
+  if (useSegmentedStringOffsetsTable())
+    NewCU.addStringOffsetsStart();
+
   initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
 
   return NewCU;
@@ -2053,14 +2085,22 @@
   SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
 }
 
+void DwarfDebug::emitStringOffsetsTableHeaderDWO() {
+  assert(useSplitDwarf() && "No split dwarf?");
+  InfoHolder.emitStringOffsetsTableHeader(
+      Asm->getObjFileLowering().getDwarfStrOffDWOSection());
+}
+
 // Emit the .debug_str.dwo section for separated dwarf. This contains the
 // string section and is identical in format to traditional .debug_str
 // sections.
 void DwarfDebug::emitDebugStrDWO() {
+  if (useSegmentedStringOffsetsTable())
+    emitStringOffsetsTableHeaderDWO();
   assert(useSplitDwarf() && "No split dwarf?");
   MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection();
   InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(),
-                         OffSec);
+                         OffSec, /* UseRelativeOffsets = */ false);
 }
 
 MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
@@ -2120,6 +2160,11 @@
     NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
   }
 
+  // Add DW_AT_str_offsets_base to the type unit DIE, but not for split type
+  // units.
+  if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
+    NewTU.addStringOffsetsStart();
+
   NewTU.setType(NewTU.createTypeDIE(CTy));
 
   if (TopLevelType) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 2ae0b41..b4a96fa 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -261,6 +261,12 @@
   bool HasAppleExtensionAttributes;
   bool HasSplitDwarf;
 
+  /// Whether to generate the DWARF v5 string offsets table.
+  /// It consists of a series of contributions, each preceded by a header.
+  /// The pre-DWARF v5 string offsets table for split dwarf is, in contrast,
+  /// a monolithic sequence of string offsets.
+  bool UseSegmentedStringOffsetsTable;
+
   /// Separated Dwarf Variables
   /// In general these will all be for bits that are left in the
   /// original object file, rather than things that are meant
@@ -324,6 +330,9 @@
   /// Emit the abbreviation section.
   void emitAbbreviations();
 
+  /// Emit the string offsets table header.
+  void emitStringOffsetsTableHeader();
+
   /// Emit a specified accelerator table.
   void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
                  StringRef TableName);
@@ -388,6 +397,9 @@
   /// Emit the debug line dwo section.
   void emitDebugLineDWO();
 
+  /// Emit the dwo stringoffsets table header.
+  void emitStringOffsetsTableHeaderDWO();
+
   /// Emit the debug str dwo section.
   void emitDebugStrDWO();
 
@@ -492,6 +504,16 @@
   /// split dwarf proposal support.
   bool useSplitDwarf() const { return HasSplitDwarf; }
 
+  /// Returns whether to generate a string offsets table with (possibly shared)
+  /// contributions from each CU and type unit. This implies the use of
+  /// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that
+  /// DW_FORM_GNU_str_index is also an indirect reference, but it is used with
+  /// a pre-DWARF v5 implementation of split DWARF sections, which uses a
+  /// monolithic string offsets table.
+  bool useSegmentedStringOffsetsTable() const {
+    return UseSegmentedStringOffsetsTable;
+  }
+
   bool shareAcrossDWOCUs() const;
 
   /// Returns the Dwarf Version.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 3c04c96..87f3916 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -28,6 +28,26 @@
   CUs.push_back(std::move(U));
 }
 
+void DwarfFile::emitStringOffsetsTableHeader(MCSection *Section) {
+  if (StrPool.empty())
+    return;
+  Asm->OutStreamer->SwitchSection(Section);
+  unsigned EntrySize = 4;
+  // FIXME: DWARF64
+  // We are emitting the header for a contribution to the string offsets
+  // table. The header consists of an entry with the contribution's
+  // size (not including the size of the header), the DWARF version and
+  // 2 bytes of padding.
+  Asm->EmitInt32(StrPool.size() * EntrySize);
+  Asm->EmitInt16(Asm->getDwarfVersion());
+  Asm->EmitInt16(0);
+  // Define the symbol that marks the start of the contribution. It is
+  // referenced by most unit headers via DW_AT_str_offsets_base.
+  // Split units do not use the attribute.
+  if (StringOffsetsStartSym)
+    Asm->OutStreamer->EmitLabel(StringOffsetsStartSym);
+}
+
 // Emit the various dwarf units to the unit section USection with
 // the abbreviations going into ASection.
 void DwarfFile::emitUnits(bool UseOffsets) {
@@ -77,8 +97,9 @@
 void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
 
 // Emit strings into a string section.
-void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
-  StrPool.emit(*Asm, StrSection, OffsetSection);
+void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection,
+                            bool UseRelativeOffsets) {
+  StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets);
 }
 
 bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 167ca13..5de6af5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -43,6 +43,10 @@
 
   DwarfStringPool StrPool;
 
+  /// DWARF v5: The symbol that designates the start of the contribution to
+  /// the string offsets table. The contribution is shared by all units.
+  MCSymbol *StringOffsetsStartSym = nullptr;
+
   // Collection of dbg variables of a scope.
   DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables;
 
@@ -75,6 +79,9 @@
   /// \brief Add a unit to the list of CUs.
   void addUnit(std::unique_ptr<DwarfCompileUnit> U);
 
+  /// Emit the string table offsets header.
+  void emitStringOffsetsTableHeader(MCSection *Section);
+
   /// \brief Emit all of the units to the section listed with the given
   /// abbreviation section.
   void emitUnits(bool UseOffsets);
@@ -85,12 +92,20 @@
   /// \brief Emit a set of abbreviations to the specific section.
   void emitAbbrevs(MCSection *);
 
-  /// \brief Emit all of the strings to the section given.
-  void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr);
+  /// Emit all of the strings to the section given. If OffsetSection is
+  /// non-null, emit a table of string offsets to it. If UseRelativeOffsets
+  /// is false, emit absolute offsets to the strings. Otherwise, emit
+  /// relocatable references to the strings if they are supported by the target.
+  void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr,
+                   bool UseRelativeOffsets = false);
 
   /// \brief Returns the string pool.
   DwarfStringPool &getStringPool() { return StrPool; }
 
+  MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; }
+
+  void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; }
+
   /// \returns false if the variable was merged with a previous one.
   bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index aa5f01e..f296467 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -40,7 +40,7 @@
 }
 
 void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection,
-                           MCSection *OffsetSection) {
+                           MCSection *OffsetSection, bool UseRelativeOffsets) {
   if (Pool.empty())
     return;
 
@@ -74,6 +74,9 @@
     Asm.OutStreamer->SwitchSection(OffsetSection);
     unsigned size = 4; // FIXME: DWARF64 is 8.
     for (const auto &Entry : Entries)
-      Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
+      if (UseRelativeOffsets)
+        Asm.emitDwarfStringOffset(Entry->getValue());
+      else
+        Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size);
   }
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index 1cac3b7..069c124 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -37,10 +37,13 @@
   DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix);
 
   void emit(AsmPrinter &Asm, MCSection *StrSection,
-            MCSection *OffsetSection = nullptr);
+            MCSection *OffsetSection = nullptr,
+            bool UseRelativeOffsets = false);
 
   bool empty() const { return Pool.empty(); }
 
+  unsigned size() const { return Pool.size(); }
+
   /// Get a reference to an entry in the string pool.
   EntryRef getEntry(AsmPrinter &Asm, StringRef Str);
 };
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 71f452b..889b123 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -241,9 +241,22 @@
 
 void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
                           StringRef String) {
-  Die.addValue(DIEValueAllocator, Attribute,
-               isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp,
-               DIEString(DU->getStringPool().getEntry(*Asm, String)));
+  auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String);
+  dwarf::Form IxForm =
+      isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp;
+  // For DWARF v5 and beyond, use the smallest strx? form possible.
+  if (useSegmentedStringOffsetsTable()) {
+    IxForm = dwarf::DW_FORM_strx1;
+    unsigned Index = StringPoolEntry.getIndex();
+    if (Index > 0xffffff)
+      IxForm = dwarf::DW_FORM_strx4;
+    else if (Index > 0xffff)
+      IxForm = dwarf::DW_FORM_strx3;
+    else if (Index > 0xff)
+      IxForm = dwarf::DW_FORM_strx2;
+  }
+  Die.addValue(DIEValueAllocator, Attribute, IxForm,
+               DIEString(StringPoolEntry));
 }
 
 DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
@@ -1660,3 +1673,10 @@
     return nullptr;
   return getSection()->getBeginSymbol();
 }
+
+void DwarfUnit::addStringOffsetsStart() {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base,
+                  DU->getStringOffsetsStartSym(),
+                  TLOF.getDwarfStrOffSection()->getBeginSymbol());
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 6546a0c..505edd1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -273,6 +273,10 @@
   /// call insertDIE if MD is not null.
   DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr);
 
+  bool useSegmentedStringOffsetsTable() const {
+    return DD->useSegmentedStringOffsetsTable();
+  }
+
   /// Compute the size of a header for this unit, not including the initial
   /// length field.
   virtual unsigned getHeaderSize() const {
@@ -286,6 +290,9 @@
   /// Emit the header for this unit, not including the initial length field.
   virtual void emitHeader(bool UseOffsets) = 0;
 
+  /// Add the DW_AT_str_offsets_base attribute to the unit DIE.
+  void addStringOffsetsStart();
+
   virtual DwarfCompileUnit &getCU() = 0;
 
   void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);