Improve DWARF parsing speed by improving DWARFAbbreviationDeclaration

This patch gets a DWARF parsing speed improvement by having DWARFAbbreviationDeclaration instances know if they have a fixed byte size. If an abbreviation has a fixed byte size that can be calculated given a DWARFUnit, then parsing a DIE becomes two steps: parse ULEB128 abbrev code, and then add constant size to the offset.

This patch also adds a fixed byte size to each DWARFAbbreviationDeclaration::AttributeSpec so that attributes can quickly skip their values if needed without the need to lookup the fixed for size.

Notable improvements:

- DWARFAbbreviationDeclaration::findAttributeIndex() now returns an Optional<uint32_t> instead of a uint32_t and we no longer have to look for the magic -1U return value
- Optional<uint32_t> DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const;
- DWARFAbbreviationDeclaration now has a getAttributeValue() function that extracts an attribute value given a DIE offset that takes advantage of the DWARFAbbreviationDeclaration::AttributeSpec::ByteSize
- bool DWARFAbbreviationDeclaration::getAttributeValue(const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U, DWARFFormValue &FormValue) const;
- A DWARFAbbreviationDeclaration instance can return a fixed byte size for itself so DWARF parsing is faster:
- Optional<size_t> DWARFAbbreviationDeclaration::getFixedAttributesByteSize(const DWARFUnit &U) const;
- Any functions that used to take a "const DWARFUnit *U" that would crash if U was NULL now take a "const DWARFUnit &U" and are only called with a valid DWARFUnit

Differential Revision: https://reviews.llvm.org/D26567

llvm-svn: 286924
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 637d12d..638830e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -17,8 +19,10 @@
 void DWARFAbbreviationDeclaration::clear() {
   Code = 0;
   Tag = DW_TAG_null;
+  CodeByteSize = 0;
   HasChildren = false;
   AttributeSpecs.clear();
+  FixedAttributeSize.reset();
 }
 
 DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
@@ -29,10 +33,12 @@
 DWARFAbbreviationDeclaration::extract(DataExtractor Data, 
                                       uint32_t* OffsetPtr) {
   clear();
+  const uint32_t Offset = *OffsetPtr;
   Code = Data.getULEB128(OffsetPtr);
   if (Code == 0) {
     return false;
   }
+  CodeByteSize = *OffsetPtr - Offset;
   Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
   if (Tag == DW_TAG_null) {
     clear();
@@ -40,12 +46,52 @@
   }
   uint8_t ChildrenByte = Data.getU8(OffsetPtr);
   HasChildren = (ChildrenByte == DW_CHILDREN_yes);
+  // Assign a value to our optional FixedAttributeSize member variable. If
+  // this member variable still has a value after the while loop below, then
+  // all attribute data in this abbreviation declaration has a fixed byte size.
+  FixedAttributeSize = FixedSizeInfo();
 
+  // Read all of the abbreviation attributes and forms.
   while (true) {
     auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
     auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
     if (A && F) {
-        AttributeSpecs.push_back(AttributeSpec(A, F));
+      auto FixedFormByteSize = DWARFFormValue::getFixedByteSize(F);
+      AttributeSpecs.push_back(AttributeSpec(A, F, FixedFormByteSize));
+      // If this abbrevation still has a fixed byte size, then update the
+      // FixedAttributeSize as needed.
+      if (FixedAttributeSize) {
+        if (FixedFormByteSize)
+          FixedAttributeSize->NumBytes += *FixedFormByteSize;
+        else {
+          switch (F) {
+          case DW_FORM_addr:
+            ++FixedAttributeSize->NumAddrs;
+            break;
+
+          case DW_FORM_ref_addr:
+            ++FixedAttributeSize->NumRefAddrs;
+            break;
+
+          case DW_FORM_strp:
+          case DW_FORM_GNU_ref_alt:
+          case DW_FORM_GNU_strp_alt:
+          case DW_FORM_line_strp:
+          case DW_FORM_sec_offset:
+          case DW_FORM_strp_sup:
+          case DW_FORM_ref_sup:
+            ++FixedAttributeSize->NumDwarfOffsets;
+            break;
+
+          default:
+            // Indicate we no longer have a fixed byte size for this
+            // abbreviation by clearing the FixedAttributeSize optional value
+            // so it doesn't have a value.
+            FixedAttributeSize.reset();
+            break;
+          }
+        }
+      }
     } else if (A == 0 && F == 0) {
       // We successfully reached the end of this abbreviation declaration
       // since both attribute and form are zero.
@@ -88,11 +134,64 @@
   OS << '\n';
 }
 
-uint32_t
-DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const {
+Optional<uint32_t>
+DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const {
   for (uint32_t i = 0, e = AttributeSpecs.size(); i != e; ++i) {
-    if (AttributeSpecs[i].Attr == attr)
+    if (AttributeSpecs[i].Attr == Attr)
       return i;
   }
-  return -1U;
+  return None;
+}
+
+bool DWARFAbbreviationDeclaration::getAttributeValue(
+    const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U,
+    DWARFFormValue &FormValue) const {
+  Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
+  if (!MatchAttrIndex)
+    return false;
+
+  auto DebugInfoData = U.getDebugInfoExtractor();
+
+  // Add the byte size of ULEB that for the abbrev Code so we can start
+  // skipping the attribute data.
+  uint32_t Offset = DIEOffset + CodeByteSize;
+  uint32_t AttrIndex = 0;
+  for (const auto &Spec : AttributeSpecs) {
+    if (*MatchAttrIndex == AttrIndex) {
+      // We have arrived at the attribute to extract, extract if from Offset.
+      FormValue.setForm(Spec.Form);
+      return FormValue.extractValue(DebugInfoData, &Offset, &U);
+    }
+    // March Offset along until we get to the attribute we want.
+    if (Optional<uint8_t> FixedSize = Spec.getByteSize(U))
+      Offset += *FixedSize;
+    else
+      DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U);
+    ++AttrIndex;
+  }
+  return false;
+}
+
+size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize(
+    const DWARFUnit &U) const {
+  size_t ByteSize = NumBytes;
+  if (NumAddrs)
+    ByteSize += NumAddrs * U.getAddressByteSize();
+  if (NumRefAddrs)
+    ByteSize += NumRefAddrs * U.getRefAddrByteSize();
+  if (NumDwarfOffsets)
+    ByteSize += NumDwarfOffsets * U.getDwarfOffsetByteSize();
+  return ByteSize;
+}
+
+Optional<uint8_t> DWARFAbbreviationDeclaration::AttributeSpec::getByteSize(
+    const DWARFUnit &U) const {
+  return ByteSize ? ByteSize : DWARFFormValue::getFixedByteSize(Form, &U);
+}
+
+Optional<size_t> DWARFAbbreviationDeclaration::getFixedAttributesByteSize(
+    const DWARFUnit &U) const {
+  if (FixedAttributeSize)
+    return FixedAttributeSize->getByteSize(U);
+  return None;
 }
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 7890bf0..d11c5ae 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -30,7 +30,7 @@
                                                const DWARFUnit *Unit,
                                                uint32_t *Offset) {
   Unit = Unit->getUnitSection().getUnitForOffset(*Offset);
-  return (Unit && DIE.extractFast(Unit, Offset)) ? Unit : nullptr;
+  return (Unit && DIE.extractFast(*Unit, Offset)) ? Unit : nullptr;
 }
 
 void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, DWARFUnit *u,
@@ -183,11 +183,17 @@
   OS << ")\n";
 }
 
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit &U,
                                              uint32_t *OffsetPtr) {
+  DataExtractor DebugInfoData = U.getDebugInfoExtractor();
+  const uint32_t UEndOffset = U.getNextUnitOffset();
+  return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset);
+}
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit &U,
+                                             uint32_t *OffsetPtr,
+                                             const DataExtractor &DebugInfoData,
+                                             uint32_t UEndOffset) {
   Offset = *OffsetPtr;
-  DataExtractor DebugInfoData = U->getDebugInfoExtractor();
-  uint32_t UEndOffset = U->getNextUnitOffset();
   if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
     return false;
   uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
@@ -196,21 +202,29 @@
     AbbrevDecl = nullptr;
     return true;
   }
-  AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  AbbrevDecl = U.getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
   if (nullptr == AbbrevDecl) {
     // Restore the original offset.
     *OffsetPtr = Offset;
     return false;
   }
+  // See if all attributes in this DIE have fixed byte sizes. If so, we can
+  // just add this size to the offset to skip to the next DIE.
+  if (Optional<size_t> FixedSize = AbbrevDecl->getFixedAttributesByteSize(U)) {
+    *OffsetPtr += *FixedSize;
+    return true;
+  }
 
   // Skip all data in the .debug_info for the attributes
   for (const auto &AttrSpec : AbbrevDecl->attributes()) {
-    auto Form = AttrSpec.Form;
-
-    if (Optional<uint8_t> FixedSize = DWARFFormValue::getFixedByteSize(Form, U))
+    // Check if this attribute has a fixed byte size.
+    if (Optional<uint8_t> FixedSize = AttrSpec.getByteSize(U)) {
+      // Attribute byte size if fixed, just add the size to the offset.
       *OffsetPtr += *FixedSize;
-    else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) {
-      // Restore the original offset.
+    } else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData,
+                                          OffsetPtr, &U)) {
+      // We failed to skip this attribute's value, restore the original offset
+      // and return the failure status.
       *OffsetPtr = Offset;
       return false;
     }
@@ -230,27 +244,9 @@
 
 bool DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFUnit *U, 
     dwarf::Attribute Attr, DWARFFormValue &FormValue) const {
-  if (!AbbrevDecl)
+  if (!AbbrevDecl || !U)
     return false;
-
-  uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr);
-  if (AttrIdx == -1U)
-    return false;
-
-  DataExtractor DebugInfoData = U->getDebugInfoExtractor();
-  uint32_t DebugInfoOffset = getOffset();
-
-  // Skip the abbreviation code so we are at the data for the attributes
-  DebugInfoData.getULEB128(&DebugInfoOffset);
-
-  // Skip preceding attribute values.
-  for (uint32_t i = 0; i < AttrIdx; ++i) {
-    DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i),
-                              DebugInfoData, &DebugInfoOffset, U);
-  }
-
-  FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx));
-  return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U);
+  return AbbrevDecl->getAttributeValue(Offset, Attr, *U, FormValue);
 }
 
 const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 7fb5998..85ef3e4 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -196,10 +196,11 @@
   uint32_t DIEOffset = Offset + getHeaderSize();
   uint32_t NextCUOffset = getNextUnitOffset();
   DWARFDebugInfoEntryMinimal DIE;
+  DataExtractor DebugInfoData = getDebugInfoExtractor();
   uint32_t Depth = 0;
   bool IsCUDie = true;
 
-  while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) {
+  while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset)) {
     if (IsCUDie) {
       if (AppendCUDie)
         Dies.push_back(DIE);