Sketch out a DWARF parser.

This introduces a new library to LLVM: libDebugInfo. It will provide debug information
parsing to LLVM. Much of the design and some of the code is taken from the LLDB project.

It also contains an llvm-dwarfdump tool that can dump the abbrevs and DIEs from an
object file. It can be used to write tests for DWARF input and output easily.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139627 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp
new file mode 100644
index 0000000..389de9d
--- /dev/null
+++ b/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -0,0 +1,203 @@
+//===-- DWARFCompileUnit.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFCompileUnit.h"
+#include "DWARFContext.h"
+#include "DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const {
+  return DataExtractor(Context.getInfoSection(),
+                       Context.isLittleEndian(), getAddressByteSize());
+}
+
+bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
+  clear();
+
+  Offset = *offset_ptr;
+
+  if (debug_info.isValidOffset(*offset_ptr)) {
+    uint64_t abbrOffset;
+    const DWARFDebugAbbrev *abbr = Context.getDebugAbbrev();
+    Length = debug_info.getU32(offset_ptr);
+    Version = debug_info.getU16(offset_ptr);
+    abbrOffset = debug_info.getU32(offset_ptr);
+    AddrSize = debug_info.getU8(offset_ptr);
+
+    bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1);
+    bool versionOK = DWARFContext::isSupportedVersion(Version);
+    bool abbrOffsetOK = Context.getAbbrevSection().size() > abbrOffset;
+    bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+    if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && abbr != NULL) {
+      Abbrevs = abbr->getAbbreviationDeclarationSet(abbrOffset);
+      return true;
+    }
+
+    // reset the offset to where we tried to parse from if anything went wrong
+    *offset_ptr = Offset;
+  }
+
+  return false;
+}
+
+uint32_t
+DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data,
+                          const DWARFAbbreviationDeclarationSet *abbrevs) {
+  clear();
+
+  Offset = offset;
+
+  if (debug_info_data.isValidOffset(offset)) {
+    Length = debug_info_data.getU32(&offset);
+    Version = debug_info_data.getU16(&offset);
+    bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset();
+    Abbrevs = abbrevs;
+    AddrSize = debug_info_data.getU8 (&offset);
+
+    bool versionOK = DWARFContext::isSupportedVersion(Version);
+    bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+
+    if (versionOK && addrSizeOK && abbrevsOK &&
+        debug_info_data.isValidOffset(offset))
+      return offset;
+  }
+  return 0;
+}
+
+void DWARFCompileUnit::clear() {
+  Offset = 0;
+  Length = 0;
+  Version = 0;
+  Abbrevs = 0;
+  AddrSize = 0;
+  BaseAddr = 0;
+  DieArray.clear();
+}
+
+void DWARFCompileUnit::dump(raw_ostream &OS) {
+  OS << format("0x%08x", Offset) << ": Compile Unit:"
+     << " length = " << format("0x%08x", Length)
+     << " version = " << format("0x%04x", Version)
+     << " abbr_offset = " << format("0x%04x", Abbrevs->getOffset())
+     << " addr_size = " << format("0x%02x", AddrSize)
+     << " (next CU at " << format("0x%08x", getNextCompileUnitOffset())
+     << ")\n";
+
+  extractDIEsIfNeeded(false);
+  for (unsigned i = 0, e = DieArray.size(); i != e; ++i)
+    DieArray[i].dump(OS, this, 10);
+}
+
+void DWARFCompileUnit::setDIERelations() {
+  if (DieArray.empty())
+    return;
+  DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front();
+  DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back();
+  DWARFDebugInfoEntryMinimal *curr_die;
+  // We purposely are skipping the last element in the array in the loop below
+  // so that we can always have a valid next item
+  for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) {
+    // Since our loop doesn't include the last element, we can always
+    // safely access the next die in the array.
+    DWARFDebugInfoEntryMinimal *next_die = curr_die + 1;
+
+    const DWARFAbbreviationDeclaration *curr_die_abbrev =
+      curr_die->getAbbreviationDeclarationPtr();
+
+    if (curr_die_abbrev) {
+      // Normal DIE
+      if (curr_die_abbrev->hasChildren())
+        next_die->setParent(curr_die);
+      else
+        curr_die->setSibling(next_die);
+    } else {
+      // NULL DIE that terminates a sibling chain
+      DWARFDebugInfoEntryMinimal *parent = curr_die->getParent();
+      if (parent)
+        parent->setSibling(next_die);
+    }
+  }
+
+  // Since we skipped the last element, we need to fix it up!
+  if (die_array_begin < die_array_end)
+    curr_die->setParent(die_array_begin);
+}
+
+size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
+  const size_t initial_die_array_size = DieArray.size();
+  if ((cu_die_only && initial_die_array_size > 0) ||
+      initial_die_array_size > 1)
+    return 0; // Already parsed
+
+  // Set the offset to that of the first DIE and calculate the start of the
+  // next compilation unit header.
+  uint32_t offset = getFirstDIEOffset();
+  uint32_t next_cu_offset = getNextCompileUnitOffset();
+
+  DWARFDebugInfoEntryMinimal die;
+  // Keep a flat array of the DIE for binary lookup by DIE offset
+  uint32_t depth = 0;
+  // We are in our compile unit, parse starting at the offset
+  // we were told to parse
+
+  const uint8_t *fixed_form_sizes =
+    DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+
+  while (offset < next_cu_offset &&
+         die.extractFast(this, fixed_form_sizes, &offset)) {
+
+    if (depth == 0) {
+      uint64_t base_addr =
+        die.getAttributeValueAsUnsigned(this, DW_AT_low_pc, -1U);
+      if (base_addr == -1U)
+        base_addr = die.getAttributeValueAsUnsigned(this, DW_AT_entry_pc, 0);
+      setBaseAddress(base_addr);
+    }
+
+    if (cu_die_only) {
+      addDIE(die);
+      return 1;
+    }
+    else if (depth == 0 && initial_die_array_size == 1) {
+      // Don't append the CU die as we already did that
+    } else {
+      addDIE (die);
+    }
+
+    const DWARFAbbreviationDeclaration *abbrDecl =
+      die.getAbbreviationDeclarationPtr();
+    if (abbrDecl) {
+      // Normal DIE
+      if (abbrDecl->hasChildren())
+        ++depth;
+    } else {
+      // NULL DIE.
+      if (depth > 0)
+        --depth;
+      if (depth == 0)
+        break;  // We are done with this compile unit!
+    }
+
+  }
+
+  // Give a little bit of info if we encounter corrupt DWARF (our offset
+  // should always terminate at or before the start of the next compilation
+  // unit header).
+  if (offset > next_cu_offset) {
+    fprintf (stderr, "warning: DWARF compile unit extends beyond its bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset);
+  }
+
+  setDIERelations();
+  return DieArray.size();
+}