[Dsymutil][Debuginfo][NFC] Reland: Refactor dsymutil to separate DWARF optimizing part. #2.

Summary:
This patch relands D71271. The problem with D71271 is that it has cyclic dependency:
CodeGen->AsmPrinter->DebugInfoDWARF->CodeGen. To avoid cyclic dependency this patch
puts implementation for DWARFOptimizer into separate library: lib/DWARFLinker.

Thus the difference between this patch and D71271 is in that DWARFOptimizer renamed into
DWARFLinker and it`s files are put into lib/DWARFLinker.

Reviewers: JDevlieghere, friss, dblaikie, aprantl

Reviewed By: JDevlieghere

Subscribers: thegameg, merge_guards_bot, probinson, mgorny, hiraditya, llvm-commits

Tags: #llvm, #debug-info

Differential Revision: https://reviews.llvm.org/D71839
diff --git a/llvm/lib/CMakeLists.txt b/llvm/lib/CMakeLists.txt
index f7e08a6..8f8d417 100644
--- a/llvm/lib/CMakeLists.txt
+++ b/llvm/lib/CMakeLists.txt
@@ -8,6 +8,7 @@
 add_subdirectory(BinaryFormat)
 add_subdirectory(Bitcode)
 add_subdirectory(Bitstream)
+add_subdirectory(DWARFLinker)
 add_subdirectory(Frontend)
 add_subdirectory(Transforms)
 add_subdirectory(Linker)
diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index d28399f..9ed3471 100644
--- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/NonRelocatableStringpool.cpp - A simple stringpool  --===//
+//===-- NonRelocatableStringpool.cpp --------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/DWARFLinker/CMakeLists.txt b/llvm/lib/DWARFLinker/CMakeLists.txt
new file mode 100644
index 0000000..09610f0
--- /dev/null
+++ b/llvm/lib/DWARFLinker/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_component_library(LLVMDWARFLinker
+  DWARFLinkerCompileUnit.cpp
+  DWARFLinkerDeclContext.cpp
+  DWARFLinker.cpp
+
+  )
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
new file mode 100644
index 0000000..e26148a
--- /dev/null
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -0,0 +1,15 @@
+//=== DWARFLinker.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/DWARFLinker.h"
+
+namespace llvm {
+
+AddressesMap::~AddressesMap() {}
+
+} // namespace llvm
diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
new file mode 100644
index 0000000..e4de016
--- /dev/null
+++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -0,0 +1,144 @@
+//===- DWARFLinkerCompileUnit.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+
+namespace llvm {
+
+/// Check if the DIE at \p Idx is in the scope of a function.
+static bool inFunctionScope(CompileUnit &U, unsigned Idx) {
+  while (Idx) {
+    if (U.getOrigUnit().getDIEAtIndex(Idx).getTag() == dwarf::DW_TAG_subprogram)
+      return true;
+    Idx = U.getInfo(Idx).ParentIdx;
+  }
+  return false;
+}
+
+uint16_t CompileUnit::getLanguage() {
+  if (!Language) {
+    DWARFDie CU = getOrigUnit().getUnitDIE();
+    Language = dwarf::toUnsigned(CU.find(dwarf::DW_AT_language), 0);
+  }
+  return Language;
+}
+
+void CompileUnit::markEverythingAsKept() {
+  unsigned Idx = 0;
+
+  setHasInterestingContent();
+
+  for (auto &I : Info) {
+    // Mark everything that wasn't explicit marked for pruning.
+    I.Keep = !I.Prune;
+    auto DIE = OrigUnit.getDIEAtIndex(Idx++);
+
+    // Try to guess which DIEs must go to the accelerator tables. We do that
+    // just for variables, because functions will be handled depending on
+    // whether they carry a DW_AT_low_pc attribute or not.
+    if (DIE.getTag() != dwarf::DW_TAG_variable &&
+        DIE.getTag() != dwarf::DW_TAG_constant)
+      continue;
+
+    Optional<DWARFFormValue> Value;
+    if (!(Value = DIE.find(dwarf::DW_AT_location))) {
+      if ((Value = DIE.find(dwarf::DW_AT_const_value)) &&
+          !inFunctionScope(*this, I.ParentIdx))
+        I.InDebugMap = true;
+      continue;
+    }
+    if (auto Block = Value->getAsBlock()) {
+      if (Block->size() > OrigUnit.getAddressByteSize() &&
+          (*Block)[0] == dwarf::DW_OP_addr)
+        I.InDebugMap = true;
+    }
+  }
+}
+
+uint64_t CompileUnit::computeNextUnitOffset() {
+  NextUnitOffset = StartOffset;
+  if (NewUnit) {
+    NextUnitOffset += 11 /* Header size */;
+    NextUnitOffset += NewUnit->getUnitDie().getSize();
+  }
+  return NextUnitOffset;
+}
+
+/// Keep track of a forward cross-cu reference from this unit
+/// to \p Die that lives in \p RefUnit.
+void CompileUnit::noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
+                                       DeclContext *Ctxt, PatchLocation Attr) {
+  ForwardDIEReferences.emplace_back(Die, RefUnit, Ctxt, Attr);
+}
+
+void CompileUnit::fixupForwardReferences() {
+  for (const auto &Ref : ForwardDIEReferences) {
+    DIE *RefDie;
+    const CompileUnit *RefUnit;
+    PatchLocation Attr;
+    DeclContext *Ctxt;
+    std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref;
+    if (Ctxt && Ctxt->getCanonicalDIEOffset())
+      Attr.set(Ctxt->getCanonicalDIEOffset());
+    else
+      Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
+  }
+}
+
+void CompileUnit::addLabelLowPc(uint64_t LabelLowPc, int64_t PcOffset) {
+  Labels.insert({LabelLowPc, PcOffset});
+}
+
+void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
+                                   int64_t PcOffset) {
+  //  Don't add empty ranges to the interval map.  They are a problem because
+  //  the interval map expects half open intervals. This is safe because they
+  //  are empty anyway.
+  if (FuncHighPc != FuncLowPc)
+    Ranges.insert(FuncLowPc, FuncHighPc, PcOffset);
+  this->LowPc = std::min(LowPc, FuncLowPc + PcOffset);
+  this->HighPc = std::max(HighPc, FuncHighPc + PcOffset);
+}
+
+void CompileUnit::noteRangeAttribute(const DIE &Die, PatchLocation Attr) {
+  if (Die.getTag() != dwarf::DW_TAG_compile_unit)
+    RangeAttributes.push_back(Attr);
+  else
+    UnitRangeAttribute = Attr;
+}
+
+void CompileUnit::noteLocationAttribute(PatchLocation Attr, int64_t PcOffset) {
+  LocationAttributes.emplace_back(Attr, PcOffset);
+}
+
+void CompileUnit::addNamespaceAccelerator(const DIE *Die,
+                                          DwarfStringPoolEntryRef Name) {
+  Namespaces.emplace_back(Name, Die);
+}
+
+void CompileUnit::addObjCAccelerator(const DIE *Die,
+                                     DwarfStringPoolEntryRef Name,
+                                     bool SkipPubSection) {
+  ObjC.emplace_back(Name, Die, SkipPubSection);
+}
+
+void CompileUnit::addNameAccelerator(const DIE *Die,
+                                     DwarfStringPoolEntryRef Name,
+                                     bool SkipPubSection) {
+  Pubnames.emplace_back(Name, Die, SkipPubSection);
+}
+
+void CompileUnit::addTypeAccelerator(const DIE *Die,
+                                     DwarfStringPoolEntryRef Name,
+                                     bool ObjcClassImplementation,
+                                     uint32_t QualifiedNameHash) {
+  Pubtypes.emplace_back(Name, Die, QualifiedNameHash, ObjcClassImplementation);
+}
+
+} // namespace llvm
diff --git a/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
new file mode 100644
index 0000000..077fd44
--- /dev/null
+++ b/llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp
@@ -0,0 +1,209 @@
+//===- DWARFLinkerDeclContext.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+
+namespace llvm {
+
+/// Set the last DIE/CU a context was seen in and, possibly invalidate the
+/// context if it is ambiguous.
+///
+/// In the current implementation, we don't handle overloaded functions well,
+/// because the argument types are not taken into account when computing the
+/// DeclContext tree.
+///
+/// Some of this is mitigated byt using mangled names that do contain the
+/// arguments types, but sometimes (e.g. with function templates) we don't have
+/// that. In that case, just do not unique anything that refers to the contexts
+/// we are not able to distinguish.
+///
+/// If a context that is not a namespace appears twice in the same CU, we know
+/// it is ambiguous. Make it invalid.
+bool DeclContext::setLastSeenDIE(CompileUnit &U, const DWARFDie &Die) {
+  if (LastSeenCompileUnitID == U.getUniqueID()) {
+    DWARFUnit &OrigUnit = U.getOrigUnit();
+    uint32_t FirstIdx = OrigUnit.getDIEIndex(LastSeenDIE);
+    U.getInfo(FirstIdx).Ctxt = nullptr;
+    return false;
+  }
+
+  LastSeenCompileUnitID = U.getUniqueID();
+  LastSeenDIE = Die;
+  return true;
+}
+
+PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
+    DeclContext &Context, const DWARFDie &DIE, CompileUnit &U,
+    UniquingStringPool &StringPool, bool InClangModule) {
+  unsigned Tag = DIE.getTag();
+
+  // FIXME: dsymutil-classic compat: We should bail out here if we
+  // have a specification or an abstract_origin. We will get the
+  // parent context wrong here.
+
+  switch (Tag) {
+  default:
+    // By default stop gathering child contexts.
+    return PointerIntPair<DeclContext *, 1>(nullptr);
+  case dwarf::DW_TAG_module:
+    break;
+  case dwarf::DW_TAG_compile_unit:
+    return PointerIntPair<DeclContext *, 1>(&Context);
+  case dwarf::DW_TAG_subprogram:
+    // Do not unique anything inside CU local functions.
+    if ((Context.getTag() == dwarf::DW_TAG_namespace ||
+         Context.getTag() == dwarf::DW_TAG_compile_unit) &&
+        !dwarf::toUnsigned(DIE.find(dwarf::DW_AT_external), 0))
+      return PointerIntPair<DeclContext *, 1>(nullptr);
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_TAG_member:
+  case dwarf::DW_TAG_namespace:
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_class_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_enumeration_type:
+  case dwarf::DW_TAG_typedef:
+    // Artificial things might be ambiguous, because they might be created on
+    // demand. For example implicitly defined constructors are ambiguous
+    // because of the way we identify contexts, and they won't be generated
+    // every time everywhere.
+    if (dwarf::toUnsigned(DIE.find(dwarf::DW_AT_artificial), 0))
+      return PointerIntPair<DeclContext *, 1>(nullptr);
+    break;
+  }
+
+  const char *Name = DIE.getName(DINameKind::LinkageName);
+  const char *ShortName = DIE.getName(DINameKind::ShortName);
+  StringRef NameRef;
+  StringRef ShortNameRef;
+  StringRef FileRef;
+
+  if (Name)
+    NameRef = StringPool.internString(Name);
+  else if (Tag == dwarf::DW_TAG_namespace)
+    // FIXME: For dsymutil-classic compatibility. I think uniquing within
+    // anonymous namespaces is wrong. There is no ODR guarantee there.
+    NameRef = StringPool.internString("(anonymous namespace)");
+
+  if (ShortName && ShortName != Name)
+    ShortNameRef = StringPool.internString(ShortName);
+  else
+    ShortNameRef = NameRef;
+
+  if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type &&
+      Tag != dwarf::DW_TAG_union_type &&
+      Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty())
+    return PointerIntPair<DeclContext *, 1>(nullptr);
+
+  unsigned Line = 0;
+  unsigned ByteSize = std::numeric_limits<uint32_t>::max();
+
+  if (!InClangModule) {
+    // Gather some discriminating data about the DeclContext we will be
+    // creating: File, line number and byte size. This shouldn't be necessary,
+    // because the ODR is just about names, but given that we do some
+    // approximations with overloaded functions and anonymous namespaces, use
+    // these additional data points to make the process safer.
+    //
+    // This is disabled for clang modules, because forward declarations of
+    // module-defined types do not have a file and line.
+    ByteSize = dwarf::toUnsigned(DIE.find(dwarf::DW_AT_byte_size),
+                                 std::numeric_limits<uint64_t>::max());
+    if (Tag != dwarf::DW_TAG_namespace || !Name) {
+      if (unsigned FileNum =
+              dwarf::toUnsigned(DIE.find(dwarf::DW_AT_decl_file), 0)) {
+        if (const auto *LT = U.getOrigUnit().getContext().getLineTableForUnit(
+                &U.getOrigUnit())) {
+          // FIXME: dsymutil-classic compatibility. I'd rather not
+          // unique anything in anonymous namespaces, but if we do, then
+          // verify that the file and line correspond.
+          if (!Name && Tag == dwarf::DW_TAG_namespace)
+            FileNum = 1;
+
+          if (LT->hasFileAtIndex(FileNum)) {
+            Line = dwarf::toUnsigned(DIE.find(dwarf::DW_AT_decl_line), 0);
+            // Cache the resolved paths based on the index in the line table,
+            // because calling realpath is expansive.
+            StringRef ResolvedPath = U.getResolvedPath(FileNum);
+            if (!ResolvedPath.empty()) {
+              FileRef = ResolvedPath;
+            } else {
+              std::string File;
+              bool FoundFileName = LT->getFileNameByIndex(
+                  FileNum, U.getOrigUnit().getCompilationDir(),
+                  DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+                  File);
+              (void)FoundFileName;
+              assert(FoundFileName && "Must get file name from line table");
+              // Second level of caching, this time based on the file's parent
+              // path.
+              FileRef = PathResolver.resolve(File, StringPool);
+              U.setResolvedPath(FileNum, FileRef);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (!Line && NameRef.empty())
+    return PointerIntPair<DeclContext *, 1>(nullptr);
+
+  // We hash NameRef, which is the mangled name, in order to get most
+  // overloaded functions resolve correctly.
+  //
+  // Strictly speaking, hashing the Tag is only necessary for a
+  // DW_TAG_module, to prevent uniquing of a module and a namespace
+  // with the same name.
+  //
+  // FIXME: dsymutil-classic won't unique the same type presented
+  // once as a struct and once as a class. Using the Tag in the fully
+  // qualified name hash to get the same effect.
+  unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef);
+
+  // FIXME: dsymutil-classic compatibility: when we don't have a name,
+  // use the filename.
+  if (Tag == dwarf::DW_TAG_namespace && NameRef == "(anonymous namespace)")
+    Hash = hash_combine(Hash, FileRef);
+
+  // Now look if this context already exists.
+  DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context);
+  auto ContextIter = Contexts.find(&Key);
+
+  if (ContextIter == Contexts.end()) {
+    // The context wasn't found.
+    bool Inserted;
+    DeclContext *NewContext =
+        new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef,
+                                    Context, DIE, U.getUniqueID());
+    std::tie(ContextIter, Inserted) = Contexts.insert(NewContext);
+    assert(Inserted && "Failed to insert DeclContext");
+    (void)Inserted;
+  } else if (Tag != dwarf::DW_TAG_namespace &&
+             !(*ContextIter)->setLastSeenDIE(U, DIE)) {
+    // The context was found, but it is ambiguous with another context
+    // in the same file. Mark it invalid.
+    return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
+  }
+
+  assert(ContextIter != Contexts.end());
+  // FIXME: dsymutil-classic compatibility. Union types aren't
+  // uniques, but their children might be.
+  if ((Tag == dwarf::DW_TAG_subprogram &&
+       Context.getTag() != dwarf::DW_TAG_structure_type &&
+       Context.getTag() != dwarf::DW_TAG_class_type) ||
+      (Tag == dwarf::DW_TAG_union_type))
+    return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
+
+  return PointerIntPair<DeclContext *, 1>(*ContextIter);
+}
+
+} // namespace llvm
diff --git a/llvm/lib/DWARFLinker/LLVMBuild.txt b/llvm/lib/DWARFLinker/LLVMBuild.txt
new file mode 100644
index 0000000..b32ea3d
--- /dev/null
+++ b/llvm/lib/DWARFLinker/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/DWARFLinker/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = DWARFLinker
+parent = Libraries
+required_libraries = DebugInfoDWARF AsmPrinter CodeGen MC Object Support
diff --git a/llvm/lib/LLVMBuild.txt b/llvm/lib/LLVMBuild.txt
index 3f5383d..1ae5979 100644
--- a/llvm/lib/LLVMBuild.txt
+++ b/llvm/lib/LLVMBuild.txt
@@ -23,6 +23,7 @@
  CodeGen
  DebugInfo
  Demangle
+ DWARFLinker
  ExecutionEngine
  Frontend
  FuzzMutate