[llvm-symbolizer] Support debug file lookup using build ID

Build ID is a protocol for looking up debug files that's already
supported by various tools including debuggers. For example, when
locating debug files, gdb would check the following directories:

- /usr/lib/debug/.build-id/ab/cdef1234.debug
- /usr/bin/ls.debug
- /usr/bin/.debug/ls.debug
- /usr/lib/debug/usr/bin/ls.debug

llvm-symbolizer currently consults all of these except for build ID
based one. This patch implements support for build ID lookup. The
set of debug directories to search is specified by the new option:
--debug-file-directory, whose name matches the debug-file-directory
variable used by gdb for the same purpose.

Differential Revision: https://reviews.llvm.org/D70759
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index 11599fc..8bfa543 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -16,6 +16,7 @@
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Error.h"
 #include <algorithm>
 #include <cstdint>
@@ -44,6 +45,7 @@
     std::vector<std::string> DsymHints;
     std::string FallbackDebugPath;
     std::string DWPName;
+    std::vector<std::string> DebugFileDirectory;
   };
 
   LLVMSymbolizer() = default;
@@ -98,6 +100,9 @@
   ObjectFile *lookUpDebuglinkObject(const std::string &Path,
                                     const ObjectFile *Obj,
                                     const std::string &ArchName);
+  ObjectFile *lookUpBuildIDObject(const std::string &Path,
+                                  const ELFObjectFileBase *Obj,
+                                  const std::string &ArchName);
 
   /// Returns pair of pointers to object and debug object.
   Expected<ObjectPair> getOrCreateObjectPair(const std::string &Path,
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index be79d9e6..cb076ae 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -284,6 +284,79 @@
   return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
 }
 
+template <typename ELFT>
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> *Obj) {
+  if (!Obj)
+    return {};
+  auto PhdrsOrErr = Obj->program_headers();
+  if (!PhdrsOrErr) {
+    consumeError(PhdrsOrErr.takeError());
+    return {};
+  }
+  for (const auto &P : *PhdrsOrErr) {
+    if (P.p_type != ELF::PT_NOTE)
+      continue;
+    Error Err = Error::success();
+    for (const auto &N : Obj->notes(P, Err))
+      if (N.getType() == ELF::NT_GNU_BUILD_ID && N.getName() == ELF::ELF_NOTE_GNU)
+        return N.getDesc();
+  }
+  return {};
+}
+
+Optional<ArrayRef<uint8_t>> getBuildID(const ELFObjectFileBase *Obj) {
+  Optional<ArrayRef<uint8_t>> BuildID;
+  if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
+    BuildID = getBuildID(O->getELFFile());
+  else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Obj))
+    BuildID = getBuildID(O->getELFFile());
+  else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Obj))
+    BuildID = getBuildID(O->getELFFile());
+  else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Obj))
+    BuildID = getBuildID(O->getELFFile());
+  else
+    llvm_unreachable("unsupported file format");
+  return BuildID;
+}
+
+bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory,
+                     const ArrayRef<uint8_t> BuildID,
+                     std::string &Result) {
+  auto getDebugPath = [&](StringRef Directory) {
+    SmallString<128> Path{Directory};
+    sys::path::append(Path, ".build-id",
+                      llvm::toHex(BuildID[0], /*LowerCase=*/true),
+                      llvm::toHex(BuildID.slice(1), /*LowerCase=*/true));
+    Path += ".debug";
+    return Path;
+  };
+  if (DebugFileDirectory.empty()) {
+    SmallString<128> Path = getDebugPath(
+#if defined(__NetBSD__)
+      // Try /usr/libdata/debug/.build-id/../...
+      "/usr/libdata/debug"
+#else
+      // Try /usr/lib/debug/.build-id/../...
+      "/usr/lib/debug"
+#endif
+    );
+    if (llvm::sys::fs::exists(Path)) {
+      Result = Path.str();
+      return true;
+    }
+  } else {
+    for (const auto &Directory : DebugFileDirectory) {
+      // Try <debug-file-directory>/.build-id/../...
+      SmallString<128> Path = getDebugPath(Directory);
+      if (llvm::sys::fs::exists(Path)) {
+        Result = Path.str();
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 } // end anonymous namespace
 
 ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
@@ -335,6 +408,25 @@
   return DbgObjOrErr.get();
 }
 
+ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
+                                                const ELFObjectFileBase *Obj,
+                                                const std::string &ArchName) {
+  auto BuildID = getBuildID(Obj);
+  if (!BuildID)
+    return nullptr;
+  if (BuildID->size() < 2)
+    return nullptr;
+  std::string DebugBinaryPath;
+  if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath))
+    return nullptr;
+  auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
+  if (!DbgObjOrErr) {
+    consumeError(DbgObjOrErr.takeError());
+    return nullptr;
+  }
+  return DbgObjOrErr.get();
+}
+
 Expected<LLVMSymbolizer::ObjectPair>
 LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
                                       const std::string &ArchName) {
@@ -355,6 +447,8 @@
 
   if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
     DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
+  else if (auto ELFObj = dyn_cast<const ELFObjectFileBase>(Obj))
+    DbgObj = lookUpBuildIDObject(Path, ELFObj, ArchName);
   if (!DbgObj)
     DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
   if (!DbgObj)
diff --git a/llvm/test/DebugInfo/Inputs/.build-id/ab/b50d82b6bdc861.debug b/llvm/test/DebugInfo/Inputs/.build-id/ab/b50d82b6bdc861.debug
new file mode 100755
index 0000000..5eafa35
--- /dev/null
+++ b/llvm/test/DebugInfo/Inputs/.build-id/ab/b50d82b6bdc861.debug
Binary files differ
diff --git a/llvm/test/DebugInfo/symbolize-build-id.test b/llvm/test/DebugInfo/symbolize-build-id.test
new file mode 100644
index 0000000..40221ae
--- /dev/null
+++ b/llvm/test/DebugInfo/symbolize-build-id.test
@@ -0,0 +1,28 @@
+# RUN: yaml2obj %s -o %t
+
+# RUN: llvm-symbolizer --debug-file-directory=/non-existent --obj=%t 0x20112f | FileCheck --check-prefix=UNKNOWN %s
+
+# UNKNOWN:      ??
+# UNKNOWN-NEXT: ??:0:0
+
+# RUN: llvm-symbolizer --debug-file-directory=%p/Inputs --obj=%t 0x20112f | FileCheck --check-prefix=FOUND %s
+
+# FOUND:      main
+# FOUND-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .note.gnu.build-id
+    Type:    SHT_NOTE
+    Flags:   [ SHF_ALLOC ]
+    Content: 040000000800000003000000474e5500abb50d82b6bdc861
+ProgramHeaders:
+  - Type: PT_NOTE
+    Flags: [ PF_R ]
+    Sections:
+      - Section: .note.gnu.build-id
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 54ce87d..c9bc030 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -150,6 +150,12 @@
     ClFallbackDebugPath("fallback-debug-path", cl::init(""),
                         cl::desc("Fallback path for debug binaries."));
 
+static cl::list<std::string>
+    ClDebugFileDirectory("debug-file-directory", cl::ZeroOrMore,
+                         cl::value_desc("dir"),
+                         cl::desc("Path to directory where to look for debug "
+                                  "files."));
+
 static cl::opt<DIPrinter::OutputStyle>
     ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
                   cl::desc("Specify print style"),
@@ -299,6 +305,7 @@
   Opts.DefaultArch = ClDefaultArch;
   Opts.FallbackDebugPath = ClFallbackDebugPath;
   Opts.DWPName = ClDwpName;
+  Opts.DebugFileDirectory = ClDebugFileDirectory;
 
   for (const auto &hint : ClDsymHint) {
     if (sys::path::extension(hint) == ".dSYM") {