[llvm-symbolizer] Introduce the -dsym-hint option.
llvm-symbolizer will consult one of the .dSYM paths passed via -dsym-hint
if it fails to find the .dSYM bundle at the default location.
llvm-svn: 220004
diff --git a/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp b/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
index f1dccfd..31bbedf 100644
--- a/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ b/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
@@ -207,14 +207,21 @@
 
 void LLVMSymbolizer::flush() {
   DeleteContainerSeconds(Modules);
-  BinaryForPath.clear();
+  ObjectPairForPathArch.clear();
   ObjectFileForArch.clear();
 }
 
-static std::string getDarwinDWARFResourceForPath(const std::string &Path) {
-  StringRef Basename = sys::path::filename(Path);
-  const std::string &DSymDirectory = Path + ".dSYM";
-  SmallString<16> ResourceName = StringRef(DSymDirectory);
+// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
+// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
+// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
+// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
+static
+std::string getDarwinDWARFResourceForPath(
+    const std::string &Path, const std::string &Basename) {
+  SmallString<16> ResourceName = StringRef(Path);
+  if (sys::path::extension(Path) != ".dSYM") {
+    ResourceName += ".dSYM";
+  }
   sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
   sys::path::append(ResourceName, Basename);
   return ResourceName.str();
@@ -265,9 +272,8 @@
   return false;
 }
 
-static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName,
+static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
                                     uint32_t &CRCHash) {
-  const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin);
   if (!Obj)
     return false;
   for (const SectionRef &Section : Obj->sections()) {
@@ -294,57 +300,91 @@
   return false;
 }
 
-LLVMSymbolizer::BinaryPair
-LLVMSymbolizer::getOrCreateBinary(const std::string &Path) {
-  const auto &I = BinaryForPath.find(Path);
-  if (I != BinaryForPath.end())
-    return I->second;
-  Binary *Bin = nullptr;
-  Binary *DbgBin = nullptr;
-  ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
-  if (!error(BinaryOrErr.getError())) {
-    OwningBinary<Binary> &ParsedBinary = BinaryOrErr.get();
-    // Check if it's a universal binary.
-    Bin = ParsedBinary.getBinary().get();
-    addOwningBinary(std::move(ParsedBinary));
-    if (Bin->isMachO() || Bin->isMachOUniversalBinary()) {
-      // On Darwin we may find DWARF in separate object file in
-      // resource directory.
-      const std::string &ResourcePath =
-          getDarwinDWARFResourceForPath(Path);
-      BinaryOrErr = createBinary(ResourcePath);
-      std::error_code EC = BinaryOrErr.getError();
-      if (EC != errc::no_such_file_or_directory && !error(EC)) {
-        OwningBinary<Binary> B = std::move(BinaryOrErr.get());
-        DbgBin = B.getBinary().get();
+static
+bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
+                             const MachOObjectFile *Obj) {
+  ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
+  ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
+  if (dbg_uuid.empty() || bin_uuid.empty())
+    return false;
+  return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
+}
+
+ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
+    const MachOObjectFile *MachExeObj, const std::string &ArchName) {
+  // On Darwin we may find DWARF in separate object file in
+  // resource directory.
+  std::vector<std::string> DsymPaths;
+  StringRef Filename = sys::path::filename(ExePath);
+  DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
+  for (const auto &Path : Opts.DsymHints) {
+    DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
+  }
+  for (const auto &path : DsymPaths) {
+    ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(path);
+    std::error_code EC = BinaryOrErr.getError();
+    if (EC != errc::no_such_file_or_directory && !error(EC)) {
+      OwningBinary<Binary> B = std::move(BinaryOrErr.get());
+      ObjectFile *DbgObj =
+          getObjectFileFromBinary(B.getBinary().get(), ArchName);
+      const MachOObjectFile *MachDbgObj =
+          dyn_cast<const MachOObjectFile>(DbgObj);
+      if (!MachDbgObj) continue;
+      if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) {
         addOwningBinary(std::move(B));
+        return DbgObj; 
       }
     }
+  }
+  return nullptr;
+}
+
+LLVMSymbolizer::ObjectPair
+LLVMSymbolizer::getOrCreateObjects(const std::string &Path,
+                                   const std::string &ArchName) {
+  const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
+  if (I != ObjectPairForPathArch.end())
+    return I->second;
+  ObjectFile *Obj = nullptr;
+  ObjectFile *DbgObj = nullptr;
+  ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
+  if (!error(BinaryOrErr.getError())) {
+    OwningBinary<Binary> &B = BinaryOrErr.get();
+    Obj = getObjectFileFromBinary(B.getBinary().get(), ArchName);
+    if (!Obj) {
+      ObjectPair Res = std::make_pair(nullptr, nullptr);
+      ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res;
+      return Res;
+    }
+    addOwningBinary(std::move(B));
+    if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
+      DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
     // Try to locate the debug binary using .gnu_debuglink section.
-    if (!DbgBin) {
+    if (!DbgObj) {
       std::string DebuglinkName;
       uint32_t CRCHash;
       std::string DebugBinaryPath;
-      if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) &&
+      if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) &&
           findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) {
         BinaryOrErr = createBinary(DebugBinaryPath);
         if (!error(BinaryOrErr.getError())) {
           OwningBinary<Binary> B = std::move(BinaryOrErr.get());
-          DbgBin = B.getBinary().get();
+          DbgObj = getObjectFileFromBinary(B.getBinary().get(), ArchName);
           addOwningBinary(std::move(B));
         }
       }
     }
   }
-  if (!DbgBin)
-    DbgBin = Bin;
-  BinaryPair Res = std::make_pair(Bin, DbgBin);
-  BinaryForPath[Path] = Res;
+  if (!DbgObj)
+    DbgObj = Obj;
+  ObjectPair Res = std::make_pair(Obj, DbgObj);
+  ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res;
   return Res;
 }
 
 ObjectFile *
-LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) {
+LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin,
+                                        const std::string &ArchName) {
   if (!Bin)
     return nullptr;
   ObjectFile *Res = nullptr;
@@ -382,18 +422,16 @@
       ArchName = ArchStr;
     }
   }
-  BinaryPair Binaries = getOrCreateBinary(BinaryName);
-  ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName);
-  ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName);
+  ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName);
 
-  if (!Obj) {
+  if (!Objects.first) {
     // Failed to find valid object file.
     Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr));
     return nullptr;
   }
-  DIContext *Context = DIContext::getDWARFContext(*DbgObj);
+  DIContext *Context = DIContext::getDWARFContext(*Objects.second);
   assert(Context);
-  ModuleInfo *Info = new ModuleInfo(Obj, Context);
+  ModuleInfo *Info = new ModuleInfo(Objects.first, Context);
   Modules.insert(make_pair(ModuleName, Info));
   return Info;
 }
diff --git a/llvm/tools/llvm-symbolizer/LLVMSymbolize.h b/llvm/tools/llvm-symbolizer/LLVMSymbolize.h
index 17b78dc..52f1fc9 100644
--- a/llvm/tools/llvm-symbolizer/LLVMSymbolize.h
+++ b/llvm/tools/llvm-symbolizer/LLVMSymbolize.h
@@ -39,13 +39,14 @@
     bool PrintInlining : 1;
     bool Demangle : 1;
     std::string DefaultArch;
+    std::vector<std::string> DsymHints;
     Options(bool UseSymbolTable = true,
             FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
             bool PrintInlining = true, bool Demangle = true,
             std::string DefaultArch = "")
-        : UseSymbolTable(UseSymbolTable), PrintFunctions(PrintFunctions),
-          PrintInlining(PrintInlining), Demangle(Demangle),
-          DefaultArch(DefaultArch) {}
+        : UseSymbolTable(UseSymbolTable),
+          PrintFunctions(PrintFunctions), PrintInlining(PrintInlining),
+          Demangle(Demangle), DefaultArch(DefaultArch) {}
   };
 
   LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
@@ -62,11 +63,15 @@
   void flush();
   static std::string DemangleName(const std::string &Name);
 private:
-  typedef std::pair<Binary*, Binary*> BinaryPair;
+  typedef std::pair<ObjectFile*, ObjectFile*> ObjectPair;
 
   ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName);
-  /// \brief Returns pair of pointers to binary and debug binary.
-  BinaryPair getOrCreateBinary(const std::string &Path);
+  ObjectFile *lookUpDsymFile(const std::string &Path, const MachOObjectFile *ExeObj,
+                             const std::string &ArchName);
+
+  /// \brief Returns pair of pointers to object and debug object.
+  ObjectPair getOrCreateObjects(const std::string &Path,
+                                const std::string &ArchName);
   /// \brief Returns a parsed object file for a given architecture in a
   /// universal binary (or the binary itself if it is an object file).
   ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName);
@@ -83,9 +88,10 @@
 
   // Owns module info objects.
   std::map<std::string, ModuleInfo *> Modules;
-  std::map<std::string, BinaryPair> BinaryForPath;
   std::map<std::pair<MachOUniversalBinary *, std::string>, ObjectFile *>
       ObjectFileForArch;
+  std::map<std::pair<std::string, std::string>, ObjectPair>
+      ObjectPairForPathArch;
 
   Options Opts;
   static const char kBadString[];
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 29db172..d554022 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
@@ -61,6 +62,11 @@
              cl::desc("Path to object file to be symbolized (if not provided, "
                       "object file should be specified for each input line)"));
 
+static cl::list<std::string>
+ClDsymHint("dsym-hint", cl::ZeroOrMore,
+           cl::desc("Path to .dSYM bundles to search for debug info for the "
+                    "object files"));
+
 static bool parseCommand(bool &IsData, std::string &ModuleName,
                          uint64_t &ModuleOffset) {
   const char *kDataCmd = "DATA ";
@@ -119,6 +125,14 @@
   cl::ParseCommandLineOptions(argc, argv, "llvm-symbolizer\n");
   LLVMSymbolizer::Options Opts(ClUseSymbolTable, ClPrintFunctions,
                                ClPrintInlining, ClDemangle, ClDefaultArch);
+  for (const auto &hint : ClDsymHint) {
+    if (sys::path::extension(hint) == ".dSYM") {
+      Opts.DsymHints.push_back(hint);
+    } else {
+      errs() << "Warning: invalid dSYM hint: \"" << hint <<
+                "\" (must have the '.dSYM' extension).\n";
+    }
+  }
   LLVMSymbolizer Symbolizer(Opts);
 
   bool IsData = false;