llvm-symbolizer: teach it about PowerPC64 ELF function descriptors

Summary:
Teach llvm-symbolizer about PowerPC64 ELF function descriptors. Symbols in the .opd section point to function descriptors, the first word of which is a pointer to the real function. For the purposes of symbolizing we pretend that the symbol points directly to the function.

This is enough to get decent function names in stack traces for unoptimized binaries, which fixes the sanitizer print-stack-trace test on PowerPC64 Linux.

Reviewers: kcc, willschm, samsonov

Reviewed By: samsonov

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D6110

llvm-svn: 221514
diff --git a/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp b/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
index 760d83b..36061d7 100644
--- a/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ b/llvm/tools/llvm-symbolizer/LLVMSymbolize.cpp
@@ -45,8 +45,26 @@
 
 ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
     : Module(Obj), DebugInfoContext(DICtx) {
+  std::unique_ptr<DataExtractor> OpdExtractor;
+  uint64_t OpdAddress = 0;
+  // Find the .opd (function descriptor) section if any, for big-endian
+  // PowerPC64 ELF.
+  if (Module->getArch() == Triple::ppc64) {
+    for (section_iterator Section : Module->sections()) {
+      StringRef Name;
+      if (!error(Section->getName(Name)) && Name == ".opd") {
+        StringRef Data;
+        if (!error(Section->getContents(Data))) {
+          OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(),
+                                               Module->getBytesInAddress()));
+          OpdAddress = Section->getAddress();
+        }
+        break;
+      }
+    }
+  }
   for (const SymbolRef &Symbol : Module->symbols()) {
-    addSymbol(Symbol);
+    addSymbol(Symbol, OpdExtractor.get(), OpdAddress);
   }
   bool NoSymbolTable = (Module->symbol_begin() == Module->symbol_end());
   if (NoSymbolTable && Module->isELF()) {
@@ -54,12 +72,13 @@
     std::pair<symbol_iterator, symbol_iterator> IDyn =
         getELFDynamicSymbolIterators(Module);
     for (symbol_iterator si = IDyn.first, se = IDyn.second; si != se; ++si) {
-      addSymbol(*si);
+      addSymbol(*si, OpdExtractor.get(), OpdAddress);
     }
   }
 }
 
-void ModuleInfo::addSymbol(const SymbolRef &Symbol) {
+void ModuleInfo::addSymbol(const SymbolRef &Symbol, DataExtractor *OpdExtractor,
+                           uint64_t OpdAddress) {
   SymbolRef::Type SymbolType;
   if (error(Symbol.getType(SymbolType)))
     return;
@@ -69,6 +88,18 @@
   if (error(Symbol.getAddress(SymbolAddress)) ||
       SymbolAddress == UnknownAddressOrSize)
     return;
+  if (OpdExtractor) {
+    // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
+    // function descriptors. The first word of the descriptor is a pointer to
+    // the function's code.
+    // For the purposes of symbolization, pretend the symbol's address is that
+    // of the function's code, not the descriptor.
+    uint64_t OpdOffset = SymbolAddress - OpdAddress;
+    uint32_t OpdOffset32 = OpdOffset;
+    if (OpdOffset == OpdOffset32 && 
+        OpdExtractor->isValidOffsetForAddress(OpdOffset32))
+      SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
+  }
   uint64_t SymbolSize;
   // Getting symbol size is linear for Mach-O files, so assume that symbol
   // occupies the memory range up to the following symbol.