blob: fc529630e979b9e7931dbf6c43482fff2ef8182c [file] [log] [blame]
Eugene Zelenko44d95122017-02-09 01:09:54 +00001//===- SymbolizableObjectFile.cpp -----------------------------------------===//
Alexey Samsonov8df3a072015-10-29 22:21:37 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Alexey Samsonov8df3a072015-10-29 22:21:37 +00006//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of SymbolizableObjectFile class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SymbolizableObjectFile.h"
Eugene Zelenko44d95122017-02-09 01:09:54 +000014#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/ADT/Triple.h"
Zachary Turner264b5d92017-06-07 03:48:56 +000017#include "llvm/BinaryFormat/COFF.h"
Reid Klecknerc038e2d2015-11-13 17:00:36 +000018#include "llvm/DebugInfo/DWARF/DWARFContext.h"
Eugene Zelenko44d95122017-02-09 01:09:54 +000019#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Object/ObjectFile.h"
22#include "llvm/Object/SymbolSize.h"
Eugene Zelenko44d95122017-02-09 01:09:54 +000023#include "llvm/Support/Casting.h"
24#include "llvm/Support/DataExtractor.h"
25#include "llvm/Support/Error.h"
26#include <algorithm>
27#include <cstdint>
28#include <memory>
29#include <string>
30#include <system_error>
31#include <utility>
32#include <vector>
Alexey Samsonov8df3a072015-10-29 22:21:37 +000033
Eugene Zelenko44d95122017-02-09 01:09:54 +000034using namespace llvm;
Alexey Samsonov8df3a072015-10-29 22:21:37 +000035using namespace object;
Eugene Zelenko44d95122017-02-09 01:09:54 +000036using namespace symbolize;
Alexey Samsonov8df3a072015-10-29 22:21:37 +000037
38static DILineInfoSpecifier
39getDILineInfoSpecifier(FunctionNameKind FNKind) {
40 return DILineInfoSpecifier(
41 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
42}
43
44ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
45SymbolizableObjectFile::create(object::ObjectFile *Obj,
46 std::unique_ptr<DIContext> DICtx) {
47 std::unique_ptr<SymbolizableObjectFile> res(
48 new SymbolizableObjectFile(Obj, std::move(DICtx)));
49 std::unique_ptr<DataExtractor> OpdExtractor;
50 uint64_t OpdAddress = 0;
51 // Find the .opd (function descriptor) section if any, for big-endian
52 // PowerPC64 ELF.
53 if (Obj->getArch() == Triple::ppc64) {
54 for (section_iterator Section : Obj->sections()) {
55 StringRef Name;
Alexey Samsonov8df3a072015-10-29 22:21:37 +000056 if (auto EC = Section->getName(Name))
57 return EC;
58 if (Name == ".opd") {
Fangrui Songa076ec52019-05-16 11:33:48 +000059 Expected<StringRef> E = Section->getContents();
60 if (!E)
61 return errorToErrorCode(E.takeError());
62 OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
Alexey Samsonov8df3a072015-10-29 22:21:37 +000063 Obj->getBytesInAddress()));
64 OpdAddress = Section->getAddress();
65 break;
66 }
67 }
68 }
69 std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
70 computeSymbolSizes(*Obj);
71 for (auto &P : Symbols)
72 res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
73
74 // If this is a COFF object and we didn't find any symbols, try the export
75 // table.
76 if (Symbols.empty()) {
77 if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
78 if (auto EC = res->addCoffExportSymbols(CoffObj))
79 return EC;
80 }
Fangrui Songafb54fd2019-04-05 12:52:04 +000081
82 std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
83 &Os = res->Objects;
Fangrui Songcb300f12019-04-06 02:18:56 +000084 auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
85 // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
86 // pick the one with the largest Size. This helps us avoid symbols with no
87 // size information (Size=0).
88 llvm::sort(S);
89 auto I = S.begin(), E = S.end(), J = S.begin();
90 while (I != E) {
91 auto OI = I;
92 while (++I != E && OI->first.Addr == I->first.Addr) {
93 }
94 *J++ = I[-1];
95 }
96 S.erase(J, S.end());
97 };
98 Uniquify(Fs);
99 Uniquify(Os);
Fangrui Songafb54fd2019-04-05 12:52:04 +0000100
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000101 return std::move(res);
102}
103
104SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
105 std::unique_ptr<DIContext> DICtx)
106 : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
107
108namespace {
Eugene Zelenko44d95122017-02-09 01:09:54 +0000109
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000110struct OffsetNamePair {
111 uint32_t Offset;
112 StringRef Name;
Eugene Zelenko44d95122017-02-09 01:09:54 +0000113
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000114 bool operator<(const OffsetNamePair &R) const {
115 return Offset < R.Offset;
116 }
117};
Eugene Zelenko44d95122017-02-09 01:09:54 +0000118
119} // end anonymous namespace
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000120
121std::error_code SymbolizableObjectFile::addCoffExportSymbols(
122 const COFFObjectFile *CoffObj) {
123 // Get all export names and offsets.
124 std::vector<OffsetNamePair> ExportSyms;
125 for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
126 StringRef Name;
127 uint32_t Offset;
128 if (auto EC = Ref.getSymbolName(Name))
129 return EC;
130 if (auto EC = Ref.getExportRVA(Offset))
131 return EC;
132 ExportSyms.push_back(OffsetNamePair{Offset, Name});
133 }
134 if (ExportSyms.empty())
135 return std::error_code();
136
137 // Sort by ascending offset.
138 array_pod_sort(ExportSyms.begin(), ExportSyms.end());
139
140 // Approximate the symbol sizes by assuming they run to the next symbol.
141 // FIXME: This assumes all exports are functions.
142 uint64_t ImageBase = CoffObj->getImageBase();
143 for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
144 OffsetNamePair &Export = *I;
145 // FIXME: The last export has a one byte size now.
146 uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
147 uint64_t SymbolStart = ImageBase + Export.Offset;
148 uint64_t SymbolSize = NextOffset - Export.Offset;
149 SymbolDesc SD = {SymbolStart, SymbolSize};
Fangrui Songafb54fd2019-04-05 12:52:04 +0000150 Functions.emplace_back(SD, Export.Name);
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000151 }
152 return std::error_code();
153}
154
155std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
156 uint64_t SymbolSize,
157 DataExtractor *OpdExtractor,
158 uint64_t OpdAddress) {
Matt Davis123be5d2019-02-14 23:50:35 +0000159 // Avoid adding symbols from an unknown/undefined section.
160 const ObjectFile *Obj = Symbol.getObject();
161 Expected<section_iterator> Sec = Symbol.getSection();
162 if (!Sec || (Obj && Obj->section_end() == *Sec))
163 return std::error_code();
Kevin Enderby7bd8d992016-05-02 20:28:12 +0000164 Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
165 if (!SymbolTypeOrErr)
166 return errorToErrorCode(SymbolTypeOrErr.takeError());
Kevin Enderby5afbc1c2016-03-23 20:27:00 +0000167 SymbolRef::Type SymbolType = *SymbolTypeOrErr;
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000168 if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
169 return std::error_code();
Kevin Enderby931cb652016-06-24 18:24:42 +0000170 Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
171 if (!SymbolAddressOrErr)
172 return errorToErrorCode(SymbolAddressOrErr.takeError());
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000173 uint64_t SymbolAddress = *SymbolAddressOrErr;
174 if (OpdExtractor) {
175 // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
176 // function descriptors. The first word of the descriptor is a pointer to
177 // the function's code.
178 // For the purposes of symbolization, pretend the symbol's address is that
179 // of the function's code, not the descriptor.
180 uint64_t OpdOffset = SymbolAddress - OpdAddress;
181 uint32_t OpdOffset32 = OpdOffset;
Fangrui Songf78650a2018-07-30 19:41:25 +0000182 if (OpdOffset == OpdOffset32 &&
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000183 OpdExtractor->isValidOffsetForAddress(OpdOffset32))
184 SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
185 }
Kevin Enderby81e8b7d2016-04-20 21:24:34 +0000186 Expected<StringRef> SymbolNameOrErr = Symbol.getName();
187 if (!SymbolNameOrErr)
188 return errorToErrorCode(SymbolNameOrErr.takeError());
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000189 StringRef SymbolName = *SymbolNameOrErr;
190 // Mach-O symbol table names have leading underscore, skip it.
Eugene Zelenko44d95122017-02-09 01:09:54 +0000191 if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000192 SymbolName = SymbolName.drop_front();
193 // FIXME: If a function has alias, there are two entries in symbol table
194 // with same address size. Make sure we choose the correct one.
195 auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
196 SymbolDesc SD = { SymbolAddress, SymbolSize };
Fangrui Songafb54fd2019-04-05 12:52:04 +0000197 M.emplace_back(SD, SymbolName);
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000198 return std::error_code();
199}
200
201// Return true if this is a 32-bit x86 PE COFF module.
202bool SymbolizableObjectFile::isWin32Module() const {
203 auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
204 return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
205}
206
207uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
208 if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
209 return CoffObject->getImageBase();
210 return 0;
211}
212
213bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
214 uint64_t Address,
215 std::string &Name,
216 uint64_t &Addr,
217 uint64_t &Size) const {
Fangrui Songafb54fd2019-04-05 12:52:04 +0000218 const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
219 std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
Fangrui Songafb54fd2019-04-05 12:52:04 +0000220 auto SymbolIterator = llvm::upper_bound(Symbols, SD);
221 if (SymbolIterator == Symbols.begin())
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000222 return false;
223 --SymbolIterator;
224 if (SymbolIterator->first.Size != 0 &&
225 SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
226 return false;
227 Name = SymbolIterator->second.str();
228 Addr = SymbolIterator->first.Addr;
229 Size = SymbolIterator->first.Size;
230 return true;
231}
232
Reid Klecknerc038e2d2015-11-13 17:00:36 +0000233bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
234 FunctionNameKind FNKind, bool UseSymbolTable) const {
235 // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
236 // better answers for linkage names than the DIContext. Otherwise, we are
237 // probably using PEs and PDBs, and we shouldn't do the override. PE files
238 // generally only contain the names of exported symbols.
239 return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
240 isa<DWARFContext>(DebugInfoContext.get());
241}
242
Alexey Lapshin77fc1f62019-02-27 13:17:36 +0000243DILineInfo
244SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
245 FunctionNameKind FNKind,
246 bool UseSymbolTable) const {
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000247 DILineInfo LineInfo;
Alexey Lapshinb2c4b8b2019-03-23 08:08:40 +0000248
249 if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
250 ModuleOffset.SectionIndex =
251 getModuleSectionIndexForAddress(ModuleOffset.Address);
252
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000253 if (DebugInfoContext) {
254 LineInfo = DebugInfoContext->getLineInfoForAddress(
255 ModuleOffset, getDILineInfoSpecifier(FNKind));
256 }
257 // Override function name from symbol table if necessary.
Reid Klecknerc038e2d2015-11-13 17:00:36 +0000258 if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000259 std::string FunctionName;
260 uint64_t Start, Size;
Alexey Lapshin77fc1f62019-02-27 13:17:36 +0000261 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000262 FunctionName, Start, Size)) {
263 LineInfo.FunctionName = FunctionName;
264 }
265 }
266 return LineInfo;
267}
268
269DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
Alexey Lapshin77fc1f62019-02-27 13:17:36 +0000270 object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
271 bool UseSymbolTable) const {
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000272 DIInliningInfo InlinedContext;
273
Alexey Lapshinb2c4b8b2019-03-23 08:08:40 +0000274 if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
275 ModuleOffset.SectionIndex =
276 getModuleSectionIndexForAddress(ModuleOffset.Address);
277
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000278 if (DebugInfoContext)
279 InlinedContext = DebugInfoContext->getInliningInfoForAddress(
280 ModuleOffset, getDILineInfoSpecifier(FNKind));
281 // Make sure there is at least one frame in context.
282 if (InlinedContext.getNumberOfFrames() == 0)
283 InlinedContext.addFrame(DILineInfo());
284
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000285 // Override the function name in lower frame with name from symbol table.
Reid Klecknerc038e2d2015-11-13 17:00:36 +0000286 if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
Alexey Samsonove46bd742015-10-30 00:02:55 +0000287 std::string FunctionName;
288 uint64_t Start, Size;
Alexey Lapshin77fc1f62019-02-27 13:17:36 +0000289 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
Alexey Samsonove46bd742015-10-30 00:02:55 +0000290 FunctionName, Start, Size)) {
291 InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
292 ->FunctionName = FunctionName;
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000293 }
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000294 }
Alexey Samsonove46bd742015-10-30 00:02:55 +0000295
296 return InlinedContext;
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000297}
298
Alexey Lapshin77fc1f62019-02-27 13:17:36 +0000299DIGlobal SymbolizableObjectFile::symbolizeData(
300 object::SectionedAddress ModuleOffset) const {
Alexey Samsonov76f7ecb2015-10-29 23:49:19 +0000301 DIGlobal Res;
Alexey Lapshin77fc1f62019-02-27 13:17:36 +0000302 getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
303 Res.Start, Res.Size);
Alexey Samsonov76f7ecb2015-10-29 23:49:19 +0000304 return Res;
Alexey Samsonov8df3a072015-10-29 22:21:37 +0000305}
Alexey Lapshinb2c4b8b2019-03-23 08:08:40 +0000306
307/// Search for the first occurence of specified Address in ObjectFile.
308uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
309 uint64_t Address) const {
310
311 for (SectionRef Sec : Module->sections()) {
312 if (!Sec.isText() || Sec.isVirtual())
313 continue;
314
315 if (Address >= Sec.getAddress() &&
Fangrui Songdd0e8332019-04-20 13:00:09 +0000316 Address < Sec.getAddress() + Sec.getSize())
Alexey Lapshinb2c4b8b2019-03-23 08:08:40 +0000317 return Sec.getIndex();
Alexey Lapshinb2c4b8b2019-03-23 08:08:40 +0000318 }
319
320 return object::SectionedAddress::UndefSection;
321}