Greg Clayton | 19602b7 | 2020-02-11 16:05:59 -0800 | [diff] [blame] | 1 | //===- DwarfTransformer.cpp -----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include <thread> |
| 10 | #include <unordered_set> |
| 11 | |
| 12 | #include "llvm/DebugInfo/DIContext.h" |
| 13 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
| 14 | #include "llvm/Support/Error.h" |
| 15 | #include "llvm/Support/ThreadPool.h" |
| 16 | #include "llvm/Support/raw_ostream.h" |
| 17 | |
| 18 | #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" |
| 19 | #include "llvm/DebugInfo/GSYM/FunctionInfo.h" |
| 20 | #include "llvm/DebugInfo/GSYM/GsymCreator.h" |
| 21 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
| 22 | #include "llvm/DebugInfo/GSYM/InlineInfo.h" |
| 23 | |
| 24 | using namespace llvm; |
| 25 | using namespace gsym; |
| 26 | |
| 27 | struct llvm::gsym::CUInfo { |
| 28 | const DWARFDebugLine::LineTable *LineTable; |
| 29 | const char *CompDir; |
| 30 | std::vector<uint32_t> FileCache; |
| 31 | uint64_t Language = 0; |
| 32 | uint8_t AddrSize = 0; |
| 33 | |
| 34 | CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { |
| 35 | LineTable = DICtx.getLineTableForUnit(CU); |
| 36 | CompDir = CU->getCompilationDir(); |
| 37 | FileCache.clear(); |
| 38 | if (LineTable) |
| 39 | FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); |
| 40 | DWARFDie Die = CU->getUnitDIE(); |
| 41 | Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); |
| 42 | AddrSize = CU->getAddressByteSize(); |
| 43 | } |
| 44 | |
| 45 | /// Return true if Addr is the highest address for a given compile unit. The |
| 46 | /// highest address is encoded as -1, of all ones in the address. These high |
| 47 | /// addresses are used by some linkers to indicate that a function has been |
| 48 | /// dead stripped or didn't end up in the linked executable. |
| 49 | bool isHighestAddress(uint64_t Addr) const { |
| 50 | if (AddrSize == 4) |
| 51 | return Addr == UINT32_MAX; |
| 52 | else if (AddrSize == 8) |
| 53 | return Addr == UINT64_MAX; |
| 54 | return false; |
| 55 | } |
| 56 | |
| 57 | /// Convert a DWARF compile unit file index into a GSYM global file index. |
| 58 | /// |
| 59 | /// Each compile unit in DWARF has its own file table in the line table |
| 60 | /// prologue. GSYM has a single large file table that applies to all files |
| 61 | /// from all of the info in a GSYM file. This function converts between the |
| 62 | /// two and caches and DWARF CU file index that has already been converted so |
| 63 | /// the first client that asks for a compile unit file index will end up |
| 64 | /// doing the conversion, and subsequent clients will get the cached GSYM |
| 65 | /// index. |
| 66 | uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { |
| 67 | if (!LineTable) |
| 68 | return 0; |
| 69 | assert(DwarfFileIdx < FileCache.size()); |
| 70 | uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; |
| 71 | if (GsymFileIdx != UINT32_MAX) |
| 72 | return GsymFileIdx; |
| 73 | std::string File; |
| 74 | if (LineTable->getFileNameByIndex( |
| 75 | DwarfFileIdx, CompDir, |
| 76 | DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) |
| 77 | GsymFileIdx = Gsym.insertFile(File); |
| 78 | else |
| 79 | GsymFileIdx = 0; |
| 80 | return GsymFileIdx; |
| 81 | } |
| 82 | }; |
| 83 | |
| 84 | |
| 85 | static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { |
| 86 | if (DWARFDie SpecDie = |
| 87 | Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { |
| 88 | if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) |
| 89 | return SpecParent; |
| 90 | } |
| 91 | if (DWARFDie AbstDie = |
| 92 | Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { |
| 93 | if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) |
| 94 | return AbstParent; |
| 95 | } |
| 96 | |
| 97 | // We never want to follow parent for inlined subroutine - that would |
| 98 | // give us information about where the function is inlined, not what |
| 99 | // function is inlined |
| 100 | if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) |
| 101 | return DWARFDie(); |
| 102 | |
| 103 | DWARFDie ParentDie = Die.getParent(); |
| 104 | if (!ParentDie) |
| 105 | return DWARFDie(); |
| 106 | |
| 107 | switch (ParentDie.getTag()) { |
| 108 | case dwarf::DW_TAG_namespace: |
| 109 | case dwarf::DW_TAG_structure_type: |
| 110 | case dwarf::DW_TAG_union_type: |
| 111 | case dwarf::DW_TAG_class_type: |
| 112 | case dwarf::DW_TAG_subprogram: |
| 113 | return ParentDie; // Found parent decl context DIE |
| 114 | case dwarf::DW_TAG_lexical_block: |
| 115 | return GetParentDeclContextDIE(ParentDie); |
| 116 | default: |
| 117 | break; |
| 118 | } |
| 119 | |
| 120 | return DWARFDie(); |
| 121 | } |
| 122 | |
| 123 | /// Get the GsymCreator string table offset for the qualified name for the |
| 124 | /// DIE passed in. This function will avoid making copies of any strings in |
| 125 | /// the GsymCreator when possible. We don't need to copy a string when the |
| 126 | /// string comes from our .debug_str section or is an inlined string in the |
| 127 | /// .debug_info. If we create a qualified name string in this function by |
| 128 | /// combining multiple strings in the DWARF string table or info, we will make |
| 129 | /// a copy of the string when we add it to the string table. |
| 130 | static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die, |
| 131 | uint64_t Language, |
| 132 | GsymCreator &Gsym) { |
| 133 | // If the dwarf has mangled name, use mangled name |
| 134 | if (auto LinkageName = |
| 135 | dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, |
| 136 | dwarf::DW_AT_linkage_name}), |
| 137 | nullptr)) |
| 138 | return Gsym.insertString(LinkageName, /* Copy */ false); |
| 139 | |
| 140 | StringRef ShortName(Die.getName(DINameKind::ShortName)); |
| 141 | if (ShortName.empty()) |
| 142 | return llvm::None; |
| 143 | |
| 144 | // For C++ and ObjC, prepend names of all parent declaration contexts |
| 145 | if (!(Language == dwarf::DW_LANG_C_plus_plus || |
| 146 | Language == dwarf::DW_LANG_C_plus_plus_03 || |
| 147 | Language == dwarf::DW_LANG_C_plus_plus_11 || |
| 148 | Language == dwarf::DW_LANG_C_plus_plus_14 || |
| 149 | Language == dwarf::DW_LANG_ObjC_plus_plus || |
| 150 | // This should not be needed for C, but we see C++ code marked as C |
| 151 | // in some binaries. This should hurt, so let's do it for C as well |
| 152 | Language == dwarf::DW_LANG_C)) |
| 153 | return Gsym.insertString(ShortName, /* Copy */ false); |
| 154 | |
| 155 | // Some GCC optimizations create functions with names ending with .isra.<num> |
| 156 | // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name |
| 157 | // If it looks like it could be the case, don't add any prefix |
| 158 | if (ShortName.startswith("_Z") && |
| 159 | (ShortName.contains(".isra.") || ShortName.contains(".part."))) |
| 160 | return Gsym.insertString(ShortName, /* Copy */ false); |
| 161 | |
| 162 | DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); |
| 163 | if (ParentDeclCtxDie) { |
| 164 | std::string Name = ShortName.str(); |
| 165 | while (ParentDeclCtxDie) { |
| 166 | StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); |
| 167 | if (!ParentName.empty()) { |
| 168 | // "lambda" names are wrapped in < >. Replace with { } |
| 169 | // to be consistent with demangled names and not to confuse with |
| 170 | // templates |
| 171 | if (ParentName.front() == '<' && ParentName.back() == '>') |
| 172 | Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + |
| 173 | "::" + Name; |
| 174 | else |
| 175 | Name = ParentName.str() + "::" + Name; |
| 176 | } |
| 177 | ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); |
| 178 | } |
| 179 | // Copy the name since we created a new name in a std::string. |
| 180 | return Gsym.insertString(Name, /* Copy */ true); |
| 181 | } |
| 182 | // Don't copy the name since it exists in the DWARF object file. |
| 183 | return Gsym.insertString(ShortName, /* Copy */ false); |
| 184 | } |
| 185 | |
| 186 | static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { |
| 187 | bool CheckChildren = true; |
| 188 | switch (Die.getTag()) { |
| 189 | case dwarf::DW_TAG_subprogram: |
| 190 | // Don't look into functions within functions. |
| 191 | CheckChildren = Depth == 0; |
| 192 | break; |
| 193 | case dwarf::DW_TAG_inlined_subroutine: |
| 194 | return true; |
| 195 | default: |
| 196 | break; |
| 197 | } |
| 198 | if (!CheckChildren) |
| 199 | return false; |
| 200 | for (DWARFDie ChildDie : Die.children()) { |
| 201 | if (hasInlineInfo(ChildDie, Depth + 1)) |
| 202 | return true; |
| 203 | } |
| 204 | return false; |
| 205 | } |
| 206 | |
| 207 | static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, |
| 208 | uint32_t Depth, FunctionInfo &FI, |
| 209 | InlineInfo &parent) { |
| 210 | if (!hasInlineInfo(Die, Depth)) |
| 211 | return; |
| 212 | |
| 213 | dwarf::Tag Tag = Die.getTag(); |
| 214 | if (Tag == dwarf::DW_TAG_inlined_subroutine) { |
| 215 | // create new InlineInfo and append to parent.children |
| 216 | InlineInfo II; |
| 217 | DWARFAddressRange FuncRange = |
| 218 | DWARFAddressRange(FI.startAddress(), FI.endAddress()); |
| 219 | Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); |
| 220 | if (RangesOrError) { |
| 221 | for (const DWARFAddressRange &Range : RangesOrError.get()) { |
| 222 | // Check that the inlined function is within the range of the function |
| 223 | // info, it might not be in case of split functions |
| 224 | if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) |
| 225 | II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC)); |
| 226 | } |
| 227 | } |
| 228 | if (II.Ranges.empty()) |
| 229 | return; |
| 230 | |
| 231 | if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) |
| 232 | II.Name = *NameIndex; |
| 233 | II.CallFile = CUI.DWARFToGSYMFileIndex( |
| 234 | Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); |
| 235 | II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); |
| 236 | // parse all children and append to parent |
| 237 | for (DWARFDie ChildDie : Die.children()) |
| 238 | parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II); |
| 239 | parent.Children.emplace_back(std::move(II)); |
| 240 | return; |
| 241 | } |
| 242 | if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { |
| 243 | // skip this Die and just recurse down |
| 244 | for (DWARFDie ChildDie : Die.children()) |
| 245 | parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent); |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, |
| 250 | DWARFDie Die, GsymCreator &Gsym, |
| 251 | FunctionInfo &FI) { |
| 252 | std::vector<uint32_t> RowVector; |
| 253 | const uint64_t StartAddress = FI.startAddress(); |
| 254 | const uint64_t EndAddress = FI.endAddress(); |
| 255 | const uint64_t RangeSize = EndAddress - StartAddress; |
| 256 | const object::SectionedAddress SecAddress{ |
| 257 | StartAddress, object::SectionedAddress::UndefSection}; |
| 258 | |
| 259 | |
| 260 | if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { |
| 261 | // If we have a DW_TAG_subprogram but no line entries, fall back to using |
| 262 | // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. |
| 263 | if (auto FileIdx = |
| 264 | dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) { |
| 265 | if (auto Line = |
| 266 | dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { |
| 267 | LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx), |
| 268 | *Line); |
| 269 | FI.OptLineTable = LineTable(); |
| 270 | FI.OptLineTable->push(LE); |
| 271 | // LE.Addr = EndAddress; |
| 272 | // FI.OptLineTable->push(LE); |
| 273 | } |
| 274 | } |
| 275 | return; |
| 276 | } |
| 277 | |
| 278 | FI.OptLineTable = LineTable(); |
| 279 | DWARFDebugLine::Row PrevRow; |
| 280 | for (uint32_t RowIndex : RowVector) { |
| 281 | // Take file number and line/column from the row. |
| 282 | const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; |
| 283 | const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); |
| 284 | uint64_t RowAddress = Row.Address.Address; |
| 285 | // Watch out for a RowAddress that is in the middle of a line table entry |
| 286 | // in the DWARF. If we pass an address in between two line table entries |
| 287 | // we will get a RowIndex for the previous valid line table row which won't |
| 288 | // be contained in our function. This is usually a bug in the DWARF due to |
| 289 | // linker problems or LTO or other DWARF re-linking so it is worth emitting |
| 290 | // an error, but not worth stopping the creation of the GSYM. |
| 291 | if (!FI.Range.contains(RowAddress)) { |
| 292 | if (RowAddress < FI.Range.Start) { |
| 293 | Log << "error: DIE has a start address whose LowPC is between the " |
| 294 | "line table Row[" << RowIndex << "] with address " |
| 295 | << HEX64(RowAddress) << " and the next one.\n"; |
| 296 | Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); |
| 297 | RowAddress = FI.Range.Start; |
| 298 | } else { |
| 299 | continue; |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | LineEntry LE(RowAddress, FileIdx, Row.Line); |
| 304 | if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { |
| 305 | // We have seen full duplicate line tables for functions in some |
| 306 | // DWARF files. Watch for those here by checking the the last |
| 307 | // row was the function's end address (HighPC) and that the |
| 308 | // current line table entry's address is the same as the first |
| 309 | // line entry we already have in our "function_info.Lines". If |
| 310 | // so break out after printing a warning. |
| 311 | auto FirstLE = FI.OptLineTable->first(); |
| 312 | if (FirstLE && *FirstLE == LE) { |
| 313 | Log << "warning: duplicate line table detected for DIE:\n"; |
| 314 | Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); |
| 315 | } else { |
| 316 | // Print out (ignore if os == nulls as this is expensive) |
| 317 | Log << "error: line table has addresses that do not " |
| 318 | << "monotonically increase:\n"; |
| 319 | for (uint32_t RowIndex2 : RowVector) { |
| 320 | CUI.LineTable->Rows[RowIndex2].dump(Log); |
| 321 | } |
| 322 | Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); |
| 323 | } |
| 324 | break; |
| 325 | } |
| 326 | |
| 327 | // Skip multiple line entries for the same file and line. |
| 328 | auto LastLE = FI.OptLineTable->last(); |
| 329 | if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) |
| 330 | continue; |
| 331 | // Only push a row if it isn't an end sequence. End sequence markers are |
| 332 | // included for the last address in a function or the last contiguous |
| 333 | // address in a sequence. |
| 334 | if (Row.EndSequence) { |
| 335 | // End sequence means that the next line entry could have a lower address |
| 336 | // that the previous entries. So we clear the previous row so we don't |
| 337 | // trigger the line table error about address that do not monotonically |
| 338 | // increase. |
| 339 | PrevRow = DWARFDebugLine::Row(); |
| 340 | } else { |
| 341 | FI.OptLineTable->push(LE); |
| 342 | PrevRow = Row; |
| 343 | } |
| 344 | } |
| 345 | // If not line table rows were added, clear the line table so we don't encode |
| 346 | // on in the GSYM file. |
| 347 | if (FI.OptLineTable->empty()) |
| 348 | FI.OptLineTable = llvm::None; |
| 349 | } |
| 350 | |
| 351 | void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { |
| 352 | switch (Die.getTag()) { |
| 353 | case dwarf::DW_TAG_subprogram: { |
| 354 | Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); |
| 355 | if (!RangesOrError) { |
| 356 | consumeError(RangesOrError.takeError()); |
| 357 | break; |
| 358 | } |
| 359 | const DWARFAddressRangesVector &Ranges = RangesOrError.get(); |
| 360 | if (Ranges.empty()) |
| 361 | break; |
| 362 | auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); |
| 363 | if (!NameIndex) { |
| 364 | OS << "error: function at " << HEX64(Die.getOffset()) |
| 365 | << " has no name\n "; |
| 366 | Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); |
| 367 | break; |
| 368 | } |
| 369 | |
| 370 | // Create a function_info for each range |
| 371 | for (const DWARFAddressRange &Range : Ranges) { |
| 372 | // The low PC must be less than the high PC. Many linkers don't remove |
| 373 | // DWARF for functions that don't get linked into the final executable. |
| 374 | // If both the high and low pc have relocations, linkers will often set |
| 375 | // the address values for both to the same value to indicate the function |
| 376 | // has been remove. Other linkers have been known to set the one or both |
| 377 | // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 |
| 378 | // byte addresses to indicate the function isn't valid. The check below |
| 379 | // tries to watch for these cases and abort if it runs into them. |
| 380 | if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) |
| 381 | break; |
| 382 | |
| 383 | // Many linkers can't remove DWARF and might set the LowPC to zero. Since |
| 384 | // high PC can be an offset from the low PC in more recent DWARF versions |
| 385 | // we need to watch for a zero'ed low pc which we do using |
| 386 | // ValidTextRanges below. |
Greg Clayton | 5e13e0c | 2020-02-15 16:46:50 -0800 | [diff] [blame^] | 387 | if (!Gsym.IsValidTextAddress(Range.LowPC)) { |
Greg Clayton | 19602b7 | 2020-02-11 16:05:59 -0800 | [diff] [blame] | 388 | // We expect zero and -1 to be invalid addresses in DWARF depending |
| 389 | // on the linker of the DWARF. This indicates a function was stripped |
| 390 | // and the debug info wasn't able to be stripped from the DWARF. If |
| 391 | // the LowPC isn't zero or -1, then we should emit an error. |
| 392 | if (Range.LowPC != 0) { |
| 393 | // Unexpected invalid address, emit an error |
| 394 | Log << "warning: DIE has an address range whose start address is " |
Greg Clayton | 5e13e0c | 2020-02-15 16:46:50 -0800 | [diff] [blame^] | 395 | "not in any executable sections (" << |
| 396 | *Gsym.GetValidTextRanges() << ") and will not be processed:\n"; |
Greg Clayton | 19602b7 | 2020-02-11 16:05:59 -0800 | [diff] [blame] | 397 | Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); |
| 398 | } |
| 399 | break; |
| 400 | } |
| 401 | |
| 402 | FunctionInfo FI; |
| 403 | FI.setStartAddress(Range.LowPC); |
| 404 | FI.setEndAddress(Range.HighPC); |
| 405 | FI.Name = *NameIndex; |
| 406 | if (CUI.LineTable) { |
| 407 | convertFunctionLineTable(OS, CUI, Die, Gsym, FI); |
| 408 | } |
| 409 | if (hasInlineInfo(Die, 0)) { |
| 410 | FI.Inline = InlineInfo(); |
| 411 | FI.Inline->Name = *NameIndex; |
| 412 | FI.Inline->Ranges.insert(FI.Range); |
| 413 | parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline); |
| 414 | } |
| 415 | Gsym.addFunctionInfo(std::move(FI)); |
| 416 | } |
| 417 | } break; |
| 418 | default: |
| 419 | break; |
| 420 | } |
| 421 | for (DWARFDie ChildDie : Die.children()) |
| 422 | handleDie(OS, CUI, ChildDie); |
| 423 | } |
| 424 | |
| 425 | Error DwarfTransformer::convert(uint32_t NumThreads) { |
| 426 | size_t NumBefore = Gsym.getNumFunctionInfos(); |
| 427 | if (NumThreads == 1) { |
| 428 | // Parse all DWARF data from this thread, use the same string/file table |
| 429 | // for everything |
| 430 | for (const auto &CU : DICtx.compile_units()) { |
| 431 | DWARFDie Die = CU->getUnitDIE(false); |
| 432 | CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); |
| 433 | handleDie(Log, CUI, Die); |
| 434 | } |
| 435 | } else { |
| 436 | // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up |
| 437 | // front before we start accessing any DIEs since there might be |
| 438 | // cross compile unit references in the DWARF. If we don't do this we can |
| 439 | // end up crashing. |
| 440 | |
| 441 | // We need to call getAbbreviations sequentially first so that getUnitDIE() |
| 442 | // only works with its local data. |
| 443 | for (const auto &CU : DICtx.compile_units()) |
| 444 | CU->getAbbreviations(); |
| 445 | |
| 446 | // Now parse all DIEs in case we have cross compile unit references in a |
| 447 | // thread pool. |
Alexandre Ganea | 8404aeb | 2020-02-13 22:49:57 -0500 | [diff] [blame] | 448 | ThreadPool pool(hardware_concurrency(NumThreads)); |
Greg Clayton | 19602b7 | 2020-02-11 16:05:59 -0800 | [diff] [blame] | 449 | for (const auto &CU : DICtx.compile_units()) |
| 450 | pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); |
| 451 | pool.wait(); |
| 452 | |
| 453 | // Now convert all DWARF to GSYM in a thread pool. |
| 454 | std::mutex LogMutex; |
| 455 | for (const auto &CU : DICtx.compile_units()) { |
| 456 | DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); |
| 457 | if (Die) { |
| 458 | CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); |
| 459 | pool.async([this, CUI, &LogMutex, Die]() mutable { |
| 460 | std::string ThreadLogStorage; |
| 461 | raw_string_ostream ThreadOS(ThreadLogStorage); |
| 462 | handleDie(ThreadOS, CUI, Die); |
| 463 | ThreadOS.flush(); |
| 464 | if (!ThreadLogStorage.empty()) { |
| 465 | // Print ThreadLogStorage lines into an actual stream under a lock |
| 466 | std::lock_guard<std::mutex> guard(LogMutex); |
| 467 | Log << ThreadLogStorage; |
| 468 | } |
| 469 | }); |
| 470 | } |
| 471 | } |
| 472 | pool.wait(); |
| 473 | } |
| 474 | size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; |
| 475 | Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; |
| 476 | return Error::success(); |
| 477 | } |
| 478 | |
| 479 | llvm::Error DwarfTransformer::verify(StringRef GsymPath) { |
| 480 | Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; |
| 481 | |
| 482 | auto Gsym = GsymReader::openFile(GsymPath); |
| 483 | if (!Gsym) |
| 484 | return Gsym.takeError(); |
| 485 | |
| 486 | auto NumAddrs = Gsym->getNumAddresses(); |
| 487 | DILineInfoSpecifier DLIS( |
| 488 | DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, |
| 489 | DILineInfoSpecifier::FunctionNameKind::LinkageName); |
| 490 | std::string gsymFilename; |
| 491 | for (uint32_t I = 0; I < NumAddrs; ++I) { |
| 492 | auto FuncAddr = Gsym->getAddress(I); |
| 493 | if (!FuncAddr) |
| 494 | return createStringError(std::errc::invalid_argument, |
| 495 | "failed to extract address[%i]", I); |
| 496 | |
| 497 | auto FI = Gsym->getFunctionInfo(*FuncAddr); |
| 498 | if (!FI) |
| 499 | return createStringError(std::errc::invalid_argument, |
| 500 | "failed to extract function info for address 0x%" |
| 501 | PRIu64, *FuncAddr); |
| 502 | |
| 503 | for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { |
| 504 | const object::SectionedAddress SectAddr{ |
| 505 | Addr, object::SectionedAddress::UndefSection}; |
| 506 | auto LR = Gsym->lookup(Addr); |
| 507 | if (!LR) |
| 508 | return LR.takeError(); |
| 509 | |
| 510 | auto DwarfInlineInfos = |
| 511 | DICtx.getInliningInfoForAddress(SectAddr, DLIS); |
| 512 | uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); |
| 513 | if (NumDwarfInlineInfos == 0) { |
| 514 | DwarfInlineInfos.addFrame( |
| 515 | DICtx.getLineInfoForAddress(SectAddr, DLIS)); |
| 516 | } |
| 517 | |
| 518 | // Check for 1 entry that has no file and line info |
| 519 | if (NumDwarfInlineInfos == 1 && |
| 520 | DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { |
| 521 | DwarfInlineInfos = DIInliningInfo(); |
| 522 | NumDwarfInlineInfos = 0; |
| 523 | } |
| 524 | if (NumDwarfInlineInfos > 0 && |
| 525 | NumDwarfInlineInfos != LR->Locations.size()) { |
| 526 | Log << "error: address " << HEX64(Addr) << " has " |
| 527 | << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " |
| 528 | << LR->Locations.size() << "\n"; |
| 529 | Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; |
| 530 | for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { |
| 531 | const auto dii = DwarfInlineInfos.getFrame(Idx); |
| 532 | Log << " [" << Idx << "]: " << dii.FunctionName << " @ " |
| 533 | << dii.FileName << ':' << dii.Line << '\n'; |
| 534 | } |
| 535 | Log << " " << LR->Locations.size() << " GSYM frames:\n"; |
| 536 | for (size_t Idx = 0, count = LR->Locations.size(); |
| 537 | Idx < count; ++Idx) { |
| 538 | const auto &gii = LR->Locations[Idx]; |
| 539 | Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir |
| 540 | << '/' << gii.Base << ':' << gii.Line << '\n'; |
| 541 | } |
| 542 | DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); |
| 543 | Gsym->dump(Log, *FI); |
| 544 | continue; |
| 545 | } |
| 546 | |
| 547 | for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; |
| 548 | ++Idx) { |
| 549 | const auto &gii = LR->Locations[Idx]; |
| 550 | if (Idx < NumDwarfInlineInfos) { |
| 551 | const auto dii = DwarfInlineInfos.getFrame(Idx); |
| 552 | gsymFilename = LR->getSourceFile(Idx); |
| 553 | // Verify function name |
| 554 | if (dii.FunctionName.find(gii.Name.str()) != 0) |
| 555 | Log << "error: address " << HEX64(Addr) << " DWARF function \"" |
| 556 | << dii.FunctionName.c_str() |
| 557 | << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; |
| 558 | // Verify source file path |
| 559 | if (dii.FileName != gsymFilename) |
| 560 | Log << "error: address " << HEX64(Addr) << " DWARF path \"" |
| 561 | << dii.FileName.c_str() << "\" doesn't match GSYM path \"" |
| 562 | << gsymFilename.c_str() << "\"\n"; |
| 563 | // Verify source file line |
| 564 | if (dii.Line != gii.Line) |
| 565 | Log << "error: address " << HEX64(Addr) << " DWARF line " |
| 566 | << dii.Line << " != GSYM line " << gii.Line << "\n"; |
| 567 | } |
| 568 | } |
| 569 | } |
| 570 | } |
| 571 | return Error::success(); |
| 572 | } |