Zachary Turner | abb17cc | 2017-09-01 20:06:56 +0000 | [diff] [blame] | 1 | //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "InputFile.h" |
| 11 | |
| 12 | #include "FormatUtil.h" |
| 13 | #include "LinePrinter.h" |
| 14 | |
| 15 | #include "llvm/BinaryFormat/Magic.h" |
| 16 | #include "llvm/DebugInfo/CodeView/CodeView.h" |
| 17 | #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" |
| 18 | #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" |
| 19 | #include "llvm/DebugInfo/PDB/Native/DbiStream.h" |
| 20 | #include "llvm/DebugInfo/PDB/Native/NativeSession.h" |
| 21 | #include "llvm/DebugInfo/PDB/Native/PDBFile.h" |
| 22 | #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" |
| 23 | #include "llvm/DebugInfo/PDB/Native/RawError.h" |
| 24 | #include "llvm/DebugInfo/PDB/Native/TpiStream.h" |
| 25 | #include "llvm/DebugInfo/PDB/PDB.h" |
| 26 | #include "llvm/Object/COFF.h" |
| 27 | #include "llvm/Support/FileSystem.h" |
| 28 | #include "llvm/Support/FormatVariadic.h" |
| 29 | |
| 30 | using namespace llvm; |
| 31 | using namespace llvm::codeview; |
| 32 | using namespace llvm::object; |
| 33 | using namespace llvm::pdb; |
| 34 | |
| 35 | InputFile::InputFile() {} |
| 36 | InputFile::~InputFile() {} |
| 37 | |
| 38 | static Expected<ModuleDebugStreamRef> |
| 39 | getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { |
| 40 | ExitOnError Err("Unexpected error: "); |
| 41 | |
| 42 | auto &Dbi = Err(File.getPDBDbiStream()); |
| 43 | const auto &Modules = Dbi.modules(); |
| 44 | auto Modi = Modules.getModuleDescriptor(Index); |
| 45 | |
| 46 | ModuleName = Modi.getModuleName(); |
| 47 | |
| 48 | uint16_t ModiStream = Modi.getModuleStreamIndex(); |
| 49 | if (ModiStream == kInvalidStreamIndex) |
| 50 | return make_error<RawError>(raw_error_code::no_stream, |
| 51 | "Module stream not present"); |
| 52 | |
| 53 | auto ModStreamData = File.createIndexedStream(ModiStream); |
| 54 | |
| 55 | ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); |
| 56 | if (auto EC = ModS.reload()) |
| 57 | return make_error<RawError>(raw_error_code::corrupt_file, |
| 58 | "Invalid module stream"); |
| 59 | |
| 60 | return std::move(ModS); |
| 61 | } |
| 62 | |
| 63 | static inline bool isCodeViewDebugSubsection(object::SectionRef Section, |
| 64 | StringRef Name, |
| 65 | BinaryStreamReader &Reader) { |
| 66 | StringRef SectionName, Contents; |
Zachary Turner | e31b9dc | 2017-09-02 00:09:43 +0000 | [diff] [blame] | 67 | if (Section.getName(SectionName)) |
Zachary Turner | abb17cc | 2017-09-01 20:06:56 +0000 | [diff] [blame] | 68 | return false; |
| 69 | |
| 70 | if (SectionName != Name) |
| 71 | return false; |
| 72 | |
Zachary Turner | e31b9dc | 2017-09-02 00:09:43 +0000 | [diff] [blame] | 73 | if (Section.getContents(Contents)) |
Zachary Turner | abb17cc | 2017-09-01 20:06:56 +0000 | [diff] [blame] | 74 | return false; |
| 75 | |
| 76 | Reader = BinaryStreamReader(Contents, support::little); |
| 77 | uint32_t Magic; |
| 78 | if (Reader.bytesRemaining() < sizeof(uint32_t)) |
| 79 | return false; |
| 80 | cantFail(Reader.readInteger(Magic)); |
| 81 | if (Magic != COFF::DEBUG_SECTION_MAGIC) |
| 82 | return false; |
| 83 | return true; |
| 84 | } |
| 85 | |
| 86 | static inline bool isDebugSSection(object::SectionRef Section, |
| 87 | DebugSubsectionArray &Subsections) { |
| 88 | BinaryStreamReader Reader; |
| 89 | if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) |
| 90 | return false; |
| 91 | |
| 92 | cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); |
| 93 | return true; |
| 94 | } |
| 95 | |
| 96 | static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { |
| 97 | BinaryStreamReader Reader; |
| 98 | if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader)) |
| 99 | return false; |
| 100 | cantFail(Reader.readArray(Types, Reader.bytesRemaining())); |
| 101 | return true; |
| 102 | } |
| 103 | |
| 104 | static std::string formatChecksumKind(FileChecksumKind Kind) { |
| 105 | switch (Kind) { |
| 106 | RETURN_CASE(FileChecksumKind, None, "None"); |
| 107 | RETURN_CASE(FileChecksumKind, MD5, "MD5"); |
| 108 | RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); |
| 109 | RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); |
| 110 | } |
| 111 | return formatUnknownEnum(Kind); |
| 112 | } |
| 113 | |
| 114 | static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) { |
| 115 | return cantFail(File.getStringTable()).getStringTable(); |
| 116 | } |
| 117 | |
| 118 | template <typename... Args> |
| 119 | static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { |
| 120 | if (Append) |
| 121 | Printer.format(std::forward<Args>(args)...); |
| 122 | else |
| 123 | Printer.formatLine(std::forward<Args>(args)...); |
| 124 | } |
| 125 | |
| 126 | SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { |
| 127 | if (!File) |
| 128 | return; |
| 129 | |
| 130 | if (File->isPdb()) |
| 131 | initializeForPdb(GroupIndex); |
| 132 | else { |
| 133 | Name = ".debug$S"; |
| 134 | uint32_t I = 0; |
| 135 | for (const auto &S : File->obj().sections()) { |
| 136 | DebugSubsectionArray SS; |
| 137 | if (!isDebugSSection(S, SS)) |
| 138 | continue; |
| 139 | |
| 140 | if (!SC.hasChecksums() || !SC.hasStrings()) |
| 141 | SC.initialize(SS); |
| 142 | |
| 143 | if (I == GroupIndex) |
| 144 | Subsections = SS; |
| 145 | |
| 146 | if (SC.hasChecksums() && SC.hasStrings()) |
| 147 | break; |
| 148 | } |
| 149 | rebuildChecksumMap(); |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | StringRef SymbolGroup::name() const { return Name; } |
| 154 | |
| 155 | void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { |
| 156 | Subsections = SS; |
| 157 | } |
| 158 | |
| 159 | void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } |
| 160 | |
| 161 | void SymbolGroup::initializeForPdb(uint32_t Modi) { |
| 162 | assert(File && File->isPdb()); |
| 163 | |
| 164 | // PDB always uses the same string table, but each module has its own |
| 165 | // checksums. So we only set the strings if they're not already set. |
| 166 | if (!SC.hasStrings()) |
| 167 | SC.setStrings(extractStringTable(File->pdb())); |
| 168 | |
| 169 | SC.resetChecksums(); |
| 170 | auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); |
| 171 | if (!MDS) { |
| 172 | consumeError(MDS.takeError()); |
| 173 | return; |
| 174 | } |
| 175 | |
| 176 | DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); |
| 177 | Subsections = DebugStream->getSubsectionsArray(); |
| 178 | SC.initialize(Subsections); |
| 179 | rebuildChecksumMap(); |
| 180 | } |
| 181 | |
| 182 | void SymbolGroup::rebuildChecksumMap() { |
| 183 | if (!SC.hasChecksums()) |
| 184 | return; |
| 185 | |
| 186 | for (const auto &Entry : SC.checksums()) { |
| 187 | auto S = SC.strings().getString(Entry.FileNameOffset); |
| 188 | if (!S) |
| 189 | continue; |
| 190 | ChecksumsByFile[*S] = Entry; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { |
| 195 | assert(File && File->isPdb() && DebugStream); |
| 196 | return *DebugStream; |
| 197 | } |
| 198 | |
| 199 | Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { |
| 200 | return SC.strings().getString(Offset); |
| 201 | } |
| 202 | |
| 203 | void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, |
| 204 | bool Append) const { |
| 205 | auto FC = ChecksumsByFile.find(File); |
| 206 | if (FC == ChecksumsByFile.end()) { |
| 207 | formatInternal(Printer, Append, "- (no checksum) {0}", File); |
| 208 | return; |
| 209 | } |
| 210 | |
| 211 | formatInternal(Printer, Append, "- ({0}: {1}) {2}", |
| 212 | formatChecksumKind(FC->getValue().Kind), |
| 213 | toHex(FC->getValue().Checksum), File); |
| 214 | } |
| 215 | |
| 216 | void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, |
| 217 | uint32_t Offset, |
| 218 | bool Append) const { |
| 219 | if (!SC.hasChecksums()) { |
| 220 | formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); |
| 221 | return; |
| 222 | } |
| 223 | |
| 224 | auto Iter = SC.checksums().getArray().at(Offset); |
| 225 | if (Iter == SC.checksums().getArray().end()) { |
| 226 | formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); |
| 227 | return; |
| 228 | } |
| 229 | |
| 230 | uint32_t FO = Iter->FileNameOffset; |
| 231 | auto ExpectedFile = getNameFromStringTable(FO); |
| 232 | if (!ExpectedFile) { |
| 233 | formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); |
| 234 | consumeError(ExpectedFile.takeError()); |
| 235 | return; |
| 236 | } |
| 237 | if (Iter->Kind == FileChecksumKind::None) { |
| 238 | formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); |
| 239 | } else { |
| 240 | formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, |
| 241 | formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | Expected<InputFile> InputFile::open(StringRef Path) { |
| 246 | InputFile IF; |
| 247 | if (!llvm::sys::fs::exists(Path)) |
| 248 | return make_error<StringError>(formatv("File {0} not found", Path), |
| 249 | inconvertibleErrorCode()); |
| 250 | |
| 251 | file_magic Magic; |
| 252 | if (auto EC = identify_magic(Path, Magic)) |
| 253 | return make_error<StringError>( |
| 254 | formatv("Unable to identify file type for file {0}", Path), EC); |
| 255 | |
| 256 | if (Magic == file_magic::coff_object) { |
| 257 | Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); |
| 258 | if (!BinaryOrErr) |
| 259 | return BinaryOrErr.takeError(); |
| 260 | |
| 261 | IF.CoffObject = std::move(*BinaryOrErr); |
| 262 | IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); |
| 263 | return std::move(IF); |
| 264 | } |
| 265 | |
| 266 | if (Magic == file_magic::unknown) { |
| 267 | std::unique_ptr<IPDBSession> Session; |
| 268 | if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) |
| 269 | return std::move(Err); |
| 270 | |
| 271 | IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); |
| 272 | IF.PdbOrObj = &IF.PdbSession->getPDBFile(); |
| 273 | |
| 274 | return std::move(IF); |
| 275 | } |
| 276 | |
| 277 | return make_error<StringError>( |
| 278 | formatv("File {0} is not a supported file type", Path), |
| 279 | inconvertibleErrorCode()); |
| 280 | } |
| 281 | |
| 282 | PDBFile &InputFile::pdb() { |
| 283 | assert(isPdb()); |
| 284 | return *PdbOrObj.get<PDBFile *>(); |
| 285 | } |
| 286 | |
| 287 | const PDBFile &InputFile::pdb() const { |
| 288 | assert(isPdb()); |
| 289 | return *PdbOrObj.get<PDBFile *>(); |
| 290 | } |
| 291 | |
| 292 | object::COFFObjectFile &InputFile::obj() { |
| 293 | assert(isObj()); |
| 294 | return *PdbOrObj.get<object::COFFObjectFile *>(); |
| 295 | } |
| 296 | |
| 297 | const object::COFFObjectFile &InputFile::obj() const { |
| 298 | assert(isObj()); |
| 299 | return *PdbOrObj.get<object::COFFObjectFile *>(); |
| 300 | } |
| 301 | |
| 302 | bool InputFile::hasTypes() const { |
| 303 | if (isPdb()) |
| 304 | return pdb().hasPDBTpiStream(); |
| 305 | |
| 306 | for (const auto &Section : obj().sections()) { |
| 307 | CVTypeArray Types; |
| 308 | if (isDebugTSection(Section, Types)) |
| 309 | return true; |
| 310 | } |
| 311 | return false; |
| 312 | } |
| 313 | |
| 314 | bool InputFile::hasIds() const { |
| 315 | if (isObj()) |
| 316 | return false; |
| 317 | return pdb().hasPDBIpiStream(); |
| 318 | } |
| 319 | |
| 320 | bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } |
| 321 | |
| 322 | bool InputFile::isObj() const { |
| 323 | return PdbOrObj.is<object::COFFObjectFile *>(); |
| 324 | } |
| 325 | |
| 326 | codeview::LazyRandomTypeCollection & |
| 327 | InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { |
| 328 | if (Types && Kind == kTypes) |
| 329 | return *Types; |
| 330 | if (Ids && Kind == kIds) |
| 331 | return *Ids; |
| 332 | |
| 333 | if (Kind == kIds) { |
| 334 | assert(isPdb() && pdb().hasPDBIpiStream()); |
| 335 | } |
| 336 | |
| 337 | // If the collection was already initialized, we should have just returned it |
| 338 | // in step 1. |
| 339 | if (isPdb()) { |
| 340 | TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; |
| 341 | auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() |
| 342 | : pdb().getPDBTpiStream()); |
| 343 | |
| 344 | auto &Array = Stream.typeArray(); |
| 345 | uint32_t Count = Stream.getNumTypeRecords(); |
| 346 | auto Offsets = Stream.getTypeIndexOffsets(); |
| 347 | Collection = |
| 348 | llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); |
| 349 | return *Collection; |
| 350 | } |
| 351 | |
| 352 | assert(isObj()); |
| 353 | assert(Kind == kTypes); |
| 354 | assert(!Types); |
| 355 | |
| 356 | for (const auto &Section : obj().sections()) { |
| 357 | CVTypeArray Records; |
| 358 | if (!isDebugTSection(Section, Records)) |
| 359 | continue; |
| 360 | |
| 361 | Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100); |
| 362 | return *Types; |
| 363 | } |
| 364 | |
| 365 | Types = llvm::make_unique<LazyRandomTypeCollection>(100); |
| 366 | return *Types; |
| 367 | } |
| 368 | |
| 369 | codeview::LazyRandomTypeCollection &InputFile::types() { |
| 370 | return getOrCreateTypeCollection(kTypes); |
| 371 | } |
| 372 | |
| 373 | codeview::LazyRandomTypeCollection &InputFile::ids() { |
| 374 | // Object files have only one type stream that contains both types and ids. |
| 375 | // Similarly, some PDBs don't contain an IPI stream, and for those both types |
| 376 | // and IDs are in the same stream. |
| 377 | if (isObj() || !pdb().hasPDBIpiStream()) |
| 378 | return types(); |
| 379 | |
| 380 | return getOrCreateTypeCollection(kIds); |
| 381 | } |
| 382 | |
| 383 | iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { |
| 384 | return make_range<SymbolGroupIterator>(symbol_groups_begin(), |
| 385 | symbol_groups_end()); |
| 386 | } |
| 387 | |
| 388 | SymbolGroupIterator InputFile::symbol_groups_begin() { |
| 389 | return SymbolGroupIterator(*this); |
| 390 | } |
| 391 | |
| 392 | SymbolGroupIterator InputFile::symbol_groups_end() { |
| 393 | return SymbolGroupIterator(); |
| 394 | } |
| 395 | |
| 396 | SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} |
| 397 | |
| 398 | SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { |
| 399 | if (File.isObj()) { |
| 400 | SectionIter = File.obj().section_begin(); |
| 401 | scanToNextDebugS(); |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { |
| 406 | bool E = isEnd(); |
| 407 | bool RE = R.isEnd(); |
| 408 | if (E || RE) |
| 409 | return E == RE; |
| 410 | |
| 411 | if (Value.File != R.Value.File) |
| 412 | return false; |
| 413 | return Index == R.Index; |
| 414 | } |
| 415 | |
| 416 | const SymbolGroup &SymbolGroupIterator::operator*() const { |
| 417 | assert(!isEnd()); |
| 418 | return Value; |
| 419 | } |
| 420 | SymbolGroup &SymbolGroupIterator::operator*() { |
| 421 | assert(!isEnd()); |
| 422 | return Value; |
| 423 | } |
| 424 | |
| 425 | SymbolGroupIterator &SymbolGroupIterator::operator++() { |
| 426 | assert(Value.File && !isEnd()); |
| 427 | ++Index; |
| 428 | if (isEnd()) |
| 429 | return *this; |
| 430 | |
| 431 | if (Value.File->isPdb()) { |
| 432 | Value.updatePdbModi(Index); |
| 433 | return *this; |
| 434 | } |
| 435 | |
| 436 | scanToNextDebugS(); |
| 437 | return *this; |
| 438 | } |
| 439 | |
| 440 | void SymbolGroupIterator::scanToNextDebugS() { |
| 441 | assert(SectionIter.hasValue()); |
| 442 | auto End = Value.File->obj().section_end(); |
| 443 | auto &Iter = *SectionIter; |
| 444 | assert(!isEnd()); |
| 445 | |
| 446 | while (++Iter != End) { |
| 447 | DebugSubsectionArray SS; |
| 448 | SectionRef SR = *Iter; |
| 449 | if (!isDebugSSection(SR, SS)) |
| 450 | continue; |
| 451 | |
| 452 | Value.updateDebugS(SS); |
| 453 | return; |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | bool SymbolGroupIterator::isEnd() const { |
| 458 | if (!Value.File) |
| 459 | return true; |
| 460 | if (Value.File->isPdb()) { |
| 461 | auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); |
| 462 | uint32_t Count = Dbi.modules().getModuleCount(); |
| 463 | assert(Index <= Count); |
| 464 | return Index == Count; |
| 465 | } |
| 466 | |
| 467 | assert(SectionIter.hasValue()); |
| 468 | return *SectionIter == Value.File->obj().section_end(); |
| 469 | } |