Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 1 | //===- PDBDbiStream.cpp - PDB Dbi Stream (Stream 3) Access ----------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #include "llvm/DebugInfo/PDB/Raw/PDBDbiStream.h" |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 11 | #include "llvm/DebugInfo/PDB/Raw/ModInfo.h" |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 12 | #include "llvm/DebugInfo/PDB/Raw/PDBFile.h" |
| 13 | #include "llvm/DebugInfo/PDB/Raw/PDBInfoStream.h" |
| 14 | #include "llvm/DebugInfo/PDB/Raw/PDBRawConstants.h" |
| 15 | |
| 16 | using namespace llvm; |
| 17 | using namespace llvm::support; |
| 18 | |
| 19 | namespace { |
| 20 | // Some of the values are stored in bitfields. Since this needs to be portable |
| 21 | // across compilers and architectures (big / little endian in particular) we |
| 22 | // can't use the actual structures below, but must instead do the shifting |
| 23 | // and masking ourselves. The struct definitions are provided for reference. |
| 24 | |
| 25 | // struct DbiFlags { |
| 26 | // uint16_t IncrementalLinking : 1; // True if linked incrementally |
| 27 | // uint16_t IsStripped : 1; // True if private symbols were stripped. |
| 28 | // uint16_t HasCTypes : 1; // True if linked with /debug:ctypes. |
| 29 | // uint16_t Reserved : 13; |
| 30 | //}; |
| 31 | const uint16_t FlagIncrementalMask = 0x0001; |
| 32 | const uint16_t FlagStrippedMask = 0x0002; |
| 33 | const uint16_t FlagHasCTypesMask = 0x0004; |
| 34 | |
| 35 | // struct DbiBuildNo { |
| 36 | // uint16_t MinorVersion : 8; |
| 37 | // uint16_t MajorVersion : 7; |
| 38 | // uint16_t NewVersionFormat : 1; |
| 39 | //}; |
| 40 | const uint16_t BuildMinorMask = 0x00FF; |
| 41 | const uint16_t BuildMinorShift = 0; |
| 42 | |
| 43 | const uint16_t BuildMajorMask = 0x7F00; |
| 44 | const uint16_t BuildMajorShift = 8; |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 45 | } |
| 46 | |
| 47 | struct PDBDbiStream::HeaderInfo { |
Zachary Turner | ff788aa | 2016-04-26 19:24:10 +0000 | [diff] [blame] | 48 | little32_t VersionSignature; |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 49 | ulittle32_t VersionHeader; |
| 50 | ulittle32_t Age; // Should match PDBInfoStream. |
Zachary Turner | 84c3a8b | 2016-04-28 20:05:18 +0000 | [diff] [blame^] | 51 | ulittle16_t GSSyms; // Number of global symbols |
| 52 | ulittle16_t BuildNumber; // See DbiBuildNo structure. |
| 53 | ulittle16_t PSSyms; // Number of public symbols |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 54 | ulittle16_t PdbDllVersion; // version of mspdbNNN.dll |
| 55 | ulittle16_t SymRecords; // Number of symbols |
| 56 | ulittle16_t PdbDllRbld; // rbld number of mspdbNNN.dll |
| 57 | little32_t ModiSubstreamSize; // Size of module info stream |
| 58 | little32_t SecContrSubstreamSize; // Size of sec. contribution stream |
Zachary Turner | 84c3a8b | 2016-04-28 20:05:18 +0000 | [diff] [blame^] | 59 | little32_t SectionMapSize; // Size of sec. map substream |
| 60 | little32_t FileInfoSize; // Size of file info substream |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 61 | little32_t TypeServerSize; // Size of type server map |
| 62 | ulittle32_t MFCTypeServerIndex; // Index of MFC Type Server |
| 63 | little32_t OptionalDbgHdrSize; // Size of DbgHeader info |
| 64 | little32_t ECSubstreamSize; // Size of EC stream (what is EC?) |
| 65 | ulittle16_t Flags; // See DbiFlags enum. |
| 66 | ulittle16_t MachineType; // See PDB_MachineType enum. |
| 67 | |
| 68 | ulittle32_t Reserved; // Pad to 64 bytes |
| 69 | }; |
| 70 | |
| 71 | PDBDbiStream::PDBDbiStream(PDBFile &File) : Pdb(File), Stream(3, File) { |
| 72 | static_assert(sizeof(HeaderInfo) == 64, "Invalid HeaderInfo size!"); |
| 73 | } |
| 74 | |
| 75 | PDBDbiStream::~PDBDbiStream() {} |
| 76 | |
| 77 | std::error_code PDBDbiStream::reload() { |
| 78 | Stream.setOffset(0); |
| 79 | Header.reset(new HeaderInfo()); |
| 80 | |
| 81 | if (Stream.getLength() < sizeof(HeaderInfo)) |
| 82 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 83 | Stream.readObject(Header.get()); |
| 84 | |
| 85 | if (Header->VersionSignature != -1) |
| 86 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 87 | |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 88 | // Require at least version 7, which should be present in all PDBs |
| 89 | // produced in the last decade and allows us to avoid having to |
| 90 | // special case all kinds of complicated arcane formats. |
| 91 | if (Header->VersionHeader < PdbDbiV70) |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 92 | return std::make_error_code(std::errc::not_supported); |
| 93 | |
| 94 | if (Header->Age != Pdb.getPDBInfoStream().getAge()) |
| 95 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 96 | |
| 97 | if (Stream.getLength() != |
| 98 | sizeof(HeaderInfo) + Header->ModiSubstreamSize + |
| 99 | Header->SecContrSubstreamSize + Header->SectionMapSize + |
| 100 | Header->FileInfoSize + Header->TypeServerSize + |
| 101 | Header->OptionalDbgHdrSize + Header->ECSubstreamSize) |
| 102 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 103 | |
Zachary Turner | 84c3a8b | 2016-04-28 20:05:18 +0000 | [diff] [blame^] | 104 | // Only certain substreams are guaranteed to be aligned. Validate |
| 105 | // them here. |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 106 | if (Header->ModiSubstreamSize % sizeof(uint32_t) != 0) |
| 107 | return std::make_error_code(std::errc::illegal_byte_sequence); |
Zachary Turner | 84c3a8b | 2016-04-28 20:05:18 +0000 | [diff] [blame^] | 108 | if (Header->SecContrSubstreamSize % sizeof(uint32_t) != 0) |
| 109 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 110 | if (Header->SectionMapSize % sizeof(uint32_t) != 0) |
| 111 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 112 | if (Header->FileInfoSize % sizeof(uint32_t) != 0) |
| 113 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 114 | if (Header->TypeServerSize % sizeof(uint32_t) != 0) |
| 115 | return std::make_error_code(std::errc::illegal_byte_sequence); |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 116 | |
Zachary Turner | 84c3a8b | 2016-04-28 20:05:18 +0000 | [diff] [blame^] | 117 | std::error_code EC; |
| 118 | if (EC = readSubstream(ModInfoSubstream, Header->ModiSubstreamSize)) |
| 119 | return EC; |
| 120 | |
| 121 | // Since each ModInfo in the stream is a variable length, we have to iterate |
| 122 | // them to know how many there actually are. |
| 123 | auto Range = llvm::make_range(ModInfoIterator(&ModInfoSubstream.front()), |
| 124 | ModInfoIterator(&ModInfoSubstream.back() + 1)); |
| 125 | for (auto Info : Range) |
| 126 | ModuleInfos.push_back(ModuleInfoEx(Info)); |
| 127 | |
| 128 | if (EC = readSubstream(SecContrSubstream, Header->SecContrSubstreamSize)) |
| 129 | return EC; |
| 130 | if (EC = readSubstream(SecMapSubstream, Header->SectionMapSize)) |
| 131 | return EC; |
| 132 | if (EC = readSubstream(FileInfoSubstream, Header->FileInfoSize)) |
| 133 | return EC; |
| 134 | if (EC = readSubstream(TypeServerMapSubstream, Header->TypeServerSize)) |
| 135 | return EC; |
| 136 | if (EC = readSubstream(ECSubstream, Header->ECSubstreamSize)) |
| 137 | return EC; |
| 138 | |
| 139 | if (EC = initializeFileInfo()) |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 140 | return EC; |
| 141 | |
Zachary Turner | 53a65ba | 2016-04-26 18:42:34 +0000 | [diff] [blame] | 142 | return std::error_code(); |
| 143 | } |
| 144 | |
| 145 | PdbRaw_DbiVer PDBDbiStream::getDbiVersion() const { |
| 146 | uint32_t Value = Header->VersionHeader; |
| 147 | return static_cast<PdbRaw_DbiVer>(Value); |
| 148 | } |
| 149 | |
| 150 | uint32_t PDBDbiStream::getAge() const { return Header->Age; } |
| 151 | |
| 152 | bool PDBDbiStream::isIncrementallyLinked() const { |
| 153 | return (Header->Flags & FlagIncrementalMask) != 0; |
| 154 | } |
| 155 | |
| 156 | bool PDBDbiStream::hasCTypes() const { |
| 157 | return (Header->Flags & FlagHasCTypesMask) != 0; |
| 158 | } |
| 159 | |
| 160 | bool PDBDbiStream::isStripped() const { |
| 161 | return (Header->Flags & FlagStrippedMask) != 0; |
| 162 | } |
| 163 | |
| 164 | uint16_t PDBDbiStream::getBuildMajorVersion() const { |
| 165 | return (Header->BuildNumber & BuildMajorMask) >> BuildMajorShift; |
| 166 | } |
| 167 | |
| 168 | uint16_t PDBDbiStream::getBuildMinorVersion() const { |
| 169 | return (Header->BuildNumber & BuildMinorMask) >> BuildMinorShift; |
| 170 | } |
| 171 | |
| 172 | uint32_t PDBDbiStream::getPdbDllVersion() const { |
| 173 | return Header->PdbDllVersion; |
| 174 | } |
| 175 | |
| 176 | uint32_t PDBDbiStream::getNumberOfSymbols() const { return Header->SymRecords; } |
| 177 | |
| 178 | PDB_Machine PDBDbiStream::getMachineType() const { |
| 179 | uint16_t Machine = Header->MachineType; |
| 180 | return static_cast<PDB_Machine>(Machine); |
| 181 | } |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 182 | |
Zachary Turner | 84c3a8b | 2016-04-28 20:05:18 +0000 | [diff] [blame^] | 183 | ArrayRef<ModuleInfoEx> PDBDbiStream::modules() const { return ModuleInfos; } |
| 184 | |
| 185 | std::error_code PDBDbiStream::readSubstream(std::vector<uint8_t> &Bytes, uint32_t Size) { |
| 186 | Bytes.clear(); |
| 187 | if (Size == 0) |
| 188 | return std::error_code(); |
| 189 | |
| 190 | Bytes.resize(Size); |
| 191 | return Stream.readBytes(&Bytes[0], Size); |
| 192 | } |
| 193 | |
| 194 | std::error_code PDBDbiStream::initializeFileInfo() { |
| 195 | struct FileInfoSubstreamHeader { |
| 196 | ulittle16_t NumModules; // Total # of modules, should match number of |
| 197 | // records in the ModuleInfo substream. |
| 198 | ulittle16_t NumSourceFiles; // Total # of source files. This value is not |
| 199 | // accurate because PDB actually supports more |
| 200 | // than 64k source files, so we ignore it and |
| 201 | // compute the value from other stream fields. |
| 202 | }; |
| 203 | |
| 204 | // The layout of the FileInfoSubstream is like this: |
| 205 | // struct { |
| 206 | // ulittle16_t NumModules; |
| 207 | // ulittle16_t NumSourceFiles; |
| 208 | // ulittle16_t ModIndices[NumModules]; |
| 209 | // ulittle16_t ModFileCounts[NumModules]; |
| 210 | // ulittle32_t FileNameOffsets[NumSourceFiles]; |
| 211 | // char Names[][NumSourceFiles]; |
| 212 | // }; |
| 213 | // with the caveat that `NumSourceFiles` cannot be trusted, so |
| 214 | // it is computed by summing `ModFileCounts`. |
| 215 | // |
| 216 | const uint8_t *Buf = &FileInfoSubstream[0]; |
| 217 | auto FI = reinterpret_cast<const FileInfoSubstreamHeader *>(Buf); |
| 218 | Buf += sizeof(FileInfoSubstreamHeader); |
| 219 | // The number of modules in the stream should be the same as reported by |
| 220 | // the FileInfoSubstreamHeader. |
| 221 | if (FI->NumModules != ModuleInfos.size()) |
| 222 | return std::make_error_code(std::errc::illegal_byte_sequence); |
| 223 | |
| 224 | // First is an array of `NumModules` module indices. This is not used for the |
| 225 | // same reason that `NumSourceFiles` is not used. It's an array of uint16's, |
| 226 | // but it's possible there are more than 64k source files, which would imply |
| 227 | // more than 64k modules (e.g. object files) as well. So we ignore this |
| 228 | // field. |
| 229 | llvm::ArrayRef<ulittle16_t> ModIndexArray( |
| 230 | reinterpret_cast<const ulittle16_t *>(Buf), ModuleInfos.size()); |
| 231 | |
| 232 | llvm::ArrayRef<ulittle16_t> ModFileCountArray(ModIndexArray.end(), |
| 233 | ModuleInfos.size()); |
| 234 | |
| 235 | // Compute the real number of source files. |
| 236 | uint32_t NumSourceFiles = 0; |
| 237 | for (auto Count : ModFileCountArray) |
| 238 | NumSourceFiles += Count; |
| 239 | |
| 240 | // This is the array that in the reference implementation corresponds to |
| 241 | // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a |
| 242 | // pointer. Due to the mentioned problems of pointers causing difficulty |
| 243 | // when reading from the file on 64-bit systems, we continue to ignore that |
| 244 | // field in `ModInfo`, and instead build a vector of StringRefs and stores |
| 245 | // them in `ModuleInfoEx`. The value written to and read from the file is |
| 246 | // not used anyway, it is only there as a way to store the offsets for the |
| 247 | // purposes of later accessing the names at runtime. |
| 248 | llvm::ArrayRef<little32_t> FileNameOffsets( |
| 249 | reinterpret_cast<const little32_t *>(ModFileCountArray.end()), |
| 250 | NumSourceFiles); |
| 251 | |
| 252 | const char *Names = reinterpret_cast<const char *>(FileNameOffsets.end()); |
| 253 | |
| 254 | // We go through each ModuleInfo, determine the number N of source files for |
| 255 | // that module, and then get the next N offsets from the Offsets array, using |
| 256 | // them to get the corresponding N names from the Names buffer and associating |
| 257 | // each one with the corresponding module. |
| 258 | uint32_t NextFileIndex = 0; |
| 259 | for (size_t I = 0; I < ModuleInfos.size(); ++I) { |
| 260 | uint32_t NumFiles = ModFileCountArray[I]; |
| 261 | ModuleInfos[I].SourceFiles.resize(NumFiles); |
| 262 | for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) { |
| 263 | uint32_t FileIndex = FileNameOffsets[NextFileIndex]; |
| 264 | ModuleInfos[I].SourceFiles[J] = StringRef(Names + FileIndex); |
| 265 | } |
| 266 | } |
| 267 | |
| 268 | return std::error_code(); |
Zachary Turner | 1822af54 | 2016-04-27 23:41:42 +0000 | [diff] [blame] | 269 | } |