blob: 8ed5b8b44c592c1b055bbefe5ffc4bf8edca227d [file] [log] [blame]
Zachary Turner946204c2017-08-09 04:23:25 +00001//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Zachary Turner946204c2017-08-09 04:23:25 +00006//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
10
Zachary Turner7c6b19f2018-12-04 21:48:46 +000011#include "llvm/ADT/DenseSet.h"
Zachary Turneree9906d2017-08-11 19:00:03 +000012#include "llvm/DebugInfo/CodeView/RecordName.h"
Zachary Turner946204c2017-08-09 04:23:25 +000013#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
14#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
15#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
16#include "llvm/DebugInfo/MSF/MSFBuilder.h"
17#include "llvm/DebugInfo/MSF/MSFCommon.h"
18#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
19#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
20#include "llvm/DebugInfo/PDB/Native/Hash.h"
21#include "llvm/Support/BinaryItemStream.h"
22#include "llvm/Support/BinaryStreamWriter.h"
Zachary Turner7c6b19f2018-12-04 21:48:46 +000023#include "llvm/Support/xxhash.h"
Zachary Turner946204c2017-08-09 04:23:25 +000024#include <algorithm>
25#include <vector>
26
27using namespace llvm;
28using namespace llvm::msf;
29using namespace llvm::pdb;
30using namespace llvm::codeview;
31
Zachary Turner946204c2017-08-09 04:23:25 +000032struct llvm::pdb::GSIHashStreamBuilder {
Amy Huang99708172019-06-11 18:02:39 +000033 struct SymbolDenseMapInfo {
Zachary Turner7c6b19f2018-12-04 21:48:46 +000034 static inline CVSymbol getEmptyKey() {
35 static CVSymbol Empty;
36 return Empty;
37 }
38 static inline CVSymbol getTombstoneKey() {
Reid Klecknere10d0042019-04-04 00:28:48 +000039 static CVSymbol Tombstone(
40 DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
Zachary Turner7c6b19f2018-12-04 21:48:46 +000041 return Tombstone;
42 }
43 static unsigned getHashValue(const CVSymbol &Val) {
44 return xxHash64(Val.RecordData);
45 }
46 static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
47 return LHS.RecordData == RHS.RecordData;
48 }
49 };
50
Zachary Turner946204c2017-08-09 04:23:25 +000051 std::vector<CVSymbol> Records;
52 uint32_t StreamIndex;
Amy Huang99708172019-06-11 18:02:39 +000053 llvm::DenseSet<CVSymbol, SymbolDenseMapInfo> SymbolHashes;
Zachary Turner946204c2017-08-09 04:23:25 +000054 std::vector<PSHashRecord> HashRecords;
55 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
56 std::vector<support::ulittle32_t> HashBuckets;
57
58 uint32_t calculateSerializedLength() const;
59 uint32_t calculateRecordByteSize() const;
60 Error commit(BinaryStreamWriter &Writer);
61 void finalizeBuckets(uint32_t RecordZeroOffset);
Zachary Turneree9906d2017-08-11 19:00:03 +000062
63 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
64 T Copy(Symbol);
Zachary Turner7c6b19f2018-12-04 21:48:46 +000065 addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
66 CodeViewContainer::Pdb));
Zachary Turneree9906d2017-08-11 19:00:03 +000067 }
Zachary Turner7c6b19f2018-12-04 21:48:46 +000068 void addSymbol(const CVSymbol &Symbol) {
Amy Huang99708172019-06-11 18:02:39 +000069 if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
70 auto Iter = SymbolHashes.insert(Symbol);
Zachary Turner7c6b19f2018-12-04 21:48:46 +000071 if (!Iter.second)
72 return;
73 }
74
75 Records.push_back(Symbol);
76 }
Zachary Turner946204c2017-08-09 04:23:25 +000077};
78
79uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
80 uint32_t Size = sizeof(GSIHashHeader);
81 Size += HashRecords.size() * sizeof(PSHashRecord);
82 Size += HashBitmap.size() * sizeof(uint32_t);
83 Size += HashBuckets.size() * sizeof(uint32_t);
84 return Size;
85}
86
87uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
88 uint32_t Size = 0;
89 for (const auto &Sym : Records)
90 Size += Sym.length();
91 return Size;
92}
93
94Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
95 GSIHashHeader Header;
96 Header.VerSignature = GSIHashHeader::HdrSignature;
97 Header.VerHdr = GSIHashHeader::HdrVersion;
98 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
99 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
100
101 if (auto EC = Writer.writeObject(Header))
102 return EC;
103
104 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
105 return EC;
106 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
107 return EC;
108 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
109 return EC;
110 return Error::success();
111}
112
Zachary Turner648bebd2018-07-06 21:01:42 +0000113static bool isAsciiString(StringRef S) {
114 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
115}
116
117// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
118static bool gsiRecordLess(StringRef S1, StringRef S2) {
119 size_t LS = S1.size();
120 size_t RS = S2.size();
121 // Shorter strings always compare less than longer strings.
122 if (LS != RS)
123 return LS < RS;
124
125 // If either string contains non ascii characters, memcmp them.
126 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
127 return memcmp(S1.data(), S2.data(), LS) < 0;
128
Benjamin Kramer9fc944a2018-07-06 21:56:57 +0000129 // Both strings are ascii, perform a case-insenstive comparison.
130 return S1.compare_lower(S2.data()) < 0;
Zachary Turner648bebd2018-07-06 21:01:42 +0000131}
132
Zachary Turner946204c2017-08-09 04:23:25 +0000133void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
Zachary Turner1f200ad2018-07-06 02:33:58 +0000134 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
135 TmpBuckets;
Zachary Turner946204c2017-08-09 04:23:25 +0000136 uint32_t SymOffset = RecordZeroOffset;
137 for (const CVSymbol &Sym : Records) {
138 PSHashRecord HR;
139 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
140 HR.Off = SymOffset + 1;
141 HR.CRef = 1; // Always use a refcount of 1.
142
143 // Hash the name to figure out which bucket this goes into.
144 StringRef Name = getSymbolName(Sym);
145 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
Zachary Turner1f200ad2018-07-06 02:33:58 +0000146 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
Zachary Turner946204c2017-08-09 04:23:25 +0000147 SymOffset += Sym.length();
148 }
149
150 // Compute the three tables: the hash records in bucket and chain order, the
151 // bucket presence bitmap, and the bucket chain start offsets.
152 HashRecords.reserve(Records.size());
153 for (ulittle32_t &Word : HashBitmap)
154 Word = 0;
155 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
156 auto &Bucket = TmpBuckets[BucketIdx];
157 if (Bucket.empty())
158 continue;
159 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
160
161 // Calculate what the offset of the first hash record in the chain would
162 // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
163 // each record would be 12 bytes. See HROffsetCalc in gsi.h.
164 const int SizeOfHROffsetCalc = 12;
165 ulittle32_t ChainStartOff =
166 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
167 HashBuckets.push_back(ChainStartOff);
Zachary Turner1f200ad2018-07-06 02:33:58 +0000168
Zachary Turner648bebd2018-07-06 21:01:42 +0000169 // Sort each bucket by memcmp of the symbol's name. It's important that
170 // we use the same sorting algorithm as is used by the reference
171 // implementation to ensure that the search for a record within a bucket
172 // can properly early-out when it detects the record won't be found. The
173 // algorithm used here corredsponds to the function
174 // caseInsensitiveComparePchPchCchCch in the reference implementation.
Fangrui Song0cac7262018-09-27 02:13:45 +0000175 llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
176 const std::pair<StringRef, PSHashRecord> &Right) {
177 return gsiRecordLess(Left.first, Right.first);
178 });
Zachary Turner1f200ad2018-07-06 02:33:58 +0000179
180 for (const auto &Entry : Bucket)
181 HashRecords.push_back(Entry.second);
Zachary Turner946204c2017-08-09 04:23:25 +0000182 }
183}
184
185GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
186 : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
187 GSH(llvm::make_unique<GSIHashStreamBuilder>()) {}
188
189GSIStreamBuilder::~GSIStreamBuilder() {}
190
191uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
192 uint32_t Size = 0;
193 Size += sizeof(PublicsStreamHeader);
194 Size += PSH->calculateSerializedLength();
195 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
196 // FIXME: Add thunk map and section offsets for incremental linking.
197
198 return Size;
199}
200
201uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
202 return GSH->calculateSerializedLength();
203}
204
205Error GSIStreamBuilder::finalizeMsfLayout() {
206 // First we write public symbol records, then we write global symbol records.
207 uint32_t PSHZero = 0;
208 uint32_t GSHZero = PSH->calculateRecordByteSize();
209
210 PSH->finalizeBuckets(PSHZero);
211 GSH->finalizeBuckets(GSHZero);
212
Zachary Turnera6fb5362018-03-23 18:43:39 +0000213 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
Zachary Turner946204c2017-08-09 04:23:25 +0000214 if (!Idx)
215 return Idx.takeError();
216 GSH->StreamIndex = *Idx;
Zachary Turnera6fb5362018-03-23 18:43:39 +0000217 Idx = Msf.addStream(calculatePublicsHashStreamSize());
218 if (!Idx)
219 return Idx.takeError();
220 PSH->StreamIndex = *Idx;
Zachary Turner946204c2017-08-09 04:23:25 +0000221
222 uint32_t RecordBytes =
223 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
224
225 Idx = Msf.addStream(RecordBytes);
226 if (!Idx)
227 return Idx.takeError();
228 RecordStreamIdx = *Idx;
229 return Error::success();
230}
231
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000232static bool comparePubSymByAddrAndName(
233 const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
234 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
235 if (LS.second->Segment != RS.second->Segment)
236 return LS.second->Segment < RS.second->Segment;
237 if (LS.second->Offset != RS.second->Offset)
238 return LS.second->Offset < RS.second->Offset;
Zachary Turner946204c2017-08-09 04:23:25 +0000239
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000240 return LS.second->Name < RS.second->Name;
Zachary Turner946204c2017-08-09 04:23:25 +0000241}
242
243/// Compute the address map. The address map is an array of symbol offsets
244/// sorted so that it can be binary searched by address.
245static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
246 // Make a vector of pointers to the symbols so we can sort it by address.
247 // Also gather the symbol offsets while we're at it.
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000248
249 std::vector<PublicSym32> DeserializedPublics;
250 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
Zachary Turner946204c2017-08-09 04:23:25 +0000251 std::vector<uint32_t> SymOffsets;
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000252 DeserializedPublics.reserve(Records.size());
Zachary Turner946204c2017-08-09 04:23:25 +0000253 PublicsByAddr.reserve(Records.size());
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000254 SymOffsets.reserve(Records.size());
255
Zachary Turner946204c2017-08-09 04:23:25 +0000256 uint32_t SymOffset = 0;
257 for (const CVSymbol &Sym : Records) {
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000258 assert(Sym.kind() == SymbolKind::S_PUB32);
259 DeserializedPublics.push_back(
260 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
261 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
Zachary Turner946204c2017-08-09 04:23:25 +0000262 SymOffsets.push_back(SymOffset);
263 SymOffset += Sym.length();
264 }
Fangrui Songefd94c52019-04-23 14:51:27 +0000265 llvm::stable_sort(PublicsByAddr, comparePubSymByAddrAndName);
Zachary Turner946204c2017-08-09 04:23:25 +0000266
267 // Fill in the symbol offsets in the appropriate order.
268 std::vector<ulittle32_t> AddrMap;
269 AddrMap.reserve(Records.size());
Zachary Turnerd76dc2d2017-08-21 20:08:40 +0000270 for (auto &Sym : PublicsByAddr) {
271 ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
Zachary Turner946204c2017-08-09 04:23:25 +0000272 assert(Idx >= 0 && size_t(Idx) < Records.size());
273 AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
274 }
275 return AddrMap;
276}
277
278uint32_t GSIStreamBuilder::getPublicsStreamIndex() const {
279 return PSH->StreamIndex;
280}
281
282uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const {
283 return GSH->StreamIndex;
284}
285
286void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) {
Zachary Turneree9906d2017-08-11 19:00:03 +0000287 PSH->addSymbol(Pub, Msf);
288}
289
290void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
291 GSH->addSymbol(Sym, Msf);
292}
293
294void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
295 GSH->addSymbol(Sym, Msf);
296}
297
298void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
299 GSH->addSymbol(Sym, Msf);
300}
301
Zachary Turneree9906d2017-08-11 19:00:03 +0000302void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
303 GSH->addSymbol(Sym);
Zachary Turner946204c2017-08-09 04:23:25 +0000304}
305
306static Error writeRecords(BinaryStreamWriter &Writer,
307 ArrayRef<CVSymbol> Records) {
308 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
309 ItemStream.setItems(Records);
310 BinaryStreamRef RecordsRef(ItemStream);
311 return Writer.writeStreamRef(RecordsRef);
312}
313
314Error GSIStreamBuilder::commitSymbolRecordStream(
315 WritableBinaryStreamRef Stream) {
316 BinaryStreamWriter Writer(Stream);
317
318 // Write public symbol records first, followed by global symbol records. This
319 // must match the order that we assume in finalizeMsfLayout when computing
320 // PSHZero and GSHZero.
321 if (auto EC = writeRecords(Writer, PSH->Records))
322 return EC;
323 if (auto EC = writeRecords(Writer, GSH->Records))
324 return EC;
325
326 return Error::success();
327}
328
329Error GSIStreamBuilder::commitPublicsHashStream(
330 WritableBinaryStreamRef Stream) {
Zachary Turner946204c2017-08-09 04:23:25 +0000331 BinaryStreamWriter Writer(Stream);
Zachary Turner5448dab2017-08-09 04:23:59 +0000332 PublicsStreamHeader Header;
Zachary Turner946204c2017-08-09 04:23:25 +0000333
334 // FIXME: Fill these in. They are for incremental linking.
Nico Webere2745b52018-09-11 14:11:52 +0000335 Header.SymHash = PSH->calculateSerializedLength();
336 Header.AddrMap = PSH->Records.size() * 4;
Zachary Turner946204c2017-08-09 04:23:25 +0000337 Header.NumThunks = 0;
338 Header.SizeOfThunk = 0;
339 Header.ISectThunkTable = 0;
Nico Webere2745b52018-09-11 14:11:52 +0000340 memset(Header.Padding, 0, sizeof(Header.Padding));
Zachary Turner946204c2017-08-09 04:23:25 +0000341 Header.OffThunkTable = 0;
342 Header.NumSections = 0;
Zachary Turner946204c2017-08-09 04:23:25 +0000343 if (auto EC = Writer.writeObject(Header))
344 return EC;
345
Zachary Turner5448dab2017-08-09 04:23:59 +0000346 if (auto EC = PSH->commit(Writer))
347 return EC;
Zachary Turner946204c2017-08-09 04:23:25 +0000348
349 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
350 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
351 return EC;
352
353 return Error::success();
354}
355
356Error GSIStreamBuilder::commitGlobalsHashStream(
357 WritableBinaryStreamRef Stream) {
358 BinaryStreamWriter Writer(Stream);
359 return GSH->commit(Writer);
360}
361
362Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
363 WritableBinaryStreamRef Buffer) {
364 auto GS = WritableMappedBlockStream::createIndexedStream(
365 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
366 auto PS = WritableMappedBlockStream::createIndexedStream(
367 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
368 auto PRS = WritableMappedBlockStream::createIndexedStream(
369 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
370
371 if (auto EC = commitSymbolRecordStream(*PRS))
372 return EC;
373 if (auto EC = commitGlobalsHashStream(*GS))
374 return EC;
375 if (auto EC = commitPublicsHashStream(*PS))
376 return EC;
377 return Error::success();
378}