Zachary Turner | f04d6e8 | 2017-01-20 22:41:15 +0000 | [diff] [blame] | 1 | //===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===// |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
Adrian McCarthy | 6b6b8c4 | 2017-01-25 22:38:55 +0000 | [diff] [blame] | 10 | #include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 11 | #include "llvm/ADT/ArrayRef.h" |
| 12 | #include "llvm/DebugInfo/MSF/StreamWriter.h" |
Adrian McCarthy | 6b6b8c4 | 2017-01-25 22:38:55 +0000 | [diff] [blame] | 13 | #include "llvm/DebugInfo/PDB/Native/Hash.h" |
| 14 | #include "llvm/DebugInfo/PDB/Native/RawTypes.h" |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 15 | #include "llvm/Support/Endian.h" |
| 16 | |
| 17 | using namespace llvm; |
| 18 | using namespace llvm::support; |
| 19 | using namespace llvm::support::endian; |
| 20 | using namespace llvm::pdb; |
| 21 | |
Zachary Turner | f04d6e8 | 2017-01-20 22:41:15 +0000 | [diff] [blame] | 22 | uint32_t StringTableBuilder::insert(StringRef S) { |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 23 | auto P = Strings.insert({S, StringSize}); |
| 24 | |
| 25 | // If a given string didn't exist in the string table, we want to increment |
| 26 | // the string table size. |
| 27 | if (P.second) |
| 28 | StringSize += S.size() + 1; // +1 for '\0' |
| 29 | return P.first->second; |
| 30 | } |
| 31 | |
| 32 | static uint32_t computeBucketCount(uint32_t NumStrings) { |
| 33 | // The /names stream is basically an on-disk open-addressing hash table. |
| 34 | // Hash collisions are resolved by linear probing. We cannot make |
| 35 | // utilization 100% because it will make the linear probing extremely |
| 36 | // slow. But lower utilization wastes disk space. As a reasonable |
| 37 | // load factor, we choose 80%. We need +1 because slot 0 is reserved. |
| 38 | return (NumStrings + 1) * 1.25; |
| 39 | } |
| 40 | |
Zachary Turner | 760ad4d | 2017-01-20 22:42:09 +0000 | [diff] [blame] | 41 | uint32_t StringTableBuilder::finalize() { |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 42 | uint32_t Size = 0; |
Zachary Turner | f04d6e8 | 2017-01-20 22:41:15 +0000 | [diff] [blame] | 43 | Size += sizeof(StringTableHeader); |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 44 | Size += StringSize; |
Zachary Turner | 760ad4d | 2017-01-20 22:42:09 +0000 | [diff] [blame] | 45 | Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 46 | |
| 47 | uint32_t BucketCount = computeBucketCount(Strings.size()); |
Zachary Turner | 760ad4d | 2017-01-20 22:42:09 +0000 | [diff] [blame] | 48 | Size += BucketCount * sizeof(uint32_t); |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 49 | |
Zachary Turner | 760ad4d | 2017-01-20 22:42:09 +0000 | [diff] [blame] | 50 | Size += |
| 51 | sizeof(uint32_t); // The /names stream ends with the number of strings. |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 52 | return Size; |
| 53 | } |
| 54 | |
Zachary Turner | f04d6e8 | 2017-01-20 22:41:15 +0000 | [diff] [blame] | 55 | Error StringTableBuilder::commit(msf::StreamWriter &Writer) const { |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 56 | // Write a header |
Zachary Turner | f04d6e8 | 2017-01-20 22:41:15 +0000 | [diff] [blame] | 57 | StringTableHeader H; |
| 58 | H.Signature = StringTableSignature; |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 59 | H.HashVersion = 1; |
| 60 | H.ByteSize = StringSize; |
| 61 | if (auto EC = Writer.writeObject(H)) |
| 62 | return EC; |
| 63 | |
| 64 | // Write a string table. |
| 65 | uint32_t StringStart = Writer.getOffset(); |
| 66 | for (auto Pair : Strings) { |
| 67 | StringRef S = Pair.first; |
| 68 | uint32_t Offset = Pair.second; |
| 69 | Writer.setOffset(StringStart + Offset); |
| 70 | if (auto EC = Writer.writeZeroString(S)) |
| 71 | return EC; |
| 72 | } |
| 73 | Writer.setOffset(StringStart + StringSize); |
| 74 | |
| 75 | // Write a hash table. |
| 76 | uint32_t BucketCount = computeBucketCount(Strings.size()); |
Zachary Turner | 181fe17 | 2017-02-18 01:35:33 +0000 | [diff] [blame] | 77 | if (auto EC = Writer.writeInteger(BucketCount, llvm::support::little)) |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 78 | return EC; |
| 79 | std::vector<ulittle32_t> Buckets(BucketCount); |
| 80 | |
| 81 | for (auto Pair : Strings) { |
| 82 | StringRef S = Pair.first; |
| 83 | uint32_t Offset = Pair.second; |
| 84 | uint32_t Hash = hashStringV1(S); |
| 85 | |
| 86 | for (uint32_t I = 0; I != BucketCount; ++I) { |
| 87 | uint32_t Slot = (Hash + I) % BucketCount; |
| 88 | if (Slot == 0) |
| 89 | continue; // Skip reserved slot |
| 90 | if (Buckets[Slot] != 0) |
| 91 | continue; |
| 92 | Buckets[Slot] = Offset; |
| 93 | break; |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) |
| 98 | return EC; |
Zachary Turner | 181fe17 | 2017-02-18 01:35:33 +0000 | [diff] [blame] | 99 | if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size()), |
| 100 | llvm::support::little)) |
Rui Ueyama | dcd3293 | 2017-01-15 00:36:02 +0000 | [diff] [blame] | 101 | return EC; |
| 102 | return Error::success(); |
| 103 | } |