blob: 4ad8521731f55a0d10a3f45df0ddf5e76fe73244 [file] [log] [blame]
Eugene Zelenko4b3289e2017-11-02 21:45:30 +00001//===- MultiOnDiskHashTable.h - Merged set of hash tables -------*- C++ -*-===//
Richard Smithd88a7f12015-09-01 20:35:42 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file provides a hash table data structure suitable for incremental and
11// distributed storage across a set of files.
12//
13// Multiple hash tables from different files are implicitly merged to improve
14// performance, and on reload the merged table will override those from other
15// files.
16//
17//===----------------------------------------------------------------------===//
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000018
Richard Smithd88a7f12015-09-01 20:35:42 +000019#ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
20#define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
21
Mehdi Amini9670f842016-07-18 19:02:11 +000022#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/DenseSet.h"
Richard Smithd88a7f12015-09-01 20:35:42 +000024#include "llvm/ADT/PointerUnion.h"
Mehdi Amini9670f842016-07-18 19:02:11 +000025#include "llvm/ADT/STLExtras.h"
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000026#include "llvm/ADT/SmallVector.h"
Mehdi Amini9670f842016-07-18 19:02:11 +000027#include "llvm/ADT/TinyPtrVector.h"
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000028#include "llvm/ADT/iterator_range.h"
29#include "llvm/Support/Endian.h"
Richard Smithd88a7f12015-09-01 20:35:42 +000030#include "llvm/Support/EndianStream.h"
31#include "llvm/Support/OnDiskHashTable.h"
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000032#include "llvm/Support/raw_ostream.h"
33#include <algorithm>
34#include <cstdint>
35#include <vector>
Richard Smithd88a7f12015-09-01 20:35:42 +000036
37namespace clang {
38namespace serialization {
39
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000040/// A collection of on-disk hash tables, merged when relevant for performance.
Richard Smithd88a7f12015-09-01 20:35:42 +000041template<typename Info> class MultiOnDiskHashTable {
42public:
43 /// A handle to a file, used when overriding tables.
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000044 using file_type = typename Info::file_type;
Richard Smithd88a7f12015-09-01 20:35:42 +000045
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000046 /// A pointer to an on-disk representation of the hash table.
47 using storage_type = const unsigned char *;
48
49 using external_key_type = typename Info::external_key_type;
50 using internal_key_type = typename Info::internal_key_type;
51 using data_type = typename Info::data_type;
52 using data_type_builder = typename Info::data_type_builder;
53 using hash_value_type = unsigned;
Richard Smithd88a7f12015-09-01 20:35:42 +000054
55private:
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000056 /// The generator is permitted to read our merged table.
57 template<typename ReaderInfo, typename WriterInfo>
58 friend class MultiOnDiskHashTableGenerator;
59
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000060 /// A hash table stored on disk.
Richard Smithd88a7f12015-09-01 20:35:42 +000061 struct OnDiskTable {
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000062 using HashTable = llvm::OnDiskIterableChainedHashTable<Info>;
Richard Smithd88a7f12015-09-01 20:35:42 +000063
64 file_type File;
65 HashTable Table;
66
67 OnDiskTable(file_type File, unsigned NumBuckets, unsigned NumEntries,
68 storage_type Buckets, storage_type Payload, storage_type Base,
69 const Info &InfoObj)
70 : File(File),
71 Table(NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj) {}
72 };
73
74 struct MergedTable {
75 std::vector<file_type> Files;
76 llvm::DenseMap<internal_key_type, data_type> Data;
77 };
78
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000079 using Table = llvm::PointerUnion<OnDiskTable *, MergedTable *>;
80 using TableVector = llvm::TinyPtrVector<void *>;
Richard Smithd88a7f12015-09-01 20:35:42 +000081
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000082 /// The current set of on-disk and merged tables.
Richard Smithd88a7f12015-09-01 20:35:42 +000083 /// We manually store the opaque value of the Table because TinyPtrVector
84 /// can't cope with holding a PointerUnion directly.
85 /// There can be at most one MergedTable in this vector, and if present,
86 /// it is the first table.
87 TableVector Tables;
88
Adrian Prantl9fc8faf2018-05-09 01:00:01 +000089 /// Files corresponding to overridden tables that we've not yet
Richard Smithd88a7f12015-09-01 20:35:42 +000090 /// discarded.
91 llvm::TinyPtrVector<file_type> PendingOverrides;
92
93 struct AsOnDiskTable {
Eugene Zelenko4b3289e2017-11-02 21:45:30 +000094 using result_type = OnDiskTable *;
95
Richard Smithd88a7f12015-09-01 20:35:42 +000096 result_type operator()(void *P) const {
97 return Table::getFromOpaqueValue(P).template get<OnDiskTable *>();
98 }
99 };
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000100
101 using table_iterator =
102 llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>;
103 using table_range = llvm::iterator_range<table_iterator>;
Richard Smithd88a7f12015-09-01 20:35:42 +0000104
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000105 /// The current set of on-disk tables.
Richard Smithd88a7f12015-09-01 20:35:42 +0000106 table_range tables() {
107 auto Begin = Tables.begin(), End = Tables.end();
108 if (getMergedTable())
109 ++Begin;
110 return llvm::make_range(llvm::map_iterator(Begin, AsOnDiskTable()),
111 llvm::map_iterator(End, AsOnDiskTable()));
112 }
113
114 MergedTable *getMergedTable() const {
115 // If we already have a merged table, it's the first one.
116 return Tables.empty() ? nullptr : Table::getFromOpaqueValue(*Tables.begin())
117 .template dyn_cast<MergedTable*>();
118 }
119
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000120 /// Delete all our current on-disk tables.
Richard Smithd88a7f12015-09-01 20:35:42 +0000121 void clear() {
122 for (auto *T : tables())
123 delete T;
124 if (auto *M = getMergedTable())
125 delete M;
126 Tables.clear();
127 }
128
129 void removeOverriddenTables() {
130 llvm::DenseSet<file_type> Files;
131 Files.insert(PendingOverrides.begin(), PendingOverrides.end());
132 // Explicitly capture Files to work around an MSVC 2015 rejects-valid bug.
133 auto ShouldRemove = [&Files](void *T) -> bool {
134 auto *ODT = Table::getFromOpaqueValue(T).template get<OnDiskTable *>();
135 bool Remove = Files.count(ODT->File);
136 if (Remove)
137 delete ODT;
138 return Remove;
139 };
140 Tables.erase(std::remove_if(tables().begin().getCurrent(), Tables.end(),
141 ShouldRemove),
142 Tables.end());
143 PendingOverrides.clear();
144 }
145
146 void condense() {
147 MergedTable *Merged = getMergedTable();
148 if (!Merged)
149 Merged = new MergedTable;
150
151 // Read in all the tables and merge them together.
152 // FIXME: Be smarter about which tables we merge.
153 for (auto *ODT : tables()) {
154 auto &HT = ODT->Table;
155 Info &InfoObj = HT.getInfoObj();
156
157 for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) {
158 auto *LocalPtr = I.getItem();
159
160 // FIXME: Don't rely on the OnDiskHashTable format here.
161 auto L = InfoObj.ReadKeyDataLength(LocalPtr);
162 const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first);
163 data_type_builder ValueBuilder(Merged->Data[Key]);
164 InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second,
165 ValueBuilder);
166 }
167
168 Merged->Files.push_back(ODT->File);
169 delete ODT;
170 }
171
172 Tables.clear();
173 Tables.push_back(Table(Merged).getOpaqueValue());
174 }
175
Richard Smithd88a7f12015-09-01 20:35:42 +0000176public:
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000177 MultiOnDiskHashTable() = default;
178
Richard Smithd88a7f12015-09-01 20:35:42 +0000179 MultiOnDiskHashTable(MultiOnDiskHashTable &&O)
180 : Tables(std::move(O.Tables)),
181 PendingOverrides(std::move(O.PendingOverrides)) {
182 O.Tables.clear();
183 }
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000184
Richard Smithd88a7f12015-09-01 20:35:42 +0000185 MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) {
186 if (&O == this)
187 return *this;
188 clear();
189 Tables = std::move(O.Tables);
190 O.Tables.clear();
191 PendingOverrides = std::move(O.PendingOverrides);
192 return *this;
193 }
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000194
Richard Smithd88a7f12015-09-01 20:35:42 +0000195 ~MultiOnDiskHashTable() { clear(); }
196
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000197 /// Add the table \p Data loaded from file \p File.
Richard Smithd88a7f12015-09-01 20:35:42 +0000198 void add(file_type File, storage_type Data, Info InfoObj = Info()) {
199 using namespace llvm::support;
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000200
Richard Smithd88a7f12015-09-01 20:35:42 +0000201 storage_type Ptr = Data;
202
203 uint32_t BucketOffset = endian::readNext<uint32_t, little, unaligned>(Ptr);
204
205 // Read the list of overridden files.
206 uint32_t NumFiles = endian::readNext<uint32_t, little, unaligned>(Ptr);
207 // FIXME: Add a reserve() to TinyPtrVector so that we don't need to make
208 // an additional copy.
209 llvm::SmallVector<file_type, 16> OverriddenFiles;
210 OverriddenFiles.reserve(NumFiles);
211 for (/**/; NumFiles != 0; --NumFiles)
212 OverriddenFiles.push_back(InfoObj.ReadFileRef(Ptr));
213 PendingOverrides.insert(PendingOverrides.end(), OverriddenFiles.begin(),
214 OverriddenFiles.end());
215
216 // Read the OnDiskChainedHashTable header.
217 storage_type Buckets = Data + BucketOffset;
218 auto NumBucketsAndEntries =
219 OnDiskTable::HashTable::readNumBucketsAndEntries(Buckets);
220
221 // Register the table.
222 Table NewTable = new OnDiskTable(File, NumBucketsAndEntries.first,
223 NumBucketsAndEntries.second,
224 Buckets, Ptr, Data, std::move(InfoObj));
225 Tables.push_back(NewTable.getOpaqueValue());
226 }
227
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000228 /// Find and read the lookup results for \p EKey.
Richard Smithd88a7f12015-09-01 20:35:42 +0000229 data_type find(const external_key_type &EKey) {
230 data_type Result;
231
232 if (!PendingOverrides.empty())
233 removeOverriddenTables();
234
Aaron Ballman7f0c25b2015-09-02 12:50:12 +0000235 if (Tables.size() > static_cast<unsigned>(Info::MaxTables))
Richard Smithd88a7f12015-09-01 20:35:42 +0000236 condense();
237
238 internal_key_type Key = Info::GetInternalKey(EKey);
239 auto KeyHash = Info::ComputeHash(Key);
240
241 if (MergedTable *M = getMergedTable()) {
242 auto It = M->Data.find(Key);
243 if (It != M->Data.end())
244 Result = It->second;
245 }
246
247 data_type_builder ResultBuilder(Result);
248
249 for (auto *ODT : tables()) {
250 auto &HT = ODT->Table;
251 auto It = HT.find_hashed(Key, KeyHash);
252 if (It != HT.end())
253 HT.getInfoObj().ReadDataInto(Key, It.getDataPtr(), It.getDataLen(),
254 ResultBuilder);
255 }
256
257 return Result;
258 }
259
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000260 /// Read all the lookup results into a single value. This only makes
Richard Smithd88a7f12015-09-01 20:35:42 +0000261 /// sense if merging values across keys is meaningful.
262 data_type findAll() {
263 data_type Result;
264 data_type_builder ResultBuilder(Result);
265
266 if (!PendingOverrides.empty())
267 removeOverriddenTables();
268
269 if (MergedTable *M = getMergedTable()) {
270 for (auto &KV : M->Data)
271 Info::MergeDataInto(KV.second, ResultBuilder);
272 }
273
274 for (auto *ODT : tables()) {
275 auto &HT = ODT->Table;
276 Info &InfoObj = HT.getInfoObj();
277 for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) {
278 auto *LocalPtr = I.getItem();
279
280 // FIXME: Don't rely on the OnDiskHashTable format here.
281 auto L = InfoObj.ReadKeyDataLength(LocalPtr);
282 const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first);
283 InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, ResultBuilder);
284 }
285 }
286
287 return Result;
288 }
289};
290
Adrian Prantl9fc8faf2018-05-09 01:00:01 +0000291/// Writer for the on-disk hash table.
Richard Smithd88a7f12015-09-01 20:35:42 +0000292template<typename ReaderInfo, typename WriterInfo>
293class MultiOnDiskHashTableGenerator {
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000294 using BaseTable = MultiOnDiskHashTable<ReaderInfo>;
295 using Generator = llvm::OnDiskChainedHashTableGenerator<WriterInfo>;
Richard Smithd88a7f12015-09-01 20:35:42 +0000296
297 Generator Gen;
298
299public:
300 MultiOnDiskHashTableGenerator() : Gen() {}
301
302 void insert(typename WriterInfo::key_type_ref Key,
303 typename WriterInfo::data_type_ref Data, WriterInfo &Info) {
304 Gen.insert(Key, Data, Info);
305 }
306
307 void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info,
308 const BaseTable *Base) {
309 using namespace llvm::support;
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000310
Richard Smithd88a7f12015-09-01 20:35:42 +0000311 llvm::raw_svector_ostream OutStream(Out);
312
313 // Write our header information.
314 {
315 endian::Writer<little> Writer(OutStream);
316
317 // Reserve four bytes for the bucket offset.
318 Writer.write<uint32_t>(0);
319
320 if (auto *Merged = Base ? Base->getMergedTable() : nullptr) {
321 // Write list of overridden files.
322 Writer.write<uint32_t>(Merged->Files.size());
323 for (const auto &F : Merged->Files)
324 Info.EmitFileRef(OutStream, F);
325
326 // Add all merged entries from Base to the generator.
327 for (auto &KV : Merged->Data) {
328 if (!Gen.contains(KV.first, Info))
329 Gen.insert(KV.first, Info.ImportData(KV.second), Info);
330 }
331 } else {
332 Writer.write<uint32_t>(0);
333 }
334 }
335
336 // Write the table itself.
337 uint32_t BucketOffset = Gen.Emit(OutStream, Info);
338
339 // Replace the first four bytes with the bucket offset.
340 endian::write32le(Out.data(), BucketOffset);
341 }
342};
343
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000344} // namespace serialization
345} // namespace clang
Richard Smithd88a7f12015-09-01 20:35:42 +0000346
Eugene Zelenko4b3289e2017-11-02 21:45:30 +0000347#endif // LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H