blob: 3f3c8f24942692b394e02193b5beff60e75b52ea [file] [log] [blame]
Martin Stjernholm4fb51112021-04-30 11:53:52 +01001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18#define ART_LIBARTBASE_BASE_HASH_SET_H_
19
20#include <stdint.h>
21
22#include <functional>
23#include <iterator>
24#include <memory>
25#include <string>
26#include <type_traits>
27#include <utility>
28
29#include <android-base/logging.h>
30
31#include "base/data_hash.h"
32#include "bit_utils.h"
33#include "macros.h"
34
35namespace art {
36
37template <class Elem, class HashSetType>
38class HashSetIterator {
39 public:
40 using iterator_category = std::forward_iterator_tag;
41 using value_type = Elem;
42 using difference_type = std::ptrdiff_t;
43 using pointer = Elem*;
44 using reference = Elem&;
45
46 HashSetIterator(const HashSetIterator&) = default;
Martin Stjernholm413b2b52021-11-15 13:56:19 +000047 HashSetIterator(HashSetIterator&&) noexcept = default;
Martin Stjernholm4fb51112021-04-30 11:53:52 +010048 HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
49
50 // Conversion from iterator to const_iterator.
51 template <class OtherElem,
52 class OtherHashSetType,
satayev499be972022-05-13 15:05:39 +000053 typename = std::enable_if_t<
54 std::is_same_v<Elem, const OtherElem> &&
55 std::is_same_v<HashSetType, const OtherHashSetType>>>
Martin Stjernholm4fb51112021-04-30 11:53:52 +010056 HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
57 : index_(other.index_), hash_set_(other.hash_set_) {}
58
59 HashSetIterator& operator=(const HashSetIterator&) = default;
Martin Stjernholm413b2b52021-11-15 13:56:19 +000060 HashSetIterator& operator=(HashSetIterator&&) noexcept = default;
Martin Stjernholm4fb51112021-04-30 11:53:52 +010061
62 bool operator==(const HashSetIterator& other) const {
63 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
64 }
65
66 bool operator!=(const HashSetIterator& other) const {
67 return !(*this == other);
68 }
69
70 HashSetIterator operator++() { // Value after modification.
71 this->index_ = hash_set_->NextNonEmptySlot(index_);
72 return *this;
73 }
74
75 HashSetIterator operator++(int) {
76 HashSetIterator temp = *this;
77 ++*this;
78 return temp;
79 }
80
81 Elem& operator*() const {
82 DCHECK(!hash_set_->IsFreeSlot(this->index_));
83 return hash_set_->ElementForIndex(this->index_);
84 }
85
86 Elem* operator->() const {
87 return &**this;
88 }
89
90 private:
91 size_t index_;
92 HashSetType* hash_set_;
93
94 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
95 friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
96 const HashSetIterator<Elem2, HashSetType2>& rhs);
97 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
98 template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
99};
100
101template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
102bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
103 const HashSetIterator<Elem2, HashSetType2>& rhs) {
104 static_assert(
satayev499be972022-05-13 15:05:39 +0000105 std::is_convertible_v<HashSetIterator<Elem1, HashSetType1>,
106 HashSetIterator<Elem2, HashSetType2>> ||
107 std::is_convertible_v<HashSetIterator<Elem2, HashSetType2>,
108 HashSetIterator<Elem1, HashSetType1>>, "Bad iterator types.");
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100109 DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
110 return lhs.index_ == rhs.index_;
111}
112
113template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
114bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
115 const HashSetIterator<Elem2, HashSetType2>& rhs) {
116 return !(lhs == rhs);
117}
118
119// Returns true if an item is empty.
120template <class T>
121class DefaultEmptyFn {
122 public:
123 void MakeEmpty(T& item) const {
124 item = T();
125 }
126 bool IsEmpty(const T& item) const {
127 return item == T();
128 }
129};
130
131template <class T>
132class DefaultEmptyFn<T*> {
133 public:
134 void MakeEmpty(T*& item) const {
135 item = nullptr;
136 }
137 bool IsEmpty(T* const& item) const {
138 return item == nullptr;
139 }
140};
141
Fairphone ODM25c12f52023-12-15 17:24:06 +0800142template <>
143class DefaultEmptyFn<std::string> {
144 public:
145 void MakeEmpty(std::string& item) const {
146 item = std::string();
147 }
148 bool IsEmpty(const std::string& item) const {
149 return item.empty();
150 }
151};
152
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100153template <class T>
satayev499be972022-05-13 15:05:39 +0000154using DefaultHashFn = std::conditional_t<std::is_same_v<T, std::string>, DataHash, std::hash<T>>;
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100155
156struct DefaultStringEquals {
157 // Allow comparison with anything that can be compared to std::string,
158 // for example std::string_view.
159 template <typename T>
160 bool operator()(const std::string& lhs, const T& rhs) const {
161 return lhs == rhs;
162 }
163};
164
165template <class T>
satayev499be972022-05-13 15:05:39 +0000166using DefaultPred =
167 std::conditional_t<std::is_same_v<T, std::string>, DefaultStringEquals, std::equal_to<T>>;
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100168
169// Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
170// aren't boxed. Uses linear probing to resolve collisions.
171// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
172// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
173// and more complicated.
174template <class T,
175 class EmptyFn = DefaultEmptyFn<T>,
176 class HashFn = DefaultHashFn<T>,
177 class Pred = DefaultPred<T>,
178 class Alloc = std::allocator<T>>
179class HashSet {
180 public:
181 using value_type = T;
182 using allocator_type = Alloc;
183 using reference = T&;
184 using const_reference = const T&;
185 using pointer = T*;
186 using const_pointer = const T*;
187 using iterator = HashSetIterator<T, HashSet>;
188 using const_iterator = HashSetIterator<const T, const HashSet>;
189 using size_type = size_t;
190 using difference_type = ptrdiff_t;
191
192 static constexpr double kDefaultMinLoadFactor = 0.4;
193 static constexpr double kDefaultMaxLoadFactor = 0.7;
194 static constexpr size_t kMinBuckets = 1000;
195
196 // If we don't own the data, this will create a new array which owns the data.
197 void clear() {
198 DeallocateStorage();
199 num_elements_ = 0;
200 elements_until_expand_ = 0;
201 }
202
203 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
204 explicit HashSet(const allocator_type& alloc) noexcept
205 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, alloc) {}
206
207 HashSet(double min_load_factor, double max_load_factor) noexcept
208 : HashSet(min_load_factor, max_load_factor, allocator_type()) {}
209 HashSet(double min_load_factor, double max_load_factor, const allocator_type& alloc) noexcept
210 : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), alloc) {}
211
212 HashSet(const HashFn& hashfn,
213 const Pred& pred) noexcept
214 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred) {}
215 HashSet(const HashFn& hashfn,
216 const Pred& pred,
217 const allocator_type& alloc) noexcept
218 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred, alloc) {}
219
220 HashSet(double min_load_factor,
221 double max_load_factor,
222 const HashFn& hashfn,
223 const Pred& pred) noexcept
224 : HashSet(min_load_factor, max_load_factor, hashfn, pred, allocator_type()) {}
225 HashSet(double min_load_factor,
226 double max_load_factor,
227 const HashFn& hashfn,
228 const Pred& pred,
229 const allocator_type& alloc) noexcept
230 : allocfn_(alloc),
231 hashfn_(hashfn),
232 emptyfn_(),
233 pred_(pred),
234 num_elements_(0u),
235 num_buckets_(0u),
236 elements_until_expand_(0u),
237 owns_data_(false),
238 data_(nullptr),
239 min_load_factor_(min_load_factor),
240 max_load_factor_(max_load_factor) {
241 DCHECK_GT(min_load_factor, 0.0);
242 DCHECK_LT(max_load_factor, 1.0);
243 }
244
Martin Stjernholm413b2b52021-11-15 13:56:19 +0000245 HashSet(const HashSet& other)
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100246 : allocfn_(other.allocfn_),
247 hashfn_(other.hashfn_),
248 emptyfn_(other.emptyfn_),
249 pred_(other.pred_),
250 num_elements_(other.num_elements_),
251 num_buckets_(0),
252 elements_until_expand_(other.elements_until_expand_),
253 owns_data_(false),
254 data_(nullptr),
255 min_load_factor_(other.min_load_factor_),
256 max_load_factor_(other.max_load_factor_) {
257 AllocateStorage(other.NumBuckets());
258 for (size_t i = 0; i < num_buckets_; ++i) {
259 ElementForIndex(i) = other.data_[i];
260 }
261 }
262
263 // noexcept required so that the move constructor is used instead of copy constructor.
264 // b/27860101
265 HashSet(HashSet&& other) noexcept
266 : allocfn_(std::move(other.allocfn_)),
267 hashfn_(std::move(other.hashfn_)),
268 emptyfn_(std::move(other.emptyfn_)),
269 pred_(std::move(other.pred_)),
270 num_elements_(other.num_elements_),
271 num_buckets_(other.num_buckets_),
272 elements_until_expand_(other.elements_until_expand_),
273 owns_data_(other.owns_data_),
274 data_(other.data_),
275 min_load_factor_(other.min_load_factor_),
276 max_load_factor_(other.max_load_factor_) {
277 other.num_elements_ = 0u;
278 other.num_buckets_ = 0u;
279 other.elements_until_expand_ = 0u;
280 other.owns_data_ = false;
281 other.data_ = nullptr;
282 }
283
284 // Construct with pre-existing buffer, usually stack-allocated,
285 // to avoid malloc/free overhead for small HashSet<>s.
286 HashSet(value_type* buffer, size_t buffer_size)
287 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size) {}
288 HashSet(value_type* buffer, size_t buffer_size, const allocator_type& alloc)
289 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size, alloc) {}
290 HashSet(double min_load_factor, double max_load_factor, value_type* buffer, size_t buffer_size)
291 : HashSet(min_load_factor, max_load_factor, buffer, buffer_size, allocator_type()) {}
292 HashSet(double min_load_factor,
293 double max_load_factor,
294 value_type* buffer,
295 size_t buffer_size,
296 const allocator_type& alloc)
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000297 : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), buffer, buffer_size, alloc) {}
298 HashSet(double min_load_factor,
299 double max_load_factor,
300 const HashFn& hashfn,
301 const Pred& pred,
302 value_type* buffer,
303 size_t buffer_size,
304 const allocator_type& alloc)
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100305 : allocfn_(alloc),
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000306 hashfn_(hashfn),
307 pred_(pred),
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100308 num_elements_(0u),
309 num_buckets_(buffer_size),
310 elements_until_expand_(buffer_size * max_load_factor),
311 owns_data_(false),
312 data_(buffer),
313 min_load_factor_(min_load_factor),
314 max_load_factor_(max_load_factor) {
315 DCHECK_GT(min_load_factor, 0.0);
316 DCHECK_LT(max_load_factor, 1.0);
317 for (size_t i = 0; i != buffer_size; ++i) {
318 emptyfn_.MakeEmpty(buffer[i]);
319 }
320 }
321
322 // Construct from existing data.
323 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
324 // passed in ptr_.
325 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
326 uint64_t temp;
327 size_t offset = 0;
328 offset = ReadFromBytes(ptr, offset, &temp);
329 num_elements_ = static_cast<uint64_t>(temp);
330 offset = ReadFromBytes(ptr, offset, &temp);
331 num_buckets_ = static_cast<uint64_t>(temp);
332 CHECK_LE(num_elements_, num_buckets_);
333 offset = ReadFromBytes(ptr, offset, &temp);
334 elements_until_expand_ = static_cast<uint64_t>(temp);
335 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
336 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
337 if (!make_copy_of_data) {
338 owns_data_ = false;
339 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
340 offset += sizeof(*data_) * num_buckets_;
341 } else {
342 AllocateStorage(num_buckets_);
343 // Write elements, not that this may not be safe for cross compilation if the elements are
344 // pointer sized.
345 for (size_t i = 0; i < num_buckets_; ++i) {
346 offset = ReadFromBytes(ptr, offset, &data_[i]);
347 }
348 }
349 // Caller responsible for aligning.
350 *read_count = offset;
351 }
352
353 // Returns how large the table is after being written. If target is null, then no writing happens
354 // but the size is still returned. Target must be 8 byte aligned.
355 size_t WriteToMemory(uint8_t* ptr) const {
356 size_t offset = 0;
357 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
358 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
359 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
360 offset = WriteToBytes(ptr, offset, min_load_factor_);
361 offset = WriteToBytes(ptr, offset, max_load_factor_);
362 // Write elements, not that this may not be safe for cross compilation if the elements are
363 // pointer sized.
364 for (size_t i = 0; i < num_buckets_; ++i) {
365 offset = WriteToBytes(ptr, offset, data_[i]);
366 }
367 // Caller responsible for aligning.
368 return offset;
369 }
370
371 ~HashSet() {
372 DeallocateStorage();
373 }
374
375 HashSet& operator=(HashSet&& other) noexcept {
376 HashSet(std::move(other)).swap(*this); // NOLINT [runtime/explicit] [5]
377 return *this;
378 }
379
Martin Stjernholm413b2b52021-11-15 13:56:19 +0000380 HashSet& operator=(const HashSet& other) {
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100381 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
382 return *this;
383 }
384
385 // Lower case for c++11 for each.
386 iterator begin() {
387 iterator ret(this, 0);
388 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
389 ++ret; // Skip all the empty slots.
390 }
391 return ret;
392 }
393
394 // Lower case for c++11 for each. const version.
395 const_iterator begin() const {
396 const_iterator ret(this, 0);
397 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
398 ++ret; // Skip all the empty slots.
399 }
400 return ret;
401 }
402
403 // Lower case for c++11 for each.
404 iterator end() {
405 return iterator(this, NumBuckets());
406 }
407
408 // Lower case for c++11 for each. const version.
409 const_iterator end() const {
410 return const_iterator(this, NumBuckets());
411 }
412
413 size_t size() const {
414 return num_elements_;
415 }
416
417 bool empty() const {
418 return size() == 0;
419 }
420
421 // Erase algorithm:
422 // Make an empty slot where the iterator is pointing.
423 // Scan forwards until we hit another empty slot.
424 // If an element in between doesn't rehash to the range from the current empty slot to the
425 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
426 // and set the empty slot to be the location we just moved from.
427 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
428 // element to its actual location/index.
429 // Note that since erase shuffles back elements, it may result in the same element being visited
430 // twice during HashSet iteration. This happens when an element already visited during iteration
431 // gets shuffled to the end of the bucket array.
432 iterator erase(iterator it) {
433 // empty_index is the index that will become empty.
434 size_t empty_index = it.index_;
435 DCHECK(!IsFreeSlot(empty_index));
436 size_t next_index = empty_index;
437 bool filled = false; // True if we filled the empty index.
438 while (true) {
439 next_index = NextIndex(next_index);
440 T& next_element = ElementForIndex(next_index);
441 // If the next element is empty, we are done. Make sure to clear the current empty index.
442 if (emptyfn_.IsEmpty(next_element)) {
443 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
444 break;
445 }
446 // Otherwise try to see if the next element can fill the current empty index.
447 const size_t next_hash = hashfn_(next_element);
448 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
449 // nothing we can do.
450 size_t next_ideal_index = IndexForHash(next_hash);
451 // Loop around if needed for our check.
452 size_t unwrapped_next_index = next_index;
453 if (unwrapped_next_index < empty_index) {
454 unwrapped_next_index += NumBuckets();
455 }
456 // Loop around if needed for our check.
457 size_t unwrapped_next_ideal_index = next_ideal_index;
458 if (unwrapped_next_ideal_index < empty_index) {
459 unwrapped_next_ideal_index += NumBuckets();
460 }
461 if (unwrapped_next_ideal_index <= empty_index ||
462 unwrapped_next_ideal_index > unwrapped_next_index) {
463 // If the target index isn't within our current range it must have been probed from before
464 // the empty index.
465 ElementForIndex(empty_index) = std::move(next_element);
466 filled = true; // TODO: Optimize
467 empty_index = next_index;
468 }
469 }
470 --num_elements_;
471 // If we didn't fill the slot then we need go to the next non free slot.
472 if (!filled) {
473 ++it;
474 }
475 return it;
476 }
477
478 // Find an element, returns end() if not found.
479 // Allows custom key (K) types, example of when this is useful:
480 // Set of Class* indexed by name, want to find a class with a name but can't allocate
481 // a temporary Class object in the heap for performance solution.
482 template <typename K>
483 iterator find(const K& key) {
484 return FindWithHash(key, hashfn_(key));
485 }
486
487 template <typename K>
488 const_iterator find(const K& key) const {
489 return FindWithHash(key, hashfn_(key));
490 }
491
492 template <typename K>
493 iterator FindWithHash(const K& key, size_t hash) {
494 return iterator(this, FindIndex(key, hash));
495 }
496
497 template <typename K>
498 const_iterator FindWithHash(const K& key, size_t hash) const {
499 return const_iterator(this, FindIndex(key, hash));
500 }
501
502 // Insert an element with hint.
503 // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
504 // and in that case the use of HashSet<> itself should be reconsidered.
505 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
506 return insert(element);
507 }
508 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
509 return insert(std::move(element));
510 }
511
512 // Insert an element.
513 std::pair<iterator, bool> insert(const T& element) {
514 return InsertWithHash(element, hashfn_(element));
515 }
516 std::pair<iterator, bool> insert(T&& element) {
517 return InsertWithHash(std::move(element), hashfn_(element));
518 }
519
satayev499be972022-05-13 15:05:39 +0000520 template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100521 std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
522 DCHECK_EQ(hash, hashfn_(element));
523 if (num_elements_ >= elements_until_expand_) {
524 Expand();
525 DCHECK_LT(num_elements_, elements_until_expand_);
526 }
527 bool find_failed = false;
satayev499be972022-05-13 15:05:39 +0000528 auto find_fail_fn = [&](size_t index) ALWAYS_INLINE {
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100529 find_failed = true;
530 return index;
531 };
532 size_t index = FindIndexImpl(element, hash, find_fail_fn);
533 if (find_failed) {
534 data_[index] = std::forward<U>(element);
535 ++num_elements_;
536 }
537 return std::make_pair(iterator(this, index), find_failed);
538 }
539
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000540 // Insert an element known not to be in the `HashSet<>`.
541 void Put(const T& element) {
542 return PutWithHash(element, hashfn_(element));
543 }
544 void Put(T&& element) {
545 return PutWithHash(std::move(element), hashfn_(element));
546 }
547
satayev499be972022-05-13 15:05:39 +0000548 template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000549 void PutWithHash(U&& element, size_t hash) {
550 DCHECK_EQ(hash, hashfn_(element));
551 if (num_elements_ >= elements_until_expand_) {
552 Expand();
553 DCHECK_LT(num_elements_, elements_until_expand_);
554 }
satayev499be972022-05-13 15:05:39 +0000555 auto find_fail_fn = [](size_t index) ALWAYS_INLINE { return index; };
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000556 size_t index = FindIndexImpl</*kCanFind=*/ false>(element, hash, find_fail_fn);
557 data_[index] = std::forward<U>(element);
558 ++num_elements_;
559 }
560
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100561 void swap(HashSet& other) {
562 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
563 using std::swap;
564 swap(allocfn_, other.allocfn_);
565 swap(hashfn_, other.hashfn_);
566 swap(emptyfn_, other.emptyfn_);
567 swap(pred_, other.pred_);
568 std::swap(data_, other.data_);
569 std::swap(num_buckets_, other.num_buckets_);
570 std::swap(num_elements_, other.num_elements_);
571 std::swap(elements_until_expand_, other.elements_until_expand_);
572 std::swap(min_load_factor_, other.min_load_factor_);
573 std::swap(max_load_factor_, other.max_load_factor_);
574 std::swap(owns_data_, other.owns_data_);
575 }
576
577 allocator_type get_allocator() const {
578 return allocfn_;
579 }
580
581 void ShrinkToMaximumLoad() {
582 Resize(size() / max_load_factor_);
583 }
584
585 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
586 // set. No-op if the hash set is already large enough to do this.
587 void reserve(size_t num_elements) {
588 size_t num_buckets = num_elements / max_load_factor_;
589 // Deal with rounding errors. Add one for rounding.
590 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
591 ++num_buckets;
592 }
593 if (num_buckets > NumBuckets()) {
594 Resize(num_buckets);
595 }
596 }
597
598 // To distance that inserted elements were probed. Used for measuring how good hash functions
599 // are.
600 size_t TotalProbeDistance() const {
601 size_t total = 0;
602 for (size_t i = 0; i < NumBuckets(); ++i) {
603 const T& element = ElementForIndex(i);
604 if (!emptyfn_.IsEmpty(element)) {
605 size_t ideal_location = IndexForHash(hashfn_(element));
606 if (ideal_location > i) {
607 total += i + NumBuckets() - ideal_location;
608 } else {
609 total += i - ideal_location;
610 }
611 }
612 }
613 return total;
614 }
615
616 // Calculate the current load factor and return it.
617 double CalculateLoadFactor() const {
618 return static_cast<double>(size()) / static_cast<double>(NumBuckets());
619 }
620
621 // Make sure that everything reinserts in the right spot. Returns the number of errors.
622 size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
623 size_t errors = 0;
624 for (size_t i = 0; i < num_buckets_; ++i) {
625 T& element = data_[i];
626 if (!emptyfn_.IsEmpty(element)) {
627 T temp;
628 emptyfn_.MakeEmpty(temp);
629 std::swap(temp, element);
630 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
631 if (i != first_slot) {
632 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
633 ++errors;
634 }
635 std::swap(temp, element);
636 }
637 }
638 return errors;
639 }
640
641 double GetMinLoadFactor() const {
642 return min_load_factor_;
643 }
644
645 double GetMaxLoadFactor() const {
646 return max_load_factor_;
647 }
648
649 // Change the load factor of the hash set. If the current load factor is greater than the max
650 // specified, then we resize the hash table storage.
651 void SetLoadFactor(double min_load_factor, double max_load_factor) {
652 DCHECK_LT(min_load_factor, max_load_factor);
653 DCHECK_GT(min_load_factor, 0.0);
654 DCHECK_LT(max_load_factor, 1.0);
655 min_load_factor_ = min_load_factor;
656 max_load_factor_ = max_load_factor;
657 elements_until_expand_ = NumBuckets() * max_load_factor_;
658 // If the current load factor isn't in the range, then resize to the mean of the minimum and
659 // maximum load factor.
660 const double load_factor = CalculateLoadFactor();
661 if (load_factor > max_load_factor_) {
662 Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
663 }
664 }
665
666 // The hash set expands when Size() reaches ElementsUntilExpand().
667 size_t ElementsUntilExpand() const {
668 return elements_until_expand_;
669 }
670
671 size_t NumBuckets() const {
672 return num_buckets_;
673 }
674
675 private:
676 T& ElementForIndex(size_t index) {
677 DCHECK_LT(index, NumBuckets());
678 DCHECK(data_ != nullptr);
679 return data_[index];
680 }
681
682 const T& ElementForIndex(size_t index) const {
683 DCHECK_LT(index, NumBuckets());
684 DCHECK(data_ != nullptr);
685 return data_[index];
686 }
687
688 size_t IndexForHash(size_t hash) const {
689 // Protect against undefined behavior (division by zero).
690 if (UNLIKELY(num_buckets_ == 0)) {
691 return 0;
692 }
693 return hash % num_buckets_;
694 }
695
696 size_t NextIndex(size_t index) const {
697 if (UNLIKELY(++index >= num_buckets_)) {
698 DCHECK_EQ(index, NumBuckets());
699 return 0;
700 }
701 return index;
702 }
703
704 // Find the hash table slot for an element, or return NumBuckets() if not found.
705 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
706 template <typename K>
satayev499be972022-05-13 15:05:39 +0000707 ALWAYS_INLINE
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100708 size_t FindIndex(const K& element, size_t hash) const {
709 // Guard against failing to get an element for a non-existing index.
710 if (UNLIKELY(NumBuckets() == 0)) {
711 return 0;
712 }
satayev499be972022-05-13 15:05:39 +0000713 auto fail_fn = [&](size_t index ATTRIBUTE_UNUSED) ALWAYS_INLINE { return NumBuckets(); };
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100714 return FindIndexImpl(element, hash, fail_fn);
715 }
716
717 // Find the hash table slot for an element, or return an empty slot index if not found.
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000718 template <bool kCanFind = true, typename K, typename FailFn>
satayev499be972022-05-13 15:05:39 +0000719 ALWAYS_INLINE
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100720 size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
721 DCHECK_NE(NumBuckets(), 0u);
722 DCHECK_EQ(hashfn_(element), hash);
723 size_t index = IndexForHash(hash);
724 while (true) {
725 const T& slot = ElementForIndex(index);
726 if (emptyfn_.IsEmpty(slot)) {
727 return fail_fn(index);
728 }
Martin Stjernholmffc90b02022-01-14 23:52:55 +0000729 if (!kCanFind) {
730 DCHECK(!pred_(slot, element));
731 } else if (pred_(slot, element)) {
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100732 return index;
733 }
734 index = NextIndex(index);
735 }
736 }
737
738 bool IsFreeSlot(size_t index) const {
739 return emptyfn_.IsEmpty(ElementForIndex(index));
740 }
741
742 // Allocate a number of buckets.
743 void AllocateStorage(size_t num_buckets) {
744 num_buckets_ = num_buckets;
745 data_ = allocfn_.allocate(num_buckets_);
746 owns_data_ = true;
747 for (size_t i = 0; i < num_buckets_; ++i) {
748 allocfn_.construct(allocfn_.address(data_[i]));
749 emptyfn_.MakeEmpty(data_[i]);
750 }
751 }
752
753 void DeallocateStorage() {
754 if (owns_data_) {
755 for (size_t i = 0; i < NumBuckets(); ++i) {
756 allocfn_.destroy(allocfn_.address(data_[i]));
757 }
758 if (data_ != nullptr) {
759 allocfn_.deallocate(data_, NumBuckets());
760 }
761 owns_data_ = false;
762 }
763 data_ = nullptr;
764 num_buckets_ = 0;
765 }
766
767 // Expand the set based on the load factors.
768 void Expand() {
769 size_t min_index = static_cast<size_t>(size() / min_load_factor_);
770 // Resize based on the minimum load factor.
771 Resize(min_index);
772 }
773
774 // Expand / shrink the table to the new specified size.
775 void Resize(size_t new_size) {
776 if (new_size < kMinBuckets) {
777 new_size = kMinBuckets;
778 }
779 DCHECK_GE(new_size, size());
780 T* const old_data = data_;
781 size_t old_num_buckets = num_buckets_;
782 // Reinsert all of the old elements.
783 const bool owned_data = owns_data_;
784 AllocateStorage(new_size);
785 for (size_t i = 0; i < old_num_buckets; ++i) {
786 T& element = old_data[i];
787 if (!emptyfn_.IsEmpty(element)) {
788 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
789 }
790 if (owned_data) {
791 allocfn_.destroy(allocfn_.address(element));
792 }
793 }
794 if (owned_data) {
795 allocfn_.deallocate(old_data, old_num_buckets);
796 }
797
798 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
799 elements_until_expand_ = NumBuckets() * max_load_factor_;
800 }
801
802 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
803 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
804 size_t non_empty_count = 0;
805 while (!emptyfn_.IsEmpty(data_[index])) {
806 index = NextIndex(index);
807 non_empty_count++;
808 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
809 }
810 return index;
811 }
812
813 size_t NextNonEmptySlot(size_t index) const {
814 const size_t num_buckets = NumBuckets();
815 DCHECK_LT(index, num_buckets);
816 do {
817 ++index;
818 } while (index < num_buckets && IsFreeSlot(index));
819 return index;
820 }
821
822 // Return new offset.
823 template <typename Elem>
824 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
825 DCHECK_ALIGNED(ptr + offset, sizeof(n));
826 if (ptr != nullptr) {
827 *reinterpret_cast<Elem*>(ptr + offset) = n;
828 }
829 return offset + sizeof(n);
830 }
831
832 template <typename Elem>
833 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
834 DCHECK(ptr != nullptr);
835 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
836 *out = *reinterpret_cast<const Elem*>(ptr + offset);
837 return offset + sizeof(*out);
838 }
839
840 Alloc allocfn_; // Allocator function.
841 HashFn hashfn_; // Hashing function.
842 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
843 Pred pred_; // Equals function.
844 size_t num_elements_; // Number of inserted elements.
845 size_t num_buckets_; // Number of hash table buckets.
846 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
847 bool owns_data_; // If we own data_ and are responsible for freeing it.
848 T* data_; // Backing storage.
849 double min_load_factor_;
850 double max_load_factor_;
851
852 template <class Elem, class HashSetType>
853 friend class HashSetIterator;
854
855 ART_FRIEND_TEST(InternTableTest, CrossHash);
856 ART_FRIEND_TEST(HashSetTest, Preallocated);
857};
858
859template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
860void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
861 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
862 lhs.swap(rhs);
863}
864
865} // namespace art
866
867#endif // ART_LIBARTBASE_BASE_HASH_SET_H_