blob: c4af1b66683bb30256517f796c98f8c143ed6deb [file] [log] [blame]
Martin Stjernholm4fb51112021-04-30 11:53:52 +01001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18#define ART_LIBARTBASE_BASE_HASH_SET_H_
19
20#include <stdint.h>
21
22#include <functional>
23#include <iterator>
24#include <memory>
25#include <string>
26#include <type_traits>
27#include <utility>
28
29#include <android-base/logging.h>
30
31#include "base/data_hash.h"
32#include "bit_utils.h"
33#include "macros.h"
34
35namespace art {
36
37template <class Elem, class HashSetType>
38class HashSetIterator {
39 public:
40 using iterator_category = std::forward_iterator_tag;
41 using value_type = Elem;
42 using difference_type = std::ptrdiff_t;
43 using pointer = Elem*;
44 using reference = Elem&;
45
46 HashSetIterator(const HashSetIterator&) = default;
android-t13d2c5b22022-10-12 13:43:18 +080047 HashSetIterator(HashSetIterator&&) noexcept = default;
Martin Stjernholm4fb51112021-04-30 11:53:52 +010048 HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
49
50 // Conversion from iterator to const_iterator.
51 template <class OtherElem,
52 class OtherHashSetType,
android-t13d2c5b22022-10-12 13:43:18 +080053 typename = std::enable_if_t<
54 std::is_same_v<Elem, const OtherElem> &&
55 std::is_same_v<HashSetType, const OtherHashSetType>>>
Martin Stjernholm4fb51112021-04-30 11:53:52 +010056 HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
57 : index_(other.index_), hash_set_(other.hash_set_) {}
58
59 HashSetIterator& operator=(const HashSetIterator&) = default;
android-t13d2c5b22022-10-12 13:43:18 +080060 HashSetIterator& operator=(HashSetIterator&&) noexcept = default;
Martin Stjernholm4fb51112021-04-30 11:53:52 +010061
62 bool operator==(const HashSetIterator& other) const {
63 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
64 }
65
66 bool operator!=(const HashSetIterator& other) const {
67 return !(*this == other);
68 }
69
70 HashSetIterator operator++() { // Value after modification.
71 this->index_ = hash_set_->NextNonEmptySlot(index_);
72 return *this;
73 }
74
75 HashSetIterator operator++(int) {
76 HashSetIterator temp = *this;
77 ++*this;
78 return temp;
79 }
80
81 Elem& operator*() const {
82 DCHECK(!hash_set_->IsFreeSlot(this->index_));
83 return hash_set_->ElementForIndex(this->index_);
84 }
85
86 Elem* operator->() const {
87 return &**this;
88 }
89
90 private:
91 size_t index_;
92 HashSetType* hash_set_;
93
94 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
95 friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
96 const HashSetIterator<Elem2, HashSetType2>& rhs);
97 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
98 template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
99};
100
101template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
102bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
103 const HashSetIterator<Elem2, HashSetType2>& rhs) {
104 static_assert(
android-t13d2c5b22022-10-12 13:43:18 +0800105 std::is_convertible_v<HashSetIterator<Elem1, HashSetType1>,
106 HashSetIterator<Elem2, HashSetType2>> ||
107 std::is_convertible_v<HashSetIterator<Elem2, HashSetType2>,
108 HashSetIterator<Elem1, HashSetType1>>, "Bad iterator types.");
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100109 DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
110 return lhs.index_ == rhs.index_;
111}
112
113template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
114bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
115 const HashSetIterator<Elem2, HashSetType2>& rhs) {
116 return !(lhs == rhs);
117}
118
119// Returns true if an item is empty.
120template <class T>
121class DefaultEmptyFn {
122 public:
123 void MakeEmpty(T& item) const {
124 item = T();
125 }
126 bool IsEmpty(const T& item) const {
127 return item == T();
128 }
129};
130
131template <class T>
132class DefaultEmptyFn<T*> {
133 public:
134 void MakeEmpty(T*& item) const {
135 item = nullptr;
136 }
137 bool IsEmpty(T* const& item) const {
138 return item == nullptr;
139 }
140};
141
142template <class T>
android-t13d2c5b22022-10-12 13:43:18 +0800143using DefaultHashFn = std::conditional_t<std::is_same_v<T, std::string>, DataHash, std::hash<T>>;
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100144
145struct DefaultStringEquals {
146 // Allow comparison with anything that can be compared to std::string,
147 // for example std::string_view.
148 template <typename T>
149 bool operator()(const std::string& lhs, const T& rhs) const {
150 return lhs == rhs;
151 }
152};
153
154template <class T>
android-t13d2c5b22022-10-12 13:43:18 +0800155using DefaultPred =
156 std::conditional_t<std::is_same_v<T, std::string>, DefaultStringEquals, std::equal_to<T>>;
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100157
158// Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
159// aren't boxed. Uses linear probing to resolve collisions.
160// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
161// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
162// and more complicated.
163template <class T,
164 class EmptyFn = DefaultEmptyFn<T>,
165 class HashFn = DefaultHashFn<T>,
166 class Pred = DefaultPred<T>,
167 class Alloc = std::allocator<T>>
168class HashSet {
169 public:
170 using value_type = T;
171 using allocator_type = Alloc;
172 using reference = T&;
173 using const_reference = const T&;
174 using pointer = T*;
175 using const_pointer = const T*;
176 using iterator = HashSetIterator<T, HashSet>;
177 using const_iterator = HashSetIterator<const T, const HashSet>;
178 using size_type = size_t;
179 using difference_type = ptrdiff_t;
180
181 static constexpr double kDefaultMinLoadFactor = 0.4;
182 static constexpr double kDefaultMaxLoadFactor = 0.7;
183 static constexpr size_t kMinBuckets = 1000;
184
185 // If we don't own the data, this will create a new array which owns the data.
186 void clear() {
187 DeallocateStorage();
188 num_elements_ = 0;
189 elements_until_expand_ = 0;
190 }
191
192 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
193 explicit HashSet(const allocator_type& alloc) noexcept
194 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, alloc) {}
195
196 HashSet(double min_load_factor, double max_load_factor) noexcept
197 : HashSet(min_load_factor, max_load_factor, allocator_type()) {}
198 HashSet(double min_load_factor, double max_load_factor, const allocator_type& alloc) noexcept
199 : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), alloc) {}
200
201 HashSet(const HashFn& hashfn,
202 const Pred& pred) noexcept
203 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred) {}
204 HashSet(const HashFn& hashfn,
205 const Pred& pred,
206 const allocator_type& alloc) noexcept
207 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred, alloc) {}
208
209 HashSet(double min_load_factor,
210 double max_load_factor,
211 const HashFn& hashfn,
212 const Pred& pred) noexcept
213 : HashSet(min_load_factor, max_load_factor, hashfn, pred, allocator_type()) {}
214 HashSet(double min_load_factor,
215 double max_load_factor,
216 const HashFn& hashfn,
217 const Pred& pred,
218 const allocator_type& alloc) noexcept
219 : allocfn_(alloc),
220 hashfn_(hashfn),
221 emptyfn_(),
222 pred_(pred),
223 num_elements_(0u),
224 num_buckets_(0u),
225 elements_until_expand_(0u),
226 owns_data_(false),
227 data_(nullptr),
228 min_load_factor_(min_load_factor),
229 max_load_factor_(max_load_factor) {
230 DCHECK_GT(min_load_factor, 0.0);
231 DCHECK_LT(max_load_factor, 1.0);
232 }
233
android-t13d2c5b22022-10-12 13:43:18 +0800234 HashSet(const HashSet& other)
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100235 : allocfn_(other.allocfn_),
236 hashfn_(other.hashfn_),
237 emptyfn_(other.emptyfn_),
238 pred_(other.pred_),
239 num_elements_(other.num_elements_),
240 num_buckets_(0),
241 elements_until_expand_(other.elements_until_expand_),
242 owns_data_(false),
243 data_(nullptr),
244 min_load_factor_(other.min_load_factor_),
245 max_load_factor_(other.max_load_factor_) {
246 AllocateStorage(other.NumBuckets());
247 for (size_t i = 0; i < num_buckets_; ++i) {
248 ElementForIndex(i) = other.data_[i];
249 }
250 }
251
252 // noexcept required so that the move constructor is used instead of copy constructor.
253 // b/27860101
254 HashSet(HashSet&& other) noexcept
255 : allocfn_(std::move(other.allocfn_)),
256 hashfn_(std::move(other.hashfn_)),
257 emptyfn_(std::move(other.emptyfn_)),
258 pred_(std::move(other.pred_)),
259 num_elements_(other.num_elements_),
260 num_buckets_(other.num_buckets_),
261 elements_until_expand_(other.elements_until_expand_),
262 owns_data_(other.owns_data_),
263 data_(other.data_),
264 min_load_factor_(other.min_load_factor_),
265 max_load_factor_(other.max_load_factor_) {
266 other.num_elements_ = 0u;
267 other.num_buckets_ = 0u;
268 other.elements_until_expand_ = 0u;
269 other.owns_data_ = false;
270 other.data_ = nullptr;
271 }
272
273 // Construct with pre-existing buffer, usually stack-allocated,
274 // to avoid malloc/free overhead for small HashSet<>s.
275 HashSet(value_type* buffer, size_t buffer_size)
276 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size) {}
277 HashSet(value_type* buffer, size_t buffer_size, const allocator_type& alloc)
278 : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size, alloc) {}
279 HashSet(double min_load_factor, double max_load_factor, value_type* buffer, size_t buffer_size)
280 : HashSet(min_load_factor, max_load_factor, buffer, buffer_size, allocator_type()) {}
281 HashSet(double min_load_factor,
282 double max_load_factor,
283 value_type* buffer,
284 size_t buffer_size,
285 const allocator_type& alloc)
android-t13d2c5b22022-10-12 13:43:18 +0800286 : HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), buffer, buffer_size, alloc) {}
287 HashSet(double min_load_factor,
288 double max_load_factor,
289 const HashFn& hashfn,
290 const Pred& pred,
291 value_type* buffer,
292 size_t buffer_size,
293 const allocator_type& alloc)
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100294 : allocfn_(alloc),
android-t13d2c5b22022-10-12 13:43:18 +0800295 hashfn_(hashfn),
296 pred_(pred),
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100297 num_elements_(0u),
298 num_buckets_(buffer_size),
299 elements_until_expand_(buffer_size * max_load_factor),
300 owns_data_(false),
301 data_(buffer),
302 min_load_factor_(min_load_factor),
303 max_load_factor_(max_load_factor) {
304 DCHECK_GT(min_load_factor, 0.0);
305 DCHECK_LT(max_load_factor, 1.0);
306 for (size_t i = 0; i != buffer_size; ++i) {
307 emptyfn_.MakeEmpty(buffer[i]);
308 }
309 }
310
311 // Construct from existing data.
312 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
313 // passed in ptr_.
314 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
315 uint64_t temp;
316 size_t offset = 0;
317 offset = ReadFromBytes(ptr, offset, &temp);
318 num_elements_ = static_cast<uint64_t>(temp);
319 offset = ReadFromBytes(ptr, offset, &temp);
320 num_buckets_ = static_cast<uint64_t>(temp);
321 CHECK_LE(num_elements_, num_buckets_);
322 offset = ReadFromBytes(ptr, offset, &temp);
323 elements_until_expand_ = static_cast<uint64_t>(temp);
324 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
325 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
326 if (!make_copy_of_data) {
327 owns_data_ = false;
328 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
329 offset += sizeof(*data_) * num_buckets_;
330 } else {
331 AllocateStorage(num_buckets_);
332 // Write elements, not that this may not be safe for cross compilation if the elements are
333 // pointer sized.
334 for (size_t i = 0; i < num_buckets_; ++i) {
335 offset = ReadFromBytes(ptr, offset, &data_[i]);
336 }
337 }
338 // Caller responsible for aligning.
339 *read_count = offset;
340 }
341
342 // Returns how large the table is after being written. If target is null, then no writing happens
343 // but the size is still returned. Target must be 8 byte aligned.
344 size_t WriteToMemory(uint8_t* ptr) const {
345 size_t offset = 0;
346 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
347 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
348 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
349 offset = WriteToBytes(ptr, offset, min_load_factor_);
350 offset = WriteToBytes(ptr, offset, max_load_factor_);
351 // Write elements, not that this may not be safe for cross compilation if the elements are
352 // pointer sized.
353 for (size_t i = 0; i < num_buckets_; ++i) {
354 offset = WriteToBytes(ptr, offset, data_[i]);
355 }
356 // Caller responsible for aligning.
357 return offset;
358 }
359
360 ~HashSet() {
361 DeallocateStorage();
362 }
363
364 HashSet& operator=(HashSet&& other) noexcept {
365 HashSet(std::move(other)).swap(*this); // NOLINT [runtime/explicit] [5]
366 return *this;
367 }
368
android-t13d2c5b22022-10-12 13:43:18 +0800369 HashSet& operator=(const HashSet& other) {
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100370 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
371 return *this;
372 }
373
374 // Lower case for c++11 for each.
375 iterator begin() {
376 iterator ret(this, 0);
377 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
378 ++ret; // Skip all the empty slots.
379 }
380 return ret;
381 }
382
383 // Lower case for c++11 for each. const version.
384 const_iterator begin() const {
385 const_iterator ret(this, 0);
386 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
387 ++ret; // Skip all the empty slots.
388 }
389 return ret;
390 }
391
392 // Lower case for c++11 for each.
393 iterator end() {
394 return iterator(this, NumBuckets());
395 }
396
397 // Lower case for c++11 for each. const version.
398 const_iterator end() const {
399 return const_iterator(this, NumBuckets());
400 }
401
402 size_t size() const {
403 return num_elements_;
404 }
405
406 bool empty() const {
407 return size() == 0;
408 }
409
410 // Erase algorithm:
411 // Make an empty slot where the iterator is pointing.
412 // Scan forwards until we hit another empty slot.
413 // If an element in between doesn't rehash to the range from the current empty slot to the
414 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
415 // and set the empty slot to be the location we just moved from.
416 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
417 // element to its actual location/index.
418 // Note that since erase shuffles back elements, it may result in the same element being visited
419 // twice during HashSet iteration. This happens when an element already visited during iteration
420 // gets shuffled to the end of the bucket array.
421 iterator erase(iterator it) {
422 // empty_index is the index that will become empty.
423 size_t empty_index = it.index_;
424 DCHECK(!IsFreeSlot(empty_index));
425 size_t next_index = empty_index;
426 bool filled = false; // True if we filled the empty index.
427 while (true) {
428 next_index = NextIndex(next_index);
429 T& next_element = ElementForIndex(next_index);
430 // If the next element is empty, we are done. Make sure to clear the current empty index.
431 if (emptyfn_.IsEmpty(next_element)) {
432 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
433 break;
434 }
435 // Otherwise try to see if the next element can fill the current empty index.
436 const size_t next_hash = hashfn_(next_element);
437 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
438 // nothing we can do.
439 size_t next_ideal_index = IndexForHash(next_hash);
440 // Loop around if needed for our check.
441 size_t unwrapped_next_index = next_index;
442 if (unwrapped_next_index < empty_index) {
443 unwrapped_next_index += NumBuckets();
444 }
445 // Loop around if needed for our check.
446 size_t unwrapped_next_ideal_index = next_ideal_index;
447 if (unwrapped_next_ideal_index < empty_index) {
448 unwrapped_next_ideal_index += NumBuckets();
449 }
450 if (unwrapped_next_ideal_index <= empty_index ||
451 unwrapped_next_ideal_index > unwrapped_next_index) {
452 // If the target index isn't within our current range it must have been probed from before
453 // the empty index.
454 ElementForIndex(empty_index) = std::move(next_element);
455 filled = true; // TODO: Optimize
456 empty_index = next_index;
457 }
458 }
459 --num_elements_;
460 // If we didn't fill the slot then we need go to the next non free slot.
461 if (!filled) {
462 ++it;
463 }
464 return it;
465 }
466
467 // Find an element, returns end() if not found.
468 // Allows custom key (K) types, example of when this is useful:
469 // Set of Class* indexed by name, want to find a class with a name but can't allocate
470 // a temporary Class object in the heap for performance solution.
471 template <typename K>
472 iterator find(const K& key) {
473 return FindWithHash(key, hashfn_(key));
474 }
475
476 template <typename K>
477 const_iterator find(const K& key) const {
478 return FindWithHash(key, hashfn_(key));
479 }
480
481 template <typename K>
482 iterator FindWithHash(const K& key, size_t hash) {
483 return iterator(this, FindIndex(key, hash));
484 }
485
486 template <typename K>
487 const_iterator FindWithHash(const K& key, size_t hash) const {
488 return const_iterator(this, FindIndex(key, hash));
489 }
490
491 // Insert an element with hint.
492 // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
493 // and in that case the use of HashSet<> itself should be reconsidered.
494 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
495 return insert(element);
496 }
497 std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
498 return insert(std::move(element));
499 }
500
501 // Insert an element.
502 std::pair<iterator, bool> insert(const T& element) {
503 return InsertWithHash(element, hashfn_(element));
504 }
505 std::pair<iterator, bool> insert(T&& element) {
506 return InsertWithHash(std::move(element), hashfn_(element));
507 }
508
android-t13d2c5b22022-10-12 13:43:18 +0800509 template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100510 std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
511 DCHECK_EQ(hash, hashfn_(element));
512 if (num_elements_ >= elements_until_expand_) {
513 Expand();
514 DCHECK_LT(num_elements_, elements_until_expand_);
515 }
516 bool find_failed = false;
android-t13d2c5b22022-10-12 13:43:18 +0800517 auto find_fail_fn = [&](size_t index) ALWAYS_INLINE {
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100518 find_failed = true;
519 return index;
520 };
521 size_t index = FindIndexImpl(element, hash, find_fail_fn);
522 if (find_failed) {
523 data_[index] = std::forward<U>(element);
524 ++num_elements_;
525 }
526 return std::make_pair(iterator(this, index), find_failed);
527 }
528
android-t13d2c5b22022-10-12 13:43:18 +0800529 // Insert an element known not to be in the `HashSet<>`.
530 void Put(const T& element) {
531 return PutWithHash(element, hashfn_(element));
532 }
533 void Put(T&& element) {
534 return PutWithHash(std::move(element), hashfn_(element));
535 }
536
537 template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
538 void PutWithHash(U&& element, size_t hash) {
539 DCHECK_EQ(hash, hashfn_(element));
540 if (num_elements_ >= elements_until_expand_) {
541 Expand();
542 DCHECK_LT(num_elements_, elements_until_expand_);
543 }
544 auto find_fail_fn = [](size_t index) ALWAYS_INLINE { return index; };
545 size_t index = FindIndexImpl</*kCanFind=*/ false>(element, hash, find_fail_fn);
546 data_[index] = std::forward<U>(element);
547 ++num_elements_;
548 }
549
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100550 void swap(HashSet& other) {
551 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
552 using std::swap;
553 swap(allocfn_, other.allocfn_);
554 swap(hashfn_, other.hashfn_);
555 swap(emptyfn_, other.emptyfn_);
556 swap(pred_, other.pred_);
557 std::swap(data_, other.data_);
558 std::swap(num_buckets_, other.num_buckets_);
559 std::swap(num_elements_, other.num_elements_);
560 std::swap(elements_until_expand_, other.elements_until_expand_);
561 std::swap(min_load_factor_, other.min_load_factor_);
562 std::swap(max_load_factor_, other.max_load_factor_);
563 std::swap(owns_data_, other.owns_data_);
564 }
565
566 allocator_type get_allocator() const {
567 return allocfn_;
568 }
569
570 void ShrinkToMaximumLoad() {
571 Resize(size() / max_load_factor_);
572 }
573
574 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
575 // set. No-op if the hash set is already large enough to do this.
576 void reserve(size_t num_elements) {
577 size_t num_buckets = num_elements / max_load_factor_;
578 // Deal with rounding errors. Add one for rounding.
579 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
580 ++num_buckets;
581 }
582 if (num_buckets > NumBuckets()) {
583 Resize(num_buckets);
584 }
585 }
586
587 // To distance that inserted elements were probed. Used for measuring how good hash functions
588 // are.
589 size_t TotalProbeDistance() const {
590 size_t total = 0;
591 for (size_t i = 0; i < NumBuckets(); ++i) {
592 const T& element = ElementForIndex(i);
593 if (!emptyfn_.IsEmpty(element)) {
594 size_t ideal_location = IndexForHash(hashfn_(element));
595 if (ideal_location > i) {
596 total += i + NumBuckets() - ideal_location;
597 } else {
598 total += i - ideal_location;
599 }
600 }
601 }
602 return total;
603 }
604
605 // Calculate the current load factor and return it.
606 double CalculateLoadFactor() const {
607 return static_cast<double>(size()) / static_cast<double>(NumBuckets());
608 }
609
610 // Make sure that everything reinserts in the right spot. Returns the number of errors.
611 size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
612 size_t errors = 0;
613 for (size_t i = 0; i < num_buckets_; ++i) {
614 T& element = data_[i];
615 if (!emptyfn_.IsEmpty(element)) {
616 T temp;
617 emptyfn_.MakeEmpty(temp);
618 std::swap(temp, element);
619 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
620 if (i != first_slot) {
621 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
622 ++errors;
623 }
624 std::swap(temp, element);
625 }
626 }
627 return errors;
628 }
629
630 double GetMinLoadFactor() const {
631 return min_load_factor_;
632 }
633
634 double GetMaxLoadFactor() const {
635 return max_load_factor_;
636 }
637
638 // Change the load factor of the hash set. If the current load factor is greater than the max
639 // specified, then we resize the hash table storage.
640 void SetLoadFactor(double min_load_factor, double max_load_factor) {
641 DCHECK_LT(min_load_factor, max_load_factor);
642 DCHECK_GT(min_load_factor, 0.0);
643 DCHECK_LT(max_load_factor, 1.0);
644 min_load_factor_ = min_load_factor;
645 max_load_factor_ = max_load_factor;
646 elements_until_expand_ = NumBuckets() * max_load_factor_;
647 // If the current load factor isn't in the range, then resize to the mean of the minimum and
648 // maximum load factor.
649 const double load_factor = CalculateLoadFactor();
650 if (load_factor > max_load_factor_) {
651 Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
652 }
653 }
654
655 // The hash set expands when Size() reaches ElementsUntilExpand().
656 size_t ElementsUntilExpand() const {
657 return elements_until_expand_;
658 }
659
660 size_t NumBuckets() const {
661 return num_buckets_;
662 }
663
664 private:
665 T& ElementForIndex(size_t index) {
666 DCHECK_LT(index, NumBuckets());
667 DCHECK(data_ != nullptr);
668 return data_[index];
669 }
670
671 const T& ElementForIndex(size_t index) const {
672 DCHECK_LT(index, NumBuckets());
673 DCHECK(data_ != nullptr);
674 return data_[index];
675 }
676
677 size_t IndexForHash(size_t hash) const {
678 // Protect against undefined behavior (division by zero).
679 if (UNLIKELY(num_buckets_ == 0)) {
680 return 0;
681 }
682 return hash % num_buckets_;
683 }
684
685 size_t NextIndex(size_t index) const {
686 if (UNLIKELY(++index >= num_buckets_)) {
687 DCHECK_EQ(index, NumBuckets());
688 return 0;
689 }
690 return index;
691 }
692
693 // Find the hash table slot for an element, or return NumBuckets() if not found.
694 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
695 template <typename K>
android-t13d2c5b22022-10-12 13:43:18 +0800696 ALWAYS_INLINE
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100697 size_t FindIndex(const K& element, size_t hash) const {
698 // Guard against failing to get an element for a non-existing index.
699 if (UNLIKELY(NumBuckets() == 0)) {
700 return 0;
701 }
android-t13d2c5b22022-10-12 13:43:18 +0800702 auto fail_fn = [&](size_t index ATTRIBUTE_UNUSED) ALWAYS_INLINE { return NumBuckets(); };
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100703 return FindIndexImpl(element, hash, fail_fn);
704 }
705
706 // Find the hash table slot for an element, or return an empty slot index if not found.
android-t13d2c5b22022-10-12 13:43:18 +0800707 template <bool kCanFind = true, typename K, typename FailFn>
708 ALWAYS_INLINE
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100709 size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
710 DCHECK_NE(NumBuckets(), 0u);
711 DCHECK_EQ(hashfn_(element), hash);
712 size_t index = IndexForHash(hash);
713 while (true) {
714 const T& slot = ElementForIndex(index);
715 if (emptyfn_.IsEmpty(slot)) {
716 return fail_fn(index);
717 }
android-t13d2c5b22022-10-12 13:43:18 +0800718 if (!kCanFind) {
719 DCHECK(!pred_(slot, element));
720 } else if (pred_(slot, element)) {
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100721 return index;
722 }
723 index = NextIndex(index);
724 }
725 }
726
727 bool IsFreeSlot(size_t index) const {
728 return emptyfn_.IsEmpty(ElementForIndex(index));
729 }
730
731 // Allocate a number of buckets.
732 void AllocateStorage(size_t num_buckets) {
733 num_buckets_ = num_buckets;
734 data_ = allocfn_.allocate(num_buckets_);
735 owns_data_ = true;
736 for (size_t i = 0; i < num_buckets_; ++i) {
737 allocfn_.construct(allocfn_.address(data_[i]));
738 emptyfn_.MakeEmpty(data_[i]);
739 }
740 }
741
742 void DeallocateStorage() {
743 if (owns_data_) {
744 for (size_t i = 0; i < NumBuckets(); ++i) {
745 allocfn_.destroy(allocfn_.address(data_[i]));
746 }
747 if (data_ != nullptr) {
748 allocfn_.deallocate(data_, NumBuckets());
749 }
750 owns_data_ = false;
751 }
752 data_ = nullptr;
753 num_buckets_ = 0;
754 }
755
756 // Expand the set based on the load factors.
757 void Expand() {
758 size_t min_index = static_cast<size_t>(size() / min_load_factor_);
759 // Resize based on the minimum load factor.
760 Resize(min_index);
761 }
762
763 // Expand / shrink the table to the new specified size.
764 void Resize(size_t new_size) {
765 if (new_size < kMinBuckets) {
766 new_size = kMinBuckets;
767 }
768 DCHECK_GE(new_size, size());
769 T* const old_data = data_;
770 size_t old_num_buckets = num_buckets_;
771 // Reinsert all of the old elements.
772 const bool owned_data = owns_data_;
773 AllocateStorage(new_size);
774 for (size_t i = 0; i < old_num_buckets; ++i) {
775 T& element = old_data[i];
776 if (!emptyfn_.IsEmpty(element)) {
777 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
778 }
779 if (owned_data) {
780 allocfn_.destroy(allocfn_.address(element));
781 }
782 }
783 if (owned_data) {
784 allocfn_.deallocate(old_data, old_num_buckets);
785 }
786
787 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
788 elements_until_expand_ = NumBuckets() * max_load_factor_;
789 }
790
791 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
792 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
793 size_t non_empty_count = 0;
794 while (!emptyfn_.IsEmpty(data_[index])) {
795 index = NextIndex(index);
796 non_empty_count++;
797 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
798 }
799 return index;
800 }
801
802 size_t NextNonEmptySlot(size_t index) const {
803 const size_t num_buckets = NumBuckets();
804 DCHECK_LT(index, num_buckets);
805 do {
806 ++index;
807 } while (index < num_buckets && IsFreeSlot(index));
808 return index;
809 }
810
811 // Return new offset.
812 template <typename Elem>
813 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
814 DCHECK_ALIGNED(ptr + offset, sizeof(n));
815 if (ptr != nullptr) {
816 *reinterpret_cast<Elem*>(ptr + offset) = n;
817 }
818 return offset + sizeof(n);
819 }
820
821 template <typename Elem>
822 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
823 DCHECK(ptr != nullptr);
824 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
825 *out = *reinterpret_cast<const Elem*>(ptr + offset);
826 return offset + sizeof(*out);
827 }
828
829 Alloc allocfn_; // Allocator function.
830 HashFn hashfn_; // Hashing function.
831 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
832 Pred pred_; // Equals function.
833 size_t num_elements_; // Number of inserted elements.
834 size_t num_buckets_; // Number of hash table buckets.
835 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
836 bool owns_data_; // If we own data_ and are responsible for freeing it.
837 T* data_; // Backing storage.
838 double min_load_factor_;
839 double max_load_factor_;
840
841 template <class Elem, class HashSetType>
842 friend class HashSetIterator;
843
844 ART_FRIEND_TEST(InternTableTest, CrossHash);
845 ART_FRIEND_TEST(HashSetTest, Preallocated);
846};
847
848template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
849void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
850 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
851 lhs.swap(rhs);
852}
853
854} // namespace art
855
856#endif // ART_LIBARTBASE_BASE_HASH_SET_H_