blob: c472a9ee0347b2c28c51223e68e2faeec4b5cd8e [file] [log] [blame]
Mathieu Chartierc2e20622014-11-03 11:41:47 -08001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_BASE_HASH_SET_H_
18#define ART_RUNTIME_BASE_HASH_SET_H_
19
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070020#include <stdint.h>
21
Mathieu Chartierc2e20622014-11-03 11:41:47 -080022#include <functional>
Vladimir Marko1f497642015-10-05 20:34:42 +010023#include <iterator>
Mathieu Chartierc2e20622014-11-03 11:41:47 -080024#include <memory>
Mathieu Chartierc2e20622014-11-03 11:41:47 -080025#include <utility>
26
Mathieu Chartierd39645e2015-06-09 17:50:29 -070027#include "bit_utils.h"
Mathieu Chartierc2e20622014-11-03 11:41:47 -080028#include "logging.h"
29
30namespace art {
31
32// Returns true if an item is empty.
33template <class T>
34class DefaultEmptyFn {
35 public:
36 void MakeEmpty(T& item) const {
37 item = T();
38 }
39 bool IsEmpty(const T& item) const {
40 return item == T();
41 }
42};
43
44template <class T>
45class DefaultEmptyFn<T*> {
46 public:
47 void MakeEmpty(T*& item) const {
48 item = nullptr;
49 }
Vladimir Marko1f497642015-10-05 20:34:42 +010050 bool IsEmpty(T* const& item) const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080051 return item == nullptr;
52 }
53};
54
55// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
Mathieu Chartier47f867a2015-03-18 10:39:00 -070056// boxed. Uses linear probing to resolve collisions.
57// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
58// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
59// and more complicated.
Mathieu Chartierc2e20622014-11-03 11:41:47 -080060template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
61 class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
62class HashSet {
Mathieu Chartier47f867a2015-03-18 10:39:00 -070063 template <class Elem, class HashSetType>
Vladimir Marko1f497642015-10-05 20:34:42 +010064 class BaseIterator : std::iterator<std::forward_iterator_tag, Elem> {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080065 public:
Mathieu Chartier47f867a2015-03-18 10:39:00 -070066 BaseIterator(const BaseIterator&) = default;
67 BaseIterator(BaseIterator&&) = default;
68 BaseIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080069 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070070 BaseIterator& operator=(const BaseIterator&) = default;
71 BaseIterator& operator=(BaseIterator&&) = default;
72
73 bool operator==(const BaseIterator& other) const {
74 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -080075 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070076
77 bool operator!=(const BaseIterator& other) const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080078 return !(*this == other);
79 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070080
81 BaseIterator operator++() { // Value after modification.
82 this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080083 return *this;
84 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070085
86 BaseIterator operator++(int) {
Vladimir Marko1f497642015-10-05 20:34:42 +010087 BaseIterator temp = *this;
Mathieu Chartier47f867a2015-03-18 10:39:00 -070088 this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080089 return temp;
90 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070091
92 Elem& operator*() const {
93 DCHECK(!hash_set_->IsFreeSlot(this->index_));
94 return hash_set_->ElementForIndex(this->index_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080095 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070096
97 Elem* operator->() const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080098 return &**this;
99 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700100
Vladimir Marko1f497642015-10-05 20:34:42 +0100101 // TODO: Operator -- --(int) (and use std::bidirectional_iterator_tag)
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800102
103 private:
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800104 size_t index_;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700105 HashSetType* hash_set_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800106
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700107 size_t NextNonEmptySlot(size_t index, const HashSet* hash_set) const {
108 const size_t num_buckets = hash_set->NumBuckets();
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800109 DCHECK_LT(index, num_buckets);
110 do {
111 ++index;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700112 } while (index < num_buckets && hash_set->IsFreeSlot(index));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800113 return index;
114 }
115
116 friend class HashSet;
117 };
118
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700119 public:
Vladimir Marko1f497642015-10-05 20:34:42 +0100120 using value_type = T;
121 using allocator_type = Alloc;
122 using reference = T&;
123 using const_reference = const T&;
124 using pointer = T*;
125 using const_pointer = const T*;
126 using iterator = BaseIterator<T, HashSet>;
127 using const_iterator = BaseIterator<const T, const HashSet>;
128 using size_type = size_t;
129 using difference_type = ptrdiff_t;
130
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700131 static constexpr double kDefaultMinLoadFactor = 0.4;
132 static constexpr double kDefaultMaxLoadFactor = 0.7;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700133 static constexpr size_t kMinBuckets = 1000;
134
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700135 // If we don't own the data, this will create a new array which owns the data.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800136 void Clear() {
137 DeallocateStorage();
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800138 num_elements_ = 0;
139 elements_until_expand_ = 0;
140 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700141
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700142 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
143
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700144 HashSet(double min_load_factor, double max_load_factor) noexcept
Vladimir Marko1f497642015-10-05 20:34:42 +0100145 : num_elements_(0u),
146 num_buckets_(0u),
147 elements_until_expand_(0u),
148 owns_data_(false),
149 data_(nullptr),
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700150 min_load_factor_(min_load_factor),
151 max_load_factor_(max_load_factor) {
152 DCHECK_GT(min_load_factor, 0.0);
153 DCHECK_LT(max_load_factor, 1.0);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800154 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700155
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700156 explicit HashSet(const allocator_type& alloc) noexcept
Vladimir Marko1f497642015-10-05 20:34:42 +0100157 : allocfn_(alloc),
158 hashfn_(),
159 emptyfn_(),
160 pred_(),
161 num_elements_(0u),
162 num_buckets_(0u),
163 elements_until_expand_(0u),
164 owns_data_(false),
165 data_(nullptr),
166 min_load_factor_(kDefaultMinLoadFactor),
167 max_load_factor_(kDefaultMaxLoadFactor) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800168 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700169
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700170 HashSet(const HashSet& other) noexcept
Vladimir Marko1f497642015-10-05 20:34:42 +0100171 : allocfn_(other.allocfn_),
172 hashfn_(other.hashfn_),
173 emptyfn_(other.emptyfn_),
174 pred_(other.pred_),
175 num_elements_(other.num_elements_),
176 num_buckets_(0),
177 elements_until_expand_(other.elements_until_expand_),
178 owns_data_(false),
179 data_(nullptr),
180 min_load_factor_(other.min_load_factor_),
181 max_load_factor_(other.max_load_factor_) {
182 AllocateStorage(other.NumBuckets());
183 for (size_t i = 0; i < num_buckets_; ++i) {
184 ElementForIndex(i) = other.data_[i];
185 }
186 }
187
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700188 // noexcept required so that the move constructor is used instead of copy constructor.
189 // b/27860101
190 HashSet(HashSet&& other) noexcept
Vladimir Marko1f497642015-10-05 20:34:42 +0100191 : allocfn_(std::move(other.allocfn_)),
192 hashfn_(std::move(other.hashfn_)),
193 emptyfn_(std::move(other.emptyfn_)),
194 pred_(std::move(other.pred_)),
195 num_elements_(other.num_elements_),
196 num_buckets_(other.num_buckets_),
197 elements_until_expand_(other.elements_until_expand_),
198 owns_data_(other.owns_data_),
199 data_(other.data_),
200 min_load_factor_(other.min_load_factor_),
201 max_load_factor_(other.max_load_factor_) {
202 other.num_elements_ = 0u;
203 other.num_buckets_ = 0u;
204 other.elements_until_expand_ = 0u;
205 other.owns_data_ = false;
206 other.data_ = nullptr;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800207 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700208
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700209 // Construct from existing data.
210 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
211 // passed in ptr_.
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700212 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700213 uint64_t temp;
214 size_t offset = 0;
215 offset = ReadFromBytes(ptr, offset, &temp);
216 num_elements_ = static_cast<uint64_t>(temp);
217 offset = ReadFromBytes(ptr, offset, &temp);
218 num_buckets_ = static_cast<uint64_t>(temp);
219 CHECK_LE(num_elements_, num_buckets_);
220 offset = ReadFromBytes(ptr, offset, &temp);
221 elements_until_expand_ = static_cast<uint64_t>(temp);
222 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
223 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
224 if (!make_copy_of_data) {
225 owns_data_ = false;
226 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
227 offset += sizeof(*data_) * num_buckets_;
228 } else {
229 AllocateStorage(num_buckets_);
230 // Write elements, not that this may not be safe for cross compilation if the elements are
231 // pointer sized.
232 for (size_t i = 0; i < num_buckets_; ++i) {
233 offset = ReadFromBytes(ptr, offset, &data_[i]);
234 }
235 }
236 // Caller responsible for aligning.
237 *read_count = offset;
238 }
239
240 // Returns how large the table is after being written. If target is null, then no writing happens
241 // but the size is still returned. Target must be 8 byte aligned.
Mathieu Chartier208a5cb2015-12-02 15:44:07 -0800242 size_t WriteToMemory(uint8_t* ptr) const {
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700243 size_t offset = 0;
244 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
245 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
246 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
247 offset = WriteToBytes(ptr, offset, min_load_factor_);
248 offset = WriteToBytes(ptr, offset, max_load_factor_);
249 // Write elements, not that this may not be safe for cross compilation if the elements are
250 // pointer sized.
251 for (size_t i = 0; i < num_buckets_; ++i) {
252 offset = WriteToBytes(ptr, offset, data_[i]);
253 }
254 // Caller responsible for aligning.
255 return offset;
256 }
257
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800258 ~HashSet() {
259 DeallocateStorage();
260 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700261
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700262 HashSet& operator=(HashSet&& other) noexcept {
Vladimir Marko1f497642015-10-05 20:34:42 +0100263 HashSet(std::move(other)).swap(*this);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800264 return *this;
265 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700266
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700267 HashSet& operator=(const HashSet& other) noexcept {
Vladimir Marko1f497642015-10-05 20:34:42 +0100268 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800269 return *this;
270 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700271
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800272 // Lower case for c++11 for each.
Vladimir Marko1f497642015-10-05 20:34:42 +0100273 iterator begin() {
274 iterator ret(this, 0);
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700275 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800276 ++ret; // Skip all the empty slots.
277 }
278 return ret;
279 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700280
Igor Murashkine2facc52015-07-10 13:49:08 -0700281 // Lower case for c++11 for each. const version.
Vladimir Marko1f497642015-10-05 20:34:42 +0100282 const_iterator begin() const {
283 const_iterator ret(this, 0);
Igor Murashkine2facc52015-07-10 13:49:08 -0700284 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
285 ++ret; // Skip all the empty slots.
286 }
287 return ret;
288 }
289
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800290 // Lower case for c++11 for each.
Vladimir Marko1f497642015-10-05 20:34:42 +0100291 iterator end() {
292 return iterator(this, NumBuckets());
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800293 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700294
Igor Murashkine2facc52015-07-10 13:49:08 -0700295 // Lower case for c++11 for each. const version.
Vladimir Marko1f497642015-10-05 20:34:42 +0100296 const_iterator end() const {
297 return const_iterator(this, NumBuckets());
Igor Murashkine2facc52015-07-10 13:49:08 -0700298 }
299
Vladimir Marko1a1de672016-10-13 12:53:15 +0100300 bool Empty() const {
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700301 return Size() == 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800302 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700303
Mathieu Chartier5ef868c2016-04-05 19:13:37 -0700304 // Return true if the hash set has ownership of the underlying data.
305 bool OwnsData() const {
306 return owns_data_;
307 }
308
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800309 // Erase algorithm:
310 // Make an empty slot where the iterator is pointing.
Igor Murashkine2facc52015-07-10 13:49:08 -0700311 // Scan forwards until we hit another empty slot.
312 // If an element in between doesn't rehash to the range from the current empty slot to the
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800313 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
314 // and set the empty slot to be the location we just moved from.
315 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
316 // element to its actual location/index.
Vladimir Marko1f497642015-10-05 20:34:42 +0100317 iterator Erase(iterator it) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800318 // empty_index is the index that will become empty.
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700319 size_t empty_index = it.index_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800320 DCHECK(!IsFreeSlot(empty_index));
321 size_t next_index = empty_index;
322 bool filled = false; // True if we filled the empty index.
323 while (true) {
324 next_index = NextIndex(next_index);
325 T& next_element = ElementForIndex(next_index);
326 // If the next element is empty, we are done. Make sure to clear the current empty index.
327 if (emptyfn_.IsEmpty(next_element)) {
328 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
329 break;
330 }
331 // Otherwise try to see if the next element can fill the current empty index.
332 const size_t next_hash = hashfn_(next_element);
333 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
334 // nothing we can do.
335 size_t next_ideal_index = IndexForHash(next_hash);
336 // Loop around if needed for our check.
337 size_t unwrapped_next_index = next_index;
338 if (unwrapped_next_index < empty_index) {
339 unwrapped_next_index += NumBuckets();
340 }
341 // Loop around if needed for our check.
342 size_t unwrapped_next_ideal_index = next_ideal_index;
343 if (unwrapped_next_ideal_index < empty_index) {
344 unwrapped_next_ideal_index += NumBuckets();
345 }
346 if (unwrapped_next_ideal_index <= empty_index ||
347 unwrapped_next_ideal_index > unwrapped_next_index) {
348 // If the target index isn't within our current range it must have been probed from before
349 // the empty index.
350 ElementForIndex(empty_index) = std::move(next_element);
351 filled = true; // TODO: Optimize
352 empty_index = next_index;
353 }
354 }
355 --num_elements_;
356 // If we didn't fill the slot then we need go to the next non free slot.
357 if (!filled) {
358 ++it;
359 }
360 return it;
361 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700362
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800363 // Find an element, returns end() if not found.
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700364 // Allows custom key (K) types, example of when this is useful:
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800365 // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
366 // object in the heap for performance solution.
367 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100368 iterator Find(const K& key) {
Igor Murashkine2facc52015-07-10 13:49:08 -0700369 return FindWithHash(key, hashfn_(key));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800370 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700371
372 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100373 const_iterator Find(const K& key) const {
Igor Murashkine2facc52015-07-10 13:49:08 -0700374 return FindWithHash(key, hashfn_(key));
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700375 }
376
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800377 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100378 iterator FindWithHash(const K& key, size_t hash) {
379 return iterator(this, FindIndex(key, hash));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800380 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700381
382 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100383 const_iterator FindWithHash(const K& key, size_t hash) const {
384 return const_iterator(this, FindIndex(key, hash));
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700385 }
386
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800387 // Insert an element, allows duplicates.
388 void Insert(const T& element) {
389 InsertWithHash(element, hashfn_(element));
390 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700391
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800392 void InsertWithHash(const T& element, size_t hash) {
393 DCHECK_EQ(hash, hashfn_(element));
394 if (num_elements_ >= elements_until_expand_) {
395 Expand();
396 DCHECK_LT(num_elements_, elements_until_expand_);
397 }
398 const size_t index = FirstAvailableSlot(IndexForHash(hash));
399 data_[index] = element;
400 ++num_elements_;
401 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700402
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800403 size_t Size() const {
404 return num_elements_;
405 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700406
Vladimir Marko1f497642015-10-05 20:34:42 +0100407 void swap(HashSet& other) {
408 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
409 using std::swap;
410 swap(allocfn_, other.allocfn_);
411 swap(hashfn_, other.hashfn_);
412 swap(emptyfn_, other.emptyfn_);
413 swap(pred_, other.pred_);
414 std::swap(data_, other.data_);
415 std::swap(num_buckets_, other.num_buckets_);
416 std::swap(num_elements_, other.num_elements_);
417 std::swap(elements_until_expand_, other.elements_until_expand_);
418 std::swap(min_load_factor_, other.min_load_factor_);
419 std::swap(max_load_factor_, other.max_load_factor_);
420 std::swap(owns_data_, other.owns_data_);
421 }
422
423 allocator_type get_allocator() const {
424 return allocfn_;
425 }
426
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800427 void ShrinkToMaximumLoad() {
428 Resize(Size() / max_load_factor_);
429 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700430
Mathieu Chartierc482d382015-10-26 11:20:18 -0700431 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
432 // set. No-op if the hash set is already large enough to do this.
433 void Reserve(size_t num_elements) {
434 size_t num_buckets = num_elements / max_load_factor_;
435 // Deal with rounding errors. Add one for rounding.
436 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
437 ++num_buckets;
438 }
439 if (num_buckets > NumBuckets()) {
440 Resize(num_buckets);
441 }
442 }
443
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800444 // To distance that inserted elements were probed. Used for measuring how good hash functions
445 // are.
446 size_t TotalProbeDistance() const {
447 size_t total = 0;
448 for (size_t i = 0; i < NumBuckets(); ++i) {
449 const T& element = ElementForIndex(i);
450 if (!emptyfn_.IsEmpty(element)) {
451 size_t ideal_location = IndexForHash(hashfn_(element));
452 if (ideal_location > i) {
453 total += i + NumBuckets() - ideal_location;
454 } else {
455 total += i - ideal_location;
456 }
457 }
458 }
459 return total;
460 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700461
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800462 // Calculate the current load factor and return it.
463 double CalculateLoadFactor() const {
464 return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
465 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700466
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800467 // Make sure that everything reinserts in the right spot. Returns the number of errors.
Mathieu Chartier208a5cb2015-12-02 15:44:07 -0800468 size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800469 size_t errors = 0;
470 for (size_t i = 0; i < num_buckets_; ++i) {
471 T& element = data_[i];
472 if (!emptyfn_.IsEmpty(element)) {
473 T temp;
474 emptyfn_.MakeEmpty(temp);
475 std::swap(temp, element);
476 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
477 if (i != first_slot) {
478 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
479 ++errors;
480 }
481 std::swap(temp, element);
482 }
483 }
484 return errors;
485 }
486
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700487 double GetMinLoadFactor() const {
488 return min_load_factor_;
489 }
490
491 double GetMaxLoadFactor() const {
492 return max_load_factor_;
493 }
494
495 // Change the load factor of the hash set. If the current load factor is greater than the max
496 // specified, then we resize the hash table storage.
497 void SetLoadFactor(double min_load_factor, double max_load_factor) {
498 DCHECK_LT(min_load_factor, max_load_factor);
499 DCHECK_GT(min_load_factor, 0.0);
500 DCHECK_LT(max_load_factor, 1.0);
501 min_load_factor_ = min_load_factor;
502 max_load_factor_ = max_load_factor;
503 elements_until_expand_ = NumBuckets() * max_load_factor_;
504 // If the current load factor isn't in the range, then resize to the mean of the minimum and
505 // maximum load factor.
506 const double load_factor = CalculateLoadFactor();
507 if (load_factor > max_load_factor_) {
508 Resize(Size() / ((min_load_factor_ + max_load_factor_) * 0.5));
509 }
510 }
511
Mathieu Chartierc482d382015-10-26 11:20:18 -0700512 // The hash set expands when Size() reaches ElementsUntilExpand().
513 size_t ElementsUntilExpand() const {
514 return elements_until_expand_;
515 }
516
517 size_t NumBuckets() const {
518 return num_buckets_;
519 }
520
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800521 private:
522 T& ElementForIndex(size_t index) {
523 DCHECK_LT(index, NumBuckets());
524 DCHECK(data_ != nullptr);
525 return data_[index];
526 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700527
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800528 const T& ElementForIndex(size_t index) const {
529 DCHECK_LT(index, NumBuckets());
530 DCHECK(data_ != nullptr);
531 return data_[index];
532 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700533
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800534 size_t IndexForHash(size_t hash) const {
Igor Murashkine2facc52015-07-10 13:49:08 -0700535 // Protect against undefined behavior (division by zero).
536 if (UNLIKELY(num_buckets_ == 0)) {
537 return 0;
538 }
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800539 return hash % num_buckets_;
540 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700541
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800542 size_t NextIndex(size_t index) const {
543 if (UNLIKELY(++index >= num_buckets_)) {
544 DCHECK_EQ(index, NumBuckets());
545 return 0;
546 }
547 return index;
548 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700549
550 // Find the hash table slot for an element, or return NumBuckets() if not found.
Vladimir Marko1f497642015-10-05 20:34:42 +0100551 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700552 template <typename K>
553 size_t FindIndex(const K& element, size_t hash) const {
Igor Murashkine2facc52015-07-10 13:49:08 -0700554 // Guard against failing to get an element for a non-existing index.
555 if (UNLIKELY(NumBuckets() == 0)) {
556 return 0;
557 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700558 DCHECK_EQ(hashfn_(element), hash);
559 size_t index = IndexForHash(hash);
560 while (true) {
561 const T& slot = ElementForIndex(index);
562 if (emptyfn_.IsEmpty(slot)) {
563 return NumBuckets();
564 }
565 if (pred_(slot, element)) {
566 return index;
567 }
568 index = NextIndex(index);
569 }
570 }
571
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800572 bool IsFreeSlot(size_t index) const {
573 return emptyfn_.IsEmpty(ElementForIndex(index));
574 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700575
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800576 // Allocate a number of buckets.
577 void AllocateStorage(size_t num_buckets) {
578 num_buckets_ = num_buckets;
579 data_ = allocfn_.allocate(num_buckets_);
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700580 owns_data_ = true;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800581 for (size_t i = 0; i < num_buckets_; ++i) {
582 allocfn_.construct(allocfn_.address(data_[i]));
583 emptyfn_.MakeEmpty(data_[i]);
584 }
585 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700586
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800587 void DeallocateStorage() {
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700588 if (owns_data_) {
589 for (size_t i = 0; i < NumBuckets(); ++i) {
590 allocfn_.destroy(allocfn_.address(data_[i]));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800591 }
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700592 if (data_ != nullptr) {
593 allocfn_.deallocate(data_, NumBuckets());
594 }
595 owns_data_ = false;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800596 }
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700597 data_ = nullptr;
598 num_buckets_ = 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800599 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700600
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800601 // Expand the set based on the load factors.
602 void Expand() {
603 size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800604 // Resize based on the minimum load factor.
605 Resize(min_index);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800606 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700607
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800608 // Expand / shrink the table to the new specified size.
609 void Resize(size_t new_size) {
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700610 if (new_size < kMinBuckets) {
611 new_size = kMinBuckets;
612 }
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800613 DCHECK_GE(new_size, Size());
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700614 T* const old_data = data_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800615 size_t old_num_buckets = num_buckets_;
616 // Reinsert all of the old elements.
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700617 const bool owned_data = owns_data_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800618 AllocateStorage(new_size);
619 for (size_t i = 0; i < old_num_buckets; ++i) {
620 T& element = old_data[i];
621 if (!emptyfn_.IsEmpty(element)) {
622 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
623 }
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700624 if (owned_data) {
625 allocfn_.destroy(allocfn_.address(element));
626 }
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800627 }
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700628 if (owned_data) {
629 allocfn_.deallocate(old_data, old_num_buckets);
630 }
Igor Murashkin3552d962015-06-22 15:57:38 -0700631
632 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
633 elements_until_expand_ = NumBuckets() * max_load_factor_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800634 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700635
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800636 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
Igor Murashkin3552d962015-06-22 15:57:38 -0700637 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
638 size_t non_empty_count = 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800639 while (!emptyfn_.IsEmpty(data_[index])) {
640 index = NextIndex(index);
Igor Murashkin3552d962015-06-22 15:57:38 -0700641 non_empty_count++;
642 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800643 }
644 return index;
645 }
646
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700647 // Return new offset.
648 template <typename Elem>
649 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
650 DCHECK_ALIGNED(ptr + offset, sizeof(n));
651 if (ptr != nullptr) {
652 *reinterpret_cast<Elem*>(ptr + offset) = n;
653 }
654 return offset + sizeof(n);
655 }
656
657 template <typename Elem>
658 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
659 DCHECK(ptr != nullptr);
660 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
661 *out = *reinterpret_cast<const Elem*>(ptr + offset);
662 return offset + sizeof(*out);
663 }
664
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800665 Alloc allocfn_; // Allocator function.
666 HashFn hashfn_; // Hashing function.
667 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
668 Pred pred_; // Equals function.
669 size_t num_elements_; // Number of inserted elements.
670 size_t num_buckets_; // Number of hash table buckets.
Igor Murashkin3552d962015-06-22 15:57:38 -0700671 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700672 bool owns_data_; // If we own data_ and are responsible for freeing it.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800673 T* data_; // Backing storage.
674 double min_load_factor_;
675 double max_load_factor_;
Alexey Grebenkin21f23642016-12-02 17:44:54 +0300676
677 ART_FRIEND_TEST(InternTableTest, CrossHash);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800678};
679
Vladimir Marko1f497642015-10-05 20:34:42 +0100680template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
681void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
682 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
683 lhs.swap(rhs);
684}
685
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800686} // namespace art
687
688#endif // ART_RUNTIME_BASE_HASH_SET_H_