blob: 95baa822b19670c80033e65869df835401d0c231 [file] [log] [blame]
Mathieu Chartierc2e20622014-11-03 11:41:47 -08001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_BASE_HASH_SET_H_
18#define ART_RUNTIME_BASE_HASH_SET_H_
19
20#include <functional>
Vladimir Marko1f497642015-10-05 20:34:42 +010021#include <iterator>
Mathieu Chartierc2e20622014-11-03 11:41:47 -080022#include <memory>
23#include <stdint.h>
24#include <utility>
25
Mathieu Chartierd39645e2015-06-09 17:50:29 -070026#include "bit_utils.h"
Mathieu Chartierc2e20622014-11-03 11:41:47 -080027#include "logging.h"
28
29namespace art {
30
31// Returns true if an item is empty.
32template <class T>
33class DefaultEmptyFn {
34 public:
35 void MakeEmpty(T& item) const {
36 item = T();
37 }
38 bool IsEmpty(const T& item) const {
39 return item == T();
40 }
41};
42
43template <class T>
44class DefaultEmptyFn<T*> {
45 public:
46 void MakeEmpty(T*& item) const {
47 item = nullptr;
48 }
Vladimir Marko1f497642015-10-05 20:34:42 +010049 bool IsEmpty(T* const& item) const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080050 return item == nullptr;
51 }
52};
53
54// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
Mathieu Chartier47f867a2015-03-18 10:39:00 -070055// boxed. Uses linear probing to resolve collisions.
56// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
57// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
58// and more complicated.
Mathieu Chartierc2e20622014-11-03 11:41:47 -080059template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
60 class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
61class HashSet {
Mathieu Chartier47f867a2015-03-18 10:39:00 -070062 template <class Elem, class HashSetType>
Vladimir Marko1f497642015-10-05 20:34:42 +010063 class BaseIterator : std::iterator<std::forward_iterator_tag, Elem> {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080064 public:
Mathieu Chartier47f867a2015-03-18 10:39:00 -070065 BaseIterator(const BaseIterator&) = default;
66 BaseIterator(BaseIterator&&) = default;
67 BaseIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080068 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070069 BaseIterator& operator=(const BaseIterator&) = default;
70 BaseIterator& operator=(BaseIterator&&) = default;
71
72 bool operator==(const BaseIterator& other) const {
73 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -080074 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070075
76 bool operator!=(const BaseIterator& other) const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080077 return !(*this == other);
78 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070079
80 BaseIterator operator++() { // Value after modification.
81 this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080082 return *this;
83 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070084
85 BaseIterator operator++(int) {
Vladimir Marko1f497642015-10-05 20:34:42 +010086 BaseIterator temp = *this;
Mathieu Chartier47f867a2015-03-18 10:39:00 -070087 this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080088 return temp;
89 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070090
91 Elem& operator*() const {
92 DCHECK(!hash_set_->IsFreeSlot(this->index_));
93 return hash_set_->ElementForIndex(this->index_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080094 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070095
96 Elem* operator->() const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080097 return &**this;
98 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070099
Vladimir Marko1f497642015-10-05 20:34:42 +0100100 // TODO: Operator -- --(int) (and use std::bidirectional_iterator_tag)
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800101
102 private:
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800103 size_t index_;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700104 HashSetType* hash_set_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800105
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700106 size_t NextNonEmptySlot(size_t index, const HashSet* hash_set) const {
107 const size_t num_buckets = hash_set->NumBuckets();
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800108 DCHECK_LT(index, num_buckets);
109 do {
110 ++index;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700111 } while (index < num_buckets && hash_set->IsFreeSlot(index));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800112 return index;
113 }
114
115 friend class HashSet;
116 };
117
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700118 public:
Vladimir Marko1f497642015-10-05 20:34:42 +0100119 using value_type = T;
120 using allocator_type = Alloc;
121 using reference = T&;
122 using const_reference = const T&;
123 using pointer = T*;
124 using const_pointer = const T*;
125 using iterator = BaseIterator<T, HashSet>;
126 using const_iterator = BaseIterator<const T, const HashSet>;
127 using size_type = size_t;
128 using difference_type = ptrdiff_t;
129
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700130 static constexpr double kDefaultMinLoadFactor = 0.4;
131 static constexpr double kDefaultMaxLoadFactor = 0.7;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700132 static constexpr size_t kMinBuckets = 1000;
133
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700134 // If we don't own the data, this will create a new array which owns the data.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800135 void Clear() {
136 DeallocateStorage();
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800137 num_elements_ = 0;
138 elements_until_expand_ = 0;
139 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700140
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700141 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
142
143 HashSet(double min_load_factor, double max_load_factor)
Vladimir Marko1f497642015-10-05 20:34:42 +0100144 : num_elements_(0u),
145 num_buckets_(0u),
146 elements_until_expand_(0u),
147 owns_data_(false),
148 data_(nullptr),
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700149 min_load_factor_(min_load_factor),
150 max_load_factor_(max_load_factor) {
151 DCHECK_GT(min_load_factor, 0.0);
152 DCHECK_LT(max_load_factor, 1.0);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800153 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700154
Vladimir Marko1f497642015-10-05 20:34:42 +0100155 explicit HashSet(const allocator_type& alloc)
156 : allocfn_(alloc),
157 hashfn_(),
158 emptyfn_(),
159 pred_(),
160 num_elements_(0u),
161 num_buckets_(0u),
162 elements_until_expand_(0u),
163 owns_data_(false),
164 data_(nullptr),
165 min_load_factor_(kDefaultMinLoadFactor),
166 max_load_factor_(kDefaultMaxLoadFactor) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800167 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700168
Vladimir Marko1f497642015-10-05 20:34:42 +0100169 HashSet(const HashSet& other)
170 : allocfn_(other.allocfn_),
171 hashfn_(other.hashfn_),
172 emptyfn_(other.emptyfn_),
173 pred_(other.pred_),
174 num_elements_(other.num_elements_),
175 num_buckets_(0),
176 elements_until_expand_(other.elements_until_expand_),
177 owns_data_(false),
178 data_(nullptr),
179 min_load_factor_(other.min_load_factor_),
180 max_load_factor_(other.max_load_factor_) {
181 AllocateStorage(other.NumBuckets());
182 for (size_t i = 0; i < num_buckets_; ++i) {
183 ElementForIndex(i) = other.data_[i];
184 }
185 }
186
187 HashSet(HashSet&& other)
188 : allocfn_(std::move(other.allocfn_)),
189 hashfn_(std::move(other.hashfn_)),
190 emptyfn_(std::move(other.emptyfn_)),
191 pred_(std::move(other.pred_)),
192 num_elements_(other.num_elements_),
193 num_buckets_(other.num_buckets_),
194 elements_until_expand_(other.elements_until_expand_),
195 owns_data_(other.owns_data_),
196 data_(other.data_),
197 min_load_factor_(other.min_load_factor_),
198 max_load_factor_(other.max_load_factor_) {
199 other.num_elements_ = 0u;
200 other.num_buckets_ = 0u;
201 other.elements_until_expand_ = 0u;
202 other.owns_data_ = false;
203 other.data_ = nullptr;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800204 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700205
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700206 // Construct from existing data.
207 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
208 // passed in ptr_.
209 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) {
210 uint64_t temp;
211 size_t offset = 0;
212 offset = ReadFromBytes(ptr, offset, &temp);
213 num_elements_ = static_cast<uint64_t>(temp);
214 offset = ReadFromBytes(ptr, offset, &temp);
215 num_buckets_ = static_cast<uint64_t>(temp);
216 CHECK_LE(num_elements_, num_buckets_);
217 offset = ReadFromBytes(ptr, offset, &temp);
218 elements_until_expand_ = static_cast<uint64_t>(temp);
219 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
220 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
221 if (!make_copy_of_data) {
222 owns_data_ = false;
223 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
224 offset += sizeof(*data_) * num_buckets_;
225 } else {
226 AllocateStorage(num_buckets_);
227 // Write elements, not that this may not be safe for cross compilation if the elements are
228 // pointer sized.
229 for (size_t i = 0; i < num_buckets_; ++i) {
230 offset = ReadFromBytes(ptr, offset, &data_[i]);
231 }
232 }
233 // Caller responsible for aligning.
234 *read_count = offset;
235 }
236
237 // Returns how large the table is after being written. If target is null, then no writing happens
238 // but the size is still returned. Target must be 8 byte aligned.
239 size_t WriteToMemory(uint8_t* ptr) {
240 size_t offset = 0;
241 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
242 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
243 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
244 offset = WriteToBytes(ptr, offset, min_load_factor_);
245 offset = WriteToBytes(ptr, offset, max_load_factor_);
246 // Write elements, not that this may not be safe for cross compilation if the elements are
247 // pointer sized.
248 for (size_t i = 0; i < num_buckets_; ++i) {
249 offset = WriteToBytes(ptr, offset, data_[i]);
250 }
251 // Caller responsible for aligning.
252 return offset;
253 }
254
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800255 ~HashSet() {
256 DeallocateStorage();
257 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700258
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800259 HashSet& operator=(HashSet&& other) {
Vladimir Marko1f497642015-10-05 20:34:42 +0100260 HashSet(std::move(other)).swap(*this);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800261 return *this;
262 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700263
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800264 HashSet& operator=(const HashSet& other) {
Vladimir Marko1f497642015-10-05 20:34:42 +0100265 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800266 return *this;
267 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700268
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800269 // Lower case for c++11 for each.
Vladimir Marko1f497642015-10-05 20:34:42 +0100270 iterator begin() {
271 iterator ret(this, 0);
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700272 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800273 ++ret; // Skip all the empty slots.
274 }
275 return ret;
276 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700277
Igor Murashkine2facc52015-07-10 13:49:08 -0700278 // Lower case for c++11 for each. const version.
Vladimir Marko1f497642015-10-05 20:34:42 +0100279 const_iterator begin() const {
280 const_iterator ret(this, 0);
Igor Murashkine2facc52015-07-10 13:49:08 -0700281 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
282 ++ret; // Skip all the empty slots.
283 }
284 return ret;
285 }
286
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800287 // Lower case for c++11 for each.
Vladimir Marko1f497642015-10-05 20:34:42 +0100288 iterator end() {
289 return iterator(this, NumBuckets());
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800290 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700291
Igor Murashkine2facc52015-07-10 13:49:08 -0700292 // Lower case for c++11 for each. const version.
Vladimir Marko1f497642015-10-05 20:34:42 +0100293 const_iterator end() const {
294 return const_iterator(this, NumBuckets());
Igor Murashkine2facc52015-07-10 13:49:08 -0700295 }
296
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800297 bool Empty() {
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700298 return Size() == 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800299 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700300
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800301 // Erase algorithm:
302 // Make an empty slot where the iterator is pointing.
Igor Murashkine2facc52015-07-10 13:49:08 -0700303 // Scan forwards until we hit another empty slot.
304 // If an element in between doesn't rehash to the range from the current empty slot to the
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800305 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
306 // and set the empty slot to be the location we just moved from.
307 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
308 // element to its actual location/index.
Vladimir Marko1f497642015-10-05 20:34:42 +0100309 iterator Erase(iterator it) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800310 // empty_index is the index that will become empty.
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700311 size_t empty_index = it.index_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800312 DCHECK(!IsFreeSlot(empty_index));
313 size_t next_index = empty_index;
314 bool filled = false; // True if we filled the empty index.
315 while (true) {
316 next_index = NextIndex(next_index);
317 T& next_element = ElementForIndex(next_index);
318 // If the next element is empty, we are done. Make sure to clear the current empty index.
319 if (emptyfn_.IsEmpty(next_element)) {
320 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
321 break;
322 }
323 // Otherwise try to see if the next element can fill the current empty index.
324 const size_t next_hash = hashfn_(next_element);
325 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
326 // nothing we can do.
327 size_t next_ideal_index = IndexForHash(next_hash);
328 // Loop around if needed for our check.
329 size_t unwrapped_next_index = next_index;
330 if (unwrapped_next_index < empty_index) {
331 unwrapped_next_index += NumBuckets();
332 }
333 // Loop around if needed for our check.
334 size_t unwrapped_next_ideal_index = next_ideal_index;
335 if (unwrapped_next_ideal_index < empty_index) {
336 unwrapped_next_ideal_index += NumBuckets();
337 }
338 if (unwrapped_next_ideal_index <= empty_index ||
339 unwrapped_next_ideal_index > unwrapped_next_index) {
340 // If the target index isn't within our current range it must have been probed from before
341 // the empty index.
342 ElementForIndex(empty_index) = std::move(next_element);
343 filled = true; // TODO: Optimize
344 empty_index = next_index;
345 }
346 }
347 --num_elements_;
348 // If we didn't fill the slot then we need go to the next non free slot.
349 if (!filled) {
350 ++it;
351 }
352 return it;
353 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700354
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800355 // Find an element, returns end() if not found.
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700356 // Allows custom key (K) types, example of when this is useful:
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800357 // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
358 // object in the heap for performance solution.
359 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100360 iterator Find(const K& key) {
Igor Murashkine2facc52015-07-10 13:49:08 -0700361 return FindWithHash(key, hashfn_(key));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800362 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700363
364 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100365 const_iterator Find(const K& key) const {
Igor Murashkine2facc52015-07-10 13:49:08 -0700366 return FindWithHash(key, hashfn_(key));
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700367 }
368
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800369 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100370 iterator FindWithHash(const K& key, size_t hash) {
371 return iterator(this, FindIndex(key, hash));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800372 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700373
374 template <typename K>
Vladimir Marko1f497642015-10-05 20:34:42 +0100375 const_iterator FindWithHash(const K& key, size_t hash) const {
376 return const_iterator(this, FindIndex(key, hash));
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700377 }
378
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800379 // Insert an element, allows duplicates.
380 void Insert(const T& element) {
381 InsertWithHash(element, hashfn_(element));
382 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700383
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800384 void InsertWithHash(const T& element, size_t hash) {
385 DCHECK_EQ(hash, hashfn_(element));
386 if (num_elements_ >= elements_until_expand_) {
387 Expand();
388 DCHECK_LT(num_elements_, elements_until_expand_);
389 }
390 const size_t index = FirstAvailableSlot(IndexForHash(hash));
391 data_[index] = element;
392 ++num_elements_;
393 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700394
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800395 size_t Size() const {
396 return num_elements_;
397 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700398
Vladimir Marko1f497642015-10-05 20:34:42 +0100399 void swap(HashSet& other) {
400 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
401 using std::swap;
402 swap(allocfn_, other.allocfn_);
403 swap(hashfn_, other.hashfn_);
404 swap(emptyfn_, other.emptyfn_);
405 swap(pred_, other.pred_);
406 std::swap(data_, other.data_);
407 std::swap(num_buckets_, other.num_buckets_);
408 std::swap(num_elements_, other.num_elements_);
409 std::swap(elements_until_expand_, other.elements_until_expand_);
410 std::swap(min_load_factor_, other.min_load_factor_);
411 std::swap(max_load_factor_, other.max_load_factor_);
412 std::swap(owns_data_, other.owns_data_);
413 }
414
415 allocator_type get_allocator() const {
416 return allocfn_;
417 }
418
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800419 void ShrinkToMaximumLoad() {
420 Resize(Size() / max_load_factor_);
421 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700422
Mathieu Chartierc482d382015-10-26 11:20:18 -0700423 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
424 // set. No-op if the hash set is already large enough to do this.
425 void Reserve(size_t num_elements) {
426 size_t num_buckets = num_elements / max_load_factor_;
427 // Deal with rounding errors. Add one for rounding.
428 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
429 ++num_buckets;
430 }
431 if (num_buckets > NumBuckets()) {
432 Resize(num_buckets);
433 }
434 }
435
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800436 // To distance that inserted elements were probed. Used for measuring how good hash functions
437 // are.
438 size_t TotalProbeDistance() const {
439 size_t total = 0;
440 for (size_t i = 0; i < NumBuckets(); ++i) {
441 const T& element = ElementForIndex(i);
442 if (!emptyfn_.IsEmpty(element)) {
443 size_t ideal_location = IndexForHash(hashfn_(element));
444 if (ideal_location > i) {
445 total += i + NumBuckets() - ideal_location;
446 } else {
447 total += i - ideal_location;
448 }
449 }
450 }
451 return total;
452 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700453
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800454 // Calculate the current load factor and return it.
455 double CalculateLoadFactor() const {
456 return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
457 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700458
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800459 // Make sure that everything reinserts in the right spot. Returns the number of errors.
460 size_t Verify() {
461 size_t errors = 0;
462 for (size_t i = 0; i < num_buckets_; ++i) {
463 T& element = data_[i];
464 if (!emptyfn_.IsEmpty(element)) {
465 T temp;
466 emptyfn_.MakeEmpty(temp);
467 std::swap(temp, element);
468 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
469 if (i != first_slot) {
470 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
471 ++errors;
472 }
473 std::swap(temp, element);
474 }
475 }
476 return errors;
477 }
478
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700479 double GetMinLoadFactor() const {
480 return min_load_factor_;
481 }
482
483 double GetMaxLoadFactor() const {
484 return max_load_factor_;
485 }
486
487 // Change the load factor of the hash set. If the current load factor is greater than the max
488 // specified, then we resize the hash table storage.
489 void SetLoadFactor(double min_load_factor, double max_load_factor) {
490 DCHECK_LT(min_load_factor, max_load_factor);
491 DCHECK_GT(min_load_factor, 0.0);
492 DCHECK_LT(max_load_factor, 1.0);
493 min_load_factor_ = min_load_factor;
494 max_load_factor_ = max_load_factor;
495 elements_until_expand_ = NumBuckets() * max_load_factor_;
496 // If the current load factor isn't in the range, then resize to the mean of the minimum and
497 // maximum load factor.
498 const double load_factor = CalculateLoadFactor();
499 if (load_factor > max_load_factor_) {
500 Resize(Size() / ((min_load_factor_ + max_load_factor_) * 0.5));
501 }
502 }
503
Mathieu Chartierc482d382015-10-26 11:20:18 -0700504 // The hash set expands when Size() reaches ElementsUntilExpand().
505 size_t ElementsUntilExpand() const {
506 return elements_until_expand_;
507 }
508
509 size_t NumBuckets() const {
510 return num_buckets_;
511 }
512
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800513 private:
514 T& ElementForIndex(size_t index) {
515 DCHECK_LT(index, NumBuckets());
516 DCHECK(data_ != nullptr);
517 return data_[index];
518 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700519
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800520 const T& ElementForIndex(size_t index) const {
521 DCHECK_LT(index, NumBuckets());
522 DCHECK(data_ != nullptr);
523 return data_[index];
524 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700525
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800526 size_t IndexForHash(size_t hash) const {
Igor Murashkine2facc52015-07-10 13:49:08 -0700527 // Protect against undefined behavior (division by zero).
528 if (UNLIKELY(num_buckets_ == 0)) {
529 return 0;
530 }
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800531 return hash % num_buckets_;
532 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700533
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800534 size_t NextIndex(size_t index) const {
535 if (UNLIKELY(++index >= num_buckets_)) {
536 DCHECK_EQ(index, NumBuckets());
537 return 0;
538 }
539 return index;
540 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700541
542 // Find the hash table slot for an element, or return NumBuckets() if not found.
Vladimir Marko1f497642015-10-05 20:34:42 +0100543 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700544 template <typename K>
545 size_t FindIndex(const K& element, size_t hash) const {
Igor Murashkine2facc52015-07-10 13:49:08 -0700546 // Guard against failing to get an element for a non-existing index.
547 if (UNLIKELY(NumBuckets() == 0)) {
548 return 0;
549 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700550 DCHECK_EQ(hashfn_(element), hash);
551 size_t index = IndexForHash(hash);
552 while (true) {
553 const T& slot = ElementForIndex(index);
554 if (emptyfn_.IsEmpty(slot)) {
555 return NumBuckets();
556 }
557 if (pred_(slot, element)) {
558 return index;
559 }
560 index = NextIndex(index);
561 }
562 }
563
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800564 bool IsFreeSlot(size_t index) const {
565 return emptyfn_.IsEmpty(ElementForIndex(index));
566 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700567
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800568 // Allocate a number of buckets.
569 void AllocateStorage(size_t num_buckets) {
570 num_buckets_ = num_buckets;
571 data_ = allocfn_.allocate(num_buckets_);
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700572 owns_data_ = true;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800573 for (size_t i = 0; i < num_buckets_; ++i) {
574 allocfn_.construct(allocfn_.address(data_[i]));
575 emptyfn_.MakeEmpty(data_[i]);
576 }
577 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700578
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800579 void DeallocateStorage() {
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700580 if (owns_data_) {
581 for (size_t i = 0; i < NumBuckets(); ++i) {
582 allocfn_.destroy(allocfn_.address(data_[i]));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800583 }
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700584 if (data_ != nullptr) {
585 allocfn_.deallocate(data_, NumBuckets());
586 }
587 owns_data_ = false;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800588 }
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700589 data_ = nullptr;
590 num_buckets_ = 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800591 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700592
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800593 // Expand the set based on the load factors.
594 void Expand() {
595 size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800596 // Resize based on the minimum load factor.
597 Resize(min_index);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800598 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700599
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800600 // Expand / shrink the table to the new specified size.
601 void Resize(size_t new_size) {
Mathieu Chartier88b6b052015-07-22 19:39:56 -0700602 if (new_size < kMinBuckets) {
603 new_size = kMinBuckets;
604 }
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800605 DCHECK_GE(new_size, Size());
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700606 T* const old_data = data_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800607 size_t old_num_buckets = num_buckets_;
608 // Reinsert all of the old elements.
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700609 const bool owned_data = owns_data_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800610 AllocateStorage(new_size);
611 for (size_t i = 0; i < old_num_buckets; ++i) {
612 T& element = old_data[i];
613 if (!emptyfn_.IsEmpty(element)) {
614 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
615 }
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700616 if (owned_data) {
617 allocfn_.destroy(allocfn_.address(element));
618 }
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800619 }
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700620 if (owned_data) {
621 allocfn_.deallocate(old_data, old_num_buckets);
622 }
Igor Murashkin3552d962015-06-22 15:57:38 -0700623
624 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
625 elements_until_expand_ = NumBuckets() * max_load_factor_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800626 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700627
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800628 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
Igor Murashkin3552d962015-06-22 15:57:38 -0700629 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
630 size_t non_empty_count = 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800631 while (!emptyfn_.IsEmpty(data_[index])) {
632 index = NextIndex(index);
Igor Murashkin3552d962015-06-22 15:57:38 -0700633 non_empty_count++;
634 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800635 }
636 return index;
637 }
638
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700639 // Return new offset.
640 template <typename Elem>
641 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
642 DCHECK_ALIGNED(ptr + offset, sizeof(n));
643 if (ptr != nullptr) {
644 *reinterpret_cast<Elem*>(ptr + offset) = n;
645 }
646 return offset + sizeof(n);
647 }
648
649 template <typename Elem>
650 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
651 DCHECK(ptr != nullptr);
652 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
653 *out = *reinterpret_cast<const Elem*>(ptr + offset);
654 return offset + sizeof(*out);
655 }
656
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800657 Alloc allocfn_; // Allocator function.
658 HashFn hashfn_; // Hashing function.
659 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
660 Pred pred_; // Equals function.
661 size_t num_elements_; // Number of inserted elements.
662 size_t num_buckets_; // Number of hash table buckets.
Igor Murashkin3552d962015-06-22 15:57:38 -0700663 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
Mathieu Chartierd39645e2015-06-09 17:50:29 -0700664 bool owns_data_; // If we own data_ and are responsible for freeing it.
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800665 T* data_; // Backing storage.
666 double min_load_factor_;
667 double max_load_factor_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800668};
669
Vladimir Marko1f497642015-10-05 20:34:42 +0100670template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
671void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
672 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
673 lhs.swap(rhs);
674}
675
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800676} // namespace art
677
678#endif // ART_RUNTIME_BASE_HASH_SET_H_