blob: ab63dddaff24cf866dab720f26714a89e512f492 [file] [log] [blame]
Mathieu Chartierc2e20622014-11-03 11:41:47 -08001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_RUNTIME_BASE_HASH_SET_H_
18#define ART_RUNTIME_BASE_HASH_SET_H_
19
20#include <functional>
21#include <memory>
22#include <stdint.h>
23#include <utility>
24
25#include "logging.h"
26
27namespace art {
28
29// Returns true if an item is empty.
30template <class T>
31class DefaultEmptyFn {
32 public:
33 void MakeEmpty(T& item) const {
34 item = T();
35 }
36 bool IsEmpty(const T& item) const {
37 return item == T();
38 }
39};
40
41template <class T>
42class DefaultEmptyFn<T*> {
43 public:
44 void MakeEmpty(T*& item) const {
45 item = nullptr;
46 }
47 bool IsEmpty(const T*& item) const {
48 return item == nullptr;
49 }
50};
51
52// Low memory version of a hash set, uses less memory than std::unordered_set since elements aren't
Mathieu Chartier47f867a2015-03-18 10:39:00 -070053// boxed. Uses linear probing to resolve collisions.
54// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
55// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
56// and more complicated.
Mathieu Chartierc2e20622014-11-03 11:41:47 -080057template <class T, class EmptyFn = DefaultEmptyFn<T>, class HashFn = std::hash<T>,
58 class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
59class HashSet {
Mathieu Chartier47f867a2015-03-18 10:39:00 -070060 template <class Elem, class HashSetType>
61 class BaseIterator {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080062 public:
Mathieu Chartier47f867a2015-03-18 10:39:00 -070063 BaseIterator(const BaseIterator&) = default;
64 BaseIterator(BaseIterator&&) = default;
65 BaseIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080066 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070067 BaseIterator& operator=(const BaseIterator&) = default;
68 BaseIterator& operator=(BaseIterator&&) = default;
69
70 bool operator==(const BaseIterator& other) const {
71 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -080072 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070073
74 bool operator!=(const BaseIterator& other) const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080075 return !(*this == other);
76 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070077
78 BaseIterator operator++() { // Value after modification.
79 this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080080 return *this;
81 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070082
83 BaseIterator operator++(int) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080084 Iterator temp = *this;
Mathieu Chartier47f867a2015-03-18 10:39:00 -070085 this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080086 return temp;
87 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070088
89 Elem& operator*() const {
90 DCHECK(!hash_set_->IsFreeSlot(this->index_));
91 return hash_set_->ElementForIndex(this->index_);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080092 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070093
94 Elem* operator->() const {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080095 return &**this;
96 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -070097
Mathieu Chartierc2e20622014-11-03 11:41:47 -080098 // TODO: Operator -- --(int)
99
100 private:
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800101 size_t index_;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700102 HashSetType* hash_set_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800103
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700104 size_t NextNonEmptySlot(size_t index, const HashSet* hash_set) const {
105 const size_t num_buckets = hash_set->NumBuckets();
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800106 DCHECK_LT(index, num_buckets);
107 do {
108 ++index;
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700109 } while (index < num_buckets && hash_set->IsFreeSlot(index));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800110 return index;
111 }
112
113 friend class HashSet;
114 };
115
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700116 public:
117 static constexpr double kDefaultMinLoadFactor = 0.5;
118 static constexpr double kDefaultMaxLoadFactor = 0.9;
119 static constexpr size_t kMinBuckets = 1000;
120
121 typedef BaseIterator<T, HashSet> Iterator;
122 typedef BaseIterator<const T, const HashSet> ConstIterator;
123
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800124 void Clear() {
125 DeallocateStorage();
126 AllocateStorage(1);
127 num_elements_ = 0;
128 elements_until_expand_ = 0;
129 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700130
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800131 HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
132 min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
133 Clear();
134 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700135
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800136 HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
137 *this = other;
138 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700139
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800140 HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
141 *this = std::move(other);
142 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700143
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800144 ~HashSet() {
145 DeallocateStorage();
146 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700147
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800148 HashSet& operator=(HashSet&& other) {
149 std::swap(data_, other.data_);
150 std::swap(num_buckets_, other.num_buckets_);
151 std::swap(num_elements_, other.num_elements_);
152 std::swap(elements_until_expand_, other.elements_until_expand_);
153 std::swap(min_load_factor_, other.min_load_factor_);
154 std::swap(max_load_factor_, other.max_load_factor_);
155 return *this;
156 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700157
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800158 HashSet& operator=(const HashSet& other) {
159 DeallocateStorage();
160 AllocateStorage(other.NumBuckets());
161 for (size_t i = 0; i < num_buckets_; ++i) {
162 ElementForIndex(i) = other.data_[i];
163 }
164 num_elements_ = other.num_elements_;
165 elements_until_expand_ = other.elements_until_expand_;
166 min_load_factor_ = other.min_load_factor_;
167 max_load_factor_ = other.max_load_factor_;
168 return *this;
169 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700170
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800171 // Lower case for c++11 for each.
172 Iterator begin() {
173 Iterator ret(this, 0);
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700174 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800175 ++ret; // Skip all the empty slots.
176 }
177 return ret;
178 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700179
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800180 // Lower case for c++11 for each.
181 Iterator end() {
182 return Iterator(this, NumBuckets());
183 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700184
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800185 bool Empty() {
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700186 return Size() == 0;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800187 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700188
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800189 // Erase algorithm:
190 // Make an empty slot where the iterator is pointing.
191 // Scan fowards until we hit another empty slot.
192 // If an element inbetween doesn't rehash to the range from the current empty slot to the
193 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
194 // and set the empty slot to be the location we just moved from.
195 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
196 // element to its actual location/index.
197 Iterator Erase(Iterator it) {
198 // empty_index is the index that will become empty.
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700199 size_t empty_index = it.index_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800200 DCHECK(!IsFreeSlot(empty_index));
201 size_t next_index = empty_index;
202 bool filled = false; // True if we filled the empty index.
203 while (true) {
204 next_index = NextIndex(next_index);
205 T& next_element = ElementForIndex(next_index);
206 // If the next element is empty, we are done. Make sure to clear the current empty index.
207 if (emptyfn_.IsEmpty(next_element)) {
208 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
209 break;
210 }
211 // Otherwise try to see if the next element can fill the current empty index.
212 const size_t next_hash = hashfn_(next_element);
213 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
214 // nothing we can do.
215 size_t next_ideal_index = IndexForHash(next_hash);
216 // Loop around if needed for our check.
217 size_t unwrapped_next_index = next_index;
218 if (unwrapped_next_index < empty_index) {
219 unwrapped_next_index += NumBuckets();
220 }
221 // Loop around if needed for our check.
222 size_t unwrapped_next_ideal_index = next_ideal_index;
223 if (unwrapped_next_ideal_index < empty_index) {
224 unwrapped_next_ideal_index += NumBuckets();
225 }
226 if (unwrapped_next_ideal_index <= empty_index ||
227 unwrapped_next_ideal_index > unwrapped_next_index) {
228 // If the target index isn't within our current range it must have been probed from before
229 // the empty index.
230 ElementForIndex(empty_index) = std::move(next_element);
231 filled = true; // TODO: Optimize
232 empty_index = next_index;
233 }
234 }
235 --num_elements_;
236 // If we didn't fill the slot then we need go to the next non free slot.
237 if (!filled) {
238 ++it;
239 }
240 return it;
241 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700242
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800243 // Find an element, returns end() if not found.
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700244 // Allows custom key (K) types, example of when this is useful:
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800245 // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
246 // object in the heap for performance solution.
247 template <typename K>
248 Iterator Find(const K& element) {
249 return FindWithHash(element, hashfn_(element));
250 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700251
252 template <typename K>
253 ConstIterator Find(const K& element) const {
254 return FindWithHash(element, hashfn_(element));
255 }
256
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800257 template <typename K>
258 Iterator FindWithHash(const K& element, size_t hash) {
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700259 return Iterator(this, FindIndex(element, hash));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800260 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700261
262 template <typename K>
263 ConstIterator FindWithHash(const K& element, size_t hash) const {
264 return ConstIterator(this, FindIndex(element, hash));
265 }
266
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800267 // Insert an element, allows duplicates.
268 void Insert(const T& element) {
269 InsertWithHash(element, hashfn_(element));
270 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700271
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800272 void InsertWithHash(const T& element, size_t hash) {
273 DCHECK_EQ(hash, hashfn_(element));
274 if (num_elements_ >= elements_until_expand_) {
275 Expand();
276 DCHECK_LT(num_elements_, elements_until_expand_);
277 }
278 const size_t index = FirstAvailableSlot(IndexForHash(hash));
279 data_[index] = element;
280 ++num_elements_;
281 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700282
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800283 size_t Size() const {
284 return num_elements_;
285 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700286
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800287 void ShrinkToMaximumLoad() {
288 Resize(Size() / max_load_factor_);
289 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700290
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800291 // To distance that inserted elements were probed. Used for measuring how good hash functions
292 // are.
293 size_t TotalProbeDistance() const {
294 size_t total = 0;
295 for (size_t i = 0; i < NumBuckets(); ++i) {
296 const T& element = ElementForIndex(i);
297 if (!emptyfn_.IsEmpty(element)) {
298 size_t ideal_location = IndexForHash(hashfn_(element));
299 if (ideal_location > i) {
300 total += i + NumBuckets() - ideal_location;
301 } else {
302 total += i - ideal_location;
303 }
304 }
305 }
306 return total;
307 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700308
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800309 // Calculate the current load factor and return it.
310 double CalculateLoadFactor() const {
311 return static_cast<double>(Size()) / static_cast<double>(NumBuckets());
312 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700313
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800314 // Make sure that everything reinserts in the right spot. Returns the number of errors.
315 size_t Verify() {
316 size_t errors = 0;
317 for (size_t i = 0; i < num_buckets_; ++i) {
318 T& element = data_[i];
319 if (!emptyfn_.IsEmpty(element)) {
320 T temp;
321 emptyfn_.MakeEmpty(temp);
322 std::swap(temp, element);
323 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
324 if (i != first_slot) {
325 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
326 ++errors;
327 }
328 std::swap(temp, element);
329 }
330 }
331 return errors;
332 }
333
334 private:
335 T& ElementForIndex(size_t index) {
336 DCHECK_LT(index, NumBuckets());
337 DCHECK(data_ != nullptr);
338 return data_[index];
339 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700340
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800341 const T& ElementForIndex(size_t index) const {
342 DCHECK_LT(index, NumBuckets());
343 DCHECK(data_ != nullptr);
344 return data_[index];
345 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700346
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800347 size_t IndexForHash(size_t hash) const {
348 return hash % num_buckets_;
349 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700350
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800351 size_t NextIndex(size_t index) const {
352 if (UNLIKELY(++index >= num_buckets_)) {
353 DCHECK_EQ(index, NumBuckets());
354 return 0;
355 }
356 return index;
357 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700358
359 // Find the hash table slot for an element, or return NumBuckets() if not found.
360 // This value for not found is important so that Iterator(this, FindIndex(...)) == end().
361 template <typename K>
362 size_t FindIndex(const K& element, size_t hash) const {
363 DCHECK_EQ(hashfn_(element), hash);
364 size_t index = IndexForHash(hash);
365 while (true) {
366 const T& slot = ElementForIndex(index);
367 if (emptyfn_.IsEmpty(slot)) {
368 return NumBuckets();
369 }
370 if (pred_(slot, element)) {
371 return index;
372 }
373 index = NextIndex(index);
374 }
375 }
376
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800377 bool IsFreeSlot(size_t index) const {
378 return emptyfn_.IsEmpty(ElementForIndex(index));
379 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700380
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800381 size_t NumBuckets() const {
382 return num_buckets_;
383 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700384
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800385 // Allocate a number of buckets.
386 void AllocateStorage(size_t num_buckets) {
387 num_buckets_ = num_buckets;
388 data_ = allocfn_.allocate(num_buckets_);
389 for (size_t i = 0; i < num_buckets_; ++i) {
390 allocfn_.construct(allocfn_.address(data_[i]));
391 emptyfn_.MakeEmpty(data_[i]);
392 }
393 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700394
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800395 void DeallocateStorage() {
396 if (num_buckets_ != 0) {
397 for (size_t i = 0; i < NumBuckets(); ++i) {
398 allocfn_.destroy(allocfn_.address(data_[i]));
399 }
400 allocfn_.deallocate(data_, NumBuckets());
401 data_ = nullptr;
402 num_buckets_ = 0;
403 }
404 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700405
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800406 // Expand the set based on the load factors.
407 void Expand() {
408 size_t min_index = static_cast<size_t>(Size() / min_load_factor_);
409 if (min_index < kMinBuckets) {
410 min_index = kMinBuckets;
411 }
412 // Resize based on the minimum load factor.
413 Resize(min_index);
414 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
415 elements_until_expand_ = NumBuckets() * max_load_factor_;
416 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700417
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800418 // Expand / shrink the table to the new specified size.
419 void Resize(size_t new_size) {
420 DCHECK_GE(new_size, Size());
421 T* old_data = data_;
422 size_t old_num_buckets = num_buckets_;
423 // Reinsert all of the old elements.
424 AllocateStorage(new_size);
425 for (size_t i = 0; i < old_num_buckets; ++i) {
426 T& element = old_data[i];
427 if (!emptyfn_.IsEmpty(element)) {
428 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
429 }
430 allocfn_.destroy(allocfn_.address(element));
431 }
432 allocfn_.deallocate(old_data, old_num_buckets);
433 }
Mathieu Chartier47f867a2015-03-18 10:39:00 -0700434
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800435 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
436 while (!emptyfn_.IsEmpty(data_[index])) {
437 index = NextIndex(index);
438 }
439 return index;
440 }
441
442 Alloc allocfn_; // Allocator function.
443 HashFn hashfn_; // Hashing function.
444 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
445 Pred pred_; // Equals function.
446 size_t num_elements_; // Number of inserted elements.
447 size_t num_buckets_; // Number of hash table buckets.
448 size_t elements_until_expand_; // Maxmimum number of elements until we expand the table.
449 T* data_; // Backing storage.
450 double min_load_factor_;
451 double max_load_factor_;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800452};
453
454} // namespace art
455
456#endif // ART_RUNTIME_BASE_HASH_SET_H_