Blame - current/host-exports/include/art/libartbase/base/hash_set.h - platform/prebuilts/module_sdk/art

blob: 3f3c8f24942692b394e02193b5beff60e75b52ea [file] [log] [blame]

Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	1	/*
				2	* Copyright (C) 2014 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
				18	#define ART_LIBARTBASE_BASE_HASH_SET_H_
				19
				20	#include <stdint.h>
				21
				22	#include <functional>
				23	#include <iterator>
				24	#include <memory>
				25	#include <string>
				26	#include <type_traits>
				27	#include <utility>
				28
				29	#include <android-base/logging.h>
				30
				31	#include "base/data_hash.h"
				32	#include "bit_utils.h"
				33	#include "macros.h"
				34
				35	namespace art {
				36
				37	template <class Elem, class HashSetType>
				38	class HashSetIterator {
				39	public:
				40	using iterator_category = std::forward_iterator_tag;
				41	using value_type = Elem;
				42	using difference_type = std::ptrdiff_t;
				43	using pointer = Elem*;
				44	using reference = Elem&;
				45
				46	HashSetIterator(const HashSetIterator&) = default;
Martin Stjernholm	413b2b5	2021-11-15 13:56:19 +0000	[diff] [blame]	47	HashSetIterator(HashSetIterator&&) noexcept = default;
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	48	HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
				49
				50	// Conversion from iterator to const_iterator.
				51	template <class OtherElem,
				52	class OtherHashSetType,
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	53	typename = std::enable_if_t<
				54	std::is_same_v<Elem, const OtherElem> &&
				55	std::is_same_v<HashSetType, const OtherHashSetType>>>
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	56	HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
				57	: index_(other.index_), hash_set_(other.hash_set_) {}
				58
				59	HashSetIterator& operator=(const HashSetIterator&) = default;
Martin Stjernholm	413b2b5	2021-11-15 13:56:19 +0000	[diff] [blame]	60	HashSetIterator& operator=(HashSetIterator&&) noexcept = default;
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	61
				62	bool operator==(const HashSetIterator& other) const {
				63	return hash_set_ == other.hash_set_ && this->index_ == other.index_;
				64	}
				65
				66	bool operator!=(const HashSetIterator& other) const {
				67	return !(*this == other);
				68	}
				69
				70	HashSetIterator operator++() { // Value after modification.
				71	this->index_ = hash_set_->NextNonEmptySlot(index_);
				72	return *this;
				73	}
				74
				75	HashSetIterator operator++(int) {
				76	HashSetIterator temp = *this;
				77	++*this;
				78	return temp;
				79	}
				80
				81	Elem& operator*() const {
				82	DCHECK(!hash_set_->IsFreeSlot(this->index_));
				83	return hash_set_->ElementForIndex(this->index_);
				84	}
				85
				86	Elem* operator->() const {
				87	return &**this;
				88	}
				89
				90	private:
				91	size_t index_;
				92	HashSetType* hash_set_;
				93
				94	template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
				95	friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
				96	const HashSetIterator<Elem2, HashSetType2>& rhs);
				97	template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
				98	template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
				99	};
				100
				101	template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
				102	bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
				103	const HashSetIterator<Elem2, HashSetType2>& rhs) {
				104	static_assert(
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	105	std::is_convertible_v<HashSetIterator<Elem1, HashSetType1>,
				106	HashSetIterator<Elem2, HashSetType2>> \|\|
				107	std::is_convertible_v<HashSetIterator<Elem2, HashSetType2>,
				108	HashSetIterator<Elem1, HashSetType1>>, "Bad iterator types.");
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	109	DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
				110	return lhs.index_ == rhs.index_;
				111	}
				112
				113	template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
				114	bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
				115	const HashSetIterator<Elem2, HashSetType2>& rhs) {
				116	return !(lhs == rhs);
				117	}
				118
				119	// Returns true if an item is empty.
				120	template <class T>
				121	class DefaultEmptyFn {
				122	public:
				123	void MakeEmpty(T& item) const {
				124	item = T();
				125	}
				126	bool IsEmpty(const T& item) const {
				127	return item == T();
				128	}
				129	};
				130
				131	template <class T>
				132	class DefaultEmptyFn<T*> {
				133	public:
				134	void MakeEmpty(T*& item) const {
				135	item = nullptr;
				136	}
				137	bool IsEmpty(T* const& item) const {
				138	return item == nullptr;
				139	}
				140	};
				141
Fairphone ODM	25c12f5	2023-12-15 17:24:06 +0800	[diff] [blame]	142	template <>
				143	class DefaultEmptyFn<std::string> {
				144	public:
				145	void MakeEmpty(std::string& item) const {
				146	item = std::string();
				147	}
				148	bool IsEmpty(const std::string& item) const {
				149	return item.empty();
				150	}
				151	};
				152
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	153	template <class T>
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	154	using DefaultHashFn = std::conditional_t<std::is_same_v<T, std::string>, DataHash, std::hash<T>>;
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	155
				156	struct DefaultStringEquals {
				157	// Allow comparison with anything that can be compared to std::string,
				158	// for example std::string_view.
				159	template <typename T>
				160	bool operator()(const std::string& lhs, const T& rhs) const {
				161	return lhs == rhs;
				162	}
				163	};
				164
				165	template <class T>
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	166	using DefaultPred =
				167	std::conditional_t<std::is_same_v<T, std::string>, DefaultStringEquals, std::equal_to<T>>;
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	168
				169	// Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
				170	// aren't boxed. Uses linear probing to resolve collisions.
				171	// EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
				172	// TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
				173	// and more complicated.
				174	template <class T,
				175	class EmptyFn = DefaultEmptyFn<T>,
				176	class HashFn = DefaultHashFn<T>,
				177	class Pred = DefaultPred<T>,
				178	class Alloc = std::allocator<T>>
				179	class HashSet {
				180	public:
				181	using value_type = T;
				182	using allocator_type = Alloc;
				183	using reference = T&;
				184	using const_reference = const T&;
				185	using pointer = T*;
				186	using const_pointer = const T*;
				187	using iterator = HashSetIterator<T, HashSet>;
				188	using const_iterator = HashSetIterator<const T, const HashSet>;
				189	using size_type = size_t;
				190	using difference_type = ptrdiff_t;
				191
				192	static constexpr double kDefaultMinLoadFactor = 0.4;
				193	static constexpr double kDefaultMaxLoadFactor = 0.7;
				194	static constexpr size_t kMinBuckets = 1000;
				195
				196	// If we don't own the data, this will create a new array which owns the data.
				197	void clear() {
				198	DeallocateStorage();
				199	num_elements_ = 0;
				200	elements_until_expand_ = 0;
				201	}
				202
				203	HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
				204	explicit HashSet(const allocator_type& alloc) noexcept
				205	: HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, alloc) {}
				206
				207	HashSet(double min_load_factor, double max_load_factor) noexcept
				208	: HashSet(min_load_factor, max_load_factor, allocator_type()) {}
				209	HashSet(double min_load_factor, double max_load_factor, const allocator_type& alloc) noexcept
				210	: HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), alloc) {}
				211
				212	HashSet(const HashFn& hashfn,
				213	const Pred& pred) noexcept
				214	: HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred) {}
				215	HashSet(const HashFn& hashfn,
				216	const Pred& pred,
				217	const allocator_type& alloc) noexcept
				218	: HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, hashfn, pred, alloc) {}
				219
				220	HashSet(double min_load_factor,
				221	double max_load_factor,
				222	const HashFn& hashfn,
				223	const Pred& pred) noexcept
				224	: HashSet(min_load_factor, max_load_factor, hashfn, pred, allocator_type()) {}
				225	HashSet(double min_load_factor,
				226	double max_load_factor,
				227	const HashFn& hashfn,
				228	const Pred& pred,
				229	const allocator_type& alloc) noexcept
				230	: allocfn_(alloc),
				231	hashfn_(hashfn),
				232	emptyfn_(),
				233	pred_(pred),
				234	num_elements_(0u),
				235	num_buckets_(0u),
				236	elements_until_expand_(0u),
				237	owns_data_(false),
				238	data_(nullptr),
				239	min_load_factor_(min_load_factor),
				240	max_load_factor_(max_load_factor) {
				241	DCHECK_GT(min_load_factor, 0.0);
				242	DCHECK_LT(max_load_factor, 1.0);
				243	}
				244
Martin Stjernholm	413b2b5	2021-11-15 13:56:19 +0000	[diff] [blame]	245	HashSet(const HashSet& other)
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	246	: allocfn_(other.allocfn_),
				247	hashfn_(other.hashfn_),
				248	emptyfn_(other.emptyfn_),
				249	pred_(other.pred_),
				250	num_elements_(other.num_elements_),
				251	num_buckets_(0),
				252	elements_until_expand_(other.elements_until_expand_),
				253	owns_data_(false),
				254	data_(nullptr),
				255	min_load_factor_(other.min_load_factor_),
				256	max_load_factor_(other.max_load_factor_) {
				257	AllocateStorage(other.NumBuckets());
				258	for (size_t i = 0; i < num_buckets_; ++i) {
				259	ElementForIndex(i) = other.data_[i];
				260	}
				261	}
				262
				263	// noexcept required so that the move constructor is used instead of copy constructor.
				264	// b/27860101
				265	HashSet(HashSet&& other) noexcept
				266	: allocfn_(std::move(other.allocfn_)),
				267	hashfn_(std::move(other.hashfn_)),
				268	emptyfn_(std::move(other.emptyfn_)),
				269	pred_(std::move(other.pred_)),
				270	num_elements_(other.num_elements_),
				271	num_buckets_(other.num_buckets_),
				272	elements_until_expand_(other.elements_until_expand_),
				273	owns_data_(other.owns_data_),
				274	data_(other.data_),
				275	min_load_factor_(other.min_load_factor_),
				276	max_load_factor_(other.max_load_factor_) {
				277	other.num_elements_ = 0u;
				278	other.num_buckets_ = 0u;
				279	other.elements_until_expand_ = 0u;
				280	other.owns_data_ = false;
				281	other.data_ = nullptr;
				282	}
				283
				284	// Construct with pre-existing buffer, usually stack-allocated,
				285	// to avoid malloc/free overhead for small HashSet<>s.
				286	HashSet(value_type* buffer, size_t buffer_size)
				287	: HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size) {}
				288	HashSet(value_type* buffer, size_t buffer_size, const allocator_type& alloc)
				289	: HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor, buffer, buffer_size, alloc) {}
				290	HashSet(double min_load_factor, double max_load_factor, value_type* buffer, size_t buffer_size)
				291	: HashSet(min_load_factor, max_load_factor, buffer, buffer_size, allocator_type()) {}
				292	HashSet(double min_load_factor,
				293	double max_load_factor,
				294	value_type* buffer,
				295	size_t buffer_size,
				296	const allocator_type& alloc)
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	297	: HashSet(min_load_factor, max_load_factor, HashFn(), Pred(), buffer, buffer_size, alloc) {}
				298	HashSet(double min_load_factor,
				299	double max_load_factor,
				300	const HashFn& hashfn,
				301	const Pred& pred,
				302	value_type* buffer,
				303	size_t buffer_size,
				304	const allocator_type& alloc)
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	305	: allocfn_(alloc),
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	306	hashfn_(hashfn),
				307	pred_(pred),
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	308	num_elements_(0u),
				309	num_buckets_(buffer_size),
				310	elements_until_expand_(buffer_size * max_load_factor),
				311	owns_data_(false),
				312	data_(buffer),
				313	min_load_factor_(min_load_factor),
				314	max_load_factor_(max_load_factor) {
				315	DCHECK_GT(min_load_factor, 0.0);
				316	DCHECK_LT(max_load_factor, 1.0);
				317	for (size_t i = 0; i != buffer_size; ++i) {
				318	emptyfn_.MakeEmpty(buffer[i]);
				319	}
				320	}
				321
				322	// Construct from existing data.
				323	// Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
				324	// passed in ptr_.
				325	HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
				326	uint64_t temp;
				327	size_t offset = 0;
				328	offset = ReadFromBytes(ptr, offset, &temp);
				329	num_elements_ = static_cast<uint64_t>(temp);
				330	offset = ReadFromBytes(ptr, offset, &temp);
				331	num_buckets_ = static_cast<uint64_t>(temp);
				332	CHECK_LE(num_elements_, num_buckets_);
				333	offset = ReadFromBytes(ptr, offset, &temp);
				334	elements_until_expand_ = static_cast<uint64_t>(temp);
				335	offset = ReadFromBytes(ptr, offset, &min_load_factor_);
				336	offset = ReadFromBytes(ptr, offset, &max_load_factor_);
				337	if (!make_copy_of_data) {
				338	owns_data_ = false;
				339	data_ = const_cast<T>(reinterpret_cast<const T>(ptr + offset));
				340	offset += sizeof(data_) num_buckets_;
				341	} else {
				342	AllocateStorage(num_buckets_);
				343	// Write elements, not that this may not be safe for cross compilation if the elements are
				344	// pointer sized.
				345	for (size_t i = 0; i < num_buckets_; ++i) {
				346	offset = ReadFromBytes(ptr, offset, &data_[i]);
				347	}
				348	}
				349	// Caller responsible for aligning.
				350	*read_count = offset;
				351	}
				352
				353	// Returns how large the table is after being written. If target is null, then no writing happens
				354	// but the size is still returned. Target must be 8 byte aligned.
				355	size_t WriteToMemory(uint8_t* ptr) const {
				356	size_t offset = 0;
				357	offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
				358	offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
				359	offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
				360	offset = WriteToBytes(ptr, offset, min_load_factor_);
				361	offset = WriteToBytes(ptr, offset, max_load_factor_);
				362	// Write elements, not that this may not be safe for cross compilation if the elements are
				363	// pointer sized.
				364	for (size_t i = 0; i < num_buckets_; ++i) {
				365	offset = WriteToBytes(ptr, offset, data_[i]);
				366	}
				367	// Caller responsible for aligning.
				368	return offset;
				369	}
				370
				371	~HashSet() {
				372	DeallocateStorage();
				373	}
				374
				375	HashSet& operator=(HashSet&& other) noexcept {
				376	HashSet(std::move(other)).swap(*this); // NOLINT [runtime/explicit] [5]
				377	return *this;
				378	}
				379
Martin Stjernholm	413b2b5	2021-11-15 13:56:19 +0000	[diff] [blame]	380	HashSet& operator=(const HashSet& other) {
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	381	HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
				382	return *this;
				383	}
				384
				385	// Lower case for c++11 for each.
				386	iterator begin() {
				387	iterator ret(this, 0);
				388	if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
				389	++ret; // Skip all the empty slots.
				390	}
				391	return ret;
				392	}
				393
				394	// Lower case for c++11 for each. const version.
				395	const_iterator begin() const {
				396	const_iterator ret(this, 0);
				397	if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
				398	++ret; // Skip all the empty slots.
				399	}
				400	return ret;
				401	}
				402
				403	// Lower case for c++11 for each.
				404	iterator end() {
				405	return iterator(this, NumBuckets());
				406	}
				407
				408	// Lower case for c++11 for each. const version.
				409	const_iterator end() const {
				410	return const_iterator(this, NumBuckets());
				411	}
				412
				413	size_t size() const {
				414	return num_elements_;
				415	}
				416
				417	bool empty() const {
				418	return size() == 0;
				419	}
				420
				421	// Erase algorithm:
				422	// Make an empty slot where the iterator is pointing.
				423	// Scan forwards until we hit another empty slot.
				424	// If an element in between doesn't rehash to the range from the current empty slot to the
				425	// iterator. It must be before the empty slot, in that case we can move it to the empty slot
				426	// and set the empty slot to be the location we just moved from.
				427	// Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
				428	// element to its actual location/index.
				429	// Note that since erase shuffles back elements, it may result in the same element being visited
				430	// twice during HashSet iteration. This happens when an element already visited during iteration
				431	// gets shuffled to the end of the bucket array.
				432	iterator erase(iterator it) {
				433	// empty_index is the index that will become empty.
				434	size_t empty_index = it.index_;
				435	DCHECK(!IsFreeSlot(empty_index));
				436	size_t next_index = empty_index;
				437	bool filled = false; // True if we filled the empty index.
				438	while (true) {
				439	next_index = NextIndex(next_index);
				440	T& next_element = ElementForIndex(next_index);
				441	// If the next element is empty, we are done. Make sure to clear the current empty index.
				442	if (emptyfn_.IsEmpty(next_element)) {
				443	emptyfn_.MakeEmpty(ElementForIndex(empty_index));
				444	break;
				445	}
				446	// Otherwise try to see if the next element can fill the current empty index.
				447	const size_t next_hash = hashfn_(next_element);
				448	// Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
				449	// nothing we can do.
				450	size_t next_ideal_index = IndexForHash(next_hash);
				451	// Loop around if needed for our check.
				452	size_t unwrapped_next_index = next_index;
				453	if (unwrapped_next_index < empty_index) {
				454	unwrapped_next_index += NumBuckets();
				455	}
				456	// Loop around if needed for our check.
				457	size_t unwrapped_next_ideal_index = next_ideal_index;
				458	if (unwrapped_next_ideal_index < empty_index) {
				459	unwrapped_next_ideal_index += NumBuckets();
				460	}
				461	if (unwrapped_next_ideal_index <= empty_index \|\|
				462	unwrapped_next_ideal_index > unwrapped_next_index) {
				463	// If the target index isn't within our current range it must have been probed from before
				464	// the empty index.
				465	ElementForIndex(empty_index) = std::move(next_element);
				466	filled = true; // TODO: Optimize
				467	empty_index = next_index;
				468	}
				469	}
				470	--num_elements_;
				471	// If we didn't fill the slot then we need go to the next non free slot.
				472	if (!filled) {
				473	++it;
				474	}
				475	return it;
				476	}
				477
				478	// Find an element, returns end() if not found.
				479	// Allows custom key (K) types, example of when this is useful:
				480	// Set of Class* indexed by name, want to find a class with a name but can't allocate
				481	// a temporary Class object in the heap for performance solution.
				482	template <typename K>
				483	iterator find(const K& key) {
				484	return FindWithHash(key, hashfn_(key));
				485	}
				486
				487	template <typename K>
				488	const_iterator find(const K& key) const {
				489	return FindWithHash(key, hashfn_(key));
				490	}
				491
				492	template <typename K>
				493	iterator FindWithHash(const K& key, size_t hash) {
				494	return iterator(this, FindIndex(key, hash));
				495	}
				496
				497	template <typename K>
				498	const_iterator FindWithHash(const K& key, size_t hash) const {
				499	return const_iterator(this, FindIndex(key, hash));
				500	}
				501
				502	// Insert an element with hint.
				503	// Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
				504	// and in that case the use of HashSet<> itself should be reconsidered.
				505	std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
				506	return insert(element);
				507	}
				508	std::pair<iterator, bool> insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
				509	return insert(std::move(element));
				510	}
				511
				512	// Insert an element.
				513	std::pair<iterator, bool> insert(const T& element) {
				514	return InsertWithHash(element, hashfn_(element));
				515	}
				516	std::pair<iterator, bool> insert(T&& element) {
				517	return InsertWithHash(std::move(element), hashfn_(element));
				518	}
				519
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	520	template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	521	std::pair<iterator, bool> InsertWithHash(U&& element, size_t hash) {
				522	DCHECK_EQ(hash, hashfn_(element));
				523	if (num_elements_ >= elements_until_expand_) {
				524	Expand();
				525	DCHECK_LT(num_elements_, elements_until_expand_);
				526	}
				527	bool find_failed = false;
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	528	auto find_fail_fn = [&](size_t index) ALWAYS_INLINE {
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	529	find_failed = true;
				530	return index;
				531	};
				532	size_t index = FindIndexImpl(element, hash, find_fail_fn);
				533	if (find_failed) {
				534	data_[index] = std::forward<U>(element);
				535	++num_elements_;
				536	}
				537	return std::make_pair(iterator(this, index), find_failed);
				538	}
				539
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	540	// Insert an element known not to be in the `HashSet<>`.
				541	void Put(const T& element) {
				542	return PutWithHash(element, hashfn_(element));
				543	}
				544	void Put(T&& element) {
				545	return PutWithHash(std::move(element), hashfn_(element));
				546	}
				547
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	548	template <typename U, typename = std::enable_if_t<std::is_convertible_v<U, T>>>
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	549	void PutWithHash(U&& element, size_t hash) {
				550	DCHECK_EQ(hash, hashfn_(element));
				551	if (num_elements_ >= elements_until_expand_) {
				552	Expand();
				553	DCHECK_LT(num_elements_, elements_until_expand_);
				554	}
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	555	auto find_fail_fn = [](size_t index) ALWAYS_INLINE { return index; };
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	556	size_t index = FindIndexImpl</kCanFind=/ false>(element, hash, find_fail_fn);
				557	data_[index] = std::forward<U>(element);
				558	++num_elements_;
				559	}
				560
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	561	void swap(HashSet& other) {
				562	// Use argument-dependent lookup with fall-back to std::swap() for function objects.
				563	using std::swap;
				564	swap(allocfn_, other.allocfn_);
				565	swap(hashfn_, other.hashfn_);
				566	swap(emptyfn_, other.emptyfn_);
				567	swap(pred_, other.pred_);
				568	std::swap(data_, other.data_);
				569	std::swap(num_buckets_, other.num_buckets_);
				570	std::swap(num_elements_, other.num_elements_);
				571	std::swap(elements_until_expand_, other.elements_until_expand_);
				572	std::swap(min_load_factor_, other.min_load_factor_);
				573	std::swap(max_load_factor_, other.max_load_factor_);
				574	std::swap(owns_data_, other.owns_data_);
				575	}
				576
				577	allocator_type get_allocator() const {
				578	return allocfn_;
				579	}
				580
				581	void ShrinkToMaximumLoad() {
				582	Resize(size() / max_load_factor_);
				583	}
				584
				585	// Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
				586	// set. No-op if the hash set is already large enough to do this.
				587	void reserve(size_t num_elements) {
				588	size_t num_buckets = num_elements / max_load_factor_;
				589	// Deal with rounding errors. Add one for rounding.
				590	while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
				591	++num_buckets;
				592	}
				593	if (num_buckets > NumBuckets()) {
				594	Resize(num_buckets);
				595	}
				596	}
				597
				598	// To distance that inserted elements were probed. Used for measuring how good hash functions
				599	// are.
				600	size_t TotalProbeDistance() const {
				601	size_t total = 0;
				602	for (size_t i = 0; i < NumBuckets(); ++i) {
				603	const T& element = ElementForIndex(i);
				604	if (!emptyfn_.IsEmpty(element)) {
				605	size_t ideal_location = IndexForHash(hashfn_(element));
				606	if (ideal_location > i) {
				607	total += i + NumBuckets() - ideal_location;
				608	} else {
				609	total += i - ideal_location;
				610	}
				611	}
				612	}
				613	return total;
				614	}
				615
				616	// Calculate the current load factor and return it.
				617	double CalculateLoadFactor() const {
				618	return static_cast<double>(size()) / static_cast<double>(NumBuckets());
				619	}
				620
				621	// Make sure that everything reinserts in the right spot. Returns the number of errors.
				622	size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
				623	size_t errors = 0;
				624	for (size_t i = 0; i < num_buckets_; ++i) {
				625	T& element = data_[i];
				626	if (!emptyfn_.IsEmpty(element)) {
				627	T temp;
				628	emptyfn_.MakeEmpty(temp);
				629	std::swap(temp, element);
				630	size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
				631	if (i != first_slot) {
				632	LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
				633	++errors;
				634	}
				635	std::swap(temp, element);
				636	}
				637	}
				638	return errors;
				639	}
				640
				641	double GetMinLoadFactor() const {
				642	return min_load_factor_;
				643	}
				644
				645	double GetMaxLoadFactor() const {
				646	return max_load_factor_;
				647	}
				648
				649	// Change the load factor of the hash set. If the current load factor is greater than the max
				650	// specified, then we resize the hash table storage.
				651	void SetLoadFactor(double min_load_factor, double max_load_factor) {
				652	DCHECK_LT(min_load_factor, max_load_factor);
				653	DCHECK_GT(min_load_factor, 0.0);
				654	DCHECK_LT(max_load_factor, 1.0);
				655	min_load_factor_ = min_load_factor;
				656	max_load_factor_ = max_load_factor;
				657	elements_until_expand_ = NumBuckets() * max_load_factor_;
				658	// If the current load factor isn't in the range, then resize to the mean of the minimum and
				659	// maximum load factor.
				660	const double load_factor = CalculateLoadFactor();
				661	if (load_factor > max_load_factor_) {
				662	Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
				663	}
				664	}
				665
				666	// The hash set expands when Size() reaches ElementsUntilExpand().
				667	size_t ElementsUntilExpand() const {
				668	return elements_until_expand_;
				669	}
				670
				671	size_t NumBuckets() const {
				672	return num_buckets_;
				673	}
				674
				675	private:
				676	T& ElementForIndex(size_t index) {
				677	DCHECK_LT(index, NumBuckets());
				678	DCHECK(data_ != nullptr);
				679	return data_[index];
				680	}
				681
				682	const T& ElementForIndex(size_t index) const {
				683	DCHECK_LT(index, NumBuckets());
				684	DCHECK(data_ != nullptr);
				685	return data_[index];
				686	}
				687
				688	size_t IndexForHash(size_t hash) const {
				689	// Protect against undefined behavior (division by zero).
				690	if (UNLIKELY(num_buckets_ == 0)) {
				691	return 0;
				692	}
				693	return hash % num_buckets_;
				694	}
				695
				696	size_t NextIndex(size_t index) const {
				697	if (UNLIKELY(++index >= num_buckets_)) {
				698	DCHECK_EQ(index, NumBuckets());
				699	return 0;
				700	}
				701	return index;
				702	}
				703
				704	// Find the hash table slot for an element, or return NumBuckets() if not found.
				705	// This value for not found is important so that iterator(this, FindIndex(...)) == end().
				706	template <typename K>
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	707	ALWAYS_INLINE
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	708	size_t FindIndex(const K& element, size_t hash) const {
				709	// Guard against failing to get an element for a non-existing index.
				710	if (UNLIKELY(NumBuckets() == 0)) {
				711	return 0;
				712	}
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	713	auto fail_fn = [&](size_t index ATTRIBUTE_UNUSED) ALWAYS_INLINE { return NumBuckets(); };
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	714	return FindIndexImpl(element, hash, fail_fn);
				715	}
				716
				717	// Find the hash table slot for an element, or return an empty slot index if not found.
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	718	template <bool kCanFind = true, typename K, typename FailFn>
satayev	499be97	2022-05-13 15:05:39 +0000	[diff] [blame]	719	ALWAYS_INLINE
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	720	size_t FindIndexImpl(const K& element, size_t hash, FailFn fail_fn) const {
				721	DCHECK_NE(NumBuckets(), 0u);
				722	DCHECK_EQ(hashfn_(element), hash);
				723	size_t index = IndexForHash(hash);
				724	while (true) {
				725	const T& slot = ElementForIndex(index);
				726	if (emptyfn_.IsEmpty(slot)) {
				727	return fail_fn(index);
				728	}
Martin Stjernholm	ffc90b0	2022-01-14 23:52:55 +0000	[diff] [blame]	729	if (!kCanFind) {
				730	DCHECK(!pred_(slot, element));
				731	} else if (pred_(slot, element)) {
Martin Stjernholm	4fb5111	2021-04-30 11:53:52 +0100	[diff] [blame]	732	return index;
				733	}
				734	index = NextIndex(index);
				735	}
				736	}
				737
				738	bool IsFreeSlot(size_t index) const {
				739	return emptyfn_.IsEmpty(ElementForIndex(index));
				740	}
				741
				742	// Allocate a number of buckets.
				743	void AllocateStorage(size_t num_buckets) {
				744	num_buckets_ = num_buckets;
				745	data_ = allocfn_.allocate(num_buckets_);
				746	owns_data_ = true;
				747	for (size_t i = 0; i < num_buckets_; ++i) {
				748	allocfn_.construct(allocfn_.address(data_[i]));
				749	emptyfn_.MakeEmpty(data_[i]);
				750	}
				751	}
				752
				753	void DeallocateStorage() {
				754	if (owns_data_) {
				755	for (size_t i = 0; i < NumBuckets(); ++i) {
				756	allocfn_.destroy(allocfn_.address(data_[i]));
				757	}
				758	if (data_ != nullptr) {
				759	allocfn_.deallocate(data_, NumBuckets());
				760	}
				761	owns_data_ = false;
				762	}
				763	data_ = nullptr;
				764	num_buckets_ = 0;
				765	}
				766
				767	// Expand the set based on the load factors.
				768	void Expand() {
				769	size_t min_index = static_cast<size_t>(size() / min_load_factor_);
				770	// Resize based on the minimum load factor.
				771	Resize(min_index);
				772	}
				773
				774	// Expand / shrink the table to the new specified size.
				775	void Resize(size_t new_size) {
				776	if (new_size < kMinBuckets) {
				777	new_size = kMinBuckets;
				778	}
				779	DCHECK_GE(new_size, size());
				780	T* const old_data = data_;
				781	size_t old_num_buckets = num_buckets_;
				782	// Reinsert all of the old elements.
				783	const bool owned_data = owns_data_;
				784	AllocateStorage(new_size);
				785	for (size_t i = 0; i < old_num_buckets; ++i) {
				786	T& element = old_data[i];
				787	if (!emptyfn_.IsEmpty(element)) {
				788	data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
				789	}
				790	if (owned_data) {
				791	allocfn_.destroy(allocfn_.address(element));
				792	}
				793	}
				794	if (owned_data) {
				795	allocfn_.deallocate(old_data, old_num_buckets);
				796	}
				797
				798	// When we hit elements_until_expand_, we are at the max load factor and must expand again.
				799	elements_until_expand_ = NumBuckets() * max_load_factor_;
				800	}
				801
				802	ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
				803	DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
				804	size_t non_empty_count = 0;
				805	while (!emptyfn_.IsEmpty(data_[index])) {
				806	index = NextIndex(index);
				807	non_empty_count++;
				808	DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
				809	}
				810	return index;
				811	}
				812
				813	size_t NextNonEmptySlot(size_t index) const {
				814	const size_t num_buckets = NumBuckets();
				815	DCHECK_LT(index, num_buckets);
				816	do {
				817	++index;
				818	} while (index < num_buckets && IsFreeSlot(index));
				819	return index;
				820	}
				821
				822	// Return new offset.
				823	template <typename Elem>
				824	static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
				825	DCHECK_ALIGNED(ptr + offset, sizeof(n));
				826	if (ptr != nullptr) {
				827	reinterpret_cast<Elem>(ptr + offset) = n;
				828	}
				829	return offset + sizeof(n);
				830	}
				831
				832	template <typename Elem>
				833	static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
				834	DCHECK(ptr != nullptr);
				835	DCHECK_ALIGNED(ptr + offset, sizeof(*out));
				836	out = reinterpret_cast<const Elem*>(ptr + offset);
				837	return offset + sizeof(*out);
				838	}
				839
				840	Alloc allocfn_; // Allocator function.
				841	HashFn hashfn_; // Hashing function.
				842	EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
				843	Pred pred_; // Equals function.
				844	size_t num_elements_; // Number of inserted elements.
				845	size_t num_buckets_; // Number of hash table buckets.
				846	size_t elements_until_expand_; // Maximum number of elements until we expand the table.
				847	bool owns_data_; // If we own data_ and are responsible for freeing it.
				848	T* data_; // Backing storage.
				849	double min_load_factor_;
				850	double max_load_factor_;
				851
				852	template <class Elem, class HashSetType>
				853	friend class HashSetIterator;
				854
				855	ART_FRIEND_TEST(InternTableTest, CrossHash);
				856	ART_FRIEND_TEST(HashSetTest, Preallocated);
				857	};
				858
				859	template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
				860	void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
				861	HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
				862	lhs.swap(rhs);
				863	}
				864
				865	} // namespace art
				866
				867	#endif // ART_LIBARTBASE_BASE_HASH_SET_H_