blob: 9e6e6d2083c54c3fad832736cfd172936a6cca82 [file] [log] [blame]
Mathieu Chartierc2e20622014-11-03 11:41:47 -08001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "hash_set.h"
18
Richard Uhlercf7792d2015-08-27 09:04:18 -070019#include <forward_list>
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070020#include <map>
Mathieu Chartierc2e20622014-11-03 11:41:47 -080021#include <sstream>
22#include <string>
Vladimir Marko2ef01102019-02-05 15:05:10 +000023#include <string_view>
Mathieu Chartierc2e20622014-11-03 11:41:47 -080024#include <unordered_set>
Richard Uhlercf7792d2015-08-27 09:04:18 -070025#include <vector>
Mathieu Chartierc2e20622014-11-03 11:41:47 -080026
Mathieu Chartier47f867a2015-03-18 10:39:00 -070027#include <gtest/gtest.h>
Vladimir Marko54159c62018-06-20 14:30:08 +010028
Mathieu Chartiere7c9a8c2014-11-06 16:35:45 -080029#include "hash_map.h"
Mathieu Chartierc2e20622014-11-03 11:41:47 -080030
31namespace art {
32
Mathieu Chartiere7c9a8c2014-11-06 16:35:45 -080033struct IsEmptyFnString {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080034 void MakeEmpty(std::string& item) const {
35 item.clear();
36 }
37 bool IsEmpty(const std::string& item) const {
38 return item.empty();
39 }
40};
41
Mathieu Chartier47f867a2015-03-18 10:39:00 -070042class HashSetTest : public testing::Test {
Mathieu Chartierc2e20622014-11-03 11:41:47 -080043 public:
44 HashSetTest() : seed_(97421), unique_number_(0) {
45 }
46 std::string RandomString(size_t len) {
47 std::ostringstream oss;
48 for (size_t i = 0; i < len; ++i) {
49 oss << static_cast<char>('A' + PRand() % 64);
50 }
51 static_assert(' ' < 'A', "space must be less than a");
52 oss << " " << unique_number_++; // Relies on ' ' < 'A'
53 return oss.str();
54 }
55 void SetSeed(size_t seed) {
56 seed_ = seed;
57 }
58 size_t PRand() { // Pseudo random.
59 seed_ = seed_ * 1103515245 + 12345;
60 return seed_;
61 }
62
63 private:
64 size_t seed_;
65 size_t unique_number_;
66};
67
68TEST_F(HashSetTest, TestSmoke) {
69 HashSet<std::string, IsEmptyFnString> hash_set;
70 const std::string test_string = "hello world 1234";
Vladimir Marko54159c62018-06-20 14:30:08 +010071 ASSERT_TRUE(hash_set.empty());
72 ASSERT_EQ(hash_set.size(), 0U);
73 hash_set.insert(test_string);
74 auto it = hash_set.find(test_string);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080075 ASSERT_EQ(*it, test_string);
Vladimir Marko54159c62018-06-20 14:30:08 +010076 auto after_it = hash_set.erase(it);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080077 ASSERT_TRUE(after_it == hash_set.end());
Vladimir Marko54159c62018-06-20 14:30:08 +010078 ASSERT_TRUE(hash_set.empty());
79 ASSERT_EQ(hash_set.size(), 0U);
80 it = hash_set.find(test_string);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080081 ASSERT_TRUE(it == hash_set.end());
82}
83
84TEST_F(HashSetTest, TestInsertAndErase) {
85 HashSet<std::string, IsEmptyFnString> hash_set;
86 static constexpr size_t count = 1000;
87 std::vector<std::string> strings;
88 for (size_t i = 0; i < count; ++i) {
89 // Insert a bunch of elements and make sure we can find them.
90 strings.push_back(RandomString(10));
Vladimir Marko54159c62018-06-20 14:30:08 +010091 hash_set.insert(strings[i]);
92 auto it = hash_set.find(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -080093 ASSERT_TRUE(it != hash_set.end());
94 ASSERT_EQ(*it, strings[i]);
95 }
Vladimir Marko54159c62018-06-20 14:30:08 +010096 ASSERT_EQ(strings.size(), hash_set.size());
Mathieu Chartierc2e20622014-11-03 11:41:47 -080097 // Try to erase the odd strings.
98 for (size_t i = 1; i < count; i += 2) {
Vladimir Marko54159c62018-06-20 14:30:08 +010099 auto it = hash_set.find(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800100 ASSERT_TRUE(it != hash_set.end());
101 ASSERT_EQ(*it, strings[i]);
Vladimir Marko54159c62018-06-20 14:30:08 +0100102 hash_set.erase(it);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800103 }
104 // Test removed.
105 for (size_t i = 1; i < count; i += 2) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100106 auto it = hash_set.find(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800107 ASSERT_TRUE(it == hash_set.end());
108 }
109 for (size_t i = 0; i < count; i += 2) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100110 auto it = hash_set.find(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800111 ASSERT_TRUE(it != hash_set.end());
112 ASSERT_EQ(*it, strings[i]);
113 }
114}
115
116TEST_F(HashSetTest, TestIterator) {
117 HashSet<std::string, IsEmptyFnString> hash_set;
118 ASSERT_TRUE(hash_set.begin() == hash_set.end());
119 static constexpr size_t count = 1000;
120 std::vector<std::string> strings;
121 for (size_t i = 0; i < count; ++i) {
122 // Insert a bunch of elements and make sure we can find them.
123 strings.push_back(RandomString(10));
Vladimir Marko54159c62018-06-20 14:30:08 +0100124 hash_set.insert(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800125 }
126 // Make sure we visit each string exactly once.
127 std::map<std::string, size_t> found_count;
128 for (const std::string& s : hash_set) {
129 ++found_count[s];
130 }
131 for (size_t i = 0; i < count; ++i) {
132 ASSERT_EQ(found_count[strings[i]], 1U);
133 }
134 found_count.clear();
135 // Remove all the elements with iterator erase.
136 for (auto it = hash_set.begin(); it != hash_set.end();) {
137 ++found_count[*it];
Vladimir Marko54159c62018-06-20 14:30:08 +0100138 it = hash_set.erase(it);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800139 ASSERT_EQ(hash_set.Verify(), 0U);
140 }
141 for (size_t i = 0; i < count; ++i) {
142 ASSERT_EQ(found_count[strings[i]], 1U);
143 }
144}
145
146TEST_F(HashSetTest, TestSwap) {
147 HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
148 std::vector<std::string> strings;
149 static constexpr size_t count = 1000;
150 for (size_t i = 0; i < count; ++i) {
151 strings.push_back(RandomString(10));
Vladimir Marko54159c62018-06-20 14:30:08 +0100152 hash_seta.insert(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800153 }
154 std::swap(hash_seta, hash_setb);
Vladimir Marko54159c62018-06-20 14:30:08 +0100155 hash_seta.insert("TEST");
156 hash_setb.insert("TEST2");
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800157 for (size_t i = 0; i < count; ++i) {
158 strings.push_back(RandomString(10));
Vladimir Marko54159c62018-06-20 14:30:08 +0100159 hash_seta.insert(strings[i]);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800160 }
161}
162
Igor Murashkin3552d962015-06-22 15:57:38 -0700163TEST_F(HashSetTest, TestShrink) {
164 HashSet<std::string, IsEmptyFnString> hash_set;
165 std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
166 for (size_t i = 0; i < strings.size(); ++i) {
167 // Insert some strings into the beginning of our hash set to establish an initial size
Vladimir Marko54159c62018-06-20 14:30:08 +0100168 hash_set.insert(strings[i]);
Igor Murashkin3552d962015-06-22 15:57:38 -0700169 }
170
171 hash_set.ShrinkToMaximumLoad();
172 const double initial_load = hash_set.CalculateLoadFactor();
173
174 // Insert a bunch of random strings to guarantee that we grow the capacity.
175 std::vector<std::string> random_strings;
176 static constexpr size_t count = 1000;
177 for (size_t i = 0; i < count; ++i) {
178 random_strings.push_back(RandomString(10));
Vladimir Marko54159c62018-06-20 14:30:08 +0100179 hash_set.insert(random_strings[i]);
Igor Murashkin3552d962015-06-22 15:57:38 -0700180 }
181
182 // Erase all the extra strings which guarantees that our load factor will be really bad.
183 for (size_t i = 0; i < count; ++i) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100184 hash_set.erase(hash_set.find(random_strings[i]));
Igor Murashkin3552d962015-06-22 15:57:38 -0700185 }
186
187 const double bad_load = hash_set.CalculateLoadFactor();
188 EXPECT_GT(initial_load, bad_load);
189
190 // Shrink again, the load factor should be good again.
191 hash_set.ShrinkToMaximumLoad();
192 EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
Igor Murashkine2facc52015-07-10 13:49:08 -0700193
194 // Make sure all the initial elements we had are still there
195 for (const std::string& initial_string : strings) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100196 EXPECT_NE(hash_set.end(), hash_set.find(initial_string))
Igor Murashkine2facc52015-07-10 13:49:08 -0700197 << "expected to find " << initial_string;
198 }
Igor Murashkin3552d962015-06-22 15:57:38 -0700199}
200
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700201TEST_F(HashSetTest, TestLoadFactor) {
202 HashSet<std::string, IsEmptyFnString> hash_set;
203 static constexpr size_t kStringCount = 1000;
204 static constexpr double kEpsilon = 0.01;
205 for (size_t i = 0; i < kStringCount; ++i) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100206 hash_set.insert(RandomString(i % 10 + 1));
Mathieu Chartier32cc9ee2015-10-15 09:19:15 -0700207 }
208 // Check that changing the load factor resizes the table to be within the target range.
209 EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
210 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
211 hash_set.SetLoadFactor(0.1, 0.3);
212 EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
213 EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
214 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
215 hash_set.SetLoadFactor(0.6, 0.8);
216 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
217}
218
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800219TEST_F(HashSetTest, TestStress) {
220 HashSet<std::string, IsEmptyFnString> hash_set;
Vladimir Markofeba2642019-11-19 13:22:18 +0000221 std::unordered_set<std::string> std_set;
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800222 std::vector<std::string> strings;
223 static constexpr size_t string_count = 2000;
224 static constexpr size_t operations = 100000;
225 static constexpr size_t target_size = 5000;
226 for (size_t i = 0; i < string_count; ++i) {
227 strings.push_back(RandomString(i % 10 + 1));
228 }
229 const size_t seed = time(nullptr);
230 SetSeed(seed);
231 LOG(INFO) << "Starting stress test with seed " << seed;
232 for (size_t i = 0; i < operations; ++i) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100233 ASSERT_EQ(hash_set.size(), std_set.size());
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800234 size_t delta = std::abs(static_cast<ssize_t>(target_size) -
Vladimir Marko54159c62018-06-20 14:30:08 +0100235 static_cast<ssize_t>(hash_set.size()));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800236 size_t n = PRand();
237 if (n % target_size == 0) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100238 hash_set.clear();
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800239 std_set.clear();
Vladimir Marko54159c62018-06-20 14:30:08 +0100240 ASSERT_TRUE(hash_set.empty());
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800241 ASSERT_TRUE(std_set.empty());
242 } else if (n % target_size < delta) {
243 // Skew towards adding elements until we are at the desired size.
244 const std::string& s = strings[PRand() % string_count];
Vladimir Marko54159c62018-06-20 14:30:08 +0100245 hash_set.insert(s);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800246 std_set.insert(s);
Vladimir Marko54159c62018-06-20 14:30:08 +0100247 ASSERT_EQ(*hash_set.find(s), *std_set.find(s));
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800248 } else {
249 const std::string& s = strings[PRand() % string_count];
Vladimir Marko54159c62018-06-20 14:30:08 +0100250 auto it1 = hash_set.find(s);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800251 auto it2 = std_set.find(s);
252 ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
253 if (it1 != hash_set.end()) {
254 ASSERT_EQ(*it1, *it2);
Vladimir Marko54159c62018-06-20 14:30:08 +0100255 hash_set.erase(it1);
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800256 std_set.erase(it2);
257 }
258 }
259 }
260}
261
Mathieu Chartiere7c9a8c2014-11-06 16:35:45 -0800262struct IsEmptyStringPair {
263 void MakeEmpty(std::pair<std::string, int>& pair) const {
264 pair.first.clear();
265 }
266 bool IsEmpty(const std::pair<std::string, int>& pair) const {
267 return pair.first.empty();
268 }
269};
270
271TEST_F(HashSetTest, TestHashMap) {
272 HashMap<std::string, int, IsEmptyStringPair> hash_map;
Vladimir Marko54159c62018-06-20 14:30:08 +0100273 hash_map.insert(std::make_pair(std::string("abcd"), 123));
274 hash_map.insert(std::make_pair(std::string("abcd"), 124));
275 hash_map.insert(std::make_pair(std::string("bags"), 444));
276 auto it = hash_map.find(std::string("abcd"));
Mathieu Chartiere7c9a8c2014-11-06 16:35:45 -0800277 ASSERT_EQ(it->second, 123);
Vladimir Marko54159c62018-06-20 14:30:08 +0100278 hash_map.erase(it);
279 it = hash_map.find(std::string("abcd"));
Vladimir Markofeba2642019-11-19 13:22:18 +0000280 ASSERT_EQ(it, hash_map.end());
Mathieu Chartiere7c9a8c2014-11-06 16:35:45 -0800281}
282
Richard Uhlercf7792d2015-08-27 09:04:18 -0700283struct IsEmptyFnVectorInt {
284 void MakeEmpty(std::vector<int>& item) const {
285 item.clear();
286 }
287 bool IsEmpty(const std::vector<int>& item) const {
288 return item.empty();
289 }
290};
291
292template <typename T>
293size_t HashIntSequence(T begin, T end) {
294 size_t hash = 0;
295 for (auto iter = begin; iter != end; ++iter) {
296 hash = hash * 2 + *iter;
297 }
298 return hash;
Igor Murashkin2ffb7032017-11-08 13:35:21 -0800299}
Richard Uhlercf7792d2015-08-27 09:04:18 -0700300
301struct VectorIntHashEquals {
302 std::size_t operator()(const std::vector<int>& item) const {
303 return HashIntSequence(item.begin(), item.end());
304 }
305
306 std::size_t operator()(const std::forward_list<int>& item) const {
307 return HashIntSequence(item.begin(), item.end());
308 }
309
310 bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
311 return a == b;
312 }
313
314 bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
315 auto aiter = a.begin();
316 auto biter = b.begin();
317 while (aiter != a.end() && biter != b.end()) {
318 if (*aiter != *biter) {
319 return false;
320 }
321 aiter++;
322 biter++;
323 }
324 return (aiter == a.end() && biter == b.end());
325 }
326};
327
328TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
329 HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
Vladimir Marko54159c62018-06-20 14:30:08 +0100330 hash_set.insert(std::vector<int>({1, 2, 3, 4}));
331 hash_set.insert(std::vector<int>({4, 2}));
332 ASSERT_EQ(hash_set.end(), hash_set.find(std::vector<int>({1, 1, 1, 1})));
333 ASSERT_NE(hash_set.end(), hash_set.find(std::vector<int>({1, 2, 3, 4})));
334 ASSERT_EQ(hash_set.end(), hash_set.find(std::forward_list<int>({1, 1, 1, 1})));
335 ASSERT_NE(hash_set.end(), hash_set.find(std::forward_list<int>({1, 2, 3, 4})));
Richard Uhlercf7792d2015-08-27 09:04:18 -0700336}
337
Mathieu Chartierc482d382015-10-26 11:20:18 -0700338TEST_F(HashSetTest, TestReserve) {
339 HashSet<std::string, IsEmptyFnString> hash_set;
340 std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
341 for (size_t size : sizes) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100342 hash_set.reserve(size);
Mathieu Chartierc482d382015-10-26 11:20:18 -0700343 const size_t buckets_before = hash_set.NumBuckets();
344 // Check that we expanded enough.
345 CHECK_GE(hash_set.ElementsUntilExpand(), size);
346 // Try inserting elements until we are at our reserve size and ensure the hash set did not
347 // expand.
Vladimir Marko54159c62018-06-20 14:30:08 +0100348 while (hash_set.size() < size) {
349 hash_set.insert(std::to_string(hash_set.size()));
Mathieu Chartierc482d382015-10-26 11:20:18 -0700350 }
351 CHECK_EQ(hash_set.NumBuckets(), buckets_before);
352 }
353 // Check the behaviour for shrinking, it does not necessarily resize down.
354 constexpr size_t size = 100;
Vladimir Marko54159c62018-06-20 14:30:08 +0100355 hash_set.reserve(size);
Mathieu Chartierc482d382015-10-26 11:20:18 -0700356 CHECK_GE(hash_set.ElementsUntilExpand(), size);
357}
358
Vladimir Marko54159c62018-06-20 14:30:08 +0100359TEST_F(HashSetTest, IteratorConversion) {
360 const char* test_string = "dummy";
361 HashSet<std::string> hash_set;
Vladimir Markofeba2642019-11-19 13:22:18 +0000362 HashSet<std::string>::iterator it = hash_set.insert(test_string).first;
Vladimir Marko54159c62018-06-20 14:30:08 +0100363 HashSet<std::string>::const_iterator cit = it;
364 ASSERT_TRUE(it == cit);
365 ASSERT_EQ(*it, *cit);
366}
367
Vladimir Markofeba2642019-11-19 13:22:18 +0000368TEST_F(HashSetTest, StringSearchStringView) {
Vladimir Marko54159c62018-06-20 14:30:08 +0100369 const char* test_string = "dummy";
370 HashSet<std::string> hash_set;
Vladimir Markofeba2642019-11-19 13:22:18 +0000371 HashSet<std::string>::iterator insert_pos = hash_set.insert(test_string).first;
Vladimir Marko2ef01102019-02-05 15:05:10 +0000372 HashSet<std::string>::iterator it = hash_set.find(std::string_view(test_string));
Vladimir Marko54159c62018-06-20 14:30:08 +0100373 ASSERT_TRUE(it == insert_pos);
374}
375
Vladimir Markofeba2642019-11-19 13:22:18 +0000376TEST_F(HashSetTest, DoubleInsert) {
377 const char* test_string = "dummy";
378 HashSet<std::string> hash_set;
379 hash_set.insert(test_string);
380 hash_set.insert(test_string);
381 ASSERT_EQ(1u, hash_set.size());
382}
383
Mathieu Chartierc2e20622014-11-03 11:41:47 -0800384} // namespace art