Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2015 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |
| 18 | #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |
| 19 | |
| 20 | #include "dedupe_set.h" |
| 21 | |
| 22 | #include <algorithm> |
| 23 | #include <inttypes.h> |
| 24 | #include <unordered_map> |
| 25 | |
Andreas Gampe | 46ee31b | 2016-12-14 10:11:49 -0800 | [diff] [blame] | 26 | #include "android-base/stringprintf.h" |
| 27 | |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 28 | #include "base/mutex.h" |
| 29 | #include "base/hash_set.h" |
| 30 | #include "base/stl_util.h" |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 31 | #include "base/time_utils.h" |
| 32 | |
| 33 | namespace art { |
| 34 | |
| 35 | template <typename InKey, |
| 36 | typename StoreKey, |
| 37 | typename Alloc, |
| 38 | typename HashType, |
| 39 | typename HashFunc, |
| 40 | HashType kShard> |
| 41 | struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats { |
| 42 | size_t collision_sum = 0u; |
| 43 | size_t collision_max = 0u; |
| 44 | size_t total_probe_distance = 0u; |
| 45 | size_t total_size = 0u; |
| 46 | }; |
| 47 | |
| 48 | template <typename InKey, |
| 49 | typename StoreKey, |
| 50 | typename Alloc, |
| 51 | typename HashType, |
| 52 | typename HashFunc, |
| 53 | HashType kShard> |
| 54 | class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { |
| 55 | public: |
| 56 | Shard(const Alloc& alloc, const std::string& lock_name) |
| 57 | : alloc_(alloc), |
| 58 | lock_name_(lock_name), |
| 59 | lock_(lock_name_.c_str()), |
| 60 | keys_() { |
| 61 | } |
| 62 | |
| 63 | ~Shard() { |
| 64 | for (const HashedKey<StoreKey>& key : keys_) { |
| 65 | DCHECK(key.Key() != nullptr); |
| 66 | alloc_.Destroy(key.Key()); |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) { |
| 71 | MutexLock lock(self, lock_); |
| 72 | HashedKey<InKey> hashed_in_key(hash, &in_key); |
| 73 | auto it = keys_.Find(hashed_in_key); |
| 74 | if (it != keys_.end()) { |
| 75 | DCHECK(it->Key() != nullptr); |
| 76 | return it->Key(); |
| 77 | } |
| 78 | const StoreKey* store_key = alloc_.Copy(in_key); |
| 79 | keys_.Insert(HashedKey<StoreKey> { hash, store_key }); |
| 80 | return store_key; |
| 81 | } |
| 82 | |
| 83 | void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) { |
| 84 | // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory |
| 85 | // for bookkeeping while collecting the stats. |
| 86 | std::unordered_map<HashType, size_t> stats; |
| 87 | { |
| 88 | MutexLock lock(self, lock_); |
| 89 | // Note: The total_probe_distance will be updated with the current state. |
| 90 | // It may have been higher before a re-hash. |
| 91 | global_stats->total_probe_distance += keys_.TotalProbeDistance(); |
| 92 | global_stats->total_size += keys_.Size(); |
| 93 | for (const HashedKey<StoreKey>& key : keys_) { |
| 94 | auto it = stats.find(key.Hash()); |
| 95 | if (it == stats.end()) { |
| 96 | stats.insert({key.Hash(), 1u}); |
| 97 | } else { |
| 98 | ++it->second; |
| 99 | } |
| 100 | } |
| 101 | } |
| 102 | for (const auto& entry : stats) { |
| 103 | size_t number_of_entries = entry.second; |
| 104 | if (number_of_entries > 1u) { |
| 105 | global_stats->collision_sum += number_of_entries - 1u; |
| 106 | global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries); |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | private: |
| 112 | template <typename T> |
| 113 | class HashedKey { |
| 114 | public: |
| 115 | HashedKey() : hash_(0u), key_(nullptr) { } |
| 116 | HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { } |
| 117 | |
| 118 | size_t Hash() const { |
| 119 | return hash_; |
| 120 | } |
| 121 | |
| 122 | const T* Key() const { |
| 123 | return key_; |
| 124 | } |
| 125 | |
| 126 | bool IsEmpty() const { |
| 127 | return Key() == nullptr; |
| 128 | } |
| 129 | |
| 130 | void MakeEmpty() { |
| 131 | key_ = nullptr; |
| 132 | } |
| 133 | |
| 134 | private: |
| 135 | size_t hash_; |
| 136 | const T* key_; |
| 137 | }; |
| 138 | |
| 139 | class ShardEmptyFn { |
| 140 | public: |
| 141 | bool IsEmpty(const HashedKey<StoreKey>& key) const { |
| 142 | return key.IsEmpty(); |
| 143 | } |
| 144 | |
| 145 | void MakeEmpty(HashedKey<StoreKey>& key) { |
| 146 | key.MakeEmpty(); |
| 147 | } |
| 148 | }; |
| 149 | |
| 150 | struct ShardHashFn { |
| 151 | template <typename T> |
| 152 | size_t operator()(const HashedKey<T>& key) const { |
| 153 | return key.Hash(); |
| 154 | } |
| 155 | }; |
| 156 | |
| 157 | struct ShardPred { |
| 158 | typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type |
| 159 | operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const { |
| 160 | DCHECK(lhs.Key() != nullptr); |
| 161 | DCHECK(rhs.Key() != nullptr); |
| 162 | // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers. |
| 163 | return lhs.Key() == rhs.Key(); |
| 164 | } |
| 165 | |
| 166 | template <typename LeftT, typename RightT> |
| 167 | bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const { |
| 168 | DCHECK(lhs.Key() != nullptr); |
| 169 | DCHECK(rhs.Key() != nullptr); |
| 170 | return lhs.Hash() == rhs.Hash() && |
| 171 | lhs.Key()->size() == rhs.Key()->size() && |
| 172 | std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin()); |
| 173 | } |
| 174 | }; |
| 175 | |
| 176 | Alloc alloc_; |
| 177 | const std::string lock_name_; |
| 178 | Mutex lock_; |
| 179 | HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_); |
| 180 | }; |
| 181 | |
| 182 | template <typename InKey, |
| 183 | typename StoreKey, |
| 184 | typename Alloc, |
| 185 | typename HashType, |
| 186 | typename HashFunc, |
| 187 | HashType kShard> |
| 188 | const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add( |
| 189 | Thread* self, const InKey& key) { |
| 190 | uint64_t hash_start; |
| 191 | if (kIsDebugBuild) { |
| 192 | hash_start = NanoTime(); |
| 193 | } |
| 194 | HashType raw_hash = HashFunc()(key); |
| 195 | if (kIsDebugBuild) { |
| 196 | uint64_t hash_end = NanoTime(); |
| 197 | hash_time_ += hash_end - hash_start; |
| 198 | } |
| 199 | HashType shard_hash = raw_hash / kShard; |
| 200 | HashType shard_bin = raw_hash % kShard; |
| 201 | return shards_[shard_bin]->Add(self, shard_hash, key); |
| 202 | } |
| 203 | |
| 204 | template <typename InKey, |
| 205 | typename StoreKey, |
| 206 | typename Alloc, |
| 207 | typename HashType, |
| 208 | typename HashFunc, |
| 209 | HashType kShard> |
| 210 | DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name, |
| 211 | const Alloc& alloc) |
| 212 | : hash_time_(0) { |
| 213 | for (HashType i = 0; i < kShard; ++i) { |
| 214 | std::ostringstream oss; |
| 215 | oss << set_name << " lock " << i; |
| 216 | shards_[i].reset(new Shard(alloc, oss.str())); |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | template <typename InKey, |
| 221 | typename StoreKey, |
| 222 | typename Alloc, |
| 223 | typename HashType, |
| 224 | typename HashFunc, |
| 225 | HashType kShard> |
| 226 | DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() { |
| 227 | // Everything done by member destructors. |
| 228 | } |
| 229 | |
| 230 | template <typename InKey, |
| 231 | typename StoreKey, |
| 232 | typename Alloc, |
| 233 | typename HashType, |
| 234 | typename HashFunc, |
| 235 | HashType kShard> |
| 236 | std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats( |
| 237 | Thread* self) const { |
| 238 | Stats stats; |
| 239 | for (HashType shard = 0; shard < kShard; ++shard) { |
| 240 | shards_[shard]->UpdateStats(self, &stats); |
| 241 | } |
Andreas Gampe | 46ee31b | 2016-12-14 10:11:49 -0800 | [diff] [blame] | 242 | return android::base::StringPrintf("%zu collisions, %zu max hash collisions, " |
| 243 | "%zu/%zu probe distance, %" PRIu64 " ns hash time", |
| 244 | stats.collision_sum, |
| 245 | stats.collision_max, |
| 246 | stats.total_probe_distance, |
| 247 | stats.total_size, |
| 248 | hash_time_); |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 249 | } |
| 250 | |
| 251 | |
| 252 | } // namespace art |
| 253 | |
| 254 | #endif // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |