| Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2015 The Android Open Source Project | 
 | 3 |  * | 
 | 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 5 |  * you may not use this file except in compliance with the License. | 
 | 6 |  * You may obtain a copy of the License at | 
 | 7 |  * | 
 | 8 |  *      http://www.apache.org/licenses/LICENSE-2.0 | 
 | 9 |  * | 
 | 10 |  * Unless required by applicable law or agreed to in writing, software | 
 | 11 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 | 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 13 |  * See the License for the specific language governing permissions and | 
 | 14 |  * limitations under the License. | 
 | 15 |  */ | 
 | 16 |  | 
 | 17 | #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ | 
 | 18 | #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ | 
 | 19 |  | 
 | 20 | #include "dedupe_set.h" | 
 | 21 |  | 
 | 22 | #include <algorithm> | 
 | 23 | #include <inttypes.h> | 
 | 24 | #include <unordered_map> | 
 | 25 |  | 
| Andreas Gampe | 46ee31b | 2016-12-14 10:11:49 -0800 | [diff] [blame] | 26 | #include "android-base/stringprintf.h" | 
 | 27 |  | 
| Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 28 | #include "base/mutex.h" | 
 | 29 | #include "base/hash_set.h" | 
 | 30 | #include "base/stl_util.h" | 
| Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 31 | #include "base/time_utils.h" | 
 | 32 |  | 
 | 33 | namespace art { | 
 | 34 |  | 
 | 35 | template <typename InKey, | 
 | 36 |           typename StoreKey, | 
 | 37 |           typename Alloc, | 
 | 38 |           typename HashType, | 
 | 39 |           typename HashFunc, | 
 | 40 |           HashType kShard> | 
 | 41 | struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats { | 
 | 42 |   size_t collision_sum = 0u; | 
 | 43 |   size_t collision_max = 0u; | 
 | 44 |   size_t total_probe_distance = 0u; | 
 | 45 |   size_t total_size = 0u; | 
 | 46 | }; | 
 | 47 |  | 
 | 48 | template <typename InKey, | 
 | 49 |           typename StoreKey, | 
 | 50 |           typename Alloc, | 
 | 51 |           typename HashType, | 
 | 52 |           typename HashFunc, | 
 | 53 |           HashType kShard> | 
 | 54 | class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { | 
 | 55 |  public: | 
 | 56 |   Shard(const Alloc& alloc, const std::string& lock_name) | 
 | 57 |       : alloc_(alloc), | 
 | 58 |         lock_name_(lock_name), | 
 | 59 |         lock_(lock_name_.c_str()), | 
 | 60 |         keys_() { | 
 | 61 |   } | 
 | 62 |  | 
 | 63 |   ~Shard() { | 
 | 64 |     for (const HashedKey<StoreKey>& key : keys_) { | 
 | 65 |       DCHECK(key.Key() != nullptr); | 
 | 66 |       alloc_.Destroy(key.Key()); | 
 | 67 |     } | 
 | 68 |   } | 
 | 69 |  | 
 | 70 |   const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) { | 
 | 71 |     MutexLock lock(self, lock_); | 
 | 72 |     HashedKey<InKey> hashed_in_key(hash, &in_key); | 
 | 73 |     auto it = keys_.Find(hashed_in_key); | 
 | 74 |     if (it != keys_.end()) { | 
 | 75 |       DCHECK(it->Key() != nullptr); | 
 | 76 |       return it->Key(); | 
 | 77 |     } | 
 | 78 |     const StoreKey* store_key = alloc_.Copy(in_key); | 
 | 79 |     keys_.Insert(HashedKey<StoreKey> { hash, store_key }); | 
 | 80 |     return store_key; | 
 | 81 |   } | 
 | 82 |  | 
 | 83 |   void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) { | 
 | 84 |     // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory | 
 | 85 |     // for bookkeeping while collecting the stats. | 
 | 86 |     std::unordered_map<HashType, size_t> stats; | 
 | 87 |     { | 
 | 88 |       MutexLock lock(self, lock_); | 
 | 89 |       // Note: The total_probe_distance will be updated with the current state. | 
 | 90 |       // It may have been higher before a re-hash. | 
 | 91 |       global_stats->total_probe_distance += keys_.TotalProbeDistance(); | 
 | 92 |       global_stats->total_size += keys_.Size(); | 
 | 93 |       for (const HashedKey<StoreKey>& key : keys_) { | 
 | 94 |         auto it = stats.find(key.Hash()); | 
 | 95 |         if (it == stats.end()) { | 
 | 96 |           stats.insert({key.Hash(), 1u}); | 
 | 97 |         } else { | 
 | 98 |           ++it->second; | 
 | 99 |         } | 
 | 100 |       } | 
 | 101 |     } | 
 | 102 |     for (const auto& entry : stats) { | 
 | 103 |       size_t number_of_entries = entry.second; | 
 | 104 |       if (number_of_entries > 1u) { | 
 | 105 |         global_stats->collision_sum += number_of_entries - 1u; | 
 | 106 |         global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries); | 
 | 107 |       } | 
 | 108 |     } | 
 | 109 |   } | 
 | 110 |  | 
 | 111 |  private: | 
 | 112 |   template <typename T> | 
 | 113 |   class HashedKey { | 
 | 114 |    public: | 
 | 115 |     HashedKey() : hash_(0u), key_(nullptr) { } | 
 | 116 |     HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { } | 
 | 117 |  | 
 | 118 |     size_t Hash() const { | 
 | 119 |       return hash_; | 
 | 120 |     } | 
 | 121 |  | 
 | 122 |     const T* Key() const { | 
 | 123 |       return key_; | 
 | 124 |     } | 
 | 125 |  | 
 | 126 |     bool IsEmpty() const { | 
 | 127 |       return Key() == nullptr; | 
 | 128 |     } | 
 | 129 |  | 
 | 130 |     void MakeEmpty() { | 
 | 131 |       key_ = nullptr; | 
 | 132 |     } | 
 | 133 |  | 
 | 134 |    private: | 
 | 135 |     size_t hash_; | 
 | 136 |     const T* key_; | 
 | 137 |   }; | 
 | 138 |  | 
 | 139 |   class ShardEmptyFn { | 
 | 140 |    public: | 
 | 141 |     bool IsEmpty(const HashedKey<StoreKey>& key) const { | 
 | 142 |       return key.IsEmpty(); | 
 | 143 |     } | 
 | 144 |  | 
 | 145 |     void MakeEmpty(HashedKey<StoreKey>& key) { | 
 | 146 |       key.MakeEmpty(); | 
 | 147 |     } | 
 | 148 |   }; | 
 | 149 |  | 
 | 150 |   struct ShardHashFn { | 
 | 151 |     template <typename T> | 
 | 152 |     size_t operator()(const HashedKey<T>& key) const { | 
 | 153 |       return key.Hash(); | 
 | 154 |     } | 
 | 155 |   }; | 
 | 156 |  | 
 | 157 |   struct ShardPred { | 
 | 158 |     typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type | 
 | 159 |     operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const { | 
 | 160 |       DCHECK(lhs.Key() != nullptr); | 
 | 161 |       DCHECK(rhs.Key() != nullptr); | 
 | 162 |       // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers. | 
 | 163 |       return lhs.Key() == rhs.Key(); | 
 | 164 |     } | 
 | 165 |  | 
 | 166 |     template <typename LeftT, typename RightT> | 
 | 167 |     bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const { | 
 | 168 |       DCHECK(lhs.Key() != nullptr); | 
 | 169 |       DCHECK(rhs.Key() != nullptr); | 
 | 170 |       return lhs.Hash() == rhs.Hash() && | 
 | 171 |           lhs.Key()->size() == rhs.Key()->size() && | 
 | 172 |           std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin()); | 
 | 173 |     } | 
 | 174 |   }; | 
 | 175 |  | 
 | 176 |   Alloc alloc_; | 
 | 177 |   const std::string lock_name_; | 
 | 178 |   Mutex lock_; | 
 | 179 |   HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_); | 
 | 180 | }; | 
 | 181 |  | 
 | 182 | template <typename InKey, | 
 | 183 |           typename StoreKey, | 
 | 184 |           typename Alloc, | 
 | 185 |           typename HashType, | 
 | 186 |           typename HashFunc, | 
 | 187 |           HashType kShard> | 
 | 188 | const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add( | 
 | 189 |     Thread* self, const InKey& key) { | 
 | 190 |   uint64_t hash_start; | 
 | 191 |   if (kIsDebugBuild) { | 
 | 192 |     hash_start = NanoTime(); | 
 | 193 |   } | 
 | 194 |   HashType raw_hash = HashFunc()(key); | 
 | 195 |   if (kIsDebugBuild) { | 
 | 196 |     uint64_t hash_end = NanoTime(); | 
 | 197 |     hash_time_ += hash_end - hash_start; | 
 | 198 |   } | 
 | 199 |   HashType shard_hash = raw_hash / kShard; | 
 | 200 |   HashType shard_bin = raw_hash % kShard; | 
 | 201 |   return shards_[shard_bin]->Add(self, shard_hash, key); | 
 | 202 | } | 
 | 203 |  | 
 | 204 | template <typename InKey, | 
 | 205 |           typename StoreKey, | 
 | 206 |           typename Alloc, | 
 | 207 |           typename HashType, | 
 | 208 |           typename HashFunc, | 
 | 209 |           HashType kShard> | 
 | 210 | DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name, | 
 | 211 |                                                                          const Alloc& alloc) | 
 | 212 |     : hash_time_(0) { | 
 | 213 |   for (HashType i = 0; i < kShard; ++i) { | 
 | 214 |     std::ostringstream oss; | 
 | 215 |     oss << set_name << " lock " << i; | 
 | 216 |     shards_[i].reset(new Shard(alloc, oss.str())); | 
 | 217 |   } | 
 | 218 | } | 
 | 219 |  | 
 | 220 | template <typename InKey, | 
 | 221 |           typename StoreKey, | 
 | 222 |           typename Alloc, | 
 | 223 |           typename HashType, | 
 | 224 |           typename HashFunc, | 
 | 225 |           HashType kShard> | 
 | 226 | DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() { | 
 | 227 |   // Everything done by member destructors. | 
 | 228 | } | 
 | 229 |  | 
 | 230 | template <typename InKey, | 
 | 231 |           typename StoreKey, | 
 | 232 |           typename Alloc, | 
 | 233 |           typename HashType, | 
 | 234 |           typename HashFunc, | 
 | 235 |           HashType kShard> | 
 | 236 | std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats( | 
 | 237 |     Thread* self) const { | 
 | 238 |   Stats stats; | 
 | 239 |   for (HashType shard = 0; shard < kShard; ++shard) { | 
 | 240 |     shards_[shard]->UpdateStats(self, &stats); | 
 | 241 |   } | 
| Andreas Gampe | 46ee31b | 2016-12-14 10:11:49 -0800 | [diff] [blame] | 242 |   return android::base::StringPrintf("%zu collisions, %zu max hash collisions, " | 
 | 243 |                                      "%zu/%zu probe distance, %" PRIu64 " ns hash time", | 
 | 244 |                                      stats.collision_sum, | 
 | 245 |                                      stats.collision_max, | 
 | 246 |                                      stats.total_probe_distance, | 
 | 247 |                                      stats.total_size, | 
 | 248 |                                      hash_time_); | 
| Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 249 | } | 
 | 250 |  | 
 | 251 |  | 
 | 252 | }  // namespace art | 
 | 253 |  | 
 | 254 | #endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |