blob: 33da6690e50565fd9908aafa336259a5cb41814e [file] [log] [blame]
Primiano Tucci58a20ff2021-11-18 18:38:55 +00001// Copyright (C) 2019 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <functional>
16#include <random>
17#include <unordered_map>
18
19#include <benchmark/benchmark.h>
20#include <stdio.h>
21#include <sys/mman.h>
22#include <unistd.h>
23
24#include "perfetto/base/logging.h"
25#include "perfetto/ext/base/file_utils.h"
26#include "perfetto/ext/base/flat_hash_map.h"
27#include "perfetto/ext/base/hash.h"
28#include "perfetto/ext/base/scoped_file.h"
29#include "perfetto/ext/base/string_view.h"
30
31// This benchmark allows to compare our FlatHashMap implementation against
32// reference implementations from Absl (Google), Folly F14 (FB), and Tssil's
33// reference RobinHood hashmap.
34// Those libraries are not checked in into the repo. If you want to reproduce
35// the benchmark you need to:
36// - Manually install the three libraries using following the instructions in
37// their readme (they all use cmake).
38// - When running cmake, remember to pass
39// -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-DNDEBUG -O3 -msse4.2 -mavx'.
40// That sets cflags for a more fair comparison.
41// - Set is_debug=false in the GN args.
42// - Set the GN var perfetto_benchmark_3p_libs_prefix="/usr/local" (or whatever
43// other directory you set as DESTDIR when running make install).
44// The presence of the perfetto_benchmark_3p_libs_prefix GN variable will
45// automatically define PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS.
46
47#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
48
49// Last tested: https://github.com/abseil/abseil-cpp @ f2dbd918d.
50#include <absl/container/flat_hash_map.h>
51
52// Last tested: https://github.com/facebook/folly @ 028a9abae3.
53#include <folly/container/F14Map.h>
54
55// Last tested: https://github.com/Tessil/robin-map @ a603419b9.
56#include <tsl/robin_map.h>
57#endif
58
59namespace {
60
61using namespace perfetto;
62using benchmark::Counter;
63using perfetto::base::AlreadyHashed;
64using perfetto::base::LinearProbe;
65using perfetto::base::QuadraticHalfProbe;
66using perfetto::base::QuadraticProbe;
67
68// Our FlatHashMap doesn't have a STL-like interface, mainly because we use
69// columnar-oriented storage, not array-of-tuples, so we can't easily map into
70// that interface. This wrapper makes our FlatHashMap compatible with STL (just
71// for what it takes to build this translation unit), at the cost of some small
72// performance penalty (around 1-2%).
73template <typename Key, typename Value, typename Hasher, typename Probe>
74class Ours : public base::FlatHashMap<Key, Value, Hasher, Probe> {
75 public:
76 struct Iterator {
77 using value_type = std::pair<const Key&, Value&>;
78 Iterator(const Key& k, Value& v) : pair_{k, v} {}
79 value_type* operator->() { return &pair_; }
80 value_type& operator*() { return pair_; }
81 bool operator==(const Iterator& other) const {
82 return &pair_.first == &other.pair_.first;
83 }
84 bool operator!=(const Iterator& other) const { return !operator==(other); }
85 value_type pair_;
86 };
87
88 void insert(std::pair<Key, Value>&& pair) {
89 this->Insert(std::move(pair.first), std::move(pair.second));
90 }
91
92 Iterator find(const Key& key) {
93 const size_t idx = this->FindInternal(key);
94 return Iterator(this->keys_[idx], this->values_[idx]);
95 }
96
97 Iterator end() {
98 return Iterator(this->keys_[this->kNotFound],
99 this->values_[this->kNotFound]);
100 }
101};
102
103std::vector<uint64_t> LoadTraceStrings(benchmark::State& state) {
104 std::vector<uint64_t> str_hashes;
105 // This requires that the user has downloaded the file
106 // go/perfetto-benchmark-trace-strings into /tmp/trace_strings. The file is
107 // too big (2.3 GB after uncompression) and it's not worth adding it to the
108 // //test/data. Also it contains data from a team member's phone and cannot
109 // be public.
110 base::ScopedFstream f(fopen("/tmp/trace_strings", "re"));
111 if (!f) {
112 state.SkipWithError(
113 "Test strings missing. Googlers: download "
114 "go/perfetto-benchmark-trace-strings and save into /tmp/trace_strings");
115 return str_hashes;
116 }
117 char line[4096];
118 while (fgets(line, sizeof(line), *f)) {
119 base::Hash hasher;
120 hasher.Update(line, strlen(line));
121 str_hashes.emplace_back(hasher.digest());
122 }
123 return str_hashes;
124}
125
126bool IsBenchmarkFunctionalOnly() {
127 return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
128}
129
130size_t num_samples() {
131 return IsBenchmarkFunctionalOnly() ? size_t(100) : size_t(10 * 1000 * 1000);
132}
133
134// Uses directly the base::FlatHashMap with no STL wrapper. Configures the map
135// in append-only mode.
136void BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State& state) {
137 std::vector<uint64_t> hashes = LoadTraceStrings(state);
138 for (auto _ : state) {
139 base::FlatHashMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe,
140 /*AppendOnly=*/true>
141 mapz;
142 for (uint64_t hash : hashes)
143 mapz.Insert(hash, 42);
144
145 benchmark::ClobberMemory();
146 state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
147 }
148 state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
149 Counter::kIsIterationInvariant);
150 state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
151 Counter::kIsIterationInvariantRate);
152}
153
154template <typename MapType>
155void BM_HashMap_InsertTraceStrings(benchmark::State& state) {
156 std::vector<uint64_t> hashes = LoadTraceStrings(state);
157 for (auto _ : state) {
158 MapType mapz;
159 for (uint64_t hash : hashes)
160 mapz.insert({hash, 42});
161
162 benchmark::ClobberMemory();
163 state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
164 }
165 state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
166 Counter::kIsIterationInvariant);
167 state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
168 Counter::kIsIterationInvariantRate);
169}
170
171template <typename MapType>
172void BM_HashMap_TraceTids(benchmark::State& state) {
173 std::vector<std::pair<char, int>> ops_and_tids;
174 {
175 base::ScopedFstream f(fopen("/tmp/tids", "re"));
176 if (!f) {
177 // This test requires a large (800MB) test file. It's not checked into the
178 // repository's //test/data because it would slow down all developers for
179 // a marginal benefit.
180 state.SkipWithError(
181 "Please run `curl -Lo /tmp/tids "
182 "https://storage.googleapis.com/perfetto/test_datalong_trace_tids.txt"
183 "` and try again.");
184 return;
185 }
186 char op;
187 int tid;
188 while (fscanf(*f, "%c %d\n", &op, &tid) == 2)
189 ops_and_tids.emplace_back(op, tid);
190 }
191
192 for (auto _ : state) {
193 MapType mapz;
194 for (const auto& ops_and_tid : ops_and_tids) {
195 if (ops_and_tid.first == '[') {
196 mapz[ops_and_tid.second]++;
197 } else {
198 mapz.insert({ops_and_tid.second, 0});
199 }
200 }
201
202 benchmark::ClobberMemory();
203 state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
204 }
205 state.counters["rate"] = Counter(static_cast<double>(ops_and_tids.size()),
206 Counter::kIsIterationInvariantRate);
207}
208
209template <typename MapType>
210void BM_HashMap_InsertRandInts(benchmark::State& state) {
211 std::minstd_rand0 rng(0);
212 std::vector<size_t> keys(static_cast<size_t>(num_samples()));
213 std::shuffle(keys.begin(), keys.end(), rng);
214 for (auto _ : state) {
215 MapType mapz;
216 for (const auto key : keys)
217 mapz.insert({key, key});
218 benchmark::DoNotOptimize(mapz);
219 benchmark::ClobberMemory();
220 }
221 state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
222 Counter::kIsIterationInvariantRate);
223}
224
225// This test is performs insertions on integers that are designed to create
226// lot of clustering on the same small set of buckets.
227// This covers the unlucky case of using a map with a poor hashing function.
228template <typename MapType>
229void BM_HashMap_InsertCollidingInts(benchmark::State& state) {
230 std::vector<size_t> keys;
231 const size_t kNumSamples = num_samples();
232
233 // Generates numbers that are all distinct from each other, but that are
234 // designed to collide on the same buckets.
235 constexpr size_t kShift = 8; // Collide on the same 2^8 = 256 buckets.
236 for (size_t i = 0; i < kNumSamples; i++) {
237 size_t bucket = i & ((1 << kShift) - 1); // [0, 256].
238 size_t multiplier = i >> kShift; // 0,0,0... 1,1,1..., 2,2,2...
239 size_t key = 8192 * multiplier + bucket;
240 keys.push_back(key);
241 }
242 for (auto _ : state) {
243 MapType mapz;
244 for (const size_t key : keys)
245 mapz.insert({key, key});
246 benchmark::DoNotOptimize(mapz);
247 benchmark::ClobberMemory();
248 }
249 state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
250 Counter::kIsIterationInvariantRate);
251}
252
253// Unlike the previous benchmark, here integers don't just collide on the same
254// buckets, they have a large number of duplicates with the same values.
255// Most of those insertions are no-ops. This tests the ability of the hashmap
256// to deal with cases where the hash function is good but the insertions contain
257// lot of dupes (e.g. dealing with pids).
258template <typename MapType>
259void BM_HashMap_InsertDupeInts(benchmark::State& state) {
260 std::vector<size_t> keys;
261 const size_t kNumSamples = num_samples();
262
263 for (size_t i = 0; i < kNumSamples; i++)
264 keys.push_back(i % 16384);
265
266 for (auto _ : state) {
267 MapType mapz;
268 for (const size_t key : keys)
269 mapz.insert({key, key});
270 benchmark::DoNotOptimize(mapz);
271 benchmark::ClobberMemory();
272 }
273 state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
274 Counter::kIsIterationInvariantRate);
275}
276
277template <typename MapType>
278void BM_HashMap_LookupRandInts(benchmark::State& state) {
279 std::minstd_rand0 rng(0);
280 std::vector<size_t> keys(static_cast<size_t>(num_samples()));
281 std::shuffle(keys.begin(), keys.end(), rng);
282
283 MapType mapz;
284 for (const size_t key : keys)
285 mapz.insert({key, key});
286
287 for (auto _ : state) {
288 int64_t total = 0;
289 for (const size_t key : keys) {
290 auto it = mapz.find(static_cast<uint64_t>(key));
291 PERFETTO_CHECK(it != mapz.end());
292 total += it->second;
293 }
294 benchmark::DoNotOptimize(total);
295 benchmark::ClobberMemory();
296 state.counters["sum"] = Counter(static_cast<double>(total));
297 }
298 state.counters["lookups"] = Counter(static_cast<double>(keys.size()),
299 Counter::kIsIterationInvariantRate);
300}
301
302} // namespace
303
304using Ours_LinearProbing =
305 Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe>;
306using Ours_QuadProbing =
307 Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticProbe>;
308using Ours_QuadCompProbing =
309 Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticHalfProbe>;
310using StdUnorderedMap =
311 std::unordered_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
312
313#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
314using RobinMap = tsl::robin_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
315using AbslFlatHashMap =
316 absl::flat_hash_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
317using FollyF14FastMap =
318 folly::F14FastMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
319#endif
320
321BENCHMARK(BM_HashMap_InsertTraceStrings_AppendOnly);
322BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_LinearProbing);
323BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_QuadProbing);
324BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, StdUnorderedMap);
325#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
326BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, RobinMap);
327BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, AbslFlatHashMap);
328BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, FollyF14FastMap);
329#endif
330
331#define TID_ARGS int, uint64_t, std::hash<int>
332BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, LinearProbe>);
333BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticProbe>);
334BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticHalfProbe>);
335BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, std::unordered_map<TID_ARGS>);
336#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
337BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, tsl::robin_map<TID_ARGS>);
338BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, absl::flat_hash_map<TID_ARGS>);
339BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, folly::F14FastMap<TID_ARGS>);
340#endif
341
342BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_LinearProbing);
343BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_QuadProbing);
344BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, StdUnorderedMap);
345#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
346BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, RobinMap);
347BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, AbslFlatHashMap);
348BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, FollyF14FastMap);
349#endif
350
351BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_LinearProbing);
352BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadProbing);
353BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadCompProbing);
354BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, StdUnorderedMap);
355#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
356BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, RobinMap);
357BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, AbslFlatHashMap);
358BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, FollyF14FastMap);
359#endif
360
361BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_LinearProbing);
362BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadProbing);
363BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadCompProbing);
364BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, StdUnorderedMap);
365#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
366BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, RobinMap);
367BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, AbslFlatHashMap);
368BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, FollyF14FastMap);
369#endif
370
371BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_LinearProbing);
372BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_QuadProbing);
373BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, StdUnorderedMap);
374#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
375BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, RobinMap);
376BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, AbslFlatHashMap);
377BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, FollyF14FastMap);
378#endif