Merge "Add the gpu_mem ftrace events in the memory muxer category"
diff --git a/Android.bp b/Android.bp
index 732b11a..bc02fbf 100644
--- a/Android.bp
+++ b/Android.bp
@@ -6905,6 +6905,7 @@
     srcs: [
         "src/base/base64_unittest.cc",
         "src/base/circular_queue_unittest.cc",
+        "src/base/flat_hash_map_unittest.cc",
         "src/base/flat_set_unittest.cc",
         "src/base/getopt_compat_unittest.cc",
         "src/base/logging_unittest.cc",
diff --git a/BUILD b/BUILD
index f4e0753..37e3fc6 100644
--- a/BUILD
+++ b/BUILD
@@ -343,6 +343,7 @@
         "include/perfetto/ext/base/endian.h",
         "include/perfetto/ext/base/event_fd.h",
         "include/perfetto/ext/base/file_utils.h",
+        "include/perfetto/ext/base/flat_hash_map.h",
         "include/perfetto/ext/base/getopt.h",
         "include/perfetto/ext/base/getopt_compat.h",
         "include/perfetto/ext/base/hash.h",
diff --git a/include/perfetto/ext/base/BUILD.gn b/include/perfetto/ext/base/BUILD.gn
index de97cea..91dfa72 100644
--- a/include/perfetto/ext/base/BUILD.gn
+++ b/include/perfetto/ext/base/BUILD.gn
@@ -25,6 +25,7 @@
     "endian.h",
     "event_fd.h",
     "file_utils.h",
+    "flat_hash_map.h",
     "getopt.h",
     "getopt_compat.h",
     "hash.h",
diff --git a/include/perfetto/ext/base/flat_hash_map.h b/include/perfetto/ext/base/flat_hash_map.h
new file mode 100644
index 0000000..f8c640f
--- /dev/null
+++ b/include/perfetto/ext/base/flat_hash_map.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
+#define INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
+
+#include "perfetto/base/compiler.h"
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/utils.h"
+
+#include <algorithm>
+#include <functional>
+
+namespace perfetto {
+namespace base {
+
+// An open-addressing hashmap implementation.
+// Pointers are not stable, neither for keys nor values.
+// Has similar performances of a RobinHood hash (without the complications)
+// and 2x an unordered map.
+//
+// When used to implement a string pool in TraceProcessor, the performance
+// characteristics obtained by replaying the set of strings seeen in a 4GB trace
+// (226M strings, 1M unique) are the following (see flat_hash_map_benchmark.cc):
+// This(Linear+AppendOnly)    879,383,676 ns    258.013M insertions/s
+// This(LinearProbe):         909,206,047 ns    249.546M insertions/s
+// This(QuadraticProbe):    1,083,844,388 ns    209.363M insertions/s
+// std::unordered_map:      6,203,351,870 ns    36.5811M insertions/s
+// tsl::robin_map:            931,403,397 ns    243.622M insertions/s
+// absl::flat_hash_map:       998,013,459 ns    227.379M insertions/s
+// FollyF14FastMap:         1,181,480,602 ns    192.074M insertions/s
+
+// The structs below define the probing algorithm used to probe slots upon a
+// collision. They are guaranteed to visit all slots as our table size is always
+// a power of two (see https://en.wikipedia.org/wiki/Quadratic_probing).
+
+// Linear probing can be faster if the hashing is well distributed and the load
+// is not high. For TraceProcessor's StringPool this is the fastest. It can
+// degenerate badly if the hashing doesn't spread (e.g., if using directly pids
+// as keys, with a no-op hashing function).
+struct LinearProbe {
+  static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
+    return (key_hash + step) & (capacity - 1);  // Linear probe
+  }
+};
+
+// Generates the sequence: 0, 3, 10, 21, 36, 55, ...
+// Can be a bit (~5%) slower than LinearProbe because it's less cache hot, but
+// avoids degenerating badly if the hash function is bad and causes clusters.
+// A good default choice unless benchmarks prove otherwise.
+struct QuadraticProbe {
+  static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
+    return (key_hash + 2 * step * step + step) & (capacity - 1);
+  }
+};
+
+// Tends to perform in the middle between linear and quadratic.
+// It's a bit more cache-effective than the QuadraticProbe but can create more
+// clustering if the hash function doesn't spread well.
+// Generates the sequence: 0, 1, 3, 6, 10, 15, 21, ...
+struct QuadraticHalfProbe {
+  static inline size_t Calc(size_t key_hash, size_t step, size_t capacity) {
+    return (key_hash + (step * step + step) / 2) & (capacity - 1);
+  }
+};
+
+template <typename Key,
+          typename Value,
+          typename Hasher = std::hash<Key>,
+          typename Probe = QuadraticProbe,
+          bool AppendOnly = false>
+class FlatHashMap {
+ public:
+  class Iterator {
+   public:
+    explicit Iterator(const FlatHashMap* map) : map_(map) { FindNextNonFree(); }
+    ~Iterator() = default;
+    Iterator(const Iterator&) = default;
+    Iterator& operator=(const Iterator&) = default;
+    Iterator(Iterator&&) noexcept = default;
+    Iterator& operator=(Iterator&&) noexcept = default;
+
+    Key& key() { return map_->keys_[idx_]; }
+    Value& value() { return map_->values_[idx_]; }
+    const Key& key() const { return map_->keys_[idx_]; }
+    const Value& value() const { return map_->values_[idx_]; }
+
+    explicit operator bool() const { return idx_ != kEnd; }
+    Iterator& operator++() {
+      PERFETTO_DCHECK(idx_ < map_->capacity_);
+      ++idx_;
+      FindNextNonFree();
+      return *this;
+    }
+
+   private:
+    static constexpr size_t kEnd = std::numeric_limits<size_t>::max();
+
+    void FindNextNonFree() {
+      const auto& tags = map_->tags_;
+      for (; idx_ < map_->capacity_; idx_++) {
+        if (tags[idx_] != kFreeSlot && (AppendOnly || tags[idx_] != kTombstone))
+          return;
+      }
+      idx_ = kEnd;
+    }
+
+    const FlatHashMap* map_ = nullptr;
+    size_t idx_ = 0;
+  };  // Iterator
+
+  static constexpr int kDefaultLoadLimitPct = 75;
+  explicit FlatHashMap(size_t initial_capacity = 0,
+                       int load_limit_pct = kDefaultLoadLimitPct)
+      : load_limit_percent_(load_limit_pct) {
+    if (initial_capacity > 0)
+      Reset(initial_capacity);
+  }
+
+  // We are calling Clear() so that the destructors for the inserted entries are
+  // called (unless they are trivial, in which case it will be a no-op).
+  ~FlatHashMap() { Clear(); }
+
+  FlatHashMap(FlatHashMap&& other) noexcept {
+    tags_ = std::move(other.tags_);
+    keys_ = std::move(other.keys_);
+    values_ = std::move(other.values_);
+    capacity_ = other.capacity_;
+    size_ = other.size_;
+    size_plus_tombstones_ = other.size_plus_tombstones_;
+    max_probe_length_ = other.max_probe_length_;
+    load_limit_ = other.load_limit_;
+    load_limit_percent_ = other.load_limit_percent_;
+
+    new (&other) FlatHashMap();
+  }
+
+  FlatHashMap& operator=(FlatHashMap&& other) noexcept {
+    this->~FlatHashMap();
+    new (this) FlatHashMap(std::move(other));
+    return *this;
+  }
+
+  FlatHashMap(const FlatHashMap&) = delete;
+  FlatHashMap& operator=(const FlatHashMap&) = delete;
+
+  std::pair<Value*, bool> Insert(Key key, Value value) {
+    if (PERFETTO_UNLIKELY(size_plus_tombstones_ >= load_limit_))
+      GrowAndRehash();
+    PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
+    const size_t key_hash = Hasher{}(key);
+    const uint8_t tag = HashToTag(key_hash);
+    const size_t kSlotNotFound = std::numeric_limits<size_t>::max();
+    size_t insertion_slot = kSlotNotFound;
+    size_t probe_len = 0;
+
+    // Start the iteration at the desired slot (key_hash % capacity_) searching
+    // either for a free slot or a tombstone. In the worst case we might end up
+    // scanning the whole array of slots. The Probe functions are guaranteed to
+    // visit all the slots within |capacity_| steps.
+    // If we find a free slot, we can stop the search immediately (a free slot
+    // acts as an "end of chain for entries having the same hash".
+    // If we find a tombstones (a deleted slot) we remember its position, but
+    // have to keep searching until a free slot to make sure we don't insert a
+    // duplicate key.
+    for (; probe_len < capacity_; ++probe_len) {
+      const size_t idx = Probe::Calc(key_hash, probe_len, capacity_);
+      PERFETTO_DCHECK(idx < capacity_);
+      const uint8_t tag_idx = tags_[idx];
+      if (tag_idx == kFreeSlot) {
+        // Rationale for "insertion_slot == kSlotNotFound": if we encountered
+        // a tombstone while iterating we should much rather reuse that,
+        // rather then expanding the probing length and taking another slot.
+        if (AppendOnly || insertion_slot == kSlotNotFound)
+          insertion_slot = idx;
+        break;
+      }
+      // We should never encounter tombstones in AppendOnly mode.
+      PERFETTO_DCHECK(!(tag_idx == kTombstone && AppendOnly));
+      if (!AppendOnly && tag_idx == kTombstone) {
+        insertion_slot = idx;
+        continue;
+      }
+      if (tag_idx == tag && keys_[idx] == key) {
+        // The key is already in the map.
+        return std::make_pair(&values_[idx], false);
+      }
+    }  // for (idx)
+
+    // We should never run out of slots.
+    PERFETTO_CHECK(insertion_slot < capacity_);
+
+    // We found a free slot (or a tombstone). Proceed with the insertion.
+    Value* value_idx = &values_[insertion_slot];
+    new (&keys_[insertion_slot]) Key(std::move(key));
+    new (value_idx) Value(std::move(value));
+    tags_[insertion_slot] = tag;
+    max_probe_length_ = std::max(max_probe_length_, probe_len + 1);
+    size_++;
+    size_plus_tombstones_++;
+    return std::make_pair(value_idx, true);
+  }
+
+  Value* Find(const Key& key) const {
+    const size_t idx = FindInternal(key);
+    if (idx == kNotFound)
+      return nullptr;
+    return &values_[idx];
+  }
+
+  bool Erase(const Key& key) {
+    if (AppendOnly)
+      PERFETTO_FATAL("Erase() not supported because AppendOnly=true");
+    size_t idx = FindInternal(key);
+    if (idx == kNotFound)
+      return false;
+    EraseInternal(idx);
+    return true;
+  }
+
+  void Clear() {
+    for (size_t i = 0; i < capacity_; ++i) {
+      const uint8_t tag = tags_[i];
+      if (tag != kFreeSlot && tag != kTombstone)
+        EraseInternal(i);
+    }
+    // Clear all tombstones. We really need to do this for AppendOnly.
+    GrowAndRehash();
+  }
+
+  Value& operator[](Key key) {
+    auto it_and_inserted = Insert(std::move(key), Value{});
+    return *it_and_inserted.first;
+  }
+
+  Iterator GetIterator() { return Iterator(this); }
+  const Iterator GetIterator() const { return Iterator(this); }
+
+  size_t size() const { return size_; }
+  size_t capacity() const { return capacity_; }
+  void set_load_limit_pct(int percent) {
+    PERFETTO_CHECK(percent > 0 && percent <= 100);
+    load_limit_percent_ = percent;
+  }
+
+  // "protected" here is only for the flat_hash_map_benchmark.cc. Everything
+  // below is by all means private.
+ protected:
+  enum ReservedTags : uint8_t { kFreeSlot = 0, kTombstone = 1 };
+  static constexpr size_t kNotFound = std::numeric_limits<size_t>::max();
+
+  size_t FindInternal(const Key& key) const {
+    const size_t key_hash = Hasher{}(key);
+    const uint8_t tag = HashToTag(key_hash);
+    PERFETTO_DCHECK((capacity_ & (capacity_ - 1)) == 0);  // Must be a pow2.
+    PERFETTO_DCHECK(max_probe_length_ <= capacity_);
+    for (size_t i = 0; i < max_probe_length_; ++i) {
+      const size_t idx = Probe::Calc(key_hash, i, capacity_);
+      const uint8_t tag_idx = tags_[idx];
+
+      if (tag_idx == kFreeSlot)
+        return kNotFound;
+      // HashToTag() never returns kTombstone, so the tag-check below cannot
+      // possibly match. Also we just want to skip tombstones.
+      if (tag_idx == tag && keys_[idx] == key) {
+        PERFETTO_DCHECK(tag_idx > kTombstone);
+        return idx;
+      }
+    }  // for (idx)
+    return kNotFound;
+  }
+
+  void EraseInternal(size_t idx) {
+    PERFETTO_DCHECK(tags_[idx] > kTombstone);
+    PERFETTO_DCHECK(size_ > 0 && size_plus_tombstones_ > 0);
+    PERFETTO_DCHECK(size_plus_tombstones_ >= size_);
+    tags_[idx] = kTombstone;
+    keys_[idx].~Key();
+    values_[idx].~Value();
+    size_--;
+  }
+
+  PERFETTO_NO_INLINE void GrowAndRehash() {
+    PERFETTO_DCHECK(size_ <= size_plus_tombstones_);
+    PERFETTO_DCHECK(size_plus_tombstones_ <= capacity_);
+    const size_t old_capacity = capacity_;
+    size_t new_capacity = std::max(old_capacity, size_t(1024));
+    size_t old_size_mb = old_capacity * (sizeof(Key) + sizeof(Value));
+
+    // Grow quickly up to 1MB, then chill.
+    if (size_ < load_limit_) {
+      // If we have a lot of tombstones, don't grow at all, just rehash. The
+      // re-hasing loop will remove the tombstones and repack the slots.
+    } else if (old_size_mb < 1 * 1024 * 1024) {
+      new_capacity *= 8;
+    } else {
+      new_capacity *= 2;
+    }
+    auto old_tags(std::move(tags_));
+    auto old_keys(std::move(keys_));
+    auto old_values(std::move(values_));
+    size_t old_size = size_;
+
+    // This must be a CHECK (i.e. not just a DCHECK) to prevent UAF attacks on
+    // 32-bit archs that try to double the size of the table until wrapping.
+    PERFETTO_CHECK(new_capacity >= old_capacity);
+    Reset(new_capacity);
+
+    size_t new_size = 0;  // Recompute the size.
+    for (size_t i = 0; i < old_capacity; ++i) {
+      const uint8_t old_tag = old_tags[i];
+      if (old_tag != kFreeSlot && old_tag != kTombstone) {
+        Insert(std::move(old_keys[i]), std::move(old_values[i]));
+        old_keys[i].~Key();  // Destroy the old objects.
+        old_values[i].~Value();
+        new_size++;
+      }
+    }
+    PERFETTO_DCHECK(new_size == old_size);
+    size_ = size_plus_tombstones_ = new_size;
+  }
+
+  // Doesn't call destructors. Use Clear() for that.
+  PERFETTO_NO_INLINE void Reset(size_t n) {
+    PERFETTO_DCHECK((n & (n - 1)) == 0);  // Must be a pow2.
+
+    capacity_ = n;
+    max_probe_length_ = 0;
+    size_ = 0;
+    size_plus_tombstones_ = 0;
+    load_limit_ = n * static_cast<size_t>(load_limit_percent_) / 100;
+    load_limit_ = std::min(load_limit_, n);
+
+    tags_.reset(new uint8_t[n]);
+    memset(&tags_[0], 0, n);                  // Clear all tags.
+    keys_ = AlignedAllocTyped<Key[]>(n);      // Deliberately not 0-initialized.
+    values_ = AlignedAllocTyped<Value[]>(n);  // Deliberately not 0-initialized.
+  }
+
+  static inline uint8_t HashToTag(size_t full_hash) {
+    uint8_t tag = full_hash >> (sizeof(full_hash) * 8 - 8);
+    // Ensure the hash is always >= 2. We use 0, 1 for kFreeSlot and kTombstone.
+    tag += (tag <= kTombstone) << 1;
+    PERFETTO_DCHECK(tag > kTombstone);
+    return tag;
+  }
+
+  size_t capacity_ = 0;
+  size_t size_ = 0;
+  size_t size_plus_tombstones_ = 0;
+  size_t max_probe_length_ = 0;
+  size_t load_limit_ = 0;  // Updated every time |capacity_| changes.
+  int load_limit_percent_ =
+      kDefaultLoadLimitPct;  // Load factor limit in % of |capacity_|.
+
+  // These arrays have always the |capacity_| elements.
+  // Note: AlignedUniquePtr just allocates memory, doesn't invoke any ctor/dtor.
+  std::unique_ptr<uint8_t[]> tags_;
+  AlignedUniquePtr<Key[]> keys_;
+  AlignedUniquePtr<Value[]> values_;
+};
+
+}  // namespace base
+}  // namespace perfetto
+
+#endif  // INCLUDE_PERFETTO_EXT_BASE_FLAT_HASH_MAP_H_
diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn
index 098103c..df75d32 100644
--- a/src/base/BUILD.gn
+++ b/src/base/BUILD.gn
@@ -162,6 +162,7 @@
   sources = [
     "base64_unittest.cc",
     "circular_queue_unittest.cc",
+    "flat_hash_map_unittest.cc",
     "flat_set_unittest.cc",
     "getopt_compat_unittest.cc",
     "logging_unittest.cc",
@@ -211,13 +212,33 @@
 }
 
 if (enable_perfetto_benchmarks) {
+  declare_args() {
+    perfetto_benchmark_3p_libs_prefix = ""
+  }
   source_set("benchmarks") {
+    # If you intend to reproduce the comparison with {Absl, Folly, Tessil}
+    # you need to manually install those libraries and then set the GN arg
+    # perfetto_benchmark_3p_libs_prefix = "/usr/local"
     testonly = true
     deps = [
       ":base",
       "../../gn:benchmark",
       "../../gn:default_deps",
     ]
-    sources = [ "flat_set_benchmark.cc" ]
+    if (perfetto_benchmark_3p_libs_prefix != "") {
+      configs -= [ "//gn/standalone:c++11" ]
+      configs += [ "//gn/standalone:c++17" ]
+      defines = [ "PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS" ]
+      cflags = [ "-isystem${perfetto_benchmark_3p_libs_prefix}/include" ]
+      libs = [
+        "${perfetto_benchmark_3p_libs_prefix}/lib/libfolly.a",
+        "${perfetto_benchmark_3p_libs_prefix}/lib/libabsl_raw_hash_set.a",
+        "${perfetto_benchmark_3p_libs_prefix}/lib/libabsl_hash.a",
+      ]
+    }
+    sources = [
+      "flat_hash_map_benchmark.cc",
+      "flat_set_benchmark.cc",
+    ]
   }
 }
diff --git a/src/base/flat_hash_map_benchmark.cc b/src/base/flat_hash_map_benchmark.cc
new file mode 100644
index 0000000..33da669
--- /dev/null
+++ b/src/base/flat_hash_map_benchmark.cc
@@ -0,0 +1,378 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <functional>
+#include <random>
+#include <unordered_map>
+
+#include <benchmark/benchmark.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/file_utils.h"
+#include "perfetto/ext/base/flat_hash_map.h"
+#include "perfetto/ext/base/hash.h"
+#include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/string_view.h"
+
+// This benchmark allows to compare our FlatHashMap implementation against
+// reference implementations from Absl (Google), Folly F14 (FB), and Tssil's
+// reference RobinHood hashmap.
+// Those libraries are not checked in into the repo. If you want to reproduce
+// the benchmark you need to:
+// - Manually install the three libraries using following the instructions in
+//   their readme (they all use cmake).
+// - When running cmake, remember to pass
+//   -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-DNDEBUG -O3 -msse4.2 -mavx'.
+//   That sets cflags for a more fair comparison.
+// - Set is_debug=false in the GN args.
+// - Set the GN var perfetto_benchmark_3p_libs_prefix="/usr/local" (or whatever
+//   other directory you set as DESTDIR when running make install).
+// The presence of the perfetto_benchmark_3p_libs_prefix GN variable will
+// automatically define PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS.
+
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+
+// Last tested: https://github.com/abseil/abseil-cpp @ f2dbd918d.
+#include <absl/container/flat_hash_map.h>
+
+// Last tested: https://github.com/facebook/folly @ 028a9abae3.
+#include <folly/container/F14Map.h>
+
+// Last tested: https://github.com/Tessil/robin-map @ a603419b9.
+#include <tsl/robin_map.h>
+#endif
+
+namespace {
+
+using namespace perfetto;
+using benchmark::Counter;
+using perfetto::base::AlreadyHashed;
+using perfetto::base::LinearProbe;
+using perfetto::base::QuadraticHalfProbe;
+using perfetto::base::QuadraticProbe;
+
+// Our FlatHashMap doesn't have a STL-like interface, mainly because we use
+// columnar-oriented storage, not array-of-tuples, so we can't easily map into
+// that interface. This wrapper makes our FlatHashMap compatible with STL (just
+// for what it takes to build this translation unit), at the cost of some small
+// performance penalty (around 1-2%).
+template <typename Key, typename Value, typename Hasher, typename Probe>
+class Ours : public base::FlatHashMap<Key, Value, Hasher, Probe> {
+ public:
+  struct Iterator {
+    using value_type = std::pair<const Key&, Value&>;
+    Iterator(const Key& k, Value& v) : pair_{k, v} {}
+    value_type* operator->() { return &pair_; }
+    value_type& operator*() { return pair_; }
+    bool operator==(const Iterator& other) const {
+      return &pair_.first == &other.pair_.first;
+    }
+    bool operator!=(const Iterator& other) const { return !operator==(other); }
+    value_type pair_;
+  };
+
+  void insert(std::pair<Key, Value>&& pair) {
+    this->Insert(std::move(pair.first), std::move(pair.second));
+  }
+
+  Iterator find(const Key& key) {
+    const size_t idx = this->FindInternal(key);
+    return Iterator(this->keys_[idx], this->values_[idx]);
+  }
+
+  Iterator end() {
+    return Iterator(this->keys_[this->kNotFound],
+                    this->values_[this->kNotFound]);
+  }
+};
+
+std::vector<uint64_t> LoadTraceStrings(benchmark::State& state) {
+  std::vector<uint64_t> str_hashes;
+  // This requires that the user has downloaded the file
+  // go/perfetto-benchmark-trace-strings into /tmp/trace_strings. The file is
+  // too big (2.3 GB after uncompression) and it's not worth adding it to the
+  // //test/data. Also it contains data from a team member's phone and cannot
+  // be public.
+  base::ScopedFstream f(fopen("/tmp/trace_strings", "re"));
+  if (!f) {
+    state.SkipWithError(
+        "Test strings missing. Googlers: download "
+        "go/perfetto-benchmark-trace-strings and save into /tmp/trace_strings");
+    return str_hashes;
+  }
+  char line[4096];
+  while (fgets(line, sizeof(line), *f)) {
+    base::Hash hasher;
+    hasher.Update(line, strlen(line));
+    str_hashes.emplace_back(hasher.digest());
+  }
+  return str_hashes;
+}
+
+bool IsBenchmarkFunctionalOnly() {
+  return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
+}
+
+size_t num_samples() {
+  return IsBenchmarkFunctionalOnly() ? size_t(100) : size_t(10 * 1000 * 1000);
+}
+
+// Uses directly the base::FlatHashMap with no STL wrapper. Configures the map
+// in append-only mode.
+void BM_HashMap_InsertTraceStrings_AppendOnly(benchmark::State& state) {
+  std::vector<uint64_t> hashes = LoadTraceStrings(state);
+  for (auto _ : state) {
+    base::FlatHashMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe,
+                      /*AppendOnly=*/true>
+        mapz;
+    for (uint64_t hash : hashes)
+      mapz.Insert(hash, 42);
+
+    benchmark::ClobberMemory();
+    state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
+  }
+  state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
+                                  Counter::kIsIterationInvariant);
+  state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
+                                   Counter::kIsIterationInvariantRate);
+}
+
+template <typename MapType>
+void BM_HashMap_InsertTraceStrings(benchmark::State& state) {
+  std::vector<uint64_t> hashes = LoadTraceStrings(state);
+  for (auto _ : state) {
+    MapType mapz;
+    for (uint64_t hash : hashes)
+      mapz.insert({hash, 42});
+
+    benchmark::ClobberMemory();
+    state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
+  }
+  state.counters["tot"] = Counter(static_cast<double>(hashes.size()),
+                                  Counter::kIsIterationInvariant);
+  state.counters["rate"] = Counter(static_cast<double>(hashes.size()),
+                                   Counter::kIsIterationInvariantRate);
+}
+
+template <typename MapType>
+void BM_HashMap_TraceTids(benchmark::State& state) {
+  std::vector<std::pair<char, int>> ops_and_tids;
+  {
+    base::ScopedFstream f(fopen("/tmp/tids", "re"));
+    if (!f) {
+      // This test requires a large (800MB) test file. It's not checked into the
+      // repository's //test/data because it would slow down all developers for
+      // a marginal benefit.
+      state.SkipWithError(
+          "Please run `curl -Lo /tmp/tids "
+          "https://storage.googleapis.com/perfetto/test_datalong_trace_tids.txt"
+          "` and try again.");
+      return;
+    }
+    char op;
+    int tid;
+    while (fscanf(*f, "%c %d\n", &op, &tid) == 2)
+      ops_and_tids.emplace_back(op, tid);
+  }
+
+  for (auto _ : state) {
+    MapType mapz;
+    for (const auto& ops_and_tid : ops_and_tids) {
+      if (ops_and_tid.first == '[') {
+        mapz[ops_and_tid.second]++;
+      } else {
+        mapz.insert({ops_and_tid.second, 0});
+      }
+    }
+
+    benchmark::ClobberMemory();
+    state.counters["uniq"] = Counter(static_cast<double>(mapz.size()));
+  }
+  state.counters["rate"] = Counter(static_cast<double>(ops_and_tids.size()),
+                                   Counter::kIsIterationInvariantRate);
+}
+
+template <typename MapType>
+void BM_HashMap_InsertRandInts(benchmark::State& state) {
+  std::minstd_rand0 rng(0);
+  std::vector<size_t> keys(static_cast<size_t>(num_samples()));
+  std::shuffle(keys.begin(), keys.end(), rng);
+  for (auto _ : state) {
+    MapType mapz;
+    for (const auto key : keys)
+      mapz.insert({key, key});
+    benchmark::DoNotOptimize(mapz);
+    benchmark::ClobberMemory();
+  }
+  state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
+                                         Counter::kIsIterationInvariantRate);
+}
+
+// This test is performs insertions on integers that are designed to create
+// lot of clustering on the same small set of buckets.
+// This covers the unlucky case of using a map with a poor hashing function.
+template <typename MapType>
+void BM_HashMap_InsertCollidingInts(benchmark::State& state) {
+  std::vector<size_t> keys;
+  const size_t kNumSamples = num_samples();
+
+  // Generates numbers that are all distinct from each other, but that are
+  // designed to collide on the same buckets.
+  constexpr size_t kShift = 8;  // Collide on the same 2^8 = 256 buckets.
+  for (size_t i = 0; i < kNumSamples; i++) {
+    size_t bucket = i & ((1 << kShift) - 1);  // [0, 256].
+    size_t multiplier = i >> kShift;          // 0,0,0... 1,1,1..., 2,2,2...
+    size_t key = 8192 * multiplier + bucket;
+    keys.push_back(key);
+  }
+  for (auto _ : state) {
+    MapType mapz;
+    for (const size_t key : keys)
+      mapz.insert({key, key});
+    benchmark::DoNotOptimize(mapz);
+    benchmark::ClobberMemory();
+  }
+  state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
+                                         Counter::kIsIterationInvariantRate);
+}
+
+// Unlike the previous benchmark, here integers don't just collide on the same
+// buckets, they have a large number of duplicates with the same values.
+// Most of those insertions are no-ops. This tests the ability of the hashmap
+// to deal with cases where the hash function is good but the insertions contain
+// lot of dupes (e.g. dealing with pids).
+template <typename MapType>
+void BM_HashMap_InsertDupeInts(benchmark::State& state) {
+  std::vector<size_t> keys;
+  const size_t kNumSamples = num_samples();
+
+  for (size_t i = 0; i < kNumSamples; i++)
+    keys.push_back(i % 16384);
+
+  for (auto _ : state) {
+    MapType mapz;
+    for (const size_t key : keys)
+      mapz.insert({key, key});
+    benchmark::DoNotOptimize(mapz);
+    benchmark::ClobberMemory();
+  }
+  state.counters["insertions"] = Counter(static_cast<double>(keys.size()),
+                                         Counter::kIsIterationInvariantRate);
+}
+
+template <typename MapType>
+void BM_HashMap_LookupRandInts(benchmark::State& state) {
+  std::minstd_rand0 rng(0);
+  std::vector<size_t> keys(static_cast<size_t>(num_samples()));
+  std::shuffle(keys.begin(), keys.end(), rng);
+
+  MapType mapz;
+  for (const size_t key : keys)
+    mapz.insert({key, key});
+
+  for (auto _ : state) {
+    int64_t total = 0;
+    for (const size_t key : keys) {
+      auto it = mapz.find(static_cast<uint64_t>(key));
+      PERFETTO_CHECK(it != mapz.end());
+      total += it->second;
+    }
+    benchmark::DoNotOptimize(total);
+    benchmark::ClobberMemory();
+    state.counters["sum"] = Counter(static_cast<double>(total));
+  }
+  state.counters["lookups"] = Counter(static_cast<double>(keys.size()),
+                                      Counter::kIsIterationInvariantRate);
+}
+
+}  // namespace
+
+using Ours_LinearProbing =
+    Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, LinearProbe>;
+using Ours_QuadProbing =
+    Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticProbe>;
+using Ours_QuadCompProbing =
+    Ours<uint64_t, uint64_t, AlreadyHashed<uint64_t>, QuadraticHalfProbe>;
+using StdUnorderedMap =
+    std::unordered_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
+
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+using RobinMap = tsl::robin_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
+using AbslFlatHashMap =
+    absl::flat_hash_map<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
+using FollyF14FastMap =
+    folly::F14FastMap<uint64_t, uint64_t, AlreadyHashed<uint64_t>>;
+#endif
+
+BENCHMARK(BM_HashMap_InsertTraceStrings_AppendOnly);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_LinearProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, Ours_QuadProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, StdUnorderedMap);
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, RobinMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, AbslFlatHashMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertTraceStrings, FollyF14FastMap);
+#endif
+
+#define TID_ARGS int, uint64_t, std::hash<int>
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, LinearProbe>);
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticProbe>);
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, Ours<TID_ARGS, QuadraticHalfProbe>);
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, std::unordered_map<TID_ARGS>);
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, tsl::robin_map<TID_ARGS>);
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, absl::flat_hash_map<TID_ARGS>);
+BENCHMARK_TEMPLATE(BM_HashMap_TraceTids, folly::F14FastMap<TID_ARGS>);
+#endif
+
+BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_LinearProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, Ours_QuadProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, StdUnorderedMap);
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, RobinMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, AbslFlatHashMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertRandInts, FollyF14FastMap);
+#endif
+
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_LinearProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, Ours_QuadCompProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, StdUnorderedMap);
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, RobinMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, AbslFlatHashMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertCollidingInts, FollyF14FastMap);
+#endif
+
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_LinearProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, Ours_QuadCompProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, StdUnorderedMap);
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, RobinMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, AbslFlatHashMap);
+BENCHMARK_TEMPLATE(BM_HashMap_InsertDupeInts, FollyF14FastMap);
+#endif
+
+BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_LinearProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, Ours_QuadProbing);
+BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, StdUnorderedMap);
+#if defined(PERFETTO_HASH_MAP_COMPARE_THIRD_PARTY_LIBS)
+BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, RobinMap);
+BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, AbslFlatHashMap);
+BENCHMARK_TEMPLATE(BM_HashMap_LookupRandInts, FollyF14FastMap);
+#endif
diff --git a/src/base/flat_hash_map_unittest.cc b/src/base/flat_hash_map_unittest.cc
new file mode 100644
index 0000000..d128764
--- /dev/null
+++ b/src/base/flat_hash_map_unittest.cc
@@ -0,0 +1,353 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/ext/base/flat_hash_map.h"
+
+#include <functional>
+#include <random>
+#include <set>
+#include <unordered_map>
+
+#include "perfetto/ext/base/hash.h"
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace base {
+namespace {
+
+using ::testing::Types;
+
+struct CollidingHasher {
+  size_t operator()(int n) const { return static_cast<size_t>(n % 1000); }
+};
+
+template <typename T>
+class FlatHashMapTest : public testing::Test {
+ public:
+  using Probe = T;
+};
+
+using ProbeTypes = Types<LinearProbe, QuadraticHalfProbe, QuadraticProbe>;
+TYPED_TEST_SUITE(FlatHashMapTest, ProbeTypes, /* trailing ',' for GCC*/);
+
+struct Key {
+  static int instances;
+
+  explicit Key(int v) : val(v) {}
+  ~Key() { instances--; }
+  Key(Key&& other) noexcept {
+    val = other.val;
+    other.val = -1;
+  }
+  bool operator==(const Key& other) { return val == other.val; }
+  int val = 0;
+  int id = instances++;
+};
+
+struct Value {
+  static int instances;
+
+  explicit Value(int v = 0) : val(v) {}
+  ~Value() { instances--; }
+  Value(Value&& other) noexcept {
+    val = other.val;
+    other.val = -1;
+  }
+  Value(const Value&) = delete;
+  int val = 0;
+  int id = instances++;
+};
+
+struct Hasher {
+  size_t operator()(const Key& k) const { return static_cast<size_t>(k.val); }
+};
+
+int Key::instances = 0;
+int Value::instances = 0;
+
+TYPED_TEST(FlatHashMapTest, NonTrivialKeyValues) {
+  FlatHashMap<Key, Value, Hasher, typename TestFixture::Probe> fmap;
+
+  for (int iteration = 0; iteration < 3; iteration++) {
+    const int kNum = 10;
+    for (int i = 0; i < kNum; i++) {
+      ASSERT_TRUE(fmap.Insert(Key(i), Value(i * 2)).second);
+      Value* value = fmap.Find(Key(i));
+      ASSERT_NE(value, nullptr);
+      ASSERT_EQ(value->val, i * 2);
+      ASSERT_EQ(Key::instances, i + 1);
+      ASSERT_EQ(Value::instances, i + 1);
+    }
+
+    ASSERT_TRUE(fmap.Erase(Key(1)));
+    ASSERT_TRUE(fmap.Erase(Key(5)));
+    ASSERT_TRUE(fmap.Erase(Key(9)));
+
+    ASSERT_EQ(Key::instances, kNum - 3);
+    ASSERT_EQ(Value::instances, kNum - 3);
+
+    FlatHashMap<Key, Value, Hasher, typename TestFixture::Probe> fmap2(
+        std::move(fmap));
+    ASSERT_EQ(fmap.size(), 0u);
+    ASSERT_EQ(fmap2.size(), static_cast<size_t>(kNum - 3));
+
+    ASSERT_EQ(Key::instances, kNum - 3);
+    ASSERT_EQ(Value::instances, kNum - 3);
+
+    // Ensure the moved-from map is usable.
+    fmap.Insert(Key(1), Value(-1));
+    fmap.Insert(Key(5), Value(-5));
+    fmap.Insert(Key(9), Value(-9));
+    ASSERT_EQ(Key::instances, (kNum - 3) + 3);
+    ASSERT_EQ(Value::instances, (kNum - 3) + 3);
+
+    fmap2.Clear();
+    ASSERT_EQ(fmap2.size(), 0u);
+    ASSERT_EQ(fmap.size(), 3u);
+    ASSERT_EQ(Key::instances, 3);
+    ASSERT_EQ(Value::instances, 3);
+    ASSERT_EQ(fmap.Find(Key(1))->val, -1);
+    ASSERT_EQ(fmap.Find(Key(5))->val, -5);
+    ASSERT_EQ(fmap.Find(Key(9))->val, -9);
+
+    fmap = std::move(fmap2);
+    ASSERT_EQ(Key::instances, 0);
+    ASSERT_EQ(Value::instances, 0);
+    ASSERT_EQ(fmap.size(), 0u);
+  }
+
+  // Test that operator[] behaves rationally.
+  fmap = decltype(fmap)();  // Re-assign with a copy constructor.
+  fmap[Key{2}].val = 102;
+  fmap[Key{1}].val = 101;
+  ASSERT_EQ(fmap.Find(Key{2})->val, 102);
+  ASSERT_EQ(fmap.Find(Key{1})->val, 101);
+  fmap[Key{2}].val = 122;
+  ASSERT_EQ(fmap.Find(Key{2})->val, 122);
+  ASSERT_EQ(fmap[Key{1}].val, 101);
+  auto fmap2(std::move(fmap));
+  ASSERT_EQ(fmap[Key{1}].val, 0);
+  ASSERT_EQ(fmap.size(), 1u);
+}
+
+TYPED_TEST(FlatHashMapTest, AllTagsAreValid) {
+  FlatHashMap<size_t, size_t, base::AlreadyHashed<size_t>,
+              typename TestFixture::Probe>
+      fmap;
+  auto make_key = [](size_t tag) {
+    return tag << ((sizeof(size_t) - 1) * size_t(8));
+  };
+  for (size_t i = 0; i < 256; i++) {
+    size_t key = make_key(i);
+    fmap.Insert(key, i);
+    ASSERT_EQ(fmap.size(), i + 1);
+  }
+  for (size_t i = 0; i < 256; i++) {
+    size_t key = make_key(i);
+    ASSERT_NE(fmap.Find(key), nullptr);
+    ASSERT_EQ(*fmap.Find(key), i);
+  }
+  for (size_t i = 0; i < 256; i++) {
+    size_t key = make_key(i);
+    fmap.Erase(key);
+    ASSERT_EQ(fmap.size(), 255 - i);
+    ASSERT_EQ(fmap.Find(key), nullptr);
+  }
+}
+
+TYPED_TEST(FlatHashMapTest, FillWithTombstones) {
+  FlatHashMap<Key, Value, Hasher, typename TestFixture::Probe> fmap;
+  fmap.set_load_limit_pct(100);
+
+  for (int rep = 0; rep < 3; rep++) {
+    for (int i = 0; i < 1024; i++)
+      ASSERT_TRUE(fmap.Insert(Key(i), Value(i)).second);
+
+    ASSERT_EQ(fmap.size(), 1024u);
+    ASSERT_EQ(Key::instances, 1024);
+    ASSERT_EQ(Value::instances, 1024);
+
+    // Erase all entries.
+    for (int i = 0; i < 1024; i++)
+      ASSERT_TRUE(fmap.Erase(Key(i)));
+
+    ASSERT_EQ(fmap.size(), 0u);
+    ASSERT_EQ(Key::instances, 0);
+    ASSERT_EQ(Value::instances, 0);
+  }
+}
+
+TYPED_TEST(FlatHashMapTest, Collisions) {
+  FlatHashMap<int, int, CollidingHasher, typename TestFixture::Probe> fmap;
+  fmap.set_load_limit_pct(100);
+
+  for (int rep = 0; rep < 3; rep++) {
+    // Insert four values which collide on th esame bucket.
+    ASSERT_TRUE(fmap.Insert(1001, 1001).second);
+    ASSERT_TRUE(fmap.Insert(2001, 2001).second);
+    ASSERT_TRUE(fmap.Insert(3001, 3001).second);
+    ASSERT_TRUE(fmap.Insert(4001, 4001).second);
+
+    // Erase the 2nd one, it will create a tombstone.
+    ASSERT_TRUE(fmap.Erase(2001));
+    ASSERT_EQ(fmap.size(), 3u);
+
+    // Insert an entry that exists already, but happens to be located after the
+    // tombstone. Should still fail.
+    ASSERT_FALSE(fmap.Insert(3001, 3001).second);
+    ASSERT_EQ(fmap.size(), 3u);
+
+    ASSERT_TRUE(fmap.Erase(3001));
+    ASSERT_FALSE(fmap.Erase(2001));
+    ASSERT_TRUE(fmap.Erase(4001));
+
+    // The only element left is 101.
+    ASSERT_EQ(fmap.size(), 1u);
+
+    ASSERT_TRUE(fmap.Erase(1001));
+    ASSERT_EQ(fmap.size(), 0u);
+  }
+}
+
+TYPED_TEST(FlatHashMapTest, ProbeVisitsAllSlots) {
+  const int kIterations = 1024;
+  FlatHashMap<int, int, CollidingHasher, typename TestFixture::Probe> fmap(
+      /*initial_capacity=*/kIterations, /*load_limit_pct=*/100);
+  for (int i = 0; i < kIterations; i++) {
+    ASSERT_TRUE(fmap.Insert(i, i).second);
+  }
+  // If the hashmap hits an expansion the tests doesn't make sense. This test
+  // makes sense only if we actually saturate all buckets.
+  EXPECT_EQ(fmap.capacity(), static_cast<size_t>(kIterations));
+}
+
+TYPED_TEST(FlatHashMapTest, Iterator) {
+  FlatHashMap<int, int, base::AlreadyHashed<int>, typename TestFixture::Probe>
+      fmap;
+
+  auto it = fmap.GetIterator();
+  ASSERT_FALSE(it);
+
+  // Insert 3 values and iterate.
+  ASSERT_TRUE(fmap.Insert(1, 1001).second);
+  ASSERT_TRUE(fmap.Insert(2, 2001).second);
+  ASSERT_TRUE(fmap.Insert(3, 3001).second);
+  it = fmap.GetIterator();
+  for (int i = 1; i <= 3; i++) {
+    ASSERT_TRUE(it);
+    ASSERT_EQ(it.key(), i);
+    ASSERT_EQ(it.value(), i * 1000 + 1);
+    ++it;
+  }
+  ASSERT_FALSE(it);
+
+  // Erase the middle one and iterate.
+  fmap.Erase(2);
+  it = fmap.GetIterator();
+  ASSERT_TRUE(it);
+  ASSERT_EQ(it.key(), 1);
+  ++it;
+  ASSERT_TRUE(it);
+  ASSERT_EQ(it.key(), 3);
+  ++it;
+  ASSERT_FALSE(it);
+
+  // Erase everything and iterate.
+  fmap.Clear();
+  it = fmap.GetIterator();
+  ASSERT_FALSE(it);
+}
+
+TYPED_TEST(FlatHashMapTest, VsUnorderedMap) {
+  std::unordered_map<int, int, CollidingHasher> umap;
+  FlatHashMap<int, int, CollidingHasher, typename TestFixture::Probe> fmap;
+  std::minstd_rand0 rng(0);
+
+  for (int rep = 0; rep < 2; rep++) {
+    std::set<int> keys_copy;
+    const int kRange = 1024;
+
+    // Insert some random elements.
+    for (int i = 0; i < kRange; i++) {
+      int key = static_cast<int>(rng());
+      keys_copy.insert(key);
+      int value = key * 2;
+      auto it_and_inserted_u = umap.insert({key, value});
+      auto it_and_inserted_f = fmap.Insert(key, value);
+      ASSERT_EQ(it_and_inserted_u.second, it_and_inserted_f.second);
+      ASSERT_EQ(*it_and_inserted_f.first, value);
+      ASSERT_EQ(umap.size(), fmap.size());
+      int* res = fmap.Find(key);
+      ASSERT_NE(res, nullptr);
+      ASSERT_EQ(*res, value);
+      ASSERT_EQ(fmap[key], value);  // Test that operator[] behaves like Find().
+    }
+    // Look them up.
+    for (int key : keys_copy) {
+      int* res = fmap.Find(key);
+      ASSERT_NE(res, nullptr);
+      ASSERT_EQ(*res, key * 2);
+      ASSERT_EQ(umap.size(), fmap.size());
+    }
+
+    // Some further deletions / insertions / reinsertions.
+    for (int key : keys_copy) {
+      auto op = rng() % 4;
+
+      if (op < 2) {
+        // With a 50% chance, erase the key.
+        bool erased_u = umap.erase(key) > 0;
+        bool erased_f = fmap.Erase(key);
+        ASSERT_EQ(erased_u, erased_f);
+      } else if (op == 3) {
+        // With a 25% chance, re-insert the same key (should fail).
+        umap.insert({key, 0});
+        ASSERT_FALSE(fmap.Insert(key, 0).second);
+      } else {
+        // With a 25% chance, insert a new key.
+        umap.insert({key + kRange, (key + kRange) * 2});
+        ASSERT_TRUE(fmap.Insert(key + kRange, (key + kRange) * 2).second);
+      }
+
+      ASSERT_EQ(umap.size(), fmap.size());
+    }
+
+    // Re-look up keys. Note some of them might be deleted by the loop above.
+    for (int k : keys_copy) {
+      for (int i = 0; i < 2; i++) {
+        const int key = k + kRange * i;
+        int* res = fmap.Find(key);
+        if (umap.count(key)) {
+          ASSERT_NE(res, nullptr);
+          ASSERT_EQ(*res, key * 2);
+        } else {
+          ASSERT_EQ(res, nullptr);
+        }
+      }
+    }
+
+    fmap.Clear();
+    umap.clear();
+    ASSERT_EQ(fmap.size(), 0u);
+
+    for (int key : keys_copy)
+      ASSERT_EQ(fmap.Find(key), nullptr);
+  }
+}
+
+}  // namespace
+}  // namespace base
+}  // namespace perfetto
diff --git a/src/trace_processor/containers/string_pool.cc b/src/trace_processor/containers/string_pool.cc
index fd65195..0189054 100644
--- a/src/trace_processor/containers/string_pool.cc
+++ b/src/trace_processor/containers/string_pool.cc
@@ -53,8 +53,8 @@
 
 StringPool::~StringPool() = default;
 
-StringPool::StringPool(StringPool&&) = default;
-StringPool& StringPool::operator=(StringPool&&) = default;
+StringPool::StringPool(StringPool&&) noexcept = default;
+StringPool& StringPool::operator=(StringPool&&) noexcept = default;
 
 StringPool::Id StringPool::InsertString(base::StringView str, uint64_t hash) {
   // Try and find enough space in the current block for the string and the
@@ -70,9 +70,8 @@
     // new block to store the string.
     if (str.size() + kMaxMetadataSize >= kMinLargeStringSizeBytes) {
       return InsertLargeString(str, hash);
-    } else {
-      blocks_.emplace_back(kBlockSizeBytes);
     }
+    blocks_.emplace_back(kBlockSizeBytes);
 
     // Try and reserve space again - this time we should definitely succeed.
     std::tie(success, offset) = blocks_.back().TryInsert(str);
@@ -82,7 +81,11 @@
   // Compute the id from the block index and offset and add a mapping from the
   // hash to the id.
   Id string_id = Id::BlockString(blocks_.size() - 1, offset);
-  string_index_.emplace(hash, string_id);
+
+  // Deliberately not adding |string_id| to |string_index_|. The caller
+  // (InternString()) must take care of this.
+  PERFETTO_DCHECK(string_index_.Find(hash));
+
   return string_id;
 }
 
@@ -91,7 +94,11 @@
   large_strings_.emplace_back(new std::string(str.begin(), str.size()));
   // Compute id from the index and add a mapping from the hash to the id.
   Id string_id = Id::LargeString(large_strings_.size() - 1);
-  string_index_.emplace(hash, string_id);
+
+  // Deliberately not adding |string_id| to |string_index_|. The caller
+  // (InternString()) must take care of this.
+  PERFETTO_DCHECK(string_index_.Find(hash));
+
   return string_id;
 }
 
diff --git a/src/trace_processor/containers/string_pool.h b/src/trace_processor/containers/string_pool.h
index 480c085..8d5e22f 100644
--- a/src/trace_processor/containers/string_pool.h
+++ b/src/trace_processor/containers/string_pool.h
@@ -21,9 +21,9 @@
 #include <stdint.h>
 
 #include <limits>
-#include <unordered_map>
 #include <vector>
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/paged_memory.h"
@@ -107,8 +107,8 @@
   ~StringPool();
 
   // Allow std::move().
-  StringPool(StringPool&&);
-  StringPool& operator=(StringPool&&);
+  StringPool(StringPool&&) noexcept;
+  StringPool& operator=(StringPool&&) noexcept;
 
   // Disable implicit copy.
   StringPool(const StringPool&) = delete;
@@ -119,12 +119,17 @@
       return Id::Null();
 
     auto hash = str.Hash();
-    auto id_it = string_index_.find(hash);
-    if (id_it != string_index_.end()) {
-      PERFETTO_DCHECK(Get(id_it->second) == str);
-      return id_it->second;
+
+    // Perform a hashtable insertion with a null ID just to check if the string
+    // is already inserted. If it's not, overwrite 0 with the actual Id.
+    auto it_and_inserted = string_index_.Insert(hash, Id());
+    Id* id = it_and_inserted.first;
+    if (!it_and_inserted.second) {
+      PERFETTO_DCHECK(Get(*id) == str);
+      return *id;
     }
-    return InsertString(str, hash);
+    *id = InsertString(str, hash);
+    return *id;
   }
 
   base::Optional<Id> GetId(base::StringView str) const {
@@ -132,10 +137,10 @@
       return Id::Null();
 
     auto hash = str.Hash();
-    auto id_it = string_index_.find(hash);
-    if (id_it != string_index_.end()) {
-      PERFETTO_DCHECK(Get(id_it->second) == str);
-      return id_it->second;
+    Id* id = string_index_.Find(hash);
+    if (id) {
+      PERFETTO_DCHECK(Get(*id) == str);
+      return *id;
     }
     return base::nullopt;
   }
@@ -287,10 +292,12 @@
   std::vector<std::unique_ptr<std::string>> large_strings_;
 
   // Maps hashes of strings to the Id in the string pool.
-  // TODO(lalitm): At some point we should benchmark just using a static
-  // hashtable of 1M elements, we can afford paying a fixed 8MB here
-  std::unordered_map<StringHash, Id, base::AlreadyHashed<StringHash>>
-      string_index_;
+  base::FlatHashMap<StringHash,
+                    Id,
+                    base::AlreadyHashed<StringHash>,
+                    base::LinearProbe,
+                    /*AppendOnly=*/true>
+      string_index_{/*initial_capacity=*/1024u * 1024u};
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/dynamic/thread_state_generator.cc b/src/trace_processor/dynamic/thread_state_generator.cc
index 4d33870..74115cd 100644
--- a/src/trace_processor/dynamic/thread_state_generator.cc
+++ b/src/trace_processor/dynamic/thread_state_generator.cc
@@ -91,7 +91,7 @@
   uint32_t sched_idx = 0;
   uint32_t waking_idx = 0;
   uint32_t blocked_idx = 0;
-  std::unordered_map<UniqueTid, ThreadSchedInfo> state_map;
+  TidInfoMap state_map(/*initial_capacity=*/1024);
   while (sched_idx < sched.row_count() || waking_idx < waking.row_count() ||
          blocked_idx < sched_blocked_reason.row_count()) {
     int64_t sched_ts = sched_idx < sched.row_count()
@@ -117,21 +117,21 @@
   }
 
   // At the end, go through and flush any remaining pending events.
-  for (const auto& utid_to_pending_info : state_map) {
-    UniqueTid utid = utid_to_pending_info.first;
-    const ThreadSchedInfo& pending_info = utid_to_pending_info.second;
+  for (auto it = state_map.GetIterator(); it; ++it) {
+    // for (const auto& utid_to_pending_info : state_map) {
+    UniqueTid utid = it.key();
+    const ThreadSchedInfo& pending_info = it.value();
     FlushPendingEventsForThread(utid, pending_info, table.get(), base::nullopt);
   }
 
   return table;
 }
 
-void ThreadStateGenerator::AddSchedEvent(
-    const Table& sched,
-    uint32_t sched_idx,
-    std::unordered_map<UniqueTid, ThreadSchedInfo>& state_map,
-    int64_t trace_end_ts,
-    tables::ThreadStateTable* table) {
+void ThreadStateGenerator::AddSchedEvent(const Table& sched,
+                                         uint32_t sched_idx,
+                                         TidInfoMap& state_map,
+                                         int64_t trace_end_ts,
+                                         tables::ThreadStateTable* table) {
   int64_t ts = sched.GetTypedColumnByName<int64_t>("ts")[sched_idx];
   UniqueTid utid = sched.GetTypedColumnByName<uint32_t>("utid")[sched_idx];
   ThreadSchedInfo* info = &state_map[utid];
@@ -201,10 +201,9 @@
   info->scheduled_row = id_and_row.row;
 }
 
-void ThreadStateGenerator::AddWakingEvent(
-    const Table& waking,
-    uint32_t waking_idx,
-    std::unordered_map<UniqueTid, ThreadSchedInfo>& state_map) {
+void ThreadStateGenerator::AddWakingEvent(const Table& waking,
+                                          uint32_t waking_idx,
+                                          TidInfoMap& state_map) {
   int64_t ts = waking.GetTypedColumnByName<int64_t>("ts")[waking_idx];
   UniqueTid utid = static_cast<UniqueTid>(
       waking.GetTypedColumnByName<int64_t>("ref")[waking_idx]);
@@ -299,10 +298,9 @@
   }
 }
 
-void ThreadStateGenerator::AddBlockedReasonEvent(
-    const Table& blocked_reason,
-    uint32_t blocked_idx,
-    std::unordered_map<UniqueTid, ThreadSchedInfo>& state_map) {
+void ThreadStateGenerator::AddBlockedReasonEvent(const Table& blocked_reason,
+                                                 uint32_t blocked_idx,
+                                                 TidInfoMap& state_map) {
   const auto& utid_col = blocked_reason.GetTypedColumnByName<int64_t>("ref");
   const auto& arg_set_id_col =
       blocked_reason.GetTypedColumnByName<uint32_t>("arg_set_id");
diff --git a/src/trace_processor/dynamic/thread_state_generator.h b/src/trace_processor/dynamic/thread_state_generator.h
index 75895ba..439d4bb 100644
--- a/src/trace_processor/dynamic/thread_state_generator.h
+++ b/src/trace_processor/dynamic/thread_state_generator.h
@@ -19,6 +19,7 @@
 
 #include "src/trace_processor/sqlite/db_sqlite_table.h"
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "src/trace_processor/storage/trace_storage.h"
 
 namespace perfetto {
@@ -54,22 +55,25 @@
     base::Optional<int64_t> runnable_ts;
     base::Optional<StringId> blocked_function;
   };
+  using TidInfoMap = base::FlatHashMap<UniqueTid,
+                                       ThreadSchedInfo,
+                                       base::AlreadyHashed<UniqueTid>,
+                                       base::QuadraticProbe,
+                                       /*AppendOnly=*/true>;
 
   void AddSchedEvent(const Table& sched,
                      uint32_t sched_idx,
-                     std::unordered_map<UniqueTid, ThreadSchedInfo>& state_map,
+                     TidInfoMap& state_map,
                      int64_t trace_end_ts,
                      tables::ThreadStateTable* table);
 
-  void AddWakingEvent(
-      const Table& wakeup,
-      uint32_t wakeup_idx,
-      std::unordered_map<UniqueTid, ThreadSchedInfo>& state_map);
+  void AddWakingEvent(const Table& wakeup,
+                      uint32_t wakeup_idx,
+                      TidInfoMap& state_map);
 
-  void AddBlockedReasonEvent(
-      const Table& blocked_reason,
-      uint32_t blocked_idx,
-      std::unordered_map<UniqueTid, ThreadSchedInfo>& state_map);
+  void AddBlockedReasonEvent(const Table& blocked_reason,
+                             uint32_t blocked_idx,
+                             TidInfoMap& state_map);
 
   void FlushPendingEventsForThread(UniqueTid utid,
                                    const ThreadSchedInfo&,
diff --git a/src/trace_processor/importers/common/flow_tracker.cc b/src/trace_processor/importers/common/flow_tracker.cc
index 9e6e2d8..6e7dd2f 100644
--- a/src/trace_processor/importers/common/flow_tracker.cc
+++ b/src/trace_processor/importers/common/flow_tracker.cc
@@ -46,11 +46,11 @@
     context_->storage->IncrementStats(stats::flow_no_enclosing_slice);
     return;
   }
-  if (flow_to_slice_map_.count(flow_id) != 0) {
+  auto it_and_ins = flow_to_slice_map_.Insert(flow_id, open_slice_id.value());
+  if (!it_and_ins.second) {
     context_->storage->IncrementStats(stats::flow_duplicate_id);
     return;
   }
-  flow_to_slice_map_[flow_id] = open_slice_id.value();
 }
 
 void FlowTracker::Step(TrackId track_id, FlowId flow_id) {
@@ -60,13 +60,14 @@
     context_->storage->IncrementStats(stats::flow_no_enclosing_slice);
     return;
   }
-  if (flow_to_slice_map_.count(flow_id) == 0) {
+  auto* it = flow_to_slice_map_.Find(flow_id);
+  if (!it) {
     context_->storage->IncrementStats(stats::flow_step_without_start);
     return;
   }
-  SliceId slice_out_id = flow_to_slice_map_[flow_id];
+  SliceId slice_out_id = *it;
   InsertFlow(flow_id, slice_out_id, open_slice_id.value());
-  flow_to_slice_map_[flow_id] = open_slice_id.value();
+  *it = open_slice_id.value();
 }
 
 void FlowTracker::End(TrackId track_id,
@@ -83,29 +84,28 @@
     context_->storage->IncrementStats(stats::flow_no_enclosing_slice);
     return;
   }
-  if (flow_to_slice_map_.count(flow_id) == 0) {
+  auto* it = flow_to_slice_map_.Find(flow_id);
+  if (!it) {
     context_->storage->IncrementStats(stats::flow_end_without_start);
     return;
   }
-  SliceId slice_out_id = flow_to_slice_map_[flow_id];
-  if (close_flow) {
-    flow_to_slice_map_.erase(flow_to_slice_map_.find(flow_id));
-  }
+  SliceId slice_out_id = *it;
+  if (close_flow)
+    flow_to_slice_map_.Erase(flow_id);
   InsertFlow(flow_id, slice_out_id, open_slice_id.value());
 }
 
 bool FlowTracker::IsActive(FlowId flow_id) const {
-  return flow_to_slice_map_.find(flow_id) != flow_to_slice_map_.end();
+  return flow_to_slice_map_.Find(flow_id) != nullptr;
 }
 
 FlowId FlowTracker::GetFlowIdForV1Event(uint64_t source_id,
                                         StringId cat,
                                         StringId name) {
   V1FlowId v1_flow_id = {source_id, cat, name};
-  auto iter = v1_flow_id_to_flow_id_map_.find(v1_flow_id);
-  if (iter != v1_flow_id_to_flow_id_map_.end()) {
-    return iter->second;
-  }
+  auto* iter = v1_flow_id_to_flow_id_map_.Find(v1_flow_id);
+  if (iter)
+    return *iter;
   FlowId new_id = v1_id_counter_++;
   flow_id_to_v1_flow_id_map_[new_id] = v1_flow_id;
   v1_flow_id_to_flow_id_map_[v1_flow_id] = new_id;
@@ -114,16 +114,16 @@
 
 void FlowTracker::ClosePendingEventsOnTrack(TrackId track_id,
                                             SliceId slice_id) {
-  auto iter = pending_flow_ids_map_.find(track_id);
-  if (iter == pending_flow_ids_map_.end())
+  auto* iter = pending_flow_ids_map_.Find(track_id);
+  if (!iter)
     return;
 
-  for (FlowId flow_id : iter->second) {
+  for (FlowId flow_id : *iter) {
     SliceId slice_out_id = flow_to_slice_map_[flow_id];
     InsertFlow(flow_id, slice_out_id, slice_id);
   }
 
-  pending_flow_ids_map_.erase(iter);
+  pending_flow_ids_map_.Erase(track_id);
 }
 
 void FlowTracker::InsertFlow(FlowId flow_id,
@@ -132,13 +132,13 @@
   tables::FlowTable::Row row(slice_out_id, slice_in_id, kInvalidArgSetId);
   auto id = context_->storage->mutable_flow_table()->Insert(row).id;
 
-  auto it = flow_id_to_v1_flow_id_map_.find(flow_id);
-  if (it != flow_id_to_v1_flow_id_map_.end()) {
+  auto* it = flow_id_to_v1_flow_id_map_.Find(flow_id);
+  if (it) {
     // TODO(b/168007725): Add any args from v1 flow events and also export them.
     auto args_tracker = ArgsTracker(context_);
     auto inserter = context_->args_tracker->AddArgsTo(id);
-    inserter.AddArg(name_key_id_, Variadic::String(it->second.name));
-    inserter.AddArg(cat_key_id_, Variadic::String(it->second.cat));
+    inserter.AddArg(name_key_id_, Variadic::String(it->name));
+    inserter.AddArg(cat_key_id_, Variadic::String(it->cat));
     context_->args_tracker->Flush();
   }
 }
diff --git a/src/trace_processor/importers/common/flow_tracker.h b/src/trace_processor/importers/common/flow_tracker.h
index f05d854..5def9b6 100644
--- a/src/trace_processor/importers/common/flow_tracker.h
+++ b/src/trace_processor/importers/common/flow_tracker.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/types/trace_processor_context.h"
@@ -78,11 +79,11 @@
     }
   };
 
-  using FlowToSourceSliceMap = std::unordered_map<FlowId, SliceId>;
-  using PendingFlowsMap = std::unordered_map<TrackId, std::vector<FlowId>>;
+  using FlowToSourceSliceMap = base::FlatHashMap<FlowId, SliceId>;
+  using PendingFlowsMap = base::FlatHashMap<TrackId, std::vector<FlowId>>;
   using V1FlowIdToFlowIdMap =
-      std::unordered_map<V1FlowId, FlowId, V1FlowIdHasher>;
-  using FlowIdToV1FlowId = std::unordered_map<FlowId, V1FlowId>;
+      base::FlatHashMap<V1FlowId, FlowId, V1FlowIdHasher>;
+  using FlowIdToV1FlowId = base::FlatHashMap<FlowId, V1FlowId>;
 
   void InsertFlow(FlowId flow_id,
                   SliceId outgoing_slice_id,
diff --git a/src/trace_processor/importers/common/global_args_tracker.h b/src/trace_processor/importers/common/global_args_tracker.h
index 96c2622..0d1c365 100644
--- a/src/trace_processor/importers/common/global_args_tracker.h
+++ b/src/trace_processor/importers/common/global_args_tracker.h
@@ -17,6 +17,7 @@
 #ifndef SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_GLOBAL_ARGS_TRACKER_H_
 #define SRC_TRACE_PROCESSOR_IMPORTERS_COMMON_GLOBAL_ARGS_TRACKER_H_
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/small_vector.h"
 #include "src/trace_processor/storage/trace_storage.h"
@@ -116,13 +117,16 @@
     auto* arg_table = context_->storage->mutable_arg_table();
 
     ArgSetHash digest = hash.digest();
-    auto it = arg_row_for_hash_.find(digest);
-    if (it != arg_row_for_hash_.end())
-      return arg_table->arg_set_id()[it->second];
+    auto it_and_inserted =
+        arg_row_for_hash_.Insert(digest, arg_table->row_count());
+    if (!it_and_inserted.second) {
+      // Already inserted.
+      return arg_table->arg_set_id()[*it_and_inserted.first];
+    }
 
-    // The +1 ensures that nothing has an id == kInvalidArgSetId == 0.
-    ArgSetId id = static_cast<uint32_t>(arg_row_for_hash_.size()) + 1;
-    arg_row_for_hash_.emplace(digest, arg_table->row_count());
+    // Taking size() after the Insert() ensures that nothing has an id == 0
+    // (0 == kInvalidArgSetId).
+    ArgSetId id = static_cast<uint32_t>(arg_row_for_hash_.size());
     for (uint32_t i : valid_indexes) {
       const auto& arg = args[i];
 
@@ -171,7 +175,7 @@
  private:
   using ArgSetHash = uint64_t;
 
-  std::unordered_map<ArgSetHash, uint32_t, base::AlreadyHashed<ArgSetHash>>
+  base::FlatHashMap<ArgSetHash, uint32_t, base::AlreadyHashed<ArgSetHash>>
       arg_row_for_hash_;
 
   TraceProcessorContext* context_;
diff --git a/src/trace_processor/importers/common/process_tracker.cc b/src/trace_processor/importers/common/process_tracker.cc
index 0981063..47b1d7a 100644
--- a/src/trace_processor/importers/common/process_tracker.cc
+++ b/src/trace_processor/importers/common/process_tracker.cc
@@ -89,7 +89,7 @@
   // of the process, we should also finish the process itself.
   PERFETTO_DCHECK(thread_table->is_main_thread()[utid].value());
   process_table->mutable_end_ts()->Set(*opt_upid, timestamp);
-  pids_.erase(tid);
+  pids_.Erase(tid);
 }
 
 base::Optional<UniqueTid> ProcessTracker::GetThreadOrNull(uint32_t tid) {
@@ -156,8 +156,8 @@
 
   // If the process has been replaced in |pids_|, this thread is dead.
   uint32_t current_pid = processes->pid()[current_upid];
-  auto pid_it = pids_.find(current_pid);
-  if (pid_it != pids_.end() && pid_it->second != current_upid)
+  auto pid_it = pids_.Find(current_pid);
+  if (pid_it && *pid_it != current_upid)
     return false;
 
   return true;
@@ -169,13 +169,13 @@
   auto* threads = context_->storage->mutable_thread_table();
   auto* processes = context_->storage->mutable_process_table();
 
-  auto vector_it = tids_.find(tid);
-  if (vector_it == tids_.end())
+  auto vector_it = tids_.Find(tid);
+  if (!vector_it)
     return base::nullopt;
 
   // Iterate backwards through the threads so ones later in the trace are more
   // likely to be picked.
-  const auto& vector = vector_it->second;
+  const auto& vector = *vector_it;
   for (auto it = vector.rbegin(); it != vector.rend(); it++) {
     UniqueTid current_utid = *it;
 
@@ -225,7 +225,7 @@
                                           uint32_t pid,
                                           StringId main_thread_name,
                                           ThreadNamePriority priority) {
-  pids_.erase(pid);
+  pids_.Erase(pid);
   // TODO(eseckler): Consider erasing all old entries in |tids_| that match the
   // |pid| (those would be for an older process with the same pid). Right now,
   // we keep them in |tids_| (if they weren't erased by EndThread()), but ignore
@@ -325,18 +325,20 @@
 
 UniquePid ProcessTracker::GetOrCreateProcess(uint32_t pid) {
   auto* process_table = context_->storage->mutable_process_table();
-  auto it = pids_.find(pid);
-  if (it != pids_.end()) {
+
+  // If the insertion succeeds, we'll fill the upid below.
+  auto it_and_ins = pids_.Insert(pid, UniquePid{0});
+  if (!it_and_ins.second) {
     // Ensure that the process has not ended.
-    PERFETTO_DCHECK(!process_table->end_ts()[it->second].has_value());
-    return it->second;
+    PERFETTO_DCHECK(!process_table->end_ts()[*it_and_ins.first].has_value());
+    return *it_and_ins.first;
   }
 
   tables::ProcessTable::Row row;
   row.pid = pid;
 
   UniquePid upid = process_table->Insert(row).row;
-  pids_.emplace(pid, upid);
+  *it_and_ins.first = upid;  // Update the newly inserted hashmap entry.
 
   // Create an entry for the main thread.
   // We cannot call StartNewThread() here, because threads for this process
@@ -459,8 +461,8 @@
 
 void ProcessTracker::SetPidZeroIgnoredForIdleProcess() {
   // Create a mapping from (t|p)id 0 -> u(t|p)id 0 for the idle process.
-  tids_.emplace(0, std::vector<UniqueTid>{0});
-  pids_.emplace(0, 0);
+  tids_.Insert(0, std::vector<UniqueTid>{0});
+  pids_.Insert(0, 0);
 
   auto swapper_id = context_->storage->InternString("swapper");
   UpdateThreadName(0, swapper_id, ThreadNamePriority::kTraceProcessorConstant);
diff --git a/src/trace_processor/importers/common/process_tracker.h b/src/trace_processor/importers/common/process_tracker.h
index 86c4339..7cfe89d 100644
--- a/src/trace_processor/importers/common/process_tracker.h
+++ b/src/trace_processor/importers/common/process_tracker.h
@@ -19,6 +19,7 @@
 
 #include <tuple>
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/string_view.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/storage/trace_storage.h"
@@ -132,8 +133,8 @@
 
   // Returns the upid for a given pid.
   base::Optional<UniquePid> UpidForPidForTesting(uint32_t pid) {
-    auto it = pids_.find(pid);
-    return it == pids_.end() ? base::nullopt : base::make_optional(it->second);
+    auto it = pids_.Find(pid);
+    return it ? base::make_optional(*it) : base::nullopt;
   }
 
   // Returns the bounds of a range that includes all UniqueTids that have the
@@ -188,10 +189,10 @@
   // simultaneously. This is no longer the case so this should be removed
   // (though it seems like there are subtle things which break in Chrome if this
   // changes).
-  std::unordered_map<uint32_t /* tid */, std::vector<UniqueTid>> tids_;
+  base::FlatHashMap<uint32_t /* tid */, std::vector<UniqueTid>> tids_;
 
   // Mapping of the most recently seen pid to the associated upid.
-  std::unordered_map<uint32_t /* pid (aka tgid) */, UniquePid> pids_;
+  base::FlatHashMap<uint32_t /* pid (aka tgid) */, UniquePid> pids_;
 
   // Pending thread associations. The meaning of a pair<ThreadA, ThreadB> in
   // this vector is: we know that A and B belong to the same process, but we
diff --git a/src/trace_processor/importers/common/slice_tracker.cc b/src/trace_processor/importers/common/slice_tracker.cc
index dbb76a2..b160ec5 100644
--- a/src/trace_processor/importers/common/slice_tracker.cc
+++ b/src/trace_processor/importers/common/slice_tracker.cc
@@ -57,8 +57,8 @@
   // Double check that if we've seen this track in the past, it was also
   // marked as unnestable then.
 #if PERFETTO_DCHECK_IS_ON()
-  auto it = stacks_.find(row.track_id);
-  PERFETTO_DCHECK(it == stacks_.end() || it->second.is_legacy_unnestable);
+  auto* it = stacks_.Find(row.track_id);
+  PERFETTO_DCHECK(!it || it->is_legacy_unnestable);
 #endif
 
   // Ensure that StartSlice knows that this track is unnestable.
@@ -98,11 +98,11 @@
                                                StringId category,
                                                StringId name,
                                                SetArgsCallback args_callback) {
-  auto it = stacks_.find(track_id);
-  if (it == stacks_.end())
+  auto* it = stacks_.Find(track_id);
+  if (!it)
     return base::nullopt;
 
-  auto& stack = it->second.slice_stack;
+  auto& stack = it->slice_stack;
   if (stack.empty())
     return base::nullopt;
 
@@ -194,11 +194,11 @@
   }
   prev_timestamp_ = timestamp;
 
-  auto it = stacks_.find(track_id);
-  if (it == stacks_.end())
+  auto it = stacks_.Find(track_id);
+  if (!it)
     return base::nullopt;
 
-  TrackInfo& track_info = it->second;
+  TrackInfo& track_info = *it;
   SlicesStack& stack = track_info.slice_stack;
   MaybeCloseStack(timestamp, &stack, track_id);
   if (stack.empty())
@@ -275,7 +275,7 @@
   // TODO(eseckler): Reconsider whether we want to close pending slices by
   // setting their duration to |trace_end - event_start|. Might still want some
   // additional way of flagging these events as "incomplete" to the UI.
-  stacks_.clear();
+  stacks_.Clear();
 }
 
 void SliceTracker::SetOnSliceBeginCallback(OnSliceBeginCallback callback) {
@@ -284,10 +284,10 @@
 
 base::Optional<SliceId> SliceTracker::GetTopmostSliceOnTrack(
     TrackId track_id) const {
-  const auto iter = stacks_.find(track_id);
-  if (iter == stacks_.end())
+  const auto* iter = stacks_.Find(track_id);
+  if (!iter)
     return base::nullopt;
-  const auto& stack = iter->second.slice_stack;
+  const auto& stack = iter->slice_stack;
   if (stack.empty())
     return base::nullopt;
   uint32_t slice_idx = stack.back().row;
diff --git a/src/trace_processor/importers/common/slice_tracker.h b/src/trace_processor/importers/common/slice_tracker.h
index f574164..847bda6 100644
--- a/src/trace_processor/importers/common/slice_tracker.h
+++ b/src/trace_processor/importers/common/slice_tracker.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/storage/trace_storage.h"
 
@@ -124,7 +125,7 @@
     uint32_t legacy_unnestable_begin_count = 0;
     int64_t legacy_unnestable_last_begin_ts = 0;
   };
-  using StackMap = std::unordered_map<TrackId, TrackInfo>;
+  using StackMap = base::FlatHashMap<TrackId, TrackInfo>;
 
   // virtual for testing.
   virtual base::Optional<SliceId> StartSlice(int64_t timestamp,
diff --git a/src/trace_processor/importers/ftrace/binder_tracker.cc b/src/trace_processor/importers/ftrace/binder_tracker.cc
index d866c0a..64eee90 100644
--- a/src/trace_processor/importers/ftrace/binder_tracker.cc
+++ b/src/trace_processor/importers/ftrace/binder_tracker.cc
@@ -15,14 +15,13 @@
  */
 
 #include "src/trace_processor/importers/ftrace/binder_tracker.h"
+#include "perfetto/base/compiler.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "src/trace_processor/importers/common/process_tracker.h"
 #include "src/trace_processor/importers/common/slice_tracker.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
 #include "src/trace_processor/types/trace_processor_context.h"
 
-#include "perfetto/base/compiler.h"
-#include "perfetto/ext/base/string_utils.h"
-
 namespace perfetto {
 namespace trace_processor {
 
@@ -157,7 +156,8 @@
     return;
   }
 
-  if (transaction_await_rcv.count(transaction_id) > 0) {
+  TrackId* rcv_track_id = transaction_await_rcv.Find(transaction_id);
+  if (rcv_track_id) {
     // First begin the reply slice to get its slice id.
     auto reply_slice_id = context_->slice_tracker->Begin(
         ts, track_id, binder_category_id_, reply_id_);
@@ -171,9 +171,9 @@
                          Variadic::UnsignedInteger(reply_slice_id->value));
     };
     // Add the dest args to the current transaction slice and get the slice id.
-    auto transaction_slice_id = context_->slice_tracker->AddArgs(
-        transaction_await_rcv[transaction_id], binder_category_id_,
-        transaction_slice_id_, args_inserter);
+    auto transaction_slice_id =
+        context_->slice_tracker->AddArgs(*rcv_track_id, binder_category_id_,
+                                         transaction_slice_id_, args_inserter);
 
     // Add the dest slice id to the reply slice that has just begun.
     auto reply_dest_inserter =
@@ -184,15 +184,15 @@
         };
     context_->slice_tracker->AddArgs(track_id, binder_category_id_, reply_id_,
                                      reply_dest_inserter);
-    transaction_await_rcv.erase(transaction_id);
+    transaction_await_rcv.Erase(transaction_id);
     return;
   }
 
-  if (awaiting_async_rcv_.count(transaction_id) > 0) {
-    auto args = awaiting_async_rcv_[transaction_id];
+  SetArgsCallback* args = awaiting_async_rcv_.Find(transaction_id);
+  if (args) {
     context_->slice_tracker->Scoped(ts, track_id, binder_category_id_,
-                                    async_rcv_id_, 0, args);
-    awaiting_async_rcv_.erase(transaction_id);
+                                    async_rcv_id_, 0, *args);
+    awaiting_async_rcv_.Erase(transaction_id);
     return;
   }
 }
@@ -209,7 +209,7 @@
 void BinderTracker::Locked(int64_t ts, uint32_t pid) {
   UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
 
-  if (attempt_lock_.count(pid) == 0)
+  if (!attempt_lock_.Find(pid))
     return;
 
   TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
@@ -218,19 +218,19 @@
                                  lock_held_id_);
 
   lock_acquired_[pid] = ts;
-  attempt_lock_.erase(pid);
+  attempt_lock_.Erase(pid);
 }
 
 void BinderTracker::Unlock(int64_t ts, uint32_t pid) {
   UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
 
-  if (lock_acquired_.count(pid) == 0)
+  if (!lock_acquired_.Find(pid))
     return;
 
   TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
   context_->slice_tracker->End(ts, track_id, binder_category_id_,
                                lock_held_id_);
-  lock_acquired_.erase(pid);
+  lock_acquired_.Erase(pid);
 }
 
 void BinderTracker::TransactionAllocBuf(int64_t ts,
diff --git a/src/trace_processor/importers/ftrace/binder_tracker.h b/src/trace_processor/importers/ftrace/binder_tracker.h
index a96816d..a9482de 100644
--- a/src/trace_processor/importers/ftrace/binder_tracker.h
+++ b/src/trace_processor/importers/ftrace/binder_tracker.h
@@ -18,9 +18,9 @@
 #define SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_BINDER_TRACKER_H_
 
 #include <stdint.h>
-#include <unordered_map>
-#include <unordered_set>
 
+#include "perfetto/base/flat_set.h"
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/types/destructible.h"
@@ -68,14 +68,14 @@
 
  private:
   TraceProcessorContext* const context_;
-  std::unordered_set<int32_t> awaiting_rcv_for_reply_;
+  base::FlatSet<int32_t> awaiting_rcv_for_reply_;
 
-  std::unordered_map<int32_t, TrackId> transaction_await_rcv;
-  std::unordered_map<int32_t, SetArgsCallback> awaiting_async_rcv_;
+  base::FlatHashMap<int32_t, TrackId> transaction_await_rcv;
+  base::FlatHashMap<int32_t, SetArgsCallback> awaiting_async_rcv_;
 
-  std::unordered_map<uint32_t, int64_t> attempt_lock_;
+  base::FlatHashMap<uint32_t, int64_t> attempt_lock_;
 
-  std::unordered_map<uint32_t, int64_t> lock_acquired_;
+  base::FlatHashMap<uint32_t, int64_t> lock_acquired_;
 
   const StringId binder_category_id_;
   const StringId lock_waiting_id_;
diff --git a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
index a134fb6..80ca10f 100644
--- a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
+++ b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
@@ -123,32 +123,33 @@
 
   // If curr is false, try and lookup the utid we previously saw for this
   // mm id.
-  auto it = mm_id_to_utid_.find(mm_id);
-  if (it == mm_id_to_utid_.end())
+  auto* it = mm_id_to_utid_.Find(mm_id);
+  if (!it)
     return base::nullopt;
 
   // If the utid in the map is the same as our current utid but curr is false,
   // that means we are in the middle of a process changing mm structs (i.e. in
   // the middle of a vfork + exec). Therefore, we should discard the association
   // of this vm struct with this thread.
-  UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
-  if (it->second == utid) {
-    mm_id_to_utid_.erase(it);
+  const UniqueTid mm_utid = *it;
+  const UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
+  if (mm_utid == utid) {
+    mm_id_to_utid_.Erase(mm_id);
     return base::nullopt;
   }
 
   // Verify that the utid in the map is still alive. This can happen if an mm
   // struct we saw in the past is about to be reused after thread but we don't
   // know the new process that struct will be associated with.
-  if (!context_->process_tracker->IsThreadAlive(it->second)) {
-    mm_id_to_utid_.erase(it);
+  if (!context_->process_tracker->IsThreadAlive(mm_utid)) {
+    mm_id_to_utid_.Erase(mm_id);
     return base::nullopt;
   }
 
   // This case happens when a process is changing the VM of another process and
   // we know that the utid corresponding to the target process. Just return that
   // utid.
-  return it->second;
+  return mm_utid;
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/importers/ftrace/rss_stat_tracker.h b/src/trace_processor/importers/ftrace/rss_stat_tracker.h
index 986fa33..b3ba29a 100644
--- a/src/trace_processor/importers/ftrace/rss_stat_tracker.h
+++ b/src/trace_processor/importers/ftrace/rss_stat_tracker.h
@@ -17,8 +17,7 @@
 #ifndef SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_RSS_STAT_TRACKER_H_
 #define SRC_TRACE_PROCESSOR_IMPORTERS_FTRACE_RSS_STAT_TRACKER_H_
 
-#include <unordered_map>
-
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/protozero/field.h"
 #include "src/trace_processor/storage/trace_storage.h"
 
@@ -49,7 +48,7 @@
                                             bool is_curr,
                                             uint32_t pid);
 
-  std::unordered_map<int64_t, UniqueTid> mm_id_to_utid_;
+  base::FlatHashMap<int64_t, UniqueTid> mm_id_to_utid_;
   std::vector<StringId> rss_members_;
   TraceProcessorContext* const context_;
 };
diff --git a/src/trace_processor/importers/fuchsia/fuchsia_trace_tokenizer.cc b/src/trace_processor/importers/fuchsia/fuchsia_trace_tokenizer.cc
index d215b19..6628c41 100644
--- a/src/trace_processor/importers/fuchsia/fuchsia_trace_tokenizer.cc
+++ b/src/trace_processor/importers/fuchsia/fuchsia_trace_tokenizer.cc
@@ -17,7 +17,6 @@
 #include "src/trace_processor/importers/fuchsia/fuchsia_trace_tokenizer.h"
 
 #include <cinttypes>
-#include <unordered_map>
 
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/string_view.h"
diff --git a/src/trace_processor/importers/json/json_trace_parser.h b/src/trace_processor/importers/json/json_trace_parser.h
index 2084c4b..18f47f4 100644
--- a/src/trace_processor/importers/json/json_trace_parser.h
+++ b/src/trace_processor/importers/json/json_trace_parser.h
@@ -21,7 +21,6 @@
 
 #include <memory>
 #include <tuple>
-#include <unordered_map>
 
 #include "src/trace_processor/importers/common/trace_parser.h"
 #include "src/trace_processor/importers/json/json_tracker.h"
diff --git a/src/trace_processor/importers/memory_tracker/graph_processor_unittest.cc b/src/trace_processor/importers/memory_tracker/graph_processor_unittest.cc
index 8d1cc31..4275b36 100644
--- a/src/trace_processor/importers/memory_tracker/graph_processor_unittest.cc
+++ b/src/trace_processor/importers/memory_tracker/graph_processor_unittest.cc
@@ -18,8 +18,6 @@
 
 #include <stddef.h>
 
-#include <unordered_map>
-
 #include "perfetto/base/build_config.h"
 #include "test/gtest_and_gmock.h"
 
diff --git a/src/trace_processor/importers/memory_tracker/raw_process_memory_node_unittest.cc b/src/trace_processor/importers/memory_tracker/raw_process_memory_node_unittest.cc
index 91a27ec..653dfe0 100644
--- a/src/trace_processor/importers/memory_tracker/raw_process_memory_node_unittest.cc
+++ b/src/trace_processor/importers/memory_tracker/raw_process_memory_node_unittest.cc
@@ -18,8 +18,6 @@
 
 #include <stddef.h>
 
-#include <unordered_map>
-
 #include "perfetto/base/build_config.h"
 #include "test/gtest_and_gmock.h"
 
diff --git a/src/trace_processor/importers/proto/async_track_set_tracker.h b/src/trace_processor/importers/proto/async_track_set_tracker.h
index b3dd836..6e7fa81 100644
--- a/src/trace_processor/importers/proto/async_track_set_tracker.h
+++ b/src/trace_processor/importers/proto/async_track_set_tracker.h
@@ -17,8 +17,6 @@
 #ifndef SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_ASYNC_TRACK_SET_TRACKER_H_
 #define SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_ASYNC_TRACK_SET_TRACKER_H_
 
-#include <unordered_map>
-
 #include "src/trace_processor/storage/trace_storage.h"
 
 namespace perfetto {
diff --git a/src/trace_processor/importers/systrace/systrace_line_parser.cc b/src/trace_processor/importers/systrace/systrace_line_parser.cc
index c78d975..9120e65 100644
--- a/src/trace_processor/importers/systrace/systrace_line_parser.cc
+++ b/src/trace_processor/importers/systrace/systrace_line_parser.cc
@@ -16,6 +16,7 @@
 
 #include "src/trace_processor/importers/systrace/systrace_line_parser.h"
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/string_splitter.h"
 #include "perfetto/ext/base/string_utils.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
@@ -31,7 +32,6 @@
 #include <cctype>
 #include <cinttypes>
 #include <string>
-#include <unordered_map>
 
 namespace perfetto {
 namespace trace_processor {
@@ -60,14 +60,14 @@
     }
   }
 
-  std::unordered_map<std::string, std::string> args;
+  base::FlatHashMap<std::string, std::string> args;
   for (base::StringSplitter ss(line.args_str, ' '); ss.Next();) {
     std::string key;
     std::string value;
     if (!base::Contains(ss.cur_token(), "=")) {
       key = "name";
       value = ss.cur_token();
-      args.emplace(std::move(key), std::move(value));
+      args.Insert(std::move(key), std::move(value));
       continue;
     }
     for (base::StringSplitter inner(ss.cur_token(), '='); inner.Next();) {
@@ -77,7 +77,7 @@
         value = inner.cur_token();
       }
     }
-    args.emplace(std::move(key), std::move(value));
+    args.Insert(std::move(key), std::move(value));
   }
   if (line.event_name == "sched_switch") {
     auto prev_state_str = args["prev_state"];
diff --git a/src/trace_processor/sqlite/span_join_operator_table.h b/src/trace_processor/sqlite/span_join_operator_table.h
index 6fba96d..a6256f8 100644
--- a/src/trace_processor/sqlite/span_join_operator_table.h
+++ b/src/trace_processor/sqlite/span_join_operator_table.h
@@ -25,9 +25,9 @@
 #include <map>
 #include <memory>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/trace_processor/basic_types.h"
 #include "perfetto/trace_processor/status.h"
 #include "src/trace_processor/sqlite/scoped_db.h"
@@ -428,7 +428,7 @@
   TableDefinition t1_defn_;
   TableDefinition t2_defn_;
   PartitioningType partitioning_;
-  std::unordered_map<size_t, ColumnLocator> global_index_to_column_locator_;
+  base::FlatHashMap<size_t, ColumnLocator> global_index_to_column_locator_;
 
   sqlite3* const db_;
 };
diff --git a/src/trace_processor/sqlite/sqlite_raw_table.h b/src/trace_processor/sqlite/sqlite_raw_table.h
index ce1b9ac..a3ec5c2 100644
--- a/src/trace_processor/sqlite/sqlite_raw_table.h
+++ b/src/trace_processor/sqlite/sqlite_raw_table.h
@@ -18,6 +18,7 @@
 #define SRC_TRACE_PROCESSOR_SQLITE_SQLITE_RAW_TABLE_H_
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/ext/base/string_writer.h"
 #include "src/trace_processor/sqlite/db_sqlite_table.h"
 #include "src/trace_processor/storage/trace_storage.h"
@@ -31,13 +32,13 @@
  public:
   using ScopedCString = std::unique_ptr<char, void (*)(void*)>;
 
-  SystraceSerializer(TraceProcessorContext* context);
+  explicit SystraceSerializer(TraceProcessorContext* context);
 
   ScopedCString SerializeToString(uint32_t raw_row);
 
  private:
   using StringIdMap =
-      std::unordered_map<StringId, std::vector<base::Optional<uint32_t>>>;
+      base::FlatHashMap<StringId, std::vector<base::Optional<uint32_t>>>;
 
   void SerializePrefix(uint32_t raw_row, base::StringWriter* writer);
 
diff --git a/src/trace_processor/util/interned_message_view.h b/src/trace_processor/util/interned_message_view.h
index 4f5b993..3422b8b 100644
--- a/src/trace_processor/util/interned_message_view.h
+++ b/src/trace_processor/util/interned_message_view.h
@@ -17,10 +17,9 @@
 #ifndef SRC_TRACE_PROCESSOR_UTIL_INTERNED_MESSAGE_VIEW_H_
 #define SRC_TRACE_PROCESSOR_UTIL_INTERNED_MESSAGE_VIEW_H_
 
+#include "perfetto/ext/base/flat_hash_map.h"
 #include "perfetto/trace_processor/trace_blob_view.h"
 
-#include <unordered_map>
-
 namespace perfetto {
 namespace trace_processor {
 
@@ -49,7 +48,7 @@
     this->message_ = view.message_.copy();
     this->decoder_ = nullptr;
     this->decoder_type_ = nullptr;
-    this->submessages_.clear();
+    this->submessages_.Clear();
     return *this;
   }
 
@@ -87,9 +86,9 @@
   // TODO(eseckler): Support repeated fields.
   template <typename MessageType, uint32_t FieldId>
   InternedMessageView* GetOrCreateSubmessageView() {
-    auto it = submessages_.find(FieldId);
-    if (it != submessages_.end())
-      return it->second.get();
+    auto it_and_ins = submessages_.Insert(FieldId, nullptr);
+    if (!it_and_ins.second)
+      return it_and_ins.first->get();
     auto* decoder = GetOrCreateDecoder<MessageType>();
     // Calls the at() template method on the decoder.
     auto field = decoder->template at<FieldId>().as_bytes();
@@ -98,8 +97,7 @@
     TraceBlobView submessage = message_.slice(field.data, field.size);
     InternedMessageView* submessage_view =
         new InternedMessageView(std::move(submessage));
-    submessages_.emplace_hint(
-        it, FieldId, std::unique_ptr<InternedMessageView>(submessage_view));
+    it_and_ins.first->reset(submessage_view);
     return submessage_view;
   }
 
@@ -107,8 +105,8 @@
 
  private:
   using SubMessageViewMap =
-      std::unordered_map<uint32_t /*field_id*/,
-                         std::unique_ptr<InternedMessageView>>;
+      base::FlatHashMap<uint32_t /*field_id*/,
+                        std::unique_ptr<InternedMessageView>>;
 
   TraceBlobView message_;
 
diff --git a/ui/release/channels.json b/ui/release/channels.json
index 33c7f2f..359228c 100644
--- a/ui/release/channels.json
+++ b/ui/release/channels.json
@@ -2,7 +2,7 @@
   "channels": [
     {
       "name": "stable",
-      "rev": "6f7273f220d1812e315be627605be2561c617ed7"
+      "rev": "b4bd17f12d544c7b09cc93c6e7ed22e80ed73e48"
     },
     {
       "name": "canary",
diff --git a/ui/src/frontend/record_page.ts b/ui/src/frontend/record_page.ts
index a1e3fd7..e71909f 100644
--- a/ui/src/frontend/record_page.ts
+++ b/ui/src/frontend/record_page.ts
@@ -754,7 +754,7 @@
         m(Toggle, {
           title: 'Resolve kernel symbols',
           cssClass: '.thin',
-          descr: `Enables lookup via /proc/kallsyms for workqueue, 
+          descr: `Enables lookup via /proc/kallsyms for workqueue,
               sched_blocked_reason and other events (userdebug/eng builds only).`,
           setEnabled: (cfg, val) => cfg.symbolizeKsyms = val,
           isEnabled: (cfg) => cfg.symbolizeKsyms
@@ -1434,7 +1434,7 @@
                 }
               },
               m(`li${routePage === 'config' ? '.active' : ''}`,
-                m('i.material-icons', 'tune'),
+                m('i.material-icons', 'save'),
                 m('.title', 'Saved configs'),
                 m('.sub', 'Manage local configs'))) :
             null),