Merge "processor: Handle chrome traces with clock_id but no ClockSnapshot"
diff --git a/gn/perfetto_benchmarks.gni b/gn/perfetto_benchmarks.gni
index d5668d6..f711222 100644
--- a/gn/perfetto_benchmarks.gni
+++ b/gn/perfetto_benchmarks.gni
@@ -18,6 +18,7 @@
   "gn:default_deps",
   "src/traced/probes/ftrace:benchmarks",
   "src/trace_processor/db:benchmarks",
+  "src/trace_processor/tables:benchmarks",
   "src/tracing:benchmarks",
   "test:benchmark_main",
   "test:end_to_end_benchmarks",
diff --git a/protos/perfetto/trace/gpu/gpu_render_stage_event.proto b/protos/perfetto/trace/gpu/gpu_render_stage_event.proto
index a2a4cfd..e719b2b 100644
--- a/protos/perfetto/trace/gpu/gpu_render_stage_event.proto
+++ b/protos/perfetto/trace/gpu/gpu_render_stage_event.proto
@@ -19,7 +19,7 @@
 
 package perfetto.protos;
 
-// next id: 12
+// next id: 13
 message GpuRenderStageEvent {
   // required. Unique ID for the event.
   optional uint64 event_id = 1;
@@ -43,6 +43,9 @@
   // optional. The Vulkan render pass handle.
   optional uint64 render_pass_handle = 9;
 
+  // optional. The Vulkan command buffer handle.
+  optional uint64 command_buffer_handle = 12;
+
   // optional. Submission ID generated by the UMD.
   optional uint32 submission_id = 10;
 
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 084679f..0ae22f5 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -4267,7 +4267,7 @@
 
 // Begin of protos/perfetto/trace/gpu/gpu_render_stage_event.proto
 
-// next id: 12
+// next id: 13
 message GpuRenderStageEvent {
   // required. Unique ID for the event.
   optional uint64 event_id = 1;
@@ -4291,6 +4291,9 @@
   // optional. The Vulkan render pass handle.
   optional uint64 render_pass_handle = 9;
 
+  // optional. The Vulkan command buffer handle.
+  optional uint64 command_buffer_handle = 12;
+
   // optional. Submission ID generated by the UMD.
   optional uint32 submission_id = 10;
 
diff --git a/src/trace_processor/db/column.cc b/src/trace_processor/db/column.cc
index 62da919..61a4a7c 100644
--- a/src/trace_processor/db/column.cc
+++ b/src/trace_processor/db/column.cc
@@ -54,8 +54,7 @@
                 col_idx, row_map_idx, nullptr);
 }
 
-void Column::FilterInto(FilterOp op, SqlValue value, RowMap* iv) const {
-  // TODO(lalitm): add special logic here to deal with kId and kSorted flags.
+void Column::FilterIntoSlow(FilterOp op, SqlValue value, RowMap* iv) const {
   switch (op) {
     case FilterOp::kLt:
       iv->RemoveIf([this, value](uint32_t row) { return Get(row) >= value; });
diff --git a/src/trace_processor/db/column.h b/src/trace_processor/db/column.h
index e01a0da..6e9c9c4 100644
--- a/src/trace_processor/db/column.h
+++ b/src/trace_processor/db/column.h
@@ -164,7 +164,19 @@
 
   // Updates the given RowMap by only keeping rows where this column meets the
   // given filter constraint.
-  void FilterInto(FilterOp, SqlValue value, RowMap*) const;
+  void FilterInto(FilterOp op, SqlValue value, RowMap* rm) const {
+    // TODO(lalitm): add special logic here to deal with kId and kSorted flags.
+    if (type_ == ColumnType::kId && op == FilterOp::kEq) {
+      auto opt_idx = IndexOf(value);
+      if (opt_idx) {
+        rm->Intersect(RowMap::SingleRow(*opt_idx));
+      } else {
+        rm->Intersect(RowMap());
+      }
+      return;
+    }
+    FilterIntoSlow(op, value, rm);
+  }
 
   // Returns true if this column is considered an id column.
   bool IsId() const { return (flags_ & Flag::kId) != 0; }
@@ -275,6 +287,8 @@
   Column(const Column&) = delete;
   Column& operator=(const Column&) = delete;
 
+  void FilterIntoSlow(FilterOp, SqlValue value, RowMap*) const;
+
   template <typename T>
   static ColumnType ToColumnType() {
     if (std::is_same<T, uint32_t>::value) {
diff --git a/src/trace_processor/db/row_map.cc b/src/trace_processor/db/row_map.cc
index 915a2e7..b66945e 100644
--- a/src/trace_processor/db/row_map.cc
+++ b/src/trace_processor/db/row_map.cc
@@ -120,6 +120,8 @@
 
 }  // namespace
 
+RowMap::RowMap() : RowMap(0, 0) {}
+
 RowMap::RowMap(uint32_t start, uint32_t end)
     : mode_(Mode::kRange), start_idx_(start), end_idx_(end) {}
 
diff --git a/src/trace_processor/db/row_map.h b/src/trace_processor/db/row_map.h
index 831495c..65192e7 100644
--- a/src/trace_processor/db/row_map.h
+++ b/src/trace_processor/db/row_map.h
@@ -59,6 +59,10 @@
 // switch to it but the cases where this happens is not precisely defined.
 class RowMap {
  public:
+  // Creates an empty RowMap.
+  // By default this will be implemented using a range.
+  RowMap();
+
   // Creates a RowMap containing the range of rows between |start| and |end|
   // i.e. all rows between |start| (inclusive) and |end| (exclusive).
   explicit RowMap(uint32_t start, uint32_t end);
@@ -69,6 +73,10 @@
   // Creates a RowMap backed by an std::vector<uint32_t>.
   explicit RowMap(std::vector<uint32_t> vec);
 
+  // Creates a RowMap containing just |row|.
+  // By default this will be implemented using a range.
+  static RowMap SingleRow(uint32_t row) { return RowMap(row, row + 1); }
+
   // Creates a copy of the RowMap.
   // We have an explicit copy function because RowMap can hold onto large chunks
   // of memory and we want to be very explicit when making a copy to avoid
@@ -102,6 +110,23 @@
     PERFETTO_FATAL("For GCC");
   }
 
+  // Returns whether the RowMap contains the given row.
+  bool Contains(uint32_t row) const {
+    switch (mode_) {
+      case Mode::kRange: {
+        return row >= start_idx_ && row < end_idx_;
+      }
+      case Mode::kBitVector: {
+        return row < bit_vector_.size() && bit_vector_.IsSet(row);
+      }
+      case Mode::kIndexVector: {
+        auto it = std::find(index_vector_.begin(), index_vector_.end(), row);
+        return it != index_vector_.end();
+      }
+    }
+    PERFETTO_FATAL("For GCC");
+  }
+
   // Returns the first index of the given |row| in the RowMap.
   base::Optional<uint32_t> IndexOf(uint32_t row) const {
     switch (mode_) {
@@ -182,10 +207,7 @@
           else
             bit_vector_.AppendTrue();
         }
-
-        start_idx_ = 0;
-        end_idx_ = 0;
-        mode_ = Mode::kBitVector;
+        *this = RowMap(std::move(bit_vector_));
         break;
       }
       case Mode::kBitVector: {
@@ -203,6 +225,35 @@
     }
   }
 
+  // Intersects |other| with |this| writing the result into |this|.
+  // By "intersect", we mean to keep only the rows present in both RowMaps. The
+  // order of the preserved rows will be the same as |this|.
+  //
+  // Conceptually, we are performing the following algorithm:
+  // for (idx : this)
+  //   if (!other.Contains(idx))
+  //     Remove(idx)
+  void Intersect(const RowMap& other) {
+    uint32_t size = other.size();
+
+    if (size == 0u) {
+      // If other is empty, then we will also end up being empty.
+      *this = RowMap();
+      return;
+    }
+
+    if (size == 1u) {
+      // If other just has a single row, see if we also have that row. If we
+      // do, then just return that row. Otherwise, make ourselves empty.
+      uint32_t row = other.Get(0);
+      *this = Contains(row) ? RowMap::SingleRow(row) : RowMap();
+      return;
+    }
+
+    // TODO(lalitm): improve efficiency of this if we end up needing it.
+    RemoveIf([&other](uint32_t row) { return !other.Contains(row); });
+  }
+
  private:
   enum class Mode {
     kRange,
diff --git a/src/trace_processor/db/row_map_benchmark.cc b/src/trace_processor/db/row_map_benchmark.cc
index 733d94a..274aefe 100644
--- a/src/trace_processor/db/row_map_benchmark.cc
+++ b/src/trace_processor/db/row_map_benchmark.cc
@@ -26,7 +26,16 @@
 static constexpr uint32_t kPoolSize = 100000;
 static constexpr uint32_t kSize = 123456;
 
-std::vector<uint32_t> CreateRandomIndexVector(uint32_t size, uint32_t mod) {
+RowMap CreateRange(uint32_t end) {
+  static constexpr uint32_t kRandomSeed = 32;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  uint32_t start = rnd_engine() % end;
+  uint32_t size = rnd_engine() % (end - start);
+  return RowMap(start, start + size);
+}
+
+std::vector<uint32_t> CreateIndexVector(uint32_t size, uint32_t mod) {
   static constexpr uint32_t kRandomSeed = 476;
   std::minstd_rand0 rnd_engine(kRandomSeed);
   std::vector<uint32_t> rows(size);
@@ -36,7 +45,7 @@
   return rows;
 }
 
-BitVector CreateRandomBitVector(uint32_t size) {
+BitVector CreateBitVector(uint32_t size) {
   static constexpr uint32_t kRandomSeed = 42;
   std::minstd_rand0 rnd_engine(kRandomSeed);
   BitVector bv;
@@ -50,131 +59,164 @@
   return bv;
 }
 
+void BenchRowMapGet(benchmark::State& state, RowMap rm) {
+  auto pool_vec = CreateIndexVector(kPoolSize, rm.size());
+
+  uint32_t pool_idx = 0;
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(rm.Get(pool_vec[pool_idx]));
+    pool_idx = (pool_idx + 1) % kPoolSize;
+  }
+}
+
+template <typename Factory>
+void BenchRowMapAddToEmpty(benchmark::State& state, Factory factory) {
+  auto pool_vec = CreateIndexVector(kPoolSize, kSize);
+
+  uint32_t pool_idx = 0;
+  for (auto _ : state) {
+    RowMap rm = factory();
+
+    rm.Add(pool_vec[pool_idx]);
+    pool_idx = (pool_idx + 1) % kPoolSize;
+
+    benchmark::ClobberMemory();
+  }
+}
+
+void BenchRowMapSelect(benchmark::State& state,
+                       RowMap rm,
+                       const RowMap& selector) {
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(rm.SelectRows(selector));
+  }
+}
+
+template <typename Factory>
+void BenchRowMapRemoveIf(benchmark::State& state, Factory factory) {
+  auto pool_vec = CreateIndexVector(kPoolSize, kSize);
+
+  uint32_t pool_idx = 0;
+  for (auto _ : state) {
+    state.PauseTiming();
+    RowMap rm = factory();
+    state.ResumeTiming();
+
+    auto fn = [&pool_vec, pool_idx](uint32_t row) {
+      return (row % pool_vec[pool_idx]) != 0;
+    };
+    rm.RemoveIf(fn);
+    pool_idx = (pool_idx + 1) % kPoolSize;
+
+    benchmark::ClobberMemory();
+  }
+}
+
 }  // namespace
 
-static void BM_RowMapBitVectorGet(benchmark::State& state) {
-  RowMap rm(CreateRandomBitVector(kSize));
-  auto pool_vec = CreateRandomIndexVector(kPoolSize, rm.size());
-
-  uint32_t pool_idx = 0;
-  for (auto _ : state) {
-    benchmark::DoNotOptimize(rm.Get(pool_vec[pool_idx]));
-    pool_idx = (pool_idx + 1) % kPoolSize;
-  }
+static void BM_RowMapRangeGet(benchmark::State& state) {
+  BenchRowMapGet(state, RowMap(CreateRange(kSize)));
 }
-BENCHMARK(BM_RowMapBitVectorGet);
+BENCHMARK(BM_RowMapRangeGet);
 
-static void BM_RowMapIndexVectorGet(benchmark::State& state) {
-  RowMap rm(CreateRandomIndexVector(kSize, kSize));
-  auto pool_vec = CreateRandomIndexVector(kPoolSize, kSize);
-
-  uint32_t pool_idx = 0;
-  for (auto _ : state) {
-    benchmark::DoNotOptimize(rm.Get(pool_vec[pool_idx]));
-    pool_idx = (pool_idx + 1) % kPoolSize;
-  }
+static void BM_RowMapBvGet(benchmark::State& state) {
+  BenchRowMapGet(state, RowMap(CreateBitVector(kSize)));
 }
-BENCHMARK(BM_RowMapIndexVectorGet);
+BENCHMARK(BM_RowMapBvGet);
+
+static void BM_RowMapIvGet(benchmark::State& state) {
+  BenchRowMapGet(state, RowMap(CreateIndexVector(kSize, kSize)));
+}
+BENCHMARK(BM_RowMapIvGet);
 
 // TODO(lalitm): add benchmarks for IndexOf after BitVector is made faster.
 // We can't add them right now because they are just too slow to run.
 
-static void BM_RowMapBitVectorAdd(benchmark::State& state) {
-  auto pool_vec = CreateRandomIndexVector(kPoolSize, kSize);
-
-  uint32_t pool_idx = 0;
-  for (auto _ : state) {
-    state.PauseTiming();
-    RowMap rm(BitVector{});
-    state.ResumeTiming();
-
-    rm.Add(pool_vec[pool_idx]);
-    pool_idx = (pool_idx + 1) % kPoolSize;
-
-    benchmark::ClobberMemory();
-  }
+static void BM_RowMapRangeAddToEmpty(benchmark::State& state) {
+  BenchRowMapAddToEmpty(state, []() { return RowMap(0, 0); });
 }
-BENCHMARK(BM_RowMapBitVectorAdd);
+BENCHMARK(BM_RowMapRangeAddToEmpty);
 
-static void BM_RowMapIndexVectorAdd(benchmark::State& state) {
-  auto pool_vec = CreateRandomIndexVector(kPoolSize, kSize);
-
-  RowMap rm(std::vector<uint32_t>{});
-  uint32_t pool_idx = 0;
-  for (auto _ : state) {
-    rm.Add(pool_vec[pool_idx]);
-    pool_idx = (pool_idx + 1) % kPoolSize;
-    benchmark::ClobberMemory();
-  }
+static void BM_RowMapBvAddToEmpty(benchmark::State& state) {
+  BenchRowMapAddToEmpty(state, []() { return RowMap(BitVector{}); });
 }
-BENCHMARK(BM_RowMapIndexVectorAdd);
+BENCHMARK(BM_RowMapBvAddToEmpty);
 
-static void BM_RowMapBvSelectBv(benchmark::State& state) {
-  RowMap rm(CreateRandomBitVector(kSize));
-  RowMap selector(CreateRandomBitVector(rm.size()));
-
-  for (auto _ : state) {
-    benchmark::DoNotOptimize(rm.SelectRows(selector));
-  }
+static void BM_RowMapIvAddToEmpty(benchmark::State& state) {
+  BenchRowMapAddToEmpty(state,
+                        []() { return RowMap(std::vector<uint32_t>{}); });
 }
-BENCHMARK(BM_RowMapBvSelectBv);
+BENCHMARK(BM_RowMapIvAddToEmpty);
 
-// TODO(lalitm): add benchmarks for BvSelectIv after BitVector is made faster.
-// We can't add them right now because they are just too slow to run.
-
-static void BM_RowMapIvSelectBv(benchmark::State& state) {
-  RowMap rm(CreateRandomIndexVector(kSize, kSize));
-  RowMap selector(CreateRandomBitVector(rm.size()));
-
-  for (auto _ : state) {
-    benchmark::DoNotOptimize(rm.SelectRows(selector));
-  }
+static void BM_RowMapSelectRangeWithRange(benchmark::State& state) {
+  RowMap rm(CreateRange(kSize));
+  RowMap selector(CreateRange(rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
 }
-BENCHMARK(BM_RowMapIvSelectBv);
+BENCHMARK(BM_RowMapSelectRangeWithRange);
 
-static void BM_RowMapIvSelectIv(benchmark::State& state) {
-  RowMap rm(CreateRandomIndexVector(kSize, kSize));
-  RowMap selector(CreateRandomIndexVector(rm.size(), rm.size()));
-
-  for (auto _ : state) {
-    benchmark::DoNotOptimize(rm.SelectRows(selector));
-  }
+static void BM_RowMapSelectRangeWithBv(benchmark::State& state) {
+  RowMap rm(CreateRange(kSize));
+  RowMap selector(CreateBitVector(rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
 }
-BENCHMARK(BM_RowMapIvSelectIv);
+BENCHMARK(BM_RowMapSelectRangeWithBv);
 
-static void BM_RowMapBvSelectSingleRow(benchmark::State& state) {
-  // This benchmark tests the performance of selecting just a single
-  // row of a RowMap. We specially test this case as it occurs on every join
-  // based on id originating from SQLite; nested subqueries will be performed
-  // on the id column and will select just a single row.
-  RowMap rm(CreateRandomBitVector(kSize));
-
-  static constexpr uint32_t kRandomSeed = 123;
-  std::minstd_rand0 rnd_engine(kRandomSeed);
-  BitVector bv(rm.size(), false);
-  bv.Set(rnd_engine() % bv.size());
-  RowMap selector(std::move(bv));
-
-  for (auto _ : state) {
-    benchmark::DoNotOptimize(rm.SelectRows(selector));
-  }
+static void BM_RowMapSelectRangeWithIv(benchmark::State& state) {
+  RowMap rm(CreateRange(kSize));
+  RowMap selector(CreateIndexVector(rm.size(), rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
 }
-BENCHMARK(BM_RowMapBvSelectSingleRow);
+BENCHMARK(BM_RowMapSelectRangeWithIv);
+
+static void BM_RowMapSelectBvWithRange(benchmark::State& state) {
+  RowMap rm(CreateBitVector(kSize));
+  RowMap selector(CreateRange(rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
+}
+BENCHMARK(BM_RowMapSelectBvWithRange);
+
+static void BM_RowMapSelectBvWithBv(benchmark::State& state) {
+  RowMap rm(CreateBitVector(kSize));
+  RowMap selector(CreateBitVector(rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
+}
+BENCHMARK(BM_RowMapSelectBvWithBv);
+
+static void BM_RowMapSelectBvWithIv(benchmark::State& state) {
+  RowMap rm(CreateBitVector(kSize));
+  RowMap selector(CreateIndexVector(rm.size(), rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
+}
+BENCHMARK(BM_RowMapSelectBvWithIv);
+
+static void BM_RowMapSelectIvWithRange(benchmark::State& state) {
+  RowMap rm(CreateIndexVector(kSize, kSize));
+  RowMap selector(CreateRange(rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
+}
+BENCHMARK(BM_RowMapSelectIvWithRange);
+
+static void BM_RowMapSelectIvWithBv(benchmark::State& state) {
+  RowMap rm(CreateIndexVector(kSize, kSize));
+  RowMap selector(CreateBitVector(rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
+}
+BENCHMARK(BM_RowMapSelectIvWithBv);
+
+static void BM_RowMapSelectIvWithIv(benchmark::State& state) {
+  RowMap rm(CreateIndexVector(kSize, kSize));
+  RowMap selector(CreateIndexVector(rm.size(), rm.size()));
+  BenchRowMapSelect(state, std::move(rm), std::move(selector));
+}
+BENCHMARK(BM_RowMapSelectIvWithIv);
+
+static void BM_RowMapRangeRemoveIf(benchmark::State& state) {
+  BenchRowMapRemoveIf(state, []() { return RowMap(CreateRange(kSize)); });
+}
+BENCHMARK(BM_RowMapRangeRemoveIf);
 
 static void BM_RowMapBvRemoveIf(benchmark::State& state) {
-  RowMap rm(CreateRandomBitVector(kSize));
-
-  static constexpr uint32_t kRandomSeed = 123;
-  std::minstd_rand0 rnd_engine(kRandomSeed);
-  for (auto _ : state) {
-    state.PauseTiming();
-    RowMap copy = rm.Copy();
-    uint32_t mod_row_to_keep = rnd_engine() % kSize;
-    state.ResumeTiming();
-
-    copy.RemoveIf(
-        [mod_row_to_keep](uint32_t row) { return row % mod_row_to_keep != 0; });
-    benchmark::ClobberMemory();
-  }
+  BenchRowMapRemoveIf(state, []() { return RowMap(CreateBitVector(kSize)); });
 }
 BENCHMARK(BM_RowMapBvRemoveIf);
diff --git a/src/trace_processor/db/row_map_unittest.cc b/src/trace_processor/db/row_map_unittest.cc
index b44d18d..cb2905d 100644
--- a/src/trace_processor/db/row_map_unittest.cc
+++ b/src/trace_processor/db/row_map_unittest.cc
@@ -111,6 +111,42 @@
   ASSERT_EQ(rm.IndexOf(10u), 4u);
 }
 
+TEST(RowMapUnittest, ContainsRange) {
+  RowMap rm(93, 157);
+
+  ASSERT_TRUE(rm.Contains(93));
+  ASSERT_TRUE(rm.Contains(105));
+  ASSERT_TRUE(rm.Contains(156));
+
+  ASSERT_FALSE(rm.Contains(0));
+  ASSERT_FALSE(rm.Contains(92));
+  ASSERT_FALSE(rm.Contains(157));
+}
+
+TEST(RowMapUnittest, ContainsBitVector) {
+  RowMap rm(BitVector{true, false, true, true, false, true});
+
+  ASSERT_TRUE(rm.Contains(0));
+  ASSERT_TRUE(rm.Contains(2));
+  ASSERT_TRUE(rm.Contains(3));
+
+  ASSERT_FALSE(rm.Contains(1));
+  ASSERT_FALSE(rm.Contains(4));
+  ASSERT_FALSE(rm.Contains(6));
+}
+
+TEST(RowMapUnittest, ContainsIndexVector) {
+  RowMap rm(std::vector<uint32_t>{0u, 2u, 3u, 5u});
+
+  ASSERT_TRUE(rm.Contains(0));
+  ASSERT_TRUE(rm.Contains(2));
+  ASSERT_TRUE(rm.Contains(3));
+
+  ASSERT_FALSE(rm.Contains(1));
+  ASSERT_FALSE(rm.Contains(4));
+  ASSERT_FALSE(rm.Contains(6));
+}
+
 TEST(RowMapUnittest, SelectRangeWithRange) {
   RowMap rm(93, 157);
   RowMap picker(4, 7);
@@ -244,6 +280,38 @@
   ASSERT_EQ(rm.Get(3u), 1u);
 }
 
+TEST(RowMapUnittest, IntersectNone) {
+  RowMap rm(BitVector{true, false, true, true, false, true});
+  rm.Intersect(RowMap());
+
+  ASSERT_EQ(rm.size(), 0u);
+}
+
+TEST(RowMapUnittest, IntersectSinglePresent) {
+  RowMap rm(BitVector{true, false, true, true, false, true});
+  rm.Intersect(RowMap::SingleRow(2u));
+
+  ASSERT_EQ(rm.size(), 1u);
+  ASSERT_EQ(rm.Get(0u), 2u);
+}
+
+TEST(RowMapUnittest, IntersectSingleAbsent) {
+  RowMap rm(BitVector{true, false, true, true, false, true});
+  rm.Intersect(RowMap::SingleRow(1u));
+
+  ASSERT_EQ(rm.size(), 0u);
+}
+
+TEST(RowMapUnittest, IntersectMany) {
+  RowMap rm(std::vector<uint32_t>{3u, 2u, 0u, 1u, 1u, 3u});
+  rm.Intersect(RowMap(BitVector{false, false, true, true}));
+
+  ASSERT_EQ(rm.size(), 3u);
+  ASSERT_EQ(rm.Get(0u), 3u);
+  ASSERT_EQ(rm.Get(1u), 2u);
+  ASSERT_EQ(rm.Get(2u), 3u);
+}
+
 }  // namespace
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
index fda0ce2..30ff98b 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
@@ -156,6 +156,7 @@
 
   MOCK_METHOD2(UpdateThreadName,
                UniqueTid(uint32_t tid, StringId thread_name_id));
+  MOCK_METHOD2(SetThreadName, void(UniqueTid utid, StringId thread_name_id));
   MOCK_METHOD2(UpdateThread, UniqueTid(uint32_t tid, uint32_t tgid));
 
   MOCK_METHOD1(GetOrCreateProcess, UniquePid(uint32_t pid));
@@ -705,18 +706,20 @@
     thread_desc->set_thread_name("DifferentThreadName");
   }
 
-  InSequence in_sequence;  // Below slices should be sorted by timestamp.
+  EXPECT_CALL(*process_, UpdateThread(16, 15))
+      .WillRepeatedly(testing::Return(1u));
+  EXPECT_CALL(*process_, UpdateThread(11, 15)).WillOnce(testing::Return(2u));
 
   EXPECT_CALL(*storage_, InternString(base::StringView("OldThreadName")))
       .WillOnce(Return(1));
-  EXPECT_CALL(*process_, UpdateThreadName(16, StringId(1)));
+  EXPECT_CALL(*process_, SetThreadName(1u, StringId(1)));
   // Packet with same thread, but different name should update the name.
   EXPECT_CALL(*storage_, InternString(base::StringView("NewThreadName")))
       .WillOnce(Return(2));
-  EXPECT_CALL(*process_, UpdateThreadName(16, StringId(2)));
+  EXPECT_CALL(*process_, SetThreadName(1u, StringId(2)));
   EXPECT_CALL(*storage_, InternString(base::StringView("DifferentThreadName")))
       .WillOnce(Return(3));
-  EXPECT_CALL(*process_, UpdateThreadName(11, StringId(3)));
+  EXPECT_CALL(*process_, SetThreadName(2u, StringId(3)));
 
   Tokenize();
   context_.sorter->ExtractEventsForced();
diff --git a/src/trace_processor/importers/proto/track_event_parser.cc b/src/trace_processor/importers/proto/track_event_parser.cc
index 6f2b46f..53a527b 100644
--- a/src/trace_processor/importers/proto/track_event_parser.cc
+++ b/src/trace_processor/importers/proto/track_event_parser.cc
@@ -562,7 +562,7 @@
         if (!thread_name.size)
           break;
         auto thread_name_id = storage->InternString(thread_name);
-        procs->UpdateThreadName(storage->GetThread(*utid).tid, thread_name_id);
+        procs->SetThreadName(*utid, thread_name_id);
         break;
       }
       if (strcmp(event_name.c_str(), "process_name") == 0) {
diff --git a/src/trace_processor/importers/proto/track_event_tokenizer.cc b/src/trace_processor/importers/proto/track_event_tokenizer.cc
index 65c3901..57df3a5 100644
--- a/src/trace_processor/importers/proto/track_event_tokenizer.cc
+++ b/src/trace_processor/importers/proto/track_event_tokenizer.cc
@@ -209,8 +209,11 @@
   if (!name.empty()) {
     auto thread_name_id = context_->storage->InternString(name);
     ProcessTracker* procs = context_->process_tracker.get();
-    procs->UpdateThreadName(
-        static_cast<uint32_t>(thread_descriptor_decoder.tid()), thread_name_id);
+    procs->SetThreadName(
+        procs->UpdateThread(
+            static_cast<uint32_t>(thread_descriptor_decoder.tid()),
+            static_cast<uint32_t>(thread_descriptor_decoder.pid())),
+        thread_name_id);
   }
 }
 
diff --git a/src/trace_processor/process_tracker.cc b/src/trace_processor/process_tracker.cc
index 39b5afb..0dc7693 100644
--- a/src/trace_processor/process_tracker.cc
+++ b/src/trace_processor/process_tracker.cc
@@ -89,6 +89,11 @@
   return utid;
 }
 
+void ProcessTracker::SetThreadName(UniqueTid utid, StringId thread_name_id) {
+  TraceStorage::Thread* thread = context_->storage->GetMutableThread(utid);
+  thread->name_id = thread_name_id;
+}
+
 UniqueTid ProcessTracker::UpdateThread(uint32_t tid, uint32_t pid) {
   auto tids_pair = tids_.equal_range(tid);
 
diff --git a/src/trace_processor/process_tracker.h b/src/trace_processor/process_tracker.h
index 970843f..6aee798 100644
--- a/src/trace_processor/process_tracker.h
+++ b/src/trace_processor/process_tracker.h
@@ -68,6 +68,9 @@
   // the thread_name_id.
   virtual UniqueTid UpdateThreadName(uint32_t tid, StringId thread_name_id);
 
+  // Assigns a new name to a thread uniquely identified by its utid.
+  virtual void SetThreadName(UniqueTid utid, StringId thread_name_id);
+
   // Called when a thread is seen the process tree. Retrieves the matching utid
   // for the tid and the matching upid for the tgid and stores both.
   // Virtual for testing.
diff --git a/src/trace_processor/tables/BUILD.gn b/src/trace_processor/tables/BUILD.gn
index ab79c88..10f5289 100644
--- a/src/trace_processor/tables/BUILD.gn
+++ b/src/trace_processor/tables/BUILD.gn
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import("../../../gn/test.gni")
+
 source_set("tables") {
   sources = [
     "macros.h",
@@ -38,3 +40,17 @@
     "../../../gn:gtest_and_gmock",
   ]
 }
+
+if (enable_perfetto_benchmarks) {
+  source_set("benchmarks") {
+    testonly = true
+    deps = [
+      ":tables",
+      "../../../gn:benchmark",
+      "../../../gn:default_deps",
+    ]
+    sources = [
+      "macros_benchmark.cc",
+    ]
+  }
+}
diff --git a/src/trace_processor/tables/macros_benchmark.cc b/src/trace_processor/tables/macros_benchmark.cc
new file mode 100644
index 0000000..4874065
--- /dev/null
+++ b/src/trace_processor/tables/macros_benchmark.cc
@@ -0,0 +1,63 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <random>
+
+#include <benchmark/benchmark.h>
+
+#include "src/trace_processor/tables/macros.h"
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+
+#define PERFETTO_TP_TEST_TABLE(NAME, PARENT, C) \
+  NAME(TestTable, "test_table")                 \
+  PERFETTO_TP_ROOT_TABLE(PARENT, C)
+
+PERFETTO_TP_TABLE(PERFETTO_TP_TEST_TABLE);
+
+}  // namespace
+}  // namespace trace_processor
+}  // namespace perfetto
+
+using perfetto::trace_processor::SqlValue;
+using perfetto::trace_processor::StringPool;
+using perfetto::trace_processor::TestTable;
+
+static void BM_TableInsert(benchmark::State& state) {
+  StringPool pool;
+  TestTable table(&pool, nullptr);
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(table.Insert({}));
+  }
+}
+BENCHMARK(BM_TableInsert);
+
+static void BM_TableFilterIdColumn(benchmark::State& state) {
+  StringPool pool;
+  TestTable table(&pool, nullptr);
+
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+  for (uint32_t i = 0; i < size; ++i)
+    table.Insert({});
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(table.Filter({table.id().eq(SqlValue::Long(30))}));
+  }
+}
+BENCHMARK(BM_TableFilterIdColumn)
+    ->RangeMultiplier(8)
+    ->Range(1024, 2 * 1024 * 1024);
diff --git a/test/synth_common.py b/test/synth_common.py
index 656faab..3383b81 100644
--- a/test/synth_common.py
+++ b/test/synth_common.py
@@ -294,6 +294,35 @@
       buffer_event.type = event_type
     buffer_event.duration_ns = duration
 
+  def add_thread_track_descriptor(self,
+                                  ps,
+                                  ts,
+                                  uuid,
+                                  pid,
+                                  tid,
+                                  thread_name,
+                                  inc_state_cleared=False):
+    packet = self.add_packet()
+    packet.trusted_packet_sequence_id = ps
+    packet.timestamp = ts
+    if inc_state_cleared:
+      packet.incremental_state_cleared = True
+    track = packet.track_descriptor
+    track.uuid = uuid
+    track.thread.pid = pid
+    track.thread.tid = tid
+    track.thread.thread_name = thread_name
+
+  def add_track_event(self, ps, ts, track_uuid, cat, name, type):
+    packet = self.add_packet()
+    packet.trusted_packet_sequence_id = ps
+    packet.timestamp = ts
+    event = packet.track_event
+    event.track_uuid = track_uuid
+    event.categories.append(cat)
+    event.name = name
+    event.type = type
+
 
 def create_trace():
   parser = argparse.ArgumentParser()
diff --git a/test/trace_processor/index b/test/trace_processor/index
index 830270c..06e0771 100644
--- a/test/trace_processor/index
+++ b/test/trace_processor/index
@@ -137,3 +137,7 @@
 
 heap_graph.textproto heap_graph_object.sql heap_graph_object.out
 heap_graph.textproto heap_graph_reference.sql heap_graph_reference.out
+
+# TrackEvent tests.
+track_event_same_tids.py process_tracking.sql track_event_same_tids_threads.out
+track_event_same_tids.py track_event_slices.sql track_event_same_tids_slices.out
diff --git a/test/trace_processor/track_event_same_tids.py b/test/trace_processor/track_event_same_tids.py
new file mode 100644
index 0000000..c838d6d
--- /dev/null
+++ b/test/trace_processor/track_event_same_tids.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from os import sys, path
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+# Chrome renderer processes don't know their "true" tids on some platforms.
+# Instead, they each write tids that start at 1 - which means, the same tids are
+# used in multiple different processes at the same time. This trace replicates
+# such a situation.
+
+trace.add_thread_track_descriptor(
+    ps=1, ts=0, uuid=1, pid=5, tid=1, thread_name="t1", inc_state_cleared=True)
+trace.add_thread_track_descriptor(
+    ps=1, ts=0, uuid=2, pid=10, tid=1, thread_name="t2")
+
+trace.add_track_event(
+    ps=1, ts=1000, track_uuid=1, cat="cat", name="name1", type=3)
+trace.add_track_event(
+    ps=1, ts=2000, track_uuid=2, cat="cat", name="name2", type=3)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/track_event_same_tids_slices.out b/test/trace_processor/track_event_same_tids_slices.out
new file mode 100644
index 0000000..f66b8ce
--- /dev/null
+++ b/test/trace_processor/track_event_same_tids_slices.out
@@ -0,0 +1,3 @@
+"ts","dur","category","name"
+1000,0,"cat","name1"
+2000,0,"cat","name2"
diff --git a/test/trace_processor/track_event_same_tids_threads.out b/test/trace_processor/track_event_same_tids_threads.out
new file mode 100644
index 0000000..b4ce83a
--- /dev/null
+++ b/test/trace_processor/track_event_same_tids_threads.out
@@ -0,0 +1,5 @@
+"tid","pid","pname","tname"
+1,5,"[NULL]","t1"
+1,10,"[NULL]","t2"
+5,5,"[NULL]","[NULL]"
+10,10,"[NULL]","[NULL]"
diff --git a/test/trace_processor/track_event_slices.sql b/test/trace_processor/track_event_slices.sql
new file mode 100644
index 0000000..b69c990
--- /dev/null
+++ b/test/trace_processor/track_event_slices.sql
@@ -0,0 +1 @@
+select ts, dur, category, name from slice order by ts asc;
\ No newline at end of file
diff --git a/tools/trace_processor b/tools/trace_processor
index 1f7f94e..e85568e 100755
--- a/tools/trace_processor
+++ b/tools/trace_processor
@@ -31,8 +31,8 @@
 import urllib
 
 TRACE_PROCESSOR_SHELL_SHAS = {
-    'linux': 'ae229859849a3fc8fa56ccba6cfb3345e967cc3b',
-    'mac': 'e22a4fedfbb521c0e315531b88f1d8283b79ab85',
+    'linux': 'dc506737a39264232609261f235caecf7e1cb4e6',
+    'mac': '54c84c12d15a89e2b83a2be6e25d1d799f4ed93b',
 }
 TRACE_PROCESSOR_SHELL_PATH = tempfile.gettempdir()
 TRACE_PROCESSOR_SHELL_BASE_URL = ('https://storage.googleapis.com/perfetto/')
diff --git a/tools/traceconv b/tools/traceconv
index c8f5078..17a5f01 100755
--- a/tools/traceconv
+++ b/tools/traceconv
@@ -33,8 +33,8 @@
 # Keep this in sync with the SHAs in catapult file
 # systrace/systrace/tracing_agents/atrace_from_file_agent.py.
 TRACE_TO_TEXT_SHAS = {
-    'linux': '2ec67adee348534af98db8a58ae42b974031934f',
-    'mac': 'b1b1721f482c4126274421110bbf1582265b9388',
+    'linux': '1fba664cb045b119fef96e5827872affcf58e896',
+    'mac': 'ffc682f302d37ede89938bd3b92d8b7b9701d682',
 }
 TRACE_TO_TEXT_PATH = tempfile.gettempdir()
 TRACE_TO_TEXT_BASE_URL = ('https://storage.googleapis.com/perfetto/')