Merge "Show heap graphs in UI."
diff --git a/BUILD.gn b/BUILD.gn
index 69bbd54..7fa8514 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -228,6 +228,10 @@
       "src/trace_processor:export_json",
       "src/trace_processor:storage_minimal",
       "src/tracing",
+      "src/tracing:client_api",
+
+      # TODO(eseckler): Create a platform for chrome and hook it up somehow.
+      "src/tracing:platform_fake",
     ]
     configs -= [ "//build/config/compiler:chromium_code" ]
     configs += [ "//build/config/compiler:no_chromium_code" ]
@@ -235,6 +239,7 @@
       "include/perfetto/ext/trace_processor:export_json",
       "include/perfetto/ext/tracing/core",
       "include/perfetto/trace_processor:storage",
+      "include/perfetto/tracing",
       "protos/perfetto/common:zero",
       "protos/perfetto/trace:zero",
       "protos/perfetto/trace/chrome:zero",
diff --git a/docs/trace-processor.md b/docs/trace-processor.md
index cea8846..532056b 100644
--- a/docs/trace-processor.md
+++ b/docs/trace-processor.md
@@ -8,8 +8,10 @@
 through SQL queries. The trace processor is used:
 * By the [Perfetto UI](https://ui.perfetto.dev/), in the form of a
   Web Assembly module.
-* Standalone, using the `trace_processor_shell` target
-  (`ninja -C out/xxx trace_processor_shell`).
+* Standalone:
+  * using the [prebuilt](http://get.perfetto.dev/trace_processor) binaries.
+  * using the `trace_processor_shell` target from source
+    (`ninja -C out/xxx trace_processor_shell`).
 * In internal pipelines for batch processing.
 
 Supported input formats:
diff --git a/gn/BUILD.gn b/gn/BUILD.gn
index 7412aa3..5e859e8 100644
--- a/gn/BUILD.gn
+++ b/gn/BUILD.gn
@@ -139,6 +139,12 @@
 
     # For perfetto_build_flags.h
     buildflag_gen_dir_,
+
+    # For generated files (proto libraries etc). We add the directory here
+    # because we stop propagation of the configs for individual proto libraries
+    # to avoid duplicate include directory command line flags in compiler
+    # invocations, see proto_library.gni, crbug.com/1043279, crbug.com/gn/142.
+    "$root_gen_dir/$perfetto_root_path",
   ]
 }
 
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index 3d756d0..828a493 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -66,6 +66,10 @@
   build_with_chromium = false
 }
 
+if (!defined(is_nacl)) {
+  is_nacl = false
+}
+
 declare_args() {
   # The Android blueprint file generator set this to true (as well as
   # is_perfetto_build_generator). This is just about being built in the
@@ -135,7 +139,7 @@
   # system backend in the client library.
   # This includes building things that rely on POSIX sockets, this places
   # limitations on the supported operating systems.
-  enable_perfetto_ipc = !is_win && !is_fuchsia &&
+  enable_perfetto_ipc = !is_win && !is_fuchsia && !is_nacl &&
                         (perfetto_build_standalone ||
                          perfetto_build_with_android || build_with_chromium)
 
diff --git a/gn/proto_library.gni b/gn/proto_library.gni
index 85b0e57..b6118bf 100644
--- a/gn/proto_library.gni
+++ b/gn/proto_library.gni
@@ -90,6 +90,7 @@
                              "testonly",
                              "visibility",
                              "generate_descriptor",
+                             "propagate_imports_configs",
                            ])
   }
 }
@@ -133,6 +134,7 @@
                              "testonly",
                              "visibility",
                              "generate_descriptor",
+                             "propagate_imports_configs",
                            ])
   }
 }
@@ -163,6 +165,7 @@
                              "sources",
                              "testonly",
                              "visibility",
+                             "propagate_imports_configs",
                            ])
   }
 }
@@ -193,6 +196,11 @@
   ]
   expansion_token = "@TYPE@"
 
+  # gn:public_config propagates the gen dir as include directory. We
+  # disable the proto_library's public_config to avoid duplicate include
+  # directory command line flags (crbug.com/1043279, crbug.com/gn/142).
+  propagate_imports_configs_ = false
+
   foreach(gen_type, proto_generators) {
     target_name_ = string_replace(target_name, expansion_token, gen_type)
 
@@ -210,6 +218,7 @@
         proto_out_dir = proto_path
         generator_plugin_options = "wrapper_namespace=pbzero"
         deps = deps_
+        propagate_imports_configs = propagate_imports_configs_
         forward_variables_from(invoker, vars_to_forward)
       }
     } else if (gen_type == "cpp") {
@@ -218,6 +227,7 @@
         proto_out_dir = proto_path
         generator_plugin_options = "wrapper_namespace=gen"
         deps = deps_
+        propagate_imports_configs = propagate_imports_configs_
         forward_variables_from(invoker, vars_to_forward)
       }
     } else if (gen_type == "ipc") {
@@ -227,6 +237,7 @@
         proto_out_dir = proto_path
         generator_plugin_options = "wrapper_namespace=gen"
         deps = deps_ + [ ":$cpp_target_name_" ]
+        propagate_imports_configs = propagate_imports_configs_
         forward_variables_from(invoker, vars_to_forward)
       }
     } else if (gen_type == "lite") {
@@ -236,6 +247,7 @@
         generate_python = false
         deps = deps_
         cc_generator_options = "lite=true:"
+        propagate_imports_configs = propagate_imports_configs_
         forward_variables_from(invoker, vars_to_forward)
       }
     } else if (gen_type == "descriptor") {
@@ -248,6 +260,9 @@
         deps = deps_
         forward_variables_from(invoker, vars_to_forward)
       }
+
+      # Not needed for descriptor proto_library target.
+      not_needed([ "propagate_imports_configs_" ])
     } else {
       assert(false, "Invalid 'proto_generators' value.")
     }
diff --git a/gn/standalone/BUILDCONFIG.gn b/gn/standalone/BUILDCONFIG.gn
index d980a05..3668c0f 100644
--- a/gn/standalone/BUILDCONFIG.gn
+++ b/gn/standalone/BUILDCONFIG.gn
@@ -36,10 +36,11 @@
 is_linux_host = host_os == "linux"
 is_mac = current_os == "mac"
 
-# Building with Windows/Fuchsia is currently only supported in the Chromium
+# Building with Windows/Fuchsia/nacl is currently only supported in the Chromium
 # tree so always set this to false.
 is_win = false
 is_fuchsia = false
+is_nacl = false
 
 if (target_cpu == "") {
   target_cpu = host_cpu
diff --git a/gn/standalone/proto_library.gni b/gn/standalone/proto_library.gni
index 75c2294..6a01512 100644
--- a/gn/standalone/proto_library.gni
+++ b/gn/standalone/proto_library.gni
@@ -230,6 +230,18 @@
         ":$config_name",
       ]
 
+      # By default, propagate the config for |include_dirs| to dependent
+      # targets, so that public imports can be resolved to corresponding header
+      # files. In some cases, the embedder target handles include directory
+      # propagation itself, e.g. via a common config.
+      propagate_imports_configs = !defined(invoker.propagate_imports_configs) ||
+                                  invoker.propagate_imports_configs
+      if (propagate_imports_configs) {
+        public_configs += [ ":$config_name" ]
+      } else {
+        configs += [ ":$config_name" ]
+      }
+
       # Use protobuf_full only for tests.
       if (defined(invoker.use_protobuf_full) &&
           invoker.use_protobuf_full == true) {
diff --git a/include/perfetto/trace_processor/basic_types.h b/include/perfetto/trace_processor/basic_types.h
index 05e34c5..b584126 100644
--- a/include/perfetto/trace_processor/basic_types.h
+++ b/include/perfetto/trace_processor/basic_types.h
@@ -39,6 +39,16 @@
   // When set to true, this option forces trace processor to perform a full
   // sort ignoring any internal heureustics to skip sorting parts of the data.
   bool force_full_sort = false;
+
+  // When set to false, this option makes the trace processor not include ftrace
+  // events in the raw table; this makes converting events back to the systrace
+  // text format impossible. On the other hand, it also saves ~50% of memory
+  // usage of trace processor. For reference, Studio intends to use this option.
+  //
+  // Note: "generic" ftrace events will be parsed into the raw table even if
+  // this flag is false and all other events which parse into the raw table are
+  // unaffected by this flag.
+  bool ingest_ftrace_in_raw_table = true;
 };
 
 // Represents a dynamically typed value returned by SQL.
diff --git a/include/perfetto/tracing/debug_annotation.h b/include/perfetto/tracing/debug_annotation.h
index 996861c..f13fbd9 100644
--- a/include/perfetto/tracing/debug_annotation.h
+++ b/include/perfetto/tracing/debug_annotation.h
@@ -21,6 +21,7 @@
 
 #include <stdint.h>
 
+#include <memory>
 #include <string>
 
 namespace perfetto {
@@ -56,6 +57,12 @@
 void WriteDebugAnnotation(protos::pbzero::DebugAnnotation*,
                           const DebugAnnotation&);
 
+template <typename T>
+void WriteDebugAnnotation(protos::pbzero::DebugAnnotation* annotation,
+                          const std::unique_ptr<T>& value) {
+  WriteDebugAnnotation(annotation, *value);
+}
+
 }  // namespace internal
 }  // namespace perfetto
 
diff --git a/include/perfetto/tracing/track_event_legacy.h b/include/perfetto/tracing/track_event_legacy.h
index 8513024..bf83ca4 100644
--- a/include/perfetto/tracing/track_event_legacy.h
+++ b/include/perfetto/tracing/track_event_legacy.h
@@ -23,6 +23,7 @@
 // to 1 activate the compatibility layer.
 
 #include "perfetto/base/compiler.h"
+#include "perfetto/tracing/track_event.h"
 
 #include <stdint.h>
 
@@ -118,6 +119,81 @@
 // Internal legacy trace point implementation.
 // ----------------------------------------------------------------------------
 
+namespace perfetto {
+namespace internal {
+
+class TrackEventLegacy {
+ public:
+  static constexpr protos::pbzero::TrackEvent::Type PhaseToType(char phase) {
+    // clang-format off
+    return (phase == TRACE_EVENT_PHASE_BEGIN) ?
+               protos::pbzero::TrackEvent::TYPE_SLICE_BEGIN :
+           (phase == TRACE_EVENT_PHASE_END) ?
+               protos::pbzero::TrackEvent::TYPE_SLICE_END :
+           (phase == TRACE_EVENT_PHASE_INSTANT) ?
+               protos::pbzero::TrackEvent::TYPE_INSTANT :
+           protos::pbzero::TrackEvent::TYPE_UNSPECIFIED;
+    // clang-format on
+  }
+
+  // Reduce binary size overhead by outlining most of the code for writing a
+  // legacy trace event.
+  template <typename... Args>
+  static void WriteLegacyEvent(EventContext ctx,
+                               char phase,
+                               uint32_t flags,
+                               Args&&... args) PERFETTO_NO_INLINE {
+    AddDebugAnnotations(&ctx, std::forward<Args>(args)...);
+    SetTrackIfNeeded(&ctx, flags);
+    if (PhaseToType(phase) == protos::pbzero::TrackEvent::TYPE_UNSPECIFIED) {
+      auto legacy_event = ctx.event()->set_legacy_event();
+      legacy_event->set_phase(phase);
+    }
+  }
+
+  // No arguments.
+  static void AddDebugAnnotations(EventContext*) {}
+
+  // One argument.
+  template <typename ArgType>
+  static void AddDebugAnnotations(EventContext* ctx,
+                                  const char* arg_name,
+                                  ArgType&& arg_value) {
+    TrackEventInternal::AddDebugAnnotation(ctx, arg_name, arg_value);
+  }
+
+  // Two arguments.
+  template <typename ArgType, typename ArgType2>
+  static void AddDebugAnnotations(EventContext* ctx,
+                                  const char* arg_name,
+                                  ArgType&& arg_value,
+                                  const char* arg_name2,
+                                  ArgType2&& arg_value2) {
+    TrackEventInternal::AddDebugAnnotation(ctx, arg_name, arg_value);
+    TrackEventInternal::AddDebugAnnotation(ctx, arg_name2, arg_value2);
+  }
+
+ private:
+  static void SetTrackIfNeeded(EventContext* ctx, uint32_t flags) {
+    auto scope = flags & TRACE_EVENT_FLAG_SCOPE_MASK;
+    switch (scope) {
+      case TRACE_EVENT_SCOPE_GLOBAL:
+        ctx->event()->set_track_uuid(0);
+        break;
+      case TRACE_EVENT_SCOPE_PROCESS:
+        ctx->event()->set_track_uuid(ProcessTrack::Current().uuid);
+        break;
+      default:
+      case TRACE_EVENT_SCOPE_THREAD:
+        // Thread scope is already the default.
+        break;
+    }
+  }
+};
+
+}  // namespace internal
+}  // namespace perfetto
+
 // A black hole trace point where unsupported trace events are routed.
 #define PERFETTO_INTERNAL_EVENT_NOOP(cat, name, ...) \
   do {                                               \
@@ -129,9 +205,23 @@
 
 // Implementations for the INTERNAL_* adapter macros used by the trace points
 // below.
-#define INTERNAL_TRACE_EVENT_ADD(...) PERFETTO_INTERNAL_EVENT_NOOP(__VA_ARGS__)
-#define INTERNAL_TRACE_EVENT_ADD_SCOPED(...) \
-  PERFETTO_INTERNAL_EVENT_NOOP(__VA_ARGS__)
+#define INTERNAL_TRACE_EVENT_ADD(phase, category, name, flags, ...)      \
+  PERFETTO_INTERNAL_TRACK_EVENT(                                         \
+      category, name,                                                    \
+      ::perfetto::internal::TrackEventLegacy::PhaseToType(phase),        \
+      [&](perfetto::EventContext ctx) {                                  \
+        using ::perfetto::internal::TrackEventLegacy;                    \
+        TrackEventLegacy::WriteLegacyEvent(std::move(ctx), phase, flags, \
+                                           ##__VA_ARGS__);               \
+      })
+
+#define INTERNAL_TRACE_EVENT_ADD_SCOPED(category, name, ...)        \
+  PERFETTO_INTERNAL_SCOPED_TRACK_EVENT(                             \
+      category, name, [&](perfetto::EventContext ctx) {             \
+        using ::perfetto::internal::TrackEventLegacy;               \
+        TrackEventLegacy::AddDebugAnnotations(&ctx, ##__VA_ARGS__); \
+      })
+
 #define INTERNAL_TRACE_EVENT_ADD_SCOPED_WITH_FLOW(...) \
   PERFETTO_INTERNAL_EVENT_NOOP(__VA_ARGS__)
 #define INTERNAL_TRACE_EVENT_ADD_WITH_TIMESTAMP(...) \
diff --git a/src/trace_processor/containers/bit_vector_benchmark.cc b/src/trace_processor/containers/bit_vector_benchmark.cc
index b4a0c27..2deb904 100644
--- a/src/trace_processor/containers/bit_vector_benchmark.cc
+++ b/src/trace_processor/containers/bit_vector_benchmark.cc
@@ -17,6 +17,7 @@
 #include <benchmark/benchmark.h>
 
 #include "src/trace_processor/containers/bit_vector.h"
+#include "src/trace_processor/containers/bit_vector_iterators.h"
 
 namespace {
 
@@ -27,15 +28,40 @@
 }
 
 void BitVectorArgs(benchmark::internal::Benchmark* b) {
-  b->Arg(64);
+  std::vector<int> set_percentages;
+  if (IsBenchmarkFunctionalOnly()) {
+    set_percentages = std::vector<int>{50};
+  } else {
+    set_percentages = std::vector<int>{0, 1, 5, 50, 95, 99, 100};
+  }
 
-  if (!IsBenchmarkFunctionalOnly()) {
-    b->Arg(512);
-    b->Arg(8192);
-    b->Arg(123456);
-    b->Arg(1234567);
+  for (int percentage : set_percentages) {
+    b->Args({64, percentage});
+
+    if (!IsBenchmarkFunctionalOnly()) {
+      b->Args({512, percentage});
+      b->Args({8192, percentage});
+      b->Args({123456, percentage});
+      b->Args({1234567, percentage});
+    }
   }
 }
+
+BitVector BvWithSizeAndSetPercentage(uint32_t size, uint32_t set_percentage) {
+  static constexpr uint32_t kRandomSeed = 29;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
+    if (rnd_engine() % 100 < set_percentage) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+  }
+  return bv;
+}
+
 }  // namespace
 
 static void BM_BitVectorAppendTrue(benchmark::State& state) {
@@ -61,15 +87,9 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
-  BitVector bv;
-  for (uint32_t i = 0; i < size; ++i) {
-    if (rnd_engine() % 2) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
 
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<bool> bit_pool(kPoolSize);
@@ -93,15 +113,9 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
-  BitVector bv;
-  for (uint32_t i = 0; i < size; ++i) {
-    if (rnd_engine() % 2) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
 
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<uint32_t> row_pool(kPoolSize);
@@ -123,19 +137,15 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
-  BitVector bv;
-  for (uint32_t i = 0; i < size; ++i) {
-    if (rnd_engine() % 2) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
-
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<uint32_t> row_pool(kPoolSize);
   uint32_t set_bit_count = bv.GetNumBitsSet();
+  if (set_bit_count == 0)
+    return;
+
   for (uint32_t i = 0; i < kPoolSize; ++i) {
     row_pool[i] = rnd_engine() % set_bit_count;
   }
@@ -153,11 +163,12 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
   uint32_t count = 0;
   BitVector bv;
   for (uint32_t i = 0; i < size; ++i) {
-    bool value = rnd_engine() % 2;
+    bool value = rnd_engine() % 100 < set_percentage;
     if (value) {
       bv.AppendTrue();
     } else {
@@ -205,11 +216,12 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
   BitVector bv;
   BitVector picker;
   for (uint32_t i = 0; i < size; ++i) {
-    bool value = rnd_engine() % 2;
+    bool value = rnd_engine() % 100 < set_percentage;
     if (value) {
       bv.AppendTrue();
 
@@ -234,3 +246,16 @@
   }
 }
 BENCHMARK(BM_BitVectorUpdateSetBits)->Apply(BitVectorArgs);
+
+static void BM_BitVectorSetBitsIterator(benchmark::State& state) {
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
+
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
+  for (auto _ : state) {
+    for (auto it = bv.IterateSetBits(); it; it.Next()) {
+      benchmark::DoNotOptimize(it.index());
+    }
+  }
+}
+BENCHMARK(BM_BitVectorSetBitsIterator)->Apply(BitVectorArgs);
diff --git a/src/trace_processor/export_json.cc b/src/trace_processor/export_json.cc
index ed6e5f8..1e78477 100644
--- a/src/trace_processor/export_json.cc
+++ b/src/trace_processor/export_json.cc
@@ -27,6 +27,7 @@
 #include <json/writer.h>
 #include <stdio.h>
 
+#include <algorithm>
 #include <cstring>
 #include <deque>
 #include <limits>
@@ -187,26 +188,146 @@
       if (label_filter_ && !label_filter_("traceEvents"))
         return;
 
-      // Pop end events with smaller or equal timestamps.
-      PopEndEvents(event["ts"].asInt64());
-
       DoWriteEvent(event);
     }
 
-    void PushEndEvent(const Json::Value& event) {
+    void AddAsyncBeginEvent(const Json::Value& event) {
       if (label_filter_ && !label_filter_("traceEvents"))
         return;
 
-      // Pop any end events that end before the new one.
-      PopEndEvents(event["ts"].asInt64() - 1);
+      async_begin_events_.push_back(event);
+    }
 
+    void AddAsyncInstantEvent(const Json::Value& event) {
+      if (label_filter_ && !label_filter_("traceEvents"))
+        return;
+
+      async_instant_events_.push_back(event);
+    }
+
+    void AddAsyncEndEvent(const Json::Value& event) {
+      if (label_filter_ && !label_filter_("traceEvents"))
+        return;
+
+      async_end_events_.push_back(event);
+    }
+
+    void SortAndEmitAsyncEvents() {
       // Catapult doesn't handle out-of-order begin/end events well, especially
       // when their timestamps are the same, but their order is incorrect. Since
-      // our events are sorted by begin timestamp, we only have to reorder end
-      // events. We do this by buffering them into a stack, so that both begin &
-      // end events of potential child events have been emitted before we emit
-      // the end of a parent event.
-      end_events_.push_back(event);
+      // we process events sorted by begin timestamp, |async_begin_events_| and
+      // |async_instant_events_| are already sorted. We now only have to sort
+      // |async_end_events_| and merge-sort all events into a single sequence.
+
+      // Sort |async_end_events_|. Note that we should order by ascending
+      // timestamp, but in reverse-stable order. This way, a child slices's end
+      // is emitted before its parent's end event, even if both end events have
+      // the same timestamp. To accomplish this, we perform a stable sort in
+      // descending order and later iterate via reverse iterators.
+      struct {
+        bool operator()(const Json::Value& a, const Json::Value& b) const {
+          return a["ts"].asInt64() > b["ts"].asInt64();
+        }
+      } CompareEvents;
+      std::stable_sort(async_end_events_.begin(), async_end_events_.end(),
+                       CompareEvents);
+
+      // Merge sort by timestamp. If events share the same timestamp, prefer
+      // instant events, then end events, so that old slices close before new
+      // ones are opened, but instant events remain in their deepest nesting
+      // level.
+      auto instant_event_it = async_instant_events_.begin();
+      auto end_event_it = async_end_events_.rbegin();
+      auto begin_event_it = async_begin_events_.begin();
+
+      auto has_instant_event = instant_event_it != async_instant_events_.end();
+      auto has_end_event = end_event_it != async_end_events_.rend();
+      auto has_begin_event = begin_event_it != async_begin_events_.end();
+
+      auto emit_next_instant = [&instant_event_it, &has_instant_event, this]() {
+        DoWriteEvent(*instant_event_it);
+        instant_event_it++;
+        has_instant_event = instant_event_it != async_instant_events_.end();
+      };
+      auto emit_next_end = [&end_event_it, &has_end_event, this]() {
+        DoWriteEvent(*end_event_it);
+        end_event_it++;
+        has_end_event = end_event_it != async_end_events_.rend();
+      };
+      auto emit_next_begin = [&begin_event_it, &has_begin_event, this]() {
+        DoWriteEvent(*begin_event_it);
+        begin_event_it++;
+        has_begin_event = begin_event_it != async_begin_events_.end();
+      };
+
+      auto emit_next_instant_or_end = [&instant_event_it, &end_event_it,
+                                       &emit_next_instant, &emit_next_end]() {
+        if ((*instant_event_it)["ts"].asInt64() <=
+            (*end_event_it)["ts"].asInt64()) {
+          emit_next_instant();
+        } else {
+          emit_next_end();
+        }
+      };
+      auto emit_next_instant_or_begin = [&instant_event_it, &begin_event_it,
+                                         &emit_next_instant,
+                                         &emit_next_begin]() {
+        if ((*instant_event_it)["ts"].asInt64() <=
+            (*begin_event_it)["ts"].asInt64()) {
+          emit_next_instant();
+        } else {
+          emit_next_begin();
+        }
+      };
+      auto emit_next_end_or_begin = [&end_event_it, &begin_event_it,
+                                     &emit_next_end, &emit_next_begin]() {
+        if ((*end_event_it)["ts"].asInt64() <=
+            (*begin_event_it)["ts"].asInt64()) {
+          emit_next_end();
+        } else {
+          emit_next_begin();
+        }
+      };
+
+      // While we still have events in all iterators, consider each.
+      while (has_instant_event && has_end_event && has_begin_event) {
+        if ((*instant_event_it)["ts"].asInt64() <=
+            (*end_event_it)["ts"].asInt64()) {
+          emit_next_instant_or_begin();
+        } else {
+          emit_next_end_or_begin();
+        }
+      }
+
+      // Only instant and end events left.
+      while (has_instant_event && has_end_event) {
+        emit_next_instant_or_end();
+      }
+
+      // Only instant and begin events left.
+      while (has_instant_event && has_begin_event) {
+        emit_next_instant_or_begin();
+      }
+
+      // Only end and begin events left.
+      while (has_end_event && has_begin_event) {
+        emit_next_end_or_begin();
+      }
+
+      // Remaining instant events.
+      while (has_instant_event) {
+        emit_next_instant();
+      }
+
+      // Remaining end events.
+      while (has_end_event) {
+        emit_next_end();
+      }
+
+      // Remaining begin events.
+      while (has_begin_event) {
+        emit_next_begin();
+      }
     }
 
     void WriteMetadataEvent(const char* metadata_type,
@@ -300,7 +421,7 @@
     }
 
     void WriteFooter() {
-      PopEndEvents(std::numeric_limits<int64_t>::max());
+      SortAndEmitAsyncEvents();
 
       // Filter metadata entries.
       if (metadata_filter_) {
@@ -372,16 +493,6 @@
       first_event_ = false;
     }
 
-    void PopEndEvents(int64_t max_ts) {
-      while (!end_events_.empty()) {
-        int64_t ts = end_events_.back()["ts"].asInt64();
-        if (ts > max_ts)
-          break;
-        DoWriteEvent(end_events_.back());
-        end_events_.pop_back();
-      }
-    }
-
     OutputWriter* output_;
     ArgumentFilterPredicate argument_filter_;
     MetadataFilterPredicate metadata_filter_;
@@ -391,7 +502,9 @@
     Json::Value metadata_;
     std::string system_trace_data_;
     std::string user_trace_data_;
-    std::deque<Json::Value> end_events_;
+    std::vector<Json::Value> async_begin_events_;
+    std::vector<Json::Value> async_instant_events_;
+    std::vector<Json::Value> async_end_events_;
   };
 
   class ArgsBuilder {
@@ -794,10 +907,10 @@
 
         if (duration_ns == 0) {  // Instant async event.
           event["ph"] = "n";
-          writer_.WriteCommonEvent(event);
+          writer_.AddAsyncInstantEvent(event);
         } else {  // Async start and end.
           event["ph"] = "b";
-          writer_.WriteCommonEvent(event);
+          writer_.AddAsyncBeginEvent(event);
           // If the slice didn't finish, the duration may be negative. Don't
           // write the end event in this case.
           if (duration_ns > 0) {
@@ -812,7 +925,7 @@
                   (thread_instruction_count + thread_instruction_delta));
             }
             event["args"].clear();
-            writer_.PushEndEvent(event);
+            writer_.AddAsyncEndEvent(event);
           }
         }
       } else {
diff --git a/src/trace_processor/export_json_unittest.cc b/src/trace_processor/export_json_unittest.cc
index 2c9ec19..6d149e6 100644
--- a/src/trace_processor/export_json_unittest.cc
+++ b/src/trace_processor/export_json_unittest.cc
@@ -886,10 +886,13 @@
 TEST_F(ExportJsonTest, AsyncEvents) {
   const int64_t kTimestamp = 10000000;
   const int64_t kDuration = 100000;
+  const int64_t kTimestamp3 = 10005000;
+  const int64_t kDuration3 = 100000;
   const uint32_t kProcessID = 100;
   const char* kCategory = "cat";
   const char* kName = "name";
   const char* kName2 = "name2";
+  const char* kName3 = "name3";
   const char* kArgName = "arg_name";
   const int kArgValue = 123;
 
@@ -897,11 +900,16 @@
   StringId cat_id = context_.storage->InternString(base::StringView(kCategory));
   StringId name_id = context_.storage->InternString(base::StringView(kName));
   StringId name2_id = context_.storage->InternString(base::StringView(kName2));
+  StringId name3_id = context_.storage->InternString(base::StringView(kName3));
 
   constexpr int64_t kSourceId = 235;
   TrackId track = context_.track_tracker->InternLegacyChromeAsyncTrack(
       name_id, upid, kSourceId, /*source_id_is_process_scoped=*/true,
       /*source_scope=*/kNullStringId);
+  constexpr int64_t kSourceId2 = 236;
+  TrackId track2 = context_.track_tracker->InternLegacyChromeAsyncTrack(
+      name3_id, upid, kSourceId2, /*source_id_is_process_scoped=*/true,
+      /*source_scope=*/kNullStringId);
   context_.args_tracker->Flush();  // Flush track args.
 
   context_.storage->mutable_slice_table()->Insert(
@@ -919,6 +927,10 @@
   context_.storage->mutable_slice_table()->Insert(
       {kTimestamp, kDuration, track.value, cat_id, name2_id, 0, 0, 0});
 
+  // Another overlapping async event on a different track.
+  context_.storage->mutable_slice_table()->Insert(
+      {kTimestamp3, kDuration3, track2.value, cat_id, name3_id, 0, 0, 0});
+
   base::TempFile temp_file = base::TempFile::Create();
   FILE* output = fopen(temp_file.path().c_str(), "w+");
   util::Status status = ExportJson(context_.storage.get(), output);
@@ -926,7 +938,10 @@
   EXPECT_TRUE(status.ok());
 
   Json::Value result = ToJsonValue(ReadFile(output));
-  EXPECT_EQ(result["traceEvents"].size(), 4u);
+  EXPECT_EQ(result["traceEvents"].size(), 6u);
+
+  // Events should be sorted by timestamp, with child slice's end before its
+  // parent.
 
   Json::Value begin_event1 = result["traceEvents"][0];
   EXPECT_EQ(begin_event1["ph"].asString(), "b");
@@ -951,19 +966,31 @@
   EXPECT_FALSE(begin_event2.isMember("tts"));
   EXPECT_FALSE(begin_event2.isMember("use_async_tts"));
 
+  Json::Value begin_event3 = result["traceEvents"][2];
+  EXPECT_EQ(begin_event3["ph"].asString(), "b");
+  EXPECT_EQ(begin_event3["ts"].asInt64(), kTimestamp3 / 1000);
+  EXPECT_EQ(begin_event3["pid"].asInt(), static_cast<int>(kProcessID));
+  EXPECT_EQ(begin_event3["id2"]["local"].asString(), "0xec");
+  EXPECT_EQ(begin_event3["cat"].asString(), kCategory);
+  EXPECT_EQ(begin_event3["name"].asString(), kName3);
+  EXPECT_TRUE(begin_event3["args"].isObject());
+  EXPECT_EQ(begin_event3["args"].size(), 0u);
+  EXPECT_FALSE(begin_event3.isMember("tts"));
+  EXPECT_FALSE(begin_event3.isMember("use_async_tts"));
+
   Json::Value end_event2 = result["traceEvents"][3];
   EXPECT_EQ(end_event2["ph"].asString(), "e");
   EXPECT_EQ(end_event2["ts"].asInt64(), (kTimestamp + kDuration) / 1000);
   EXPECT_EQ(end_event2["pid"].asInt(), static_cast<int>(kProcessID));
   EXPECT_EQ(end_event2["id2"]["local"].asString(), "0xeb");
   EXPECT_EQ(end_event2["cat"].asString(), kCategory);
-  EXPECT_EQ(end_event2["name"].asString(), kName);
+  EXPECT_EQ(end_event2["name"].asString(), kName2);
   EXPECT_TRUE(end_event2["args"].isObject());
   EXPECT_EQ(end_event2["args"].size(), 0u);
   EXPECT_FALSE(end_event2.isMember("tts"));
   EXPECT_FALSE(end_event2.isMember("use_async_tts"));
 
-  Json::Value end_event1 = result["traceEvents"][3];
+  Json::Value end_event1 = result["traceEvents"][4];
   EXPECT_EQ(end_event1["ph"].asString(), "e");
   EXPECT_EQ(end_event1["ts"].asInt64(), (kTimestamp + kDuration) / 1000);
   EXPECT_EQ(end_event1["pid"].asInt(), static_cast<int>(kProcessID));
@@ -974,6 +1001,18 @@
   EXPECT_EQ(end_event1["args"].size(), 0u);
   EXPECT_FALSE(end_event1.isMember("tts"));
   EXPECT_FALSE(end_event1.isMember("use_async_tts"));
+
+  Json::Value end_event3 = result["traceEvents"][5];
+  EXPECT_EQ(end_event3["ph"].asString(), "e");
+  EXPECT_EQ(end_event3["ts"].asInt64(), (kTimestamp3 + kDuration3) / 1000);
+  EXPECT_EQ(end_event3["pid"].asInt(), static_cast<int>(kProcessID));
+  EXPECT_EQ(end_event3["id2"]["local"].asString(), "0xec");
+  EXPECT_EQ(end_event3["cat"].asString(), kCategory);
+  EXPECT_EQ(end_event3["name"].asString(), kName3);
+  EXPECT_TRUE(end_event3["args"].isObject());
+  EXPECT_EQ(end_event3["args"].size(), 0u);
+  EXPECT_FALSE(end_event3.isMember("tts"));
+  EXPECT_FALSE(end_event3.isMember("use_async_tts"));
 }
 
 TEST_F(ExportJsonTest, AsyncEventWithThreadTimestamp) {
diff --git a/src/trace_processor/importers/ftrace/ftrace_parser.cc b/src/trace_processor/importers/ftrace/ftrace_parser.cc
index afce6e2..ac3d374 100644
--- a/src/trace_processor/importers/ftrace/ftrace_parser.cc
+++ b/src/trace_processor/importers/ftrace/ftrace_parser.cc
@@ -383,6 +383,9 @@
                                          uint32_t cpu,
                                          uint32_t tid,
                                          ConstBytes blob) {
+  if (PERFETTO_UNLIKELY(!context_->config.ingest_ftrace_in_raw_table))
+    return;
+
   ProtoDecoder decoder(blob.data, blob.size);
   if (ftrace_id >= GetDescriptorsSize()) {
     PERFETTO_DLOG("Event with id: %d does not exist and cannot be parsed.",
diff --git a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
index db444a7..f536a02 100644
--- a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
+++ b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
@@ -69,24 +69,34 @@
 base::Optional<UniqueTid> RssStatTracker::FindUtidForMmId(int64_t mm_id,
                                                           bool is_curr,
                                                           uint32_t pid) {
-  auto it = mm_id_to_utid_.find(mm_id);
-  if (!is_curr) {
-    return it == mm_id_to_utid_.end() ? base::nullopt
-                                      : base::make_optional(it->second);
+  // If curr is true, we can just overwrite the state in the map and return
+  // the utid correspodning to |pid|.
+  if (is_curr) {
+    UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
+    mm_id_to_utid_[mm_id] = utid;
+    return utid;
   }
 
+  // If curr is false, try and lookup the utid we previously saw for this
+  // mm id.
+  auto it = mm_id_to_utid_.find(mm_id);
+  if (it == mm_id_to_utid_.end())
+    return base::nullopt;
+
+  // If the utid in the map is the same as our current utid but curr is false,
+  // that means we are in the middle of a process changing mm structs (i.e. in
+  // the middle of a vfork + exec). Therefore, we should discard the association
+  // of this vm struct with this thread.
   UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
-  if (it != mm_id_to_utid_.end() && it->second != utid) {
-    // Since both of these structs have the same mm hash and both say that
-    // the mm hash is for the current project, we can assume they belong to
-    // the same process so we can associate them together.
-    // TODO(lalitm): investigate if it's possible for mm_id to be reused
-    // between different processes if we have pid reuse and get unlucky. If
-    // so, we'll need to do some more careful tracking here.
-    context_->process_tracker->AssociateThreads(it->second, utid);
+  if (it->second == utid) {
+    mm_id_to_utid_.erase(it);
+    return base::nullopt;
   }
-  mm_id_to_utid_[mm_id] = utid;
-  return utid;
+
+  // This case happens when a process is changing the VM of another process and
+  // we know that the utid corresponding to the target process. Just return that
+  // utid.
+  return it->second;
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/importers/ftrace/sched_event_tracker.cc b/src/trace_processor/importers/ftrace/sched_event_tracker.cc
index 79b3666..62fcec4 100644
--- a/src/trace_processor/importers/ftrace/sched_event_tracker.cc
+++ b/src/trace_processor/importers/ftrace/sched_event_tracker.cc
@@ -192,28 +192,30 @@
                                                    uint32_t next_pid,
                                                    StringId next_comm_id,
                                                    int32_t next_prio) {
-  // Push the raw event - this is done as the raw ftrace event codepath does
-  // not insert sched_switch.
-  RawId id = context_->storage->mutable_raw_table()->Insert(
-      {ts, sched_switch_id_, cpu, prev_utid});
+  if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
+    // Push the raw event - this is done as the raw ftrace event codepath does
+    // not insert sched_switch.
+    RawId id = context_->storage->mutable_raw_table()->Insert(
+        {ts, sched_switch_id_, cpu, prev_utid});
 
-  // Note: this ordering is important. The events should be pushed in the same
-  // order as the order of fields in the proto; this is used by the raw table to
-  // index these events using the field ids.
-  using SS = protos::pbzero::SchedSwitchFtraceEvent;
+    // Note: this ordering is important. The events should be pushed in the same
+    // order as the order of fields in the proto; this is used by the raw table
+    // to index these events using the field ids.
+    using SS = protos::pbzero::SchedSwitchFtraceEvent;
 
-  auto inserter = context_->args_tracker->AddArgsTo(id);
-  auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
-    StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
-    inserter.AddArg(key, var);
-  };
-  add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
-  add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
-  add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
-  add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
-  add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
-  add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
-  add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
+    auto inserter = context_->args_tracker->AddArgsTo(id);
+    auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
+      StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
+      inserter.AddArg(key, var);
+    };
+    add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
+    add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
+    add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
+    add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
+    add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
+    add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
+    add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
+  }
 
   // Open a new scheduling slice, corresponding to the task that was
   // just switched to.
@@ -271,24 +273,27 @@
   }
   auto curr_utid = pending_sched->last_utid;
 
-  // Add an entry to the raw table.
-  RawId id = context_->storage->mutable_raw_table()->Insert(
-      {ts, sched_waking_id_, cpu, curr_utid});
+  if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
+    // Add an entry to the raw table.
+    RawId id = context_->storage->mutable_raw_table()->Insert(
+        {ts, sched_waking_id_, cpu, curr_utid});
 
-  // "success" is hardcoded as always 1 by the kernel, with a TODO to remove it.
-  static constexpr int32_t kHardcodedSuccess = 1;
+    // "success" is hardcoded as always 1 by the kernel, with a TODO to remove
+    // it.
+    static constexpr int32_t kHardcodedSuccess = 1;
 
-  using SW = protos::pbzero::SchedWakingFtraceEvent;
-  auto inserter = context_->args_tracker->AddArgsTo(id);
-  auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
-    StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
-    inserter.AddArg(key, var);
-  };
-  add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
-  add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
-  add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
-  add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
-  add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
+    using SW = protos::pbzero::SchedWakingFtraceEvent;
+    auto inserter = context_->args_tracker->AddArgsTo(id);
+    auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
+      StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
+      inserter.AddArg(key, var);
+    };
+    add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
+    add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
+    add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
+    add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
+    add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
+  }
 
   // Add a waking entry to the instants.
   auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
diff --git a/src/trace_processor/importers/proto/proto_trace_parser.cc b/src/trace_processor/importers/proto/proto_trace_parser.cc
index 4c5566d..9d6a97b 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser.cc
@@ -629,11 +629,18 @@
 
 void ProtoTraceParser::ParseModuleSymbols(ConstBytes blob) {
   protos::pbzero::ModuleSymbols::Decoder module_symbols(blob.data, blob.size);
-  std::string hex_build_id = base::ToHex(module_symbols.build_id().data,
-                                         module_symbols.build_id().size);
+  StringId build_id;
+  // TODO(b/148109467): Remove workaround once all active Chrome versions
+  // write raw bytes instead of a string as build_id.
+  if (module_symbols.build_id().size == 33) {
+    build_id = context_->storage->InternString(module_symbols.build_id());
+  } else {
+    build_id = context_->storage->InternString(base::StringView(base::ToHex(
+        module_symbols.build_id().data, module_symbols.build_id().size)));
+  }
+
   auto mapping_ids = context_->storage->FindMappingRow(
-      context_->storage->InternString(module_symbols.path()),
-      context_->storage->InternString(base::StringView(hex_build_id)));
+      context_->storage->InternString(module_symbols.path()), build_id);
   if (mapping_ids.empty()) {
     context_->storage->IncrementStats(stats::stackprofile_invalid_mapping_id);
     return;
diff --git a/src/trace_processor/metrics/android/unmapped_java_symbols.sql b/src/trace_processor/metrics/android/unmapped_java_symbols.sql
index 7cc314f..d13d948 100644
--- a/src/trace_processor/metrics/android/unmapped_java_symbols.sql
+++ b/src/trace_processor/metrics/android/unmapped_java_symbols.sql
@@ -24,6 +24,7 @@
   AND INSTR(type_name, '.') = 0
   AND RTRIM(type_name, '[]') NOT IN ('byte', 'char', 'short', 'int', 'long', 'boolean', 'float', 'double')
   AND type_name NOT LIKE '$Proxy%'
+  AND LENGTH(type_name) > 0
 )
 SELECT upid, RepeatedField(type_name) AS types
 FROM distinct_unmapped_type_names GROUP BY 1;
@@ -35,6 +36,7 @@
   WHERE deobfuscated_type_name IS NULL
   AND field_name NOT LIKE '%.%.%'
   AND field_name NOT LIKE '$Proxy%'
+  AND LENGTH(field_name) > 0
 )
 SELECT upid, RepeatedField(field_name) AS fields
 FROM distinct_unmapped_field_names GROUP BY 1;
diff --git a/src/trace_processor/sqlite/db_sqlite_table.cc b/src/trace_processor/sqlite/db_sqlite_table.cc
index 054f387..83789ea 100644
--- a/src/trace_processor/sqlite/db_sqlite_table.cc
+++ b/src/trace_processor/sqlite/db_sqlite_table.cc
@@ -222,9 +222,12 @@
     } else if (sqlite_utils::IsOpEq(c.op)) {
       // If there is only a single equality constraint, we have special logic
       // to sort by that column and then binary search if we see the constraint
-      // set often. Model this by dividing but the log of the number of rows as
+      // set often. Model this by dividing by the log of the number of rows as
       // a good approximation. Otherwise, we'll need to do a full table scan.
-      filter_cost += cs.size() == 1
+      // Alternatively, if the column is sorted, we can use the same binary
+      // search logic so we have the same low cost (even better because we don't
+      // have to sort at all).
+      filter_cost += cs.size() == 1 || col.IsSorted()
                          ? (2 * current_row_count) / log2(current_row_count)
                          : current_row_count;
 
diff --git a/src/trace_processor/sqlite/db_sqlite_table_unittest.cc b/src/trace_processor/sqlite/db_sqlite_table_unittest.cc
index 4f39839..2fbc3e4 100644
--- a/src/trace_processor/sqlite/db_sqlite_table_unittest.cc
+++ b/src/trace_processor/sqlite/db_sqlite_table_unittest.cc
@@ -33,12 +33,18 @@
         Column("a", &a_, Column::Flag::kNoFlag, this, 1u, 0u));
     columns_.emplace_back(
         Column("sorted", &sorted_, Column::Flag::kSorted, this, 2u, 0u));
+    columns_.emplace_back(
+        Column("other", &other_, Column::Flag::kNoFlag, this, 3u, 0u));
+    columns_.emplace_back(
+        Column("other2", &other_, Column::Flag::kNoFlag, this, 4u, 0u));
   }
 
  private:
   StringPool pool_;
   SparseVector<uint32_t> a_;
   SparseVector<uint32_t> sorted_;
+  SparseVector<uint32_t> other_;
+  SparseVector<uint32_t> other2_;
 };
 
 TEST(DbSqliteTable, IdEqCheaperThanOtherEq) {
@@ -95,6 +101,27 @@
   ASSERT_GT(single_cost.rows, multi_cost.rows);
 }
 
+TEST(DbSqliteTable, MultiSortedEqCheaperThanMultiUnsortedEq) {
+  TestTable table(1234);
+
+  QueryConstraints sorted_eq;
+  sorted_eq.AddConstraint(2u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+  sorted_eq.AddConstraint(3u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+
+  auto sorted_cost = DbSqliteTable::EstimateCost(table, sorted_eq);
+
+  QueryConstraints unsorted_eq;
+  unsorted_eq.AddConstraint(3u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+  unsorted_eq.AddConstraint(4u, SQLITE_INDEX_CONSTRAINT_EQ, 0u);
+
+  auto unsorted_cost = DbSqliteTable::EstimateCost(table, unsorted_eq);
+
+  // The number of rows should be the same but the cost of the sorted
+  // query should be less.
+  ASSERT_LT(sorted_cost.cost, unsorted_cost.cost);
+  ASSERT_EQ(sorted_cost.rows, unsorted_cost.rows);
+}
+
 TEST(DbSqliteTable, EmptyTableCosting) {
   TestTable table(0u);
 
diff --git a/src/trace_processor/tables/macros_benchmark.cc b/src/trace_processor/tables/macros_benchmark.cc
index 2e7bfbb..fec67ed 100644
--- a/src/trace_processor/tables/macros_benchmark.cc
+++ b/src/trace_processor/tables/macros_benchmark.cc
@@ -27,6 +27,7 @@
   PERFETTO_TP_ROOT_TABLE(PARENT, C)                  \
   C(uint32_t, root_sorted, Column::Flag::kSorted)    \
   C(uint32_t, root_non_null)                         \
+  C(uint32_t, root_non_null_2)                       \
   C(base::Optional<uint32_t>, root_nullable)
 
 PERFETTO_TP_TABLE(PERFETTO_TP_ROOT_TEST_TABLE);
@@ -200,6 +201,28 @@
 }
 BENCHMARK(BM_TableFilterRootNonNullEqMatchMany)->Apply(TableFilterArgs);
 
+static void BM_TableFilterRootMultipleNonNull(benchmark::State& state) {
+  StringPool pool;
+  RootTestTable root(&pool, nullptr);
+
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t partitions = size / 512;
+
+  std::minstd_rand0 rnd_engine;
+  for (uint32_t i = 0; i < size; ++i) {
+    RootTestTable::Row row;
+    row.root_non_null = rnd_engine() % partitions;
+    row.root_non_null_2 = rnd_engine() % partitions;
+    root.Insert(row);
+  }
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(root.Filter(
+        {root.root_non_null().lt(4), root.root_non_null_2().lt(10)}));
+  }
+}
+BENCHMARK(BM_TableFilterRootMultipleNonNull)->Apply(TableFilterArgs);
+
 static void BM_TableFilterRootNullableEqMatchMany(benchmark::State& state) {
   StringPool pool;
   RootTestTable root(&pool, nullptr);
diff --git a/src/trace_processor/tables/profiler_tables.h b/src/trace_processor/tables/profiler_tables.h
index e87c40b..04dae8d 100644
--- a/src/trace_processor/tables/profiler_tables.h
+++ b/src/trace_processor/tables/profiler_tables.h
@@ -128,7 +128,7 @@
 #define PERFETTO_TP_HEAP_GRAPH_REFERENCE_DEF(NAME, PARENT, C) \
   NAME(HeapGraphReferenceTable, "heap_graph_reference")       \
   PERFETTO_TP_ROOT_TABLE(PARENT, C)                           \
-  C(int64_t, reference_set_id)                                \
+  C(int64_t, reference_set_id, Column::Flag::kSorted)         \
   C(int64_t, owner_id)                                        \
   C(int64_t, owned_id)                                        \
   C(StringPool::Id, field_name)                               \
diff --git a/src/tracing/api_integrationtest.cc b/src/tracing/api_integrationtest.cc
index c4edec1..63a682e 100644
--- a/src/tracing/api_integrationtest.cc
+++ b/src/tracing/api_integrationtest.cc
@@ -72,7 +72,8 @@
 // Trace categories used in the tests.
 PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORY(test),
                            PERFETTO_CATEGORY(foo),
-                           PERFETTO_CATEGORY(bar));
+                           PERFETTO_CATEGORY(bar),
+                           PERFETTO_CATEGORY(cat));
 PERFETTO_TRACK_EVENT_STATIC_STORAGE();
 
 // For testing interning of complex objects.
@@ -237,6 +238,16 @@
   WaitableTestEvent on_stop;
 };
 
+class MyDebugAnnotation : public perfetto::DebugAnnotation {
+ public:
+  ~MyDebugAnnotation() override = default;
+
+  void Add(
+      perfetto::protos::pbzero::DebugAnnotation* annotation) const override {
+    annotation->set_legacy_json_value(R"({"key": 123})");
+  }
+};
+
 // -------------------------
 // Declaration of test class
 // -------------------------
@@ -398,9 +409,16 @@
         case perfetto::protos::gen::TrackEvent::TYPE_INSTANT:
           slice += "I";
           break;
-        default:
-        case perfetto::protos::gen::TrackEvent::TYPE_UNSPECIFIED:
+        case perfetto::protos::gen::TrackEvent::TYPE_UNSPECIFIED: {
+          EXPECT_TRUE(track_event.has_legacy_event());
           EXPECT_FALSE(track_event.type());
+          auto legacy_event = track_event.legacy_event();
+          slice += "Legacy_" +
+                   std::string(1, static_cast<char>(legacy_event.phase()));
+          break;
+        }
+        default:
+          ADD_FAILURE();
       }
       if (!track_event.category_iids().empty())
         slice += ":" + categories[track_event.category_iids()[0]];
@@ -1455,23 +1473,15 @@
   ds_cfg->set_name("track_event");
   ds_cfg->set_legacy_config("test");
 
-  class MyDebugAnnotation : public perfetto::DebugAnnotation {
-   public:
-    ~MyDebugAnnotation() override = default;
-
-    void Add(
-        perfetto::protos::pbzero::DebugAnnotation* annotation) const override {
-      annotation->set_legacy_json_value(R"({"key": 123})");
-    }
-  };
-
   // Create a new trace session.
   auto* tracing_session = NewTrace(cfg);
   tracing_session->get()->StartBlocking();
 
+  std::unique_ptr<MyDebugAnnotation> owned_annotation(new MyDebugAnnotation());
+
   TRACE_EVENT_BEGIN("test", "E", "custom_arg", MyDebugAnnotation());
   TRACE_EVENT_BEGIN("test", "E", "normal_arg", "x", "custom_arg",
-                    MyDebugAnnotation());
+                    std::move(owned_annotation));
   perfetto::TrackEvent::Flush();
 
   tracing_session->get()->StopBlocking();
@@ -1952,7 +1962,19 @@
   // TODO(skyostil): For now we just test that all variants of legacy trace
   // points compile. Test actual functionality when implemented.
 
+  // Setup the trace config.
+  perfetto::TraceConfig cfg;
+  cfg.set_duration_ms(500);
+  cfg.add_buffers()->set_size_kb(1024);
+  auto* ds_cfg = cfg.add_data_sources()->mutable_config();
+  ds_cfg->set_name("track_event");
+
+  // Create a new trace session.
+  auto* tracing_session = NewTrace(cfg);
+  tracing_session->get()->StartBlocking();
+
   // Basic events.
+  TRACE_EVENT_INSTANT0("cat", "LegacyEvent", TRACE_EVENT_SCOPE_GLOBAL);
   TRACE_EVENT_BEGIN1("cat", "LegacyEvent", "arg", 123);
   TRACE_EVENT_END2("cat", "LegacyEvent", "arg", "string", "arg2", 0.123f);
 
@@ -1972,10 +1994,79 @@
       "cat", std::string("LegacyWithIdTidAndTimestamp").c_str(), 1, 2, 3);
 
   // Event with id.
-  TRACE_COUNTER_ID1("cat", "LegacyCounter", 1234, 9000);
+  TRACE_COUNTER1("cat", "LegacyCounter", 1234);
+  TRACE_COUNTER_ID1("cat", "LegacyCounterWithId", 1234, 9000);
 
   // Metadata event.
   TRACE_EVENT_METADATA1("cat", "LegacyMetadata", "obsolete", true);
+
+  perfetto::TrackEvent::Flush();
+  tracing_session->get()->StopBlocking();
+  auto slices = ReadSlicesFromTrace(tracing_session->get());
+  EXPECT_THAT(
+      slices,
+      ElementsAre("I:cat.LegacyEvent", "B:cat.LegacyEvent(arg=(int)123)",
+                  "E.LegacyEvent(arg=(string)string,arg2=(double)0.123)",
+                  "B:cat.ScopedLegacyEvent", "E",
+                  "Legacy_C:cat.LegacyCounter(value=(int)1234)"));
+}
+
+TEST_F(PerfettoApiTest, LegacyTraceEventsWithCustomAnnotation) {
+  // Setup the trace config.
+  perfetto::TraceConfig cfg;
+  cfg.set_duration_ms(500);
+  cfg.add_buffers()->set_size_kb(1024);
+  auto* ds_cfg = cfg.add_data_sources()->mutable_config();
+  ds_cfg->set_name("track_event");
+
+  // Create a new trace session.
+  auto* tracing_session = NewTrace(cfg);
+  tracing_session->get()->StartBlocking();
+
+  MyDebugAnnotation annotation;
+  TRACE_EVENT_BEGIN1("cat", "LegacyEvent", "arg", annotation);
+
+  std::unique_ptr<MyDebugAnnotation> owned_annotation(new MyDebugAnnotation());
+  TRACE_EVENT_BEGIN1("cat", "LegacyEvent", "arg", std::move(owned_annotation));
+
+  perfetto::TrackEvent::Flush();
+  tracing_session->get()->StopBlocking();
+  auto slices = ReadSlicesFromTrace(tracing_session->get());
+  EXPECT_THAT(slices,
+              ElementsAre("B:cat.LegacyEvent(arg=(json){\"key\": 123})",
+                          "B:cat.LegacyEvent(arg=(json){\"key\": 123})"));
+}
+
+TEST_F(PerfettoApiTest, LegacyTraceEventsWithConcurrentSessions) {
+  // Make sure that a uniquely owned debug annotation can be written into
+  // multiple concurrent tracing sessions.
+
+  // Setup the trace config.
+  perfetto::TraceConfig cfg;
+  cfg.set_duration_ms(500);
+  cfg.add_buffers()->set_size_kb(1024);
+  auto* ds_cfg = cfg.add_data_sources()->mutable_config();
+  ds_cfg->set_name("track_event");
+
+  auto* tracing_session = NewTrace(cfg);
+  tracing_session->get()->StartBlocking();
+
+  auto* tracing_session2 = NewTrace(cfg);
+  tracing_session2->get()->StartBlocking();
+
+  std::unique_ptr<MyDebugAnnotation> owned_annotation(new MyDebugAnnotation());
+  TRACE_EVENT_BEGIN1("cat", "LegacyEvent", "arg", std::move(owned_annotation));
+
+  perfetto::TrackEvent::Flush();
+  tracing_session->get()->StopBlocking();
+  auto slices = ReadSlicesFromTrace(tracing_session->get());
+  EXPECT_THAT(slices,
+              ElementsAre("B:cat.LegacyEvent(arg=(json){\"key\": 123})"));
+
+  tracing_session2->get()->StopBlocking();
+  slices = ReadSlicesFromTrace(tracing_session2->get());
+  EXPECT_THAT(slices,
+              ElementsAre("B:cat.LegacyEvent(arg=(json){\"key\": 123})"));
 }
 
 }  // namespace
diff --git a/src/tracing/internal/track_event_internal.cc b/src/tracing/internal/track_event_internal.cc
index 9a84aeb..4c0c86b 100644
--- a/src/tracing/internal/track_event_internal.cc
+++ b/src/tracing/internal/track_event_internal.cc
@@ -202,7 +202,8 @@
   EventContext ctx(std::move(packet), incr_state);
 
   auto track_event = ctx.event();
-  track_event->set_type(type);
+  if (type != protos::pbzero::TrackEvent::TYPE_UNSPECIFIED)
+    track_event->set_type(type);
 
   // We assume that |category| and |name| point to strings with static lifetime.
   // This means we can use their addresses as interning keys.
diff --git a/src/tracing/test/tracing_module.cc b/src/tracing/test/tracing_module.cc
index 7c10d4e..d4fef2e 100644
--- a/src/tracing/test/tracing_module.cc
+++ b/src/tracing/test/tracing_module.cc
@@ -89,4 +89,18 @@
   puts("Hello");
 }
 
+void FunctionWithOneLegacyEvent() {
+  TRACE_EVENT_BEGIN("cat1", "LegacyEventWithArgs", "arg1", 42, "arg2", .5f);
+  // Simulates the non-tracing work of this function, which should take priority
+  // over the above trace event in terms of instruction scheduling.
+  puts("Hello");
+}
+
+void FunctionWithOneScopedLegacyEvent() {
+  TRACE_EVENT("cat1", "ScopedLegacyEventWithArgs", "arg1", 42, "arg2", .5f);
+  // Simulates the non-tracing work of this function, which should take priority
+  // over the above trace event in terms of instruction scheduling.
+  puts("Hello");
+}
+
 }  // namespace tracing_module
diff --git a/src/tracing/test/tracing_module.h b/src/tracing/test/tracing_module.h
index c005a6f..7ed9364 100644
--- a/src/tracing/test/tracing_module.h
+++ b/src/tracing/test/tracing_module.h
@@ -39,6 +39,10 @@
 void FunctionWithOneTrackEventWithDebugAnnotations();
 void FunctionWithOneTrackEventWithCustomTrack();
 
+// Legacy events.
+void FunctionWithOneLegacyEvent();
+void FunctionWithOneScopedLegacyEvent();
+
 }  // namespace tracing_module
 
 #endif  // SRC_TRACING_TEST_TRACING_MODULE_H_
diff --git a/src/tracing/test/tracing_module_categories.h b/src/tracing/test/tracing_module_categories.h
index f776894..b492db8 100644
--- a/src/tracing/test/tracing_module_categories.h
+++ b/src/tracing/test/tracing_module_categories.h
@@ -23,6 +23,7 @@
 // categories can be written to the same trace writer.
 
 #define PERFETTO_TRACK_EVENT_NAMESPACE tracing_module
+#define PERFETTO_ENABLE_LEGACY_TRACE_EVENTS 1
 
 #include "perfetto/tracing.h"
 
diff --git a/test/synth_common.py b/test/synth_common.py
index f950294..09bf07e 100644
--- a/test/synth_common.py
+++ b/test/synth_common.py
@@ -19,6 +19,8 @@
 from google.protobuf.pyext import _message
 
 CLONE_THREAD = 0x00010000
+CLONE_VFORK = 0x00004000
+CLONE_VM = 0x00000100
 
 
 class Trace(object):
diff --git a/test/trace_processor/index b/test/trace_processor/index
index 9ce8f44..afd8064 100644
--- a/test/trace_processor/index
+++ b/test/trace_processor/index
@@ -57,6 +57,8 @@
 
 # Rss stats
 rss_stat_mm_id.py rss_stat.sql rss_stat_mm_id.out
+rss_stat_mm_id_clone.py rss_stat.sql rss_stat_mm_id_clone.out
+rss_stat_mm_id_reuse.py rss_stat.sql rss_stat_mm_id_reuse.out
 rss_stat_legacy.py rss_stat.sql rss_stat_legacy.out
 
 # Memory counters
diff --git a/test/trace_processor/rss_stat_mm_id copy.py b/test/trace_processor/rss_stat_mm_id copy.py
new file mode 100644
index 0000000..d2b5e5a
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id copy.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 0, "process")
+
+trace.add_ftrace_packet(0)
+
+# Create a new child process (treated internally as a thread) of kthreadd.
+trace.add_newtask(ts=50, tid=2, new_tid=3, new_comm="kthread_child", flags=0)
+
+# Add an event on tid 3 which affects its own rss.
+trace.add_rss_stat(ts=90, tid=3, member=0, size=9, mm_id=4321, curr=True)
+
+# Try to add an event for tid 10. However, as we've not seen an event
+# with curr == True for tid == 10, this event will be dropped.
+trace.add_rss_stat(ts=91, tid=3, member=0, size=900, mm_id=1234, curr=False)
+
+# Add an event for tid 3 from tid 10. This emulates e.g. direct reclaim
+# where a process reaches into another process' mm struct.
+trace.add_rss_stat(ts=99, tid=10, member=0, size=10, mm_id=4321, curr=False)
+
+# Add an event on tid 10 which affects its own rss.
+trace.add_rss_stat(ts=100, tid=10, member=0, size=1000, mm_id=1234, curr=True)
+
+# Add an event on tid 10 from tid 3. This emlates e.g. background reclaim
+# where kthreadd is cleaning up the mm struct of another process.
+trace.add_rss_stat(ts=101, tid=3, member=0, size=900, mm_id=1234, curr=False)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/rss_stat_mm_id_clone.out b/test/trace_processor/rss_stat_mm_id_clone.out
new file mode 100644
index 0000000..0969481
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_clone.out
@@ -0,0 +1,9 @@
+"ts","name","pid","name","value"
+100,"mem.rss.file",10,"parent_process",100.000000
+100,"mem.rss.file",2,"kthreadd",10.000000
+102,"mem.rss.file",2,"kthreadd",20.000000
+102,"mem.rss.file",11,"child_process",90.000000
+104,"mem.rss.file",11,"child_process",10.000000
+105,"mem.rss.file",10,"parent_process",95.000000
+107,"mem.rss.file",10,"parent_process",105.000000
+108,"mem.rss.file",10,"parent_process",110.000000
diff --git a/test/trace_processor/rss_stat_mm_id_clone.py b/test/trace_processor/rss_stat_mm_id_clone.py
new file mode 100644
index 0000000..308b6b4
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_clone.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event during clone events which have various flag combinations set.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 1, "parent_process")
+trace.add_process(3, 2, "kernel_thread")
+
+# In this packet, check what happens to userspace processes with different
+# clone flags.
+trace.add_ftrace_packet(1)
+
+# Emit an rss stat event for the main thread of the process to associate it
+# with an mm_id.
+trace.add_rss_stat(100, tid=10, member=0, size=100, mm_id=0x1234, curr=1)
+
+# Create a newtask event emulating vfork/posix_spawn (i.e. CLONE_VM and
+# CLONE_VFORK set).
+trace.add_newtask(
+    101,
+    tid=10,
+    new_tid=11,
+    new_comm="child_process",
+    flags=synth_common.CLONE_VFORK | synth_common.CLONE_VM)
+
+# The child process will now change its own (and parent's) VM space with
+# |curr| set to 1 (emulating cleaning up some memory in parent).
+trace.add_rss_stat(102, tid=11, member=0, size=90, mm_id=0x1234, curr=1)
+
+# At this point, the child process will obtain a new mm struct. From this
+# point on, all mm_ids from the child should be different from the parent.
+
+# The child process will now change its parents VM space with curr set to
+# 0 (emulating e.g. cleaning up its stack).
+trace.add_rss_stat(103, tid=11, member=0, size=85, mm_id=0x1234, curr=0)
+
+# Now the child process should exec another process.
+
+# The child can now change its own memory.
+trace.add_rss_stat(104, tid=11, member=0, size=10, mm_id=0x5678, curr=1)
+
+# The parent can now resume execution and may emit another rss event.
+trace.add_rss_stat(105, tid=10, member=0, size=95, mm_id=0x1234, curr=1)
+
+# The parent can now go ahead and start a new thread.
+trace.add_newtask(
+    106,
+    tid=10,
+    new_tid=12,
+    new_comm="parent_thread",
+    flags=synth_common.CLONE_VM | synth_common.CLONE_THREAD)
+
+# Since this thread shares mm space with the parent, it should have the
+# same mm id and have curr set to 1.
+trace.add_rss_stat(107, tid=12, member=0, size=105, mm_id=0x1234, curr=1)
+
+# The parent can also emit events with the same mm struct at the same time.
+trace.add_rss_stat(108, tid=10, member=0, size=110, mm_id=0x1234, curr=1)
+
+# In this packet, we check what happens to kernel threads in RSS stat.
+trace.add_ftrace_packet(1)
+
+# Emit an rss stat event for the the existing kernel thread.
+trace.add_rss_stat(100, tid=3, member=0, size=10, mm_id=0x2345, curr=1)
+
+# Start a new kernel thread.
+trace.add_newtask(
+    101,
+    tid=2,
+    new_tid=4,
+    new_comm="kernel_thread2",
+    flags=synth_common.CLONE_VM)
+
+# Emit a rss stat for the new kernel thread.
+trace.add_rss_stat(102, tid=4, member=0, size=20, mm_id=0x2345, curr=1)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/rss_stat_mm_id_reuse.out b/test/trace_processor/rss_stat_mm_id_reuse.out
new file mode 100644
index 0000000..396f5f7
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_reuse.out
@@ -0,0 +1,3 @@
+"ts","name","pid","name","value"
+100,"mem.rss.file",10,"parent_process",100.000000
+103,"mem.rss.file",10,"new_process",10.000000
diff --git a/test/trace_processor/rss_stat_mm_id_reuse.py b/test/trace_processor/rss_stat_mm_id_reuse.py
new file mode 100644
index 0000000..58d7b4d
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_reuse.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event when mm_structs are reused on process death.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 1, "parent_process")
+
+trace.add_ftrace_packet(1)
+
+# Emit an event for the process.
+trace.add_rss_stat(100, tid=10, member=0, size=100, mm_id=0x1234, curr=1)
+
+# Now kill the process.
+trace.add_process_free(ts=101, tid=10, comm="parent_process", prio=0)
+
+# Create a new thread which reuses the pid and mm struct.
+trace.add_newtask(102, tid=1, new_tid=10, new_comm="new_process", flags=0)
+
+# Emit an event for the new thread.
+trace.add_rss_stat(103, tid=10, member=0, size=10, mm_id=0x1234, curr=1)
+
+print(trace.trace.SerializeToString())
diff --git a/ui/src/controller/trace_controller.ts b/ui/src/controller/trace_controller.ts
index 63e45b2..76acaa9 100644
--- a/ui/src/controller/trace_controller.ts
+++ b/ui/src/controller/trace_controller.ts
@@ -374,10 +374,14 @@
 
     const upidToProcessTracks = new Map();
     const rawProcessTracks = await engine.query(`
-      select id, upid, process_track.name, max(depth) as maxDepth
+      select
+        process_track.id as track_id,
+        process_track.upid,
+        process_track.name,
+        max(slice.depth) as max_depth
       from process_track
-      inner join slice on slice.track_id = process_track.id
-      group by track_id
+      join slice on slice.track_id = process_track.id
+      group by process_track.id
     `);
     for (let i = 0; i < rawProcessTracks.numRecords; i++) {
       const trackId = rawProcessTracks.columns[0].longValues![i];
@@ -469,6 +473,32 @@
       });
     }
 
+    // Add global slice tracks.
+    const globalSliceTracks = await engine.query(`
+      select
+        track.name as track_name,
+        track.id as track_id,
+        max(depth) as max_depth
+      from track
+      join slice on track.id = slice.track_id
+      where track.type = 'track'
+      group by track_id
+    `);
+
+    for (let i = 0; i < globalSliceTracks.numRecords; i++) {
+      const name = globalSliceTracks.columns[0].stringValues![i];
+      const trackId = +globalSliceTracks.columns[1].longValues![i];
+      const maxDepth = +globalSliceTracks.columns[2].longValues![i];
+
+      tracksToAdd.push({
+        engineId: this.engineId,
+        kind: SLICE_TRACK_KIND,
+        name: `${name}`,
+        trackGroup: SCROLLING_TRACK_GROUP,
+        config: {maxDepth, trackId},
+      });
+    }
+
     interface CounterTrack {
       name: string;
       trackId: number;
@@ -689,8 +719,6 @@
           name: `${threadName} [${tid}]`,
           trackGroup: pUuid,
           config: {
-            upid,
-            utid,
             maxDepth: threadTrack.maxDepth,
             trackId: threadTrack.trackId
           },
diff --git a/ui/src/service_worker/service_worker.ts b/ui/src/service_worker/service_worker.ts
index 4226a8a..3289352 100644
--- a/ui/src/service_worker/service_worker.ts
+++ b/ui/src/service_worker/service_worker.ts
@@ -51,9 +51,23 @@
 const CACHE_NAME = 'dist-' + UI_DIST_MAP.hex_digest.substr(0, 16);
 const LOG_TAG = `ServiceWorker[${UI_DIST_MAP.hex_digest.substr(0, 16)}]: `;
 
-async function handleHttpRequest(req: Request): Promise<Response> {
-  let fetchReason = 'N/A';
+
+function shouldHandleHttpRequest(req: Request): boolean {
+  // Suppress warning: 'only-if-cached' can be set only with 'same-origin' mode.
+  // This seems to be a chromium bug. An internal code search suggests this is a
+  // socially acceptable workaround.
+  if (req.cache === 'only-if-cached' && req.mode !== 'same-origin') {
+    return false;
+  }
+
   const url = new URL(req.url);
+  return req.method === 'GET' && url.origin === self.location.origin;
+}
+
+async function handleHttpRequest(req: Request): Promise<Response> {
+  if (!shouldHandleHttpRequest(req)) {
+    throw new Error(LOG_TAG + `${req.url} shouldn't have been handled`);
+  }
 
   // We serve from the cache even if req.cache == 'no-cache'. It's a bit
   // contra-intuitive but it's the most consistent option. If the user hits the
@@ -65,28 +79,21 @@
   // resources, which is undesirable.
   // * Only Ctrl+R. Ctrl+Shift+R will always bypass service-worker for all the
   // requests (index.html and the rest) made in that tab.
-  const cacheable = req.method === 'GET' && url.origin === self.location.origin;
-  if (cacheable) {
-    try {
-      const cacheOps = {cacheName: CACHE_NAME} as CacheQueryOptions;
-      const cachedRes = await caches.match(req, cacheOps);
-      if (cachedRes) {
-        console.debug(LOG_TAG + `serving ${req.url} from cache`);
-        return cachedRes;
-      }
-      console.warn(LOG_TAG + `cache miss on ${req.url}`);
-      fetchReason = 'cache miss';
-    } catch (exc) {
-      console.error(LOG_TAG + `Fetch failed for ${req.url}`, exc);
-      fetchReason = 'fetch failed';
+  try {
+    const cacheOps = {cacheName: CACHE_NAME} as CacheQueryOptions;
+    const cachedRes = await caches.match(req, cacheOps);
+    if (cachedRes) {
+      console.debug(LOG_TAG + `serving ${req.url} from cache`);
+      return cachedRes;
     }
-  } else {
-    fetchReason = `not cacheable (${req.method}, ${req.cache}, ${url.origin})`;
+    console.warn(LOG_TAG + `cache miss on ${req.url}`);
+  } catch (exc) {
+    console.error(LOG_TAG + `Cache request failed for ${req.url}`, exc);
   }
 
   // In any other case, just propagate the fetch on the network, which is the
   // safe behavior.
-  console.debug(LOG_TAG + `serving ${req.url} from network: ${fetchReason}`);
+  console.debug(LOG_TAG + `falling back on network fetch() for ${req.url}`);
   return fetch(req);
 }
 
@@ -106,16 +113,14 @@
     const cache = await caches.open(CACHE_NAME);
     const urlsToCache: RequestInfo[] = [];
     for (const [file, integrity] of Object.entries(UI_DIST_MAP.files)) {
-      const reqOpts: RequestInit = {cache: 'reload', integrity};
+      const reqOpts:
+          RequestInit = {cache: 'reload', mode: 'same-origin', integrity};
       urlsToCache.push(new Request(file, reqOpts));
-      if (file === 'index.html') {
-        const indexPage = location.href.split('service_worker.js')[0];
-        // Disable cachinig of '/' for cases where the UI is hosted in a
-        // subdirectory, because the ci-artifacts GCS bucket doesn't support
-        // auto indexes (it has a fallback 404 page that fails the check).
-        if (indexPage === '/') {
-          urlsToCache.push(new Request(indexPage, reqOpts));
-        }
+      if (file === 'index.html' && location.host !== 'storage.googleapis.com') {
+        // Disable cachinig of '/' for cases where the UI is hosted on GCS.
+        // GCS doesn't support auto indexes. GCS returns a 404 page on / that
+        // fails the integrity check.
+        urlsToCache.push(new Request('/', reqOpts));
       }
     }
     await cache.addAll(urlsToCache);
@@ -150,5 +155,12 @@
 });
 
 self.addEventListener('fetch', event => {
+  // The early return here will cause the browser to fall back on standard
+  // network-based fetch.
+  if (!shouldHandleHttpRequest(event.request)) {
+    console.debug(LOG_TAG + `serving ${event.request.url} from network`);
+    return;
+  }
+
   event.respondWith(handleHttpRequest(event.request));
 });
diff --git a/ui/src/tracks/chrome_slices/common.ts b/ui/src/tracks/chrome_slices/common.ts
index faa40c3..41eb3c2 100644
--- a/ui/src/tracks/chrome_slices/common.ts
+++ b/ui/src/tracks/chrome_slices/common.ts
@@ -18,8 +18,6 @@
 
 export interface Config {
   maxDepth: number;
-  upid: number;
-  utid: number;
   trackId: number;
 }