Merge "ui: add global slice tracks"
diff --git a/docs/trace-processor.md b/docs/trace-processor.md
index cea8846..532056b 100644
--- a/docs/trace-processor.md
+++ b/docs/trace-processor.md
@@ -8,8 +8,10 @@
 through SQL queries. The trace processor is used:
 * By the [Perfetto UI](https://ui.perfetto.dev/), in the form of a
   Web Assembly module.
-* Standalone, using the `trace_processor_shell` target
-  (`ninja -C out/xxx trace_processor_shell`).
+* Standalone:
+  * using the [prebuilt](http://get.perfetto.dev/trace_processor) binaries.
+  * using the `trace_processor_shell` target from source
+    (`ninja -C out/xxx trace_processor_shell`).
 * In internal pipelines for batch processing.
 
 Supported input formats:
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index 3d756d0..828a493 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -66,6 +66,10 @@
   build_with_chromium = false
 }
 
+if (!defined(is_nacl)) {
+  is_nacl = false
+}
+
 declare_args() {
   # The Android blueprint file generator set this to true (as well as
   # is_perfetto_build_generator). This is just about being built in the
@@ -135,7 +139,7 @@
   # system backend in the client library.
   # This includes building things that rely on POSIX sockets, this places
   # limitations on the supported operating systems.
-  enable_perfetto_ipc = !is_win && !is_fuchsia &&
+  enable_perfetto_ipc = !is_win && !is_fuchsia && !is_nacl &&
                         (perfetto_build_standalone ||
                          perfetto_build_with_android || build_with_chromium)
 
diff --git a/gn/standalone/BUILDCONFIG.gn b/gn/standalone/BUILDCONFIG.gn
index d980a05..3668c0f 100644
--- a/gn/standalone/BUILDCONFIG.gn
+++ b/gn/standalone/BUILDCONFIG.gn
@@ -36,10 +36,11 @@
 is_linux_host = host_os == "linux"
 is_mac = current_os == "mac"
 
-# Building with Windows/Fuchsia is currently only supported in the Chromium
+# Building with Windows/Fuchsia/nacl is currently only supported in the Chromium
 # tree so always set this to false.
 is_win = false
 is_fuchsia = false
+is_nacl = false
 
 if (target_cpu == "") {
   target_cpu = host_cpu
diff --git a/src/trace_processor/containers/bit_vector_benchmark.cc b/src/trace_processor/containers/bit_vector_benchmark.cc
index b4a0c27..206bc5d 100644
--- a/src/trace_processor/containers/bit_vector_benchmark.cc
+++ b/src/trace_processor/containers/bit_vector_benchmark.cc
@@ -17,6 +17,7 @@
 #include <benchmark/benchmark.h>
 
 #include "src/trace_processor/containers/bit_vector.h"
+#include "src/trace_processor/containers/bit_vector_iterators.h"
 
 namespace {
 
@@ -27,15 +28,40 @@
 }
 
 void BitVectorArgs(benchmark::internal::Benchmark* b) {
-  b->Arg(64);
+  std::vector<int> set_percentages;
+  if (IsBenchmarkFunctionalOnly()) {
+    set_percentages = std::vector<int>{50};
+  } else {
+    set_percentages = std::vector<int>{0, 1, 5, 50, 95, 99, 100};
+  }
 
-  if (!IsBenchmarkFunctionalOnly()) {
-    b->Arg(512);
-    b->Arg(8192);
-    b->Arg(123456);
-    b->Arg(1234567);
+  for (int percentage : set_percentages) {
+    b->Args({64, percentage});
+
+    if (!IsBenchmarkFunctionalOnly()) {
+      b->Args({512, percentage});
+      b->Args({8192, percentage});
+      b->Args({123456, percentage});
+      b->Args({1234567, percentage});
+    }
   }
 }
+
+BitVector BvWithSizeAndSetPercentage(uint32_t size, uint32_t set_percentage) {
+  static constexpr uint32_t kRandomSeed = 29;
+  std::minstd_rand0 rnd_engine(kRandomSeed);
+
+  BitVector bv;
+  for (uint32_t i = 0; i < size; ++i) {
+    if (rnd_engine() % 100 < set_percentage) {
+      bv.AppendTrue();
+    } else {
+      bv.AppendFalse();
+    }
+  }
+  return bv;
+}
+
 }  // namespace
 
 static void BM_BitVectorAppendTrue(benchmark::State& state) {
@@ -61,15 +87,9 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
-  BitVector bv;
-  for (uint32_t i = 0; i < size; ++i) {
-    if (rnd_engine() % 2) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
 
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<bool> bit_pool(kPoolSize);
@@ -93,15 +113,9 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
-  BitVector bv;
-  for (uint32_t i = 0; i < size; ++i) {
-    if (rnd_engine() % 2) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
 
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<uint32_t> row_pool(kPoolSize);
@@ -123,16 +137,9 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
-  BitVector bv;
-  for (uint32_t i = 0; i < size; ++i) {
-    if (rnd_engine() % 2) {
-      bv.AppendTrue();
-    } else {
-      bv.AppendFalse();
-    }
-  }
-
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
   static constexpr uint32_t kPoolSize = 1024 * 1024;
   std::vector<uint32_t> row_pool(kPoolSize);
   uint32_t set_bit_count = bv.GetNumBitsSet();
@@ -153,11 +160,12 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
   uint32_t count = 0;
   BitVector bv;
   for (uint32_t i = 0; i < size; ++i) {
-    bool value = rnd_engine() % 2;
+    bool value = rnd_engine() % 100 < set_percentage;
     if (value) {
       bv.AppendTrue();
     } else {
@@ -205,11 +213,12 @@
   std::minstd_rand0 rnd_engine(kRandomSeed);
 
   uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
 
   BitVector bv;
   BitVector picker;
   for (uint32_t i = 0; i < size; ++i) {
-    bool value = rnd_engine() % 2;
+    bool value = rnd_engine() % 100 < set_percentage;
     if (value) {
       bv.AppendTrue();
 
@@ -234,3 +243,16 @@
   }
 }
 BENCHMARK(BM_BitVectorUpdateSetBits)->Apply(BitVectorArgs);
+
+static void BM_BitVectorSetBitsIterator(benchmark::State& state) {
+  uint32_t size = static_cast<uint32_t>(state.range(0));
+  uint32_t set_percentage = static_cast<uint32_t>(state.range(1));
+
+  BitVector bv = BvWithSizeAndSetPercentage(size, set_percentage);
+  for (auto _ : state) {
+    for (auto it = bv.IterateSetBits(); it; it.Next()) {
+      benchmark::DoNotOptimize(it.index());
+    }
+  }
+}
+BENCHMARK(BM_BitVectorSetBitsIterator)->Apply(BitVectorArgs);
diff --git a/src/trace_processor/heap_profile_tracker.cc b/src/trace_processor/heap_profile_tracker.cc
index 410dfa2..a2aff3a 100644
--- a/src/trace_processor/heap_profile_tracker.cc
+++ b/src/trace_processor/heap_profile_tracker.cc
@@ -80,7 +80,6 @@
 
 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> BuildNativeFlamegraph(
     TraceStorage* storage,
-    NativeFlamegraphType type,
     UniquePid upid,
     int64_t timestamp) {
   const tables::HeapProfileAllocationTable& allocation_tbl =
@@ -88,15 +87,7 @@
   const tables::StackProfileCallsiteTable& callsites_tbl =
       storage->stack_profile_callsite_table();
 
-  StringId profile_type;
-  switch (type) {
-    case NativeFlamegraphType::kNotFreed:
-      profile_type = storage->InternString("native");
-      break;
-    case NativeFlamegraphType::kAlloc:
-      profile_type = storage->InternString("native_alloc");
-      break;
-  }
+  StringId profile_type = storage->InternString("native");
 
   std::vector<uint32_t> callsite_to_merged_callsite(callsites_tbl.row_count(),
                                                     0);
@@ -174,13 +165,18 @@
             .long_value;
 
     PERFETTO_CHECK((size < 0 && count < 0) || (size >= 0 && count >= 0));
-    if (type == NativeFlamegraphType::kAlloc && size < 0)
-      continue;
-
-    // TODO(fmayer): Clean up types and remove the static_cast.
     uint32_t merged_idx =
         callsite_to_merged_callsite[*callsites_tbl.id().IndexOf(
             CallsiteId(static_cast<uint32_t>(callsite_id)))];
+    if (size > 0) {
+      // TODO(fmayer): Clean up types and remove the static_cast.
+      tbl->mutable_alloc_size()->Set(merged_idx,
+                                     tbl->alloc_size()[merged_idx] + size);
+      tbl->mutable_alloc_count()->Set(merged_idx,
+                                      tbl->alloc_count()[merged_idx] + count);
+    }
+
+    // TODO(fmayer): Clean up types and remove the static_cast.
     tbl->mutable_size()->Set(merged_idx, tbl->size()[merged_idx] + size);
     tbl->mutable_count()->Set(merged_idx, tbl->count()[merged_idx] + count);
   }
@@ -195,6 +191,11 @@
     tbl->mutable_cumulative_count()->Set(
         idx, tbl->cumulative_count()[idx] + tbl->count()[idx]);
 
+    tbl->mutable_cumulative_alloc_size()->Set(
+        idx, tbl->cumulative_alloc_size()[idx] + tbl->alloc_size()[idx]);
+    tbl->mutable_cumulative_alloc_count()->Set(
+        idx, tbl->cumulative_alloc_count()[idx] + tbl->alloc_count()[idx]);
+
     auto parent = tbl->parent_id()[idx];
     if (parent) {
       uint32_t parent_idx = *tbl->id().IndexOf(
@@ -205,6 +206,13 @@
       tbl->mutable_cumulative_count()->Set(
           parent_idx,
           tbl->cumulative_count()[parent_idx] + tbl->cumulative_count()[idx]);
+
+      tbl->mutable_cumulative_alloc_size()->Set(
+          parent_idx, tbl->cumulative_alloc_size()[parent_idx] +
+                          tbl->cumulative_alloc_size()[idx]);
+      tbl->mutable_cumulative_alloc_count()->Set(
+          parent_idx, tbl->cumulative_alloc_count()[parent_idx] +
+                          tbl->cumulative_alloc_count()[idx]);
     }
   }
 
diff --git a/src/trace_processor/heap_profile_tracker.h b/src/trace_processor/heap_profile_tracker.h
index a04c03b..53e7637 100644
--- a/src/trace_processor/heap_profile_tracker.h
+++ b/src/trace_processor/heap_profile_tracker.h
@@ -30,14 +30,8 @@
 namespace perfetto {
 namespace trace_processor {
 
-enum class NativeFlamegraphType {
-  kAlloc,
-  kNotFreed,
-};
-
 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> BuildNativeFlamegraph(
     TraceStorage* storage,
-    NativeFlamegraphType type,
     UniquePid upid,
     int64_t timestamp);
 
diff --git a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
index db444a7..f536a02 100644
--- a/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
+++ b/src/trace_processor/importers/ftrace/rss_stat_tracker.cc
@@ -69,24 +69,34 @@
 base::Optional<UniqueTid> RssStatTracker::FindUtidForMmId(int64_t mm_id,
                                                           bool is_curr,
                                                           uint32_t pid) {
-  auto it = mm_id_to_utid_.find(mm_id);
-  if (!is_curr) {
-    return it == mm_id_to_utid_.end() ? base::nullopt
-                                      : base::make_optional(it->second);
+  // If curr is true, we can just overwrite the state in the map and return
+  // the utid correspodning to |pid|.
+  if (is_curr) {
+    UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
+    mm_id_to_utid_[mm_id] = utid;
+    return utid;
   }
 
+  // If curr is false, try and lookup the utid we previously saw for this
+  // mm id.
+  auto it = mm_id_to_utid_.find(mm_id);
+  if (it == mm_id_to_utid_.end())
+    return base::nullopt;
+
+  // If the utid in the map is the same as our current utid but curr is false,
+  // that means we are in the middle of a process changing mm structs (i.e. in
+  // the middle of a vfork + exec). Therefore, we should discard the association
+  // of this vm struct with this thread.
   UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
-  if (it != mm_id_to_utid_.end() && it->second != utid) {
-    // Since both of these structs have the same mm hash and both say that
-    // the mm hash is for the current project, we can assume they belong to
-    // the same process so we can associate them together.
-    // TODO(lalitm): investigate if it's possible for mm_id to be reused
-    // between different processes if we have pid reuse and get unlucky. If
-    // so, we'll need to do some more careful tracking here.
-    context_->process_tracker->AssociateThreads(it->second, utid);
+  if (it->second == utid) {
+    mm_id_to_utid_.erase(it);
+    return base::nullopt;
   }
-  mm_id_to_utid_[mm_id] = utid;
-  return utid;
+
+  // This case happens when a process is changing the VM of another process and
+  // we know that the utid corresponding to the target process. Just return that
+  // utid.
+  return it->second;
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/importers/proto/heap_graph_tracker.cc b/src/trace_processor/importers/proto/heap_graph_tracker.cc
index 124fab9..f96cf6e 100644
--- a/src/trace_processor/importers/proto/heap_graph_tracker.cc
+++ b/src/trace_processor/importers/proto/heap_graph_tracker.cc
@@ -234,17 +234,17 @@
                                             // artificial root in the database.
 
     tables::ExperimentalFlamegraphNodesTable::Row alloc_row{
-        current_ts,
-        current_upid,
-        profile_type,
-        depth,
-        StringId::Raw(static_cast<uint32_t>(node.class_name)),
-        java_mapping,
+        current_ts, current_upid, profile_type, depth,
+        StringId::Raw(static_cast<uint32_t>(node.class_name)), java_mapping,
         static_cast<int64_t>(node.count),
         static_cast<int64_t>(node_to_cumulative_count[i]),
         static_cast<int64_t>(node.size),
         static_cast<int64_t>(node_to_cumulative_size[i]),
-        parent_id};
+        // For java dumps, set alloc_count == count, etc.
+        static_cast<int64_t>(node.count),
+        static_cast<int64_t>(node_to_cumulative_count[i]),
+        static_cast<int64_t>(node.size),
+        static_cast<int64_t>(node_to_cumulative_size[i]), parent_id};
     node_to_row_idx[i] = *tbl->id().IndexOf(tbl->Insert(alloc_row));
   }
   return tbl;
diff --git a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
index 5fd1d74..ff27f76 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
@@ -2390,11 +2390,6 @@
 
     auto mapping = interned_data->add_mappings();
     mapping->set_iid(1);
-    mapping->set_build_id(1);
-
-    auto build_id = interned_data->add_build_ids();
-    build_id->set_iid(1);
-    build_id->set_str("3BBCFBD372448A727265C3E7C4D954F91");
 
     auto frame = interned_data->add_frames();
     frame->set_iid(1);
@@ -2456,13 +2451,6 @@
   EXPECT_EQ(samples.ts()[2], 68000);
   EXPECT_EQ(samples.callsite_id()[2], 0);
   EXPECT_EQ(samples.utid()[2], 1u);
-
-  // Breakpad build_ids should not be modified/mangled.
-  ASSERT_STREQ(
-      context_.storage
-          ->GetString(storage_->stack_profile_mapping_table().build_id()[0])
-          .c_str(),
-      "3BBCFBD372448A727265C3E7C4D954F91");
 }
 
 }  // namespace
diff --git a/src/trace_processor/sqlite_experimental_flamegraph_table.cc b/src/trace_processor/sqlite_experimental_flamegraph_table.cc
index 033179c..12a9bb6 100644
--- a/src/trace_processor/sqlite_experimental_flamegraph_table.cc
+++ b/src/trace_processor/sqlite_experimental_flamegraph_table.cc
@@ -161,14 +161,8 @@
   }
   if (values_.profile_type == "native") {
     table_ = BuildNativeFlamegraph(context_->storage.get(),
-                                   NativeFlamegraphType::kNotFreed,
                                    values_.upid, values_.ts);
   }
-  if (values_.profile_type == "native_alloc") {
-    table_ = BuildNativeFlamegraph(context_->storage.get(),
-                                   NativeFlamegraphType::kAlloc, values_.upid,
-                                   values_.ts);
-  }
 
   // table_ can be nullptr precisely where the constraints passed to us don't
   // make sense. Therefore, we can just return this to SQLite.
diff --git a/src/trace_processor/stack_profile_tracker.cc b/src/trace_processor/stack_profile_tracker.cc
index e9185e0..63443f4 100644
--- a/src/trace_processor/stack_profile_tracker.cc
+++ b/src/trace_processor/stack_profile_tracker.cc
@@ -68,18 +68,9 @@
       context_->storage->GetString(raw_build_id);
   StringId build_id = GetEmptyStringId();
   if (raw_build_id_str.size() > 0) {
-    // If the build_id is 33 characters long, we assume it's a Breakpad debug
-    // identifier which is already in Hex and doesn't need conversion.
-    // TODO(b/148109467): Remove workaround once all active Chrome versions
-    // write raw bytes instead of a string as build_id.
-    if (raw_build_id_str.size() == 33) {
-      build_id = raw_build_id;
-    } else {
-      std::string hex_build_id =
-          base::ToHex(raw_build_id_str.c_str(), raw_build_id_str.size());
-      build_id =
-          context_->storage->InternString(base::StringView(hex_build_id));
-    }
+    std::string hex_build_id =
+        base::ToHex(raw_build_id_str.c_str(), raw_build_id_str.size());
+    build_id = context_->storage->InternString(base::StringView(hex_build_id));
   }
 
   tables::StackProfileMappingTable::Row row{
diff --git a/src/trace_processor/tables/profiler_tables.h b/src/trace_processor/tables/profiler_tables.h
index 5f529eb..e87c40b 100644
--- a/src/trace_processor/tables/profiler_tables.h
+++ b/src/trace_processor/tables/profiler_tables.h
@@ -100,6 +100,10 @@
   C(int64_t, cumulative_count)                                            \
   C(int64_t, size)                                                        \
   C(int64_t, cumulative_size)                                             \
+  C(int64_t, alloc_count)                                                 \
+  C(int64_t, cumulative_alloc_count)                                      \
+  C(int64_t, alloc_size)                                                  \
+  C(int64_t, cumulative_alloc_size)                                       \
   C(base::Optional<uint32_t>, parent_id)
 
 PERFETTO_TP_TABLE(PERFETTO_TP_EXPERIMENTAL_FLAMEGRAPH_NODES);
diff --git a/test/synth_common.py b/test/synth_common.py
index f950294..09bf07e 100644
--- a/test/synth_common.py
+++ b/test/synth_common.py
@@ -19,6 +19,8 @@
 from google.protobuf.pyext import _message
 
 CLONE_THREAD = 0x00010000
+CLONE_VFORK = 0x00004000
+CLONE_VM = 0x00000100
 
 
 class Trace(object):
diff --git a/test/trace_processor/heap_profile_flamegraph.sql b/test/trace_processor/heap_profile_flamegraph.sql
index a38509e..fa90b15 100644
--- a/test/trace_processor/heap_profile_flamegraph.sql
+++ b/test/trace_processor/heap_profile_flamegraph.sql
@@ -1 +1 @@
-select * from experimental_flamegraph(605908369259172, 1, 'native_alloc')  limit 10;
+select * from experimental_flamegraph(605908369259172, 1, 'native')  limit 10;
diff --git a/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out b/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out
index f76368d..42ab65d 100644
--- a/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out
+++ b/test/trace_processor/heap_profile_flamegraph_system-server-native-profile.out
@@ -1,11 +1,11 @@
-"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","parent_id"
-0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,210,0,1084996,"[NULL]"
-1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,210,0,1084996,0
-2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,77,0,348050,1
-3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,77,0,348050,2
-4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,64,0,279182,3
-5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,64,0,279182,4
-6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,64,0,279182,5
-7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,64,0,279182,6
-8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,64,0,279182,7
-9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,60,0,262730,8
+"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id"
+0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]"
+1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0
+2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1
+3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2
+4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3
+5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4
+6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5
+7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6
+8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7
+9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8
diff --git a/test/trace_processor/index b/test/trace_processor/index
index 9ce8f44..afd8064 100644
--- a/test/trace_processor/index
+++ b/test/trace_processor/index
@@ -57,6 +57,8 @@
 
 # Rss stats
 rss_stat_mm_id.py rss_stat.sql rss_stat_mm_id.out
+rss_stat_mm_id_clone.py rss_stat.sql rss_stat_mm_id_clone.out
+rss_stat_mm_id_reuse.py rss_stat.sql rss_stat_mm_id_reuse.out
 rss_stat_legacy.py rss_stat.sql rss_stat_legacy.out
 
 # Memory counters
diff --git a/test/trace_processor/rss_stat_mm_id copy.py b/test/trace_processor/rss_stat_mm_id copy.py
new file mode 100644
index 0000000..d2b5e5a
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id copy.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 0, "process")
+
+trace.add_ftrace_packet(0)
+
+# Create a new child process (treated internally as a thread) of kthreadd.
+trace.add_newtask(ts=50, tid=2, new_tid=3, new_comm="kthread_child", flags=0)
+
+# Add an event on tid 3 which affects its own rss.
+trace.add_rss_stat(ts=90, tid=3, member=0, size=9, mm_id=4321, curr=True)
+
+# Try to add an event for tid 10. However, as we've not seen an event
+# with curr == True for tid == 10, this event will be dropped.
+trace.add_rss_stat(ts=91, tid=3, member=0, size=900, mm_id=1234, curr=False)
+
+# Add an event for tid 3 from tid 10. This emulates e.g. direct reclaim
+# where a process reaches into another process' mm struct.
+trace.add_rss_stat(ts=99, tid=10, member=0, size=10, mm_id=4321, curr=False)
+
+# Add an event on tid 10 which affects its own rss.
+trace.add_rss_stat(ts=100, tid=10, member=0, size=1000, mm_id=1234, curr=True)
+
+# Add an event on tid 10 from tid 3. This emlates e.g. background reclaim
+# where kthreadd is cleaning up the mm struct of another process.
+trace.add_rss_stat(ts=101, tid=3, member=0, size=900, mm_id=1234, curr=False)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/rss_stat_mm_id_clone.out b/test/trace_processor/rss_stat_mm_id_clone.out
new file mode 100644
index 0000000..0969481
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_clone.out
@@ -0,0 +1,9 @@
+"ts","name","pid","name","value"
+100,"mem.rss.file",10,"parent_process",100.000000
+100,"mem.rss.file",2,"kthreadd",10.000000
+102,"mem.rss.file",2,"kthreadd",20.000000
+102,"mem.rss.file",11,"child_process",90.000000
+104,"mem.rss.file",11,"child_process",10.000000
+105,"mem.rss.file",10,"parent_process",95.000000
+107,"mem.rss.file",10,"parent_process",105.000000
+108,"mem.rss.file",10,"parent_process",110.000000
diff --git a/test/trace_processor/rss_stat_mm_id_clone.py b/test/trace_processor/rss_stat_mm_id_clone.py
new file mode 100644
index 0000000..308b6b4
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_clone.py
@@ -0,0 +1,98 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event during clone events which have various flag combinations set.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 1, "parent_process")
+trace.add_process(3, 2, "kernel_thread")
+
+# In this packet, check what happens to userspace processes with different
+# clone flags.
+trace.add_ftrace_packet(1)
+
+# Emit an rss stat event for the main thread of the process to associate it
+# with an mm_id.
+trace.add_rss_stat(100, tid=10, member=0, size=100, mm_id=0x1234, curr=1)
+
+# Create a newtask event emulating vfork/posix_spawn (i.e. CLONE_VM and
+# CLONE_VFORK set).
+trace.add_newtask(
+    101,
+    tid=10,
+    new_tid=11,
+    new_comm="child_process",
+    flags=synth_common.CLONE_VFORK | synth_common.CLONE_VM)
+
+# The child process will now change its own (and parent's) VM space with
+# |curr| set to 1 (emulating cleaning up some memory in parent).
+trace.add_rss_stat(102, tid=11, member=0, size=90, mm_id=0x1234, curr=1)
+
+# At this point, the child process will obtain a new mm struct. From this
+# point on, all mm_ids from the child should be different from the parent.
+
+# The child process will now change its parents VM space with curr set to
+# 0 (emulating e.g. cleaning up its stack).
+trace.add_rss_stat(103, tid=11, member=0, size=85, mm_id=0x1234, curr=0)
+
+# Now the child process should exec another process.
+
+# The child can now change its own memory.
+trace.add_rss_stat(104, tid=11, member=0, size=10, mm_id=0x5678, curr=1)
+
+# The parent can now resume execution and may emit another rss event.
+trace.add_rss_stat(105, tid=10, member=0, size=95, mm_id=0x1234, curr=1)
+
+# The parent can now go ahead and start a new thread.
+trace.add_newtask(
+    106,
+    tid=10,
+    new_tid=12,
+    new_comm="parent_thread",
+    flags=synth_common.CLONE_VM | synth_common.CLONE_THREAD)
+
+# Since this thread shares mm space with the parent, it should have the
+# same mm id and have curr set to 1.
+trace.add_rss_stat(107, tid=12, member=0, size=105, mm_id=0x1234, curr=1)
+
+# The parent can also emit events with the same mm struct at the same time.
+trace.add_rss_stat(108, tid=10, member=0, size=110, mm_id=0x1234, curr=1)
+
+# In this packet, we check what happens to kernel threads in RSS stat.
+trace.add_ftrace_packet(1)
+
+# Emit an rss stat event for the the existing kernel thread.
+trace.add_rss_stat(100, tid=3, member=0, size=10, mm_id=0x2345, curr=1)
+
+# Start a new kernel thread.
+trace.add_newtask(
+    101,
+    tid=2,
+    new_tid=4,
+    new_comm="kernel_thread2",
+    flags=synth_common.CLONE_VM)
+
+# Emit a rss stat for the new kernel thread.
+trace.add_rss_stat(102, tid=4, member=0, size=20, mm_id=0x2345, curr=1)
+
+print(trace.trace.SerializeToString())
diff --git a/test/trace_processor/rss_stat_mm_id_reuse.out b/test/trace_processor/rss_stat_mm_id_reuse.out
new file mode 100644
index 0000000..396f5f7
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_reuse.out
@@ -0,0 +1,3 @@
+"ts","name","pid","name","value"
+100,"mem.rss.file",10,"parent_process",100.000000
+103,"mem.rss.file",10,"new_process",10.000000
diff --git a/test/trace_processor/rss_stat_mm_id_reuse.py b/test/trace_processor/rss_stat_mm_id_reuse.py
new file mode 100644
index 0000000..58d7b4d
--- /dev/null
+++ b/test/trace_processor/rss_stat_mm_id_reuse.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This synthetic trace tests handling of the mm_id field in the rss_stat
+# event when mm_structs are reused on process death.
+
+from os import sys, path
+
+sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
+import synth_common
+
+trace = synth_common.create_trace()
+
+trace.add_process_tree_packet(ts=1)
+trace.add_process(10, 1, "parent_process")
+
+trace.add_ftrace_packet(1)
+
+# Emit an event for the process.
+trace.add_rss_stat(100, tid=10, member=0, size=100, mm_id=0x1234, curr=1)
+
+# Now kill the process.
+trace.add_process_free(ts=101, tid=10, comm="parent_process", prio=0)
+
+# Create a new thread which reuses the pid and mm struct.
+trace.add_newtask(102, tid=1, new_tid=10, new_comm="new_process", flags=0)
+
+# Emit an event for the new thread.
+trace.add_rss_stat(103, tid=10, member=0, size=10, mm_id=0x1234, curr=1)
+
+print(trace.trace.SerializeToString())
diff --git a/ui/src/controller/heap_profile_controller.ts b/ui/src/controller/heap_profile_controller.ts
index 973a92f..eadb704 100644
--- a/ui/src/controller/heap_profile_controller.ts
+++ b/ui/src/controller/heap_profile_controller.ts
@@ -177,23 +177,23 @@
     let sizeIndex = 4;
     switch (viewingOption) {
       case SPACE_MEMORY_ALLOCATED_NOT_FREED_KEY:
-        orderBy = `where size > 0 order by depth, parent_hash, size desc, name`;
+        orderBy = `where cumulative_size > 0 order by depth, parent_id,
+            cumulative_size desc, name`;
         sizeIndex = 4;
         break;
       case ALLOC_SPACE_MEMORY_ALLOCATED_KEY:
-        orderBy =
-            `where alloc_size > 0 order by depth, parent_hash, alloc_size desc,
-            name`;
+        orderBy = `where cumulative_alloc_size > 0 order by depth, parent_id,
+            cumulative_alloc_size desc, name`;
         sizeIndex = 5;
         break;
       case OBJECTS_ALLOCATED_NOT_FREED_KEY:
-        orderBy =
-            `where count > 0 order by depth, parent_hash, count desc, name`;
+        orderBy = `where cumulative_count > 0 order by depth, parent_id,
+            cumulative_count desc, name`;
         sizeIndex = 6;
         break;
       case OBJECTS_ALLOCATED_KEY:
-        orderBy = `where alloc_count > 0 order by depth, parent_hash,
-            alloc_count desc, name`;
+        orderBy = `where cumulative_alloc_count > 0 order by depth, parent_id,
+            cumulative_alloc_count desc, name`;
         sizeIndex = 7;
         break;
       default:
@@ -201,8 +201,9 @@
     }
 
     const callsites = await this.args.engine.query(
-        `SELECT hash, name, parent_hash, depth, size, alloc_size, count,
-        alloc_count, map_name, self_size from ${tableName} ${orderBy}`);
+        `SELECT id, name, parent_id, depth, cumulative_size,
+        cumulative_alloc_size, cumulative_count,
+        cumulative_alloc_count, map_name, size from ${tableName} ${orderBy}`);
 
     const flamegraphData: CallsiteInfo[] = new Array();
     const hashToindex: Map<number, number> = new Map();
@@ -230,102 +231,15 @@
       Promise<string> {
     // Creating unique names for views so we can reuse and not delete them
     // for each marker.
-    const tableNameCallsiteNameSize =
-        this.tableName(`callsite_with_name_and_size`);
-    const tableNameCallsiteHashNameSize =
-        this.tableName(`callsite_hash_name_size`);
     const tableNameGroupedCallsitesForFlamegraph =
         this.tableName(`grouped_callsites_for_flamegraph`);
-    // Joining the callsite table with frame table then with alloc table to get
-    // the size and name for each callsite.
-    // TODO(taylori): Make frame name nullable in the trace processor for
-    // consistency with the other columns.
-    await this.args.engine.query(
-        `create view if not exists ${tableNameCallsiteNameSize} as
-         select id, parent_id, depth, IFNULL(DEMANGLE(name), name) as name,
-            map_name, size, alloc_size, count, alloc_count from (
-         select cs.id as id, parent_id, depth,
-            coalesce(symbols.name,
-                case when fr.name != '' then fr.name else map.name end) as name,
-            map.name as map_name,
-            SUM(IFNULL(size, 0)) as size,
-            SUM(IFNULL(size, 0)) as size,
-            SUM(case when size > 0 then size else 0 end) as alloc_size,
-            SUM(IFNULL(count, 0)) as count,
-            SUM(case when count > 0 then count else 0 end) as alloc_count
-         from stack_profile_callsite cs
-         join stack_profile_frame fr on cs.frame_id = fr.id
-         join stack_profile_mapping map on fr.mapping = map.id
-         left join (
-              select symbol_set_id, FIRST_VALUE(name) OVER(PARTITION BY
-                symbol_set_id) as name
-              from stack_profile_symbol GROUP BY symbol_set_id
-            ) as symbols using(symbol_set_id)
-         left join heap_profile_allocation alloc on alloc.callsite_id = cs.id
-         and alloc.ts <= ${ts} and alloc.upid = ${upid} group by cs.id)`);
 
-    // Recursive query to compute the hash for each callsite based on names
-    // rather than ids.
-    // We get all the children of the row in question and emit a row with hash
-    // equal hash(name, parent.hash). Roots without the parent will have -1 as
-    // hash.  Slices will be merged into a big slice.
-    await this.args.engine.query(
-        `create view if not exists ${tableNameCallsiteHashNameSize} as
-        with recursive callsite_table_names(
-          id, hash, name, map_name, size, alloc_size, count, alloc_count,
-          parent_hash, depth) AS (
-        select id, hash(name) as hash, name, map_name, size, alloc_size, count,
-          alloc_count, -1, depth
-        from ${tableNameCallsiteNameSize}
-        where depth = 0
-        union all
-        select cs.id, hash(cs.name, ctn.hash) as hash, cs.name, cs.map_name,
-          cs.size, cs.alloc_size, cs.count, cs.alloc_count, ctn.hash, cs.depth
-        from callsite_table_names ctn
-        inner join ${tableNameCallsiteNameSize} cs ON ctn.id = cs.parent_id
-        )
-        select hash, name, map_name, parent_hash, depth, SUM(size) as size,
-          SUM(case when alloc_size > 0 then alloc_size else 0 end)
-            as alloc_size, SUM(count) as count,
-          SUM(case when alloc_count > 0 then alloc_count else 0 end)
-            as alloc_count
-        from callsite_table_names
-        group by hash`);
-
-    // Recursive query to compute the cumulative size of each callsite.
-    // Base case: We get all the callsites where the size is non-zero.
-    // Recursive case: We get the callsite which is the parent of the current
-    //  callsite(in terms of hashes) and emit a row with the size of the current
-    //  callsite plus all the info of the parent.
-    // Grouping: For each callsite, our recursive table has n rows where n is
-    //  the number of descendents with a non-zero self size. We need to group on
-    //  the hash and sum all the sizes to get the cumulative size for each
-    //  callsite hash.
     await this.args.engine.query(`create temp table if not exists ${
-        tableNameGroupedCallsitesForFlamegraph}
-        as with recursive callsite_children(
-          hash, name, map_name, parent_hash, depth, size, alloc_size, count,
-          alloc_count, self_size, self_alloc_size, self_count, self_alloc_count)
-        as (
-        select hash, name, map_name, parent_hash, depth, size, alloc_size,
-          count, alloc_count, size as self_size, alloc_size as self_alloc_size,
-          count as self_count, alloc_count as self_alloc_count
-        from ${tableNameCallsiteHashNameSize}
-        union all
-        select chns.hash, chns.name, chns.map_name, chns.parent_hash,
-          chns.depth, cc.size, cc.alloc_size, cc.count, cc.alloc_count,
-          chns.size, chns.alloc_size, chns.count, chns.alloc_count
-        from ${tableNameCallsiteHashNameSize} chns
-        inner join callsite_children cc on chns.hash = cc.parent_hash
-        )
-        select hash, name, map_name, parent_hash, depth, SUM(size) as size,
-          SUM(case when alloc_size > 0 then alloc_size else 0 end)
-            as alloc_size, SUM(count) as count,
-          SUM(case when alloc_count > 0 then alloc_count else 0 end) as
-            alloc_count,
-          self_size, self_alloc_size, self_count, self_alloc_count
-        from callsite_children
-        group by hash`);
+        tableNameGroupedCallsitesForFlamegraph} as
+        select id, name, map_name, parent_id, depth, cumulative_size,
+          cumulative_alloc_size, cumulative_count, cumulative_alloc_count,
+          size, alloc_size, count, alloc_count
+        from experimental_flamegraph(${ts}, ${upid}, 'native')`);
     return tableNameGroupedCallsitesForFlamegraph;
   }