Merge "trace_to_text: Output process names and fix json."
diff --git a/Android.bp b/Android.bp
index 83d55df..8e6341e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -4366,7 +4366,6 @@
   name: "trace_to_text",
   srcs: [
     ":perfetto_protos_perfetto_common_lite_gen",
-    ":perfetto_protos_perfetto_common_zero_gen",
     ":perfetto_protos_perfetto_config_lite_gen",
     ":perfetto_protos_perfetto_trace_chrome_lite_gen",
     ":perfetto_protos_perfetto_trace_filesystem_lite_gen",
@@ -4375,24 +4374,6 @@
     ":perfetto_protos_perfetto_trace_minimal_lite_gen",
     ":perfetto_protos_perfetto_trace_ps_lite_gen",
     ":perfetto_protos_perfetto_trace_sys_stats_lite_gen",
-    "src/base/event.cc",
-    "src/base/file_utils.cc",
-    "src/base/metatrace.cc",
-    "src/base/page_allocator.cc",
-    "src/base/string_splitter.cc",
-    "src/base/string_utils.cc",
-    "src/base/temp_file.cc",
-    "src/base/thread_checker.cc",
-    "src/base/time.cc",
-    "src/base/unix_task_runner.cc",
-    "src/base/virtual_destructors.cc",
-    "src/base/watchdog_posix.cc",
-    "src/protozero/message.cc",
-    "src/protozero/message_handle.cc",
-    "src/protozero/proto_decoder.cc",
-    "src/protozero/proto_field_descriptor.cc",
-    "src/protozero/scattered_stream_null_delegate.cc",
-    "src/protozero/scattered_stream_writer.cc",
     "tools/trace_to_text/ftrace_event_formatter.cc",
     "tools/trace_to_text/ftrace_inode_handler.cc",
     "tools/trace_to_text/main.cc",
@@ -4402,12 +4383,8 @@
     "libprotobuf-cpp-full",
     "libprotobuf-cpp-lite",
   ],
-  static_libs: [
-    "libgtest_prod",
-  ],
   generated_headers: [
     "perfetto_protos_perfetto_common_lite_gen_headers",
-    "perfetto_protos_perfetto_common_zero_gen_headers",
     "perfetto_protos_perfetto_config_lite_gen_headers",
     "perfetto_protos_perfetto_trace_chrome_lite_gen_headers",
     "perfetto_protos_perfetto_trace_filesystem_lite_gen_headers",
diff --git a/include/perfetto/traced/BUILD.gn b/include/perfetto/traced/BUILD.gn
index f7dc66f..2c2760b 100644
--- a/include/perfetto/traced/BUILD.gn
+++ b/include/perfetto/traced/BUILD.gn
@@ -24,7 +24,7 @@
     "../../../gn:default_deps",
   ]
   public_deps = [
-    "../../../protos/perfetto/common:zero",
+    "../../../protos/perfetto/common:lite",
     "../base",
   ]
   sources = [
diff --git a/include/perfetto/traced/sys_stats_counters.h b/include/perfetto/traced/sys_stats_counters.h
index 98d25af..f52651a 100644
--- a/include/perfetto/traced/sys_stats_counters.h
+++ b/include/perfetto/traced/sys_stats_counters.h
@@ -18,7 +18,7 @@
 #define INCLUDE_PERFETTO_TRACED_SYS_STATS_COUNTERS_H_
 
 #include "perfetto/base/utils.h"
-#include "perfetto/common/sys_stats_counters.pbzero.h"
+#include "perfetto/common/sys_stats_counters.pb.h"
 
 #include <vector>
 
@@ -30,181 +30,161 @@
 };
 
 constexpr KeyAndId kMeminfoKeys[] = {
-    {"MemUnspecified", protos::pbzero::MeminfoCounters::MEMINFO_UNSPECIFIED},
-    {"MemTotal", protos::pbzero::MeminfoCounters::MEMINFO_MEM_TOTAL},
-    {"MemFree", protos::pbzero::MeminfoCounters::MEMINFO_MEM_FREE},
-    {"MemAvailable", protos::pbzero::MeminfoCounters::MEMINFO_MEM_AVAILABLE},
-    {"Buffers", protos::pbzero::MeminfoCounters::MEMINFO_BUFFERS},
-    {"Cached", protos::pbzero::MeminfoCounters::MEMINFO_CACHED},
-    {"SwapCached", protos::pbzero::MeminfoCounters::MEMINFO_SWAP_CACHED},
-    {"Active", protos::pbzero::MeminfoCounters::MEMINFO_ACTIVE},
-    {"Inactive", protos::pbzero::MeminfoCounters::MEMINFO_INACTIVE},
-    {"Active(anon)", protos::pbzero::MeminfoCounters::MEMINFO_ACTIVE_ANON},
-    {"Inactive(anon)", protos::pbzero::MeminfoCounters::MEMINFO_INACTIVE_ANON},
-    {"Active(file)", protos::pbzero::MeminfoCounters::MEMINFO_ACTIVE_FILE},
-    {"Inactive(file)", protos::pbzero::MeminfoCounters::MEMINFO_INACTIVE_FILE},
-    {"Unevictable", protos::pbzero::MeminfoCounters::MEMINFO_UNEVICTABLE},
-    {"Mlocked", protos::pbzero::MeminfoCounters::MEMINFO_MLOCKED},
-    {"SwapTotal", protos::pbzero::MeminfoCounters::MEMINFO_SWAP_TOTAL},
-    {"SwapFree", protos::pbzero::MeminfoCounters::MEMINFO_SWAP_FREE},
-    {"Dirty", protos::pbzero::MeminfoCounters::MEMINFO_DIRTY},
-    {"Writeback", protos::pbzero::MeminfoCounters::MEMINFO_WRITEBACK},
-    {"AnonPages", protos::pbzero::MeminfoCounters::MEMINFO_ANON_PAGES},
-    {"Mapped", protos::pbzero::MeminfoCounters::MEMINFO_MAPPED},
-    {"Shmem", protos::pbzero::MeminfoCounters::MEMINFO_SHMEM},
-    {"Slab", protos::pbzero::MeminfoCounters::MEMINFO_SLAB},
-    {"SReclaimable", protos::pbzero::MeminfoCounters::MEMINFO_SLAB_RECLAIMABLE},
-    {"SUnreclaim", protos::pbzero::MeminfoCounters::MEMINFO_SLAB_UNRECLAIMABLE},
-    {"KernelStack", protos::pbzero::MeminfoCounters::MEMINFO_KERNEL_STACK},
-    {"PageTables", protos::pbzero::MeminfoCounters::MEMINFO_PAGE_TABLES},
-    {"CommitLimit", protos::pbzero::MeminfoCounters::MEMINFO_COMMIT_LIMIT},
-    {"Committed_AS", protos::pbzero::MeminfoCounters::MEMINFO_COMMITED_AS},
-    {"VmallocTotal", protos::pbzero::MeminfoCounters::MEMINFO_VMALLOC_TOTAL},
-    {"VmallocUsed", protos::pbzero::MeminfoCounters::MEMINFO_VMALLOC_USED},
-    {"VmallocChunk", protos::pbzero::MeminfoCounters::MEMINFO_VMALLOC_CHUNK},
-    {"CmaTotal", protos::pbzero::MeminfoCounters::MEMINFO_CMA_TOTAL},
-    {"CmaFree", protos::pbzero::MeminfoCounters::MEMINFO_CMA_FREE},
+    {"MemUnspecified", protos::MeminfoCounters::MEMINFO_UNSPECIFIED},
+    {"MemTotal", protos::MeminfoCounters::MEMINFO_MEM_TOTAL},
+    {"MemFree", protos::MeminfoCounters::MEMINFO_MEM_FREE},
+    {"MemAvailable", protos::MeminfoCounters::MEMINFO_MEM_AVAILABLE},
+    {"Buffers", protos::MeminfoCounters::MEMINFO_BUFFERS},
+    {"Cached", protos::MeminfoCounters::MEMINFO_CACHED},
+    {"SwapCached", protos::MeminfoCounters::MEMINFO_SWAP_CACHED},
+    {"Active", protos::MeminfoCounters::MEMINFO_ACTIVE},
+    {"Inactive", protos::MeminfoCounters::MEMINFO_INACTIVE},
+    {"Active(anon)", protos::MeminfoCounters::MEMINFO_ACTIVE_ANON},
+    {"Inactive(anon)", protos::MeminfoCounters::MEMINFO_INACTIVE_ANON},
+    {"Active(file)", protos::MeminfoCounters::MEMINFO_ACTIVE_FILE},
+    {"Inactive(file)", protos::MeminfoCounters::MEMINFO_INACTIVE_FILE},
+    {"Unevictable", protos::MeminfoCounters::MEMINFO_UNEVICTABLE},
+    {"Mlocked", protos::MeminfoCounters::MEMINFO_MLOCKED},
+    {"SwapTotal", protos::MeminfoCounters::MEMINFO_SWAP_TOTAL},
+    {"SwapFree", protos::MeminfoCounters::MEMINFO_SWAP_FREE},
+    {"Dirty", protos::MeminfoCounters::MEMINFO_DIRTY},
+    {"Writeback", protos::MeminfoCounters::MEMINFO_WRITEBACK},
+    {"AnonPages", protos::MeminfoCounters::MEMINFO_ANON_PAGES},
+    {"Mapped", protos::MeminfoCounters::MEMINFO_MAPPED},
+    {"Shmem", protos::MeminfoCounters::MEMINFO_SHMEM},
+    {"Slab", protos::MeminfoCounters::MEMINFO_SLAB},
+    {"SReclaimable", protos::MeminfoCounters::MEMINFO_SLAB_RECLAIMABLE},
+    {"SUnreclaim", protos::MeminfoCounters::MEMINFO_SLAB_UNRECLAIMABLE},
+    {"KernelStack", protos::MeminfoCounters::MEMINFO_KERNEL_STACK},
+    {"PageTables", protos::MeminfoCounters::MEMINFO_PAGE_TABLES},
+    {"CommitLimit", protos::MeminfoCounters::MEMINFO_COMMIT_LIMIT},
+    {"Committed_AS", protos::MeminfoCounters::MEMINFO_COMMITED_AS},
+    {"VmallocTotal", protos::MeminfoCounters::MEMINFO_VMALLOC_TOTAL},
+    {"VmallocUsed", protos::MeminfoCounters::MEMINFO_VMALLOC_USED},
+    {"VmallocChunk", protos::MeminfoCounters::MEMINFO_VMALLOC_CHUNK},
+    {"CmaTotal", protos::MeminfoCounters::MEMINFO_CMA_TOTAL},
+    {"CmaFree", protos::MeminfoCounters::MEMINFO_CMA_FREE},
 };
 
 const KeyAndId kVmstatKeys[] = {
-    {"VmstatUnspecified", protos::pbzero::VmstatCounters::VMSTAT_UNSPECIFIED},
-    {"nr_free_pages", protos::pbzero::VmstatCounters::VMSTAT_NR_FREE_PAGES},
-    {"nr_alloc_batch", protos::pbzero::VmstatCounters::VMSTAT_NR_ALLOC_BATCH},
-    {"nr_inactive_anon",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_INACTIVE_ANON},
-    {"nr_active_anon", protos::pbzero::VmstatCounters::VMSTAT_NR_ACTIVE_ANON},
-    {"nr_inactive_file",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_INACTIVE_FILE},
-    {"nr_active_file", protos::pbzero::VmstatCounters::VMSTAT_NR_ACTIVE_FILE},
-    {"nr_unevictable", protos::pbzero::VmstatCounters::VMSTAT_NR_UNEVICTABLE},
-    {"nr_mlock", protos::pbzero::VmstatCounters::VMSTAT_NR_MLOCK},
-    {"nr_anon_pages", protos::pbzero::VmstatCounters::VMSTAT_NR_ANON_PAGES},
-    {"nr_mapped", protos::pbzero::VmstatCounters::VMSTAT_NR_MAPPED},
-    {"nr_file_pages", protos::pbzero::VmstatCounters::VMSTAT_NR_FILE_PAGES},
-    {"nr_dirty", protos::pbzero::VmstatCounters::VMSTAT_NR_DIRTY},
-    {"nr_writeback", protos::pbzero::VmstatCounters::VMSTAT_NR_WRITEBACK},
-    {"nr_slab_reclaimable",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_SLAB_RECLAIMABLE},
+    {"VmstatUnspecified", protos::VmstatCounters::VMSTAT_UNSPECIFIED},
+    {"nr_free_pages", protos::VmstatCounters::VMSTAT_NR_FREE_PAGES},
+    {"nr_alloc_batch", protos::VmstatCounters::VMSTAT_NR_ALLOC_BATCH},
+    {"nr_inactive_anon", protos::VmstatCounters::VMSTAT_NR_INACTIVE_ANON},
+    {"nr_active_anon", protos::VmstatCounters::VMSTAT_NR_ACTIVE_ANON},
+    {"nr_inactive_file", protos::VmstatCounters::VMSTAT_NR_INACTIVE_FILE},
+    {"nr_active_file", protos::VmstatCounters::VMSTAT_NR_ACTIVE_FILE},
+    {"nr_unevictable", protos::VmstatCounters::VMSTAT_NR_UNEVICTABLE},
+    {"nr_mlock", protos::VmstatCounters::VMSTAT_NR_MLOCK},
+    {"nr_anon_pages", protos::VmstatCounters::VMSTAT_NR_ANON_PAGES},
+    {"nr_mapped", protos::VmstatCounters::VMSTAT_NR_MAPPED},
+    {"nr_file_pages", protos::VmstatCounters::VMSTAT_NR_FILE_PAGES},
+    {"nr_dirty", protos::VmstatCounters::VMSTAT_NR_DIRTY},
+    {"nr_writeback", protos::VmstatCounters::VMSTAT_NR_WRITEBACK},
+    {"nr_slab_reclaimable", protos::VmstatCounters::VMSTAT_NR_SLAB_RECLAIMABLE},
     {"nr_slab_unreclaimable",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_SLAB_UNRECLAIMABLE},
-    {"nr_page_table_pages",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_PAGE_TABLE_PAGES},
-    {"nr_kernel_stack", protos::pbzero::VmstatCounters::VMSTAT_NR_KERNEL_STACK},
-    {"nr_overhead", protos::pbzero::VmstatCounters::VMSTAT_NR_OVERHEAD},
-    {"nr_unstable", protos::pbzero::VmstatCounters::VMSTAT_NR_UNSTABLE},
-    {"nr_bounce", protos::pbzero::VmstatCounters::VMSTAT_NR_BOUNCE},
-    {"nr_vmscan_write", protos::pbzero::VmstatCounters::VMSTAT_NR_VMSCAN_WRITE},
+     protos::VmstatCounters::VMSTAT_NR_SLAB_UNRECLAIMABLE},
+    {"nr_page_table_pages", protos::VmstatCounters::VMSTAT_NR_PAGE_TABLE_PAGES},
+    {"nr_kernel_stack", protos::VmstatCounters::VMSTAT_NR_KERNEL_STACK},
+    {"nr_overhead", protos::VmstatCounters::VMSTAT_NR_OVERHEAD},
+    {"nr_unstable", protos::VmstatCounters::VMSTAT_NR_UNSTABLE},
+    {"nr_bounce", protos::VmstatCounters::VMSTAT_NR_BOUNCE},
+    {"nr_vmscan_write", protos::VmstatCounters::VMSTAT_NR_VMSCAN_WRITE},
     {"nr_vmscan_immediate_reclaim",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_VMSCAN_IMMEDIATE_RECLAIM},
-    {"nr_writeback_temp",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_WRITEBACK_TEMP},
-    {"nr_isolated_anon",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_ISOLATED_ANON},
-    {"nr_isolated_file",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_ISOLATED_FILE},
-    {"nr_shmem", protos::pbzero::VmstatCounters::VMSTAT_NR_SHMEM},
-    {"nr_dirtied", protos::pbzero::VmstatCounters::VMSTAT_NR_DIRTIED},
-    {"nr_written", protos::pbzero::VmstatCounters::VMSTAT_NR_WRITTEN},
-    {"nr_pages_scanned",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_PAGES_SCANNED},
-    {"workingset_refault",
-     protos::pbzero::VmstatCounters::VMSTAT_WORKINGSET_REFAULT},
-    {"workingset_activate",
-     protos::pbzero::VmstatCounters::VMSTAT_WORKINGSET_ACTIVATE},
+     protos::VmstatCounters::VMSTAT_NR_VMSCAN_IMMEDIATE_RECLAIM},
+    {"nr_writeback_temp", protos::VmstatCounters::VMSTAT_NR_WRITEBACK_TEMP},
+    {"nr_isolated_anon", protos::VmstatCounters::VMSTAT_NR_ISOLATED_ANON},
+    {"nr_isolated_file", protos::VmstatCounters::VMSTAT_NR_ISOLATED_FILE},
+    {"nr_shmem", protos::VmstatCounters::VMSTAT_NR_SHMEM},
+    {"nr_dirtied", protos::VmstatCounters::VMSTAT_NR_DIRTIED},
+    {"nr_written", protos::VmstatCounters::VMSTAT_NR_WRITTEN},
+    {"nr_pages_scanned", protos::VmstatCounters::VMSTAT_NR_PAGES_SCANNED},
+    {"workingset_refault", protos::VmstatCounters::VMSTAT_WORKINGSET_REFAULT},
+    {"workingset_activate", protos::VmstatCounters::VMSTAT_WORKINGSET_ACTIVATE},
     {"workingset_nodereclaim",
-     protos::pbzero::VmstatCounters::VMSTAT_WORKINGSET_NODERECLAIM},
+     protos::VmstatCounters::VMSTAT_WORKINGSET_NODERECLAIM},
     {"nr_anon_transparent_hugepages",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_ANON_TRANSPARENT_HUGEPAGES},
-    {"nr_free_cma", protos::pbzero::VmstatCounters::VMSTAT_NR_FREE_CMA},
-    {"nr_swapcache", protos::pbzero::VmstatCounters::VMSTAT_NR_SWAPCACHE},
-    {"nr_dirty_threshold",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_DIRTY_THRESHOLD},
+     protos::VmstatCounters::VMSTAT_NR_ANON_TRANSPARENT_HUGEPAGES},
+    {"nr_free_cma", protos::VmstatCounters::VMSTAT_NR_FREE_CMA},
+    {"nr_swapcache", protos::VmstatCounters::VMSTAT_NR_SWAPCACHE},
+    {"nr_dirty_threshold", protos::VmstatCounters::VMSTAT_NR_DIRTY_THRESHOLD},
     {"nr_dirty_background_threshold",
-     protos::pbzero::VmstatCounters::VMSTAT_NR_DIRTY_BACKGROUND_THRESHOLD},
-    {"pgpgin", protos::pbzero::VmstatCounters::VMSTAT_PGPGIN},
-    {"pgpgout", protos::pbzero::VmstatCounters::VMSTAT_PGPGOUT},
-    {"pgpgoutclean", protos::pbzero::VmstatCounters::VMSTAT_PGPGOUTCLEAN},
-    {"pswpin", protos::pbzero::VmstatCounters::VMSTAT_PSWPIN},
-    {"pswpout", protos::pbzero::VmstatCounters::VMSTAT_PSWPOUT},
-    {"pgalloc_dma", protos::pbzero::VmstatCounters::VMSTAT_PGALLOC_DMA},
-    {"pgalloc_normal", protos::pbzero::VmstatCounters::VMSTAT_PGALLOC_NORMAL},
-    {"pgalloc_movable", protos::pbzero::VmstatCounters::VMSTAT_PGALLOC_MOVABLE},
-    {"pgfree", protos::pbzero::VmstatCounters::VMSTAT_PGFREE},
-    {"pgactivate", protos::pbzero::VmstatCounters::VMSTAT_PGACTIVATE},
-    {"pgdeactivate", protos::pbzero::VmstatCounters::VMSTAT_PGDEACTIVATE},
-    {"pgfault", protos::pbzero::VmstatCounters::VMSTAT_PGFAULT},
-    {"pgmajfault", protos::pbzero::VmstatCounters::VMSTAT_PGMAJFAULT},
-    {"pgrefill_dma", protos::pbzero::VmstatCounters::VMSTAT_PGREFILL_DMA},
-    {"pgrefill_normal", protos::pbzero::VmstatCounters::VMSTAT_PGREFILL_NORMAL},
-    {"pgrefill_movable",
-     protos::pbzero::VmstatCounters::VMSTAT_PGREFILL_MOVABLE},
-    {"pgsteal_kswapd_dma",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSTEAL_KSWAPD_DMA},
+     protos::VmstatCounters::VMSTAT_NR_DIRTY_BACKGROUND_THRESHOLD},
+    {"pgpgin", protos::VmstatCounters::VMSTAT_PGPGIN},
+    {"pgpgout", protos::VmstatCounters::VMSTAT_PGPGOUT},
+    {"pgpgoutclean", protos::VmstatCounters::VMSTAT_PGPGOUTCLEAN},
+    {"pswpin", protos::VmstatCounters::VMSTAT_PSWPIN},
+    {"pswpout", protos::VmstatCounters::VMSTAT_PSWPOUT},
+    {"pgalloc_dma", protos::VmstatCounters::VMSTAT_PGALLOC_DMA},
+    {"pgalloc_normal", protos::VmstatCounters::VMSTAT_PGALLOC_NORMAL},
+    {"pgalloc_movable", protos::VmstatCounters::VMSTAT_PGALLOC_MOVABLE},
+    {"pgfree", protos::VmstatCounters::VMSTAT_PGFREE},
+    {"pgactivate", protos::VmstatCounters::VMSTAT_PGACTIVATE},
+    {"pgdeactivate", protos::VmstatCounters::VMSTAT_PGDEACTIVATE},
+    {"pgfault", protos::VmstatCounters::VMSTAT_PGFAULT},
+    {"pgmajfault", protos::VmstatCounters::VMSTAT_PGMAJFAULT},
+    {"pgrefill_dma", protos::VmstatCounters::VMSTAT_PGREFILL_DMA},
+    {"pgrefill_normal", protos::VmstatCounters::VMSTAT_PGREFILL_NORMAL},
+    {"pgrefill_movable", protos::VmstatCounters::VMSTAT_PGREFILL_MOVABLE},
+    {"pgsteal_kswapd_dma", protos::VmstatCounters::VMSTAT_PGSTEAL_KSWAPD_DMA},
     {"pgsteal_kswapd_normal",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSTEAL_KSWAPD_NORMAL},
+     protos::VmstatCounters::VMSTAT_PGSTEAL_KSWAPD_NORMAL},
     {"pgsteal_kswapd_movable",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSTEAL_KSWAPD_MOVABLE},
-    {"pgsteal_direct_dma",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSTEAL_DIRECT_DMA},
+     protos::VmstatCounters::VMSTAT_PGSTEAL_KSWAPD_MOVABLE},
+    {"pgsteal_direct_dma", protos::VmstatCounters::VMSTAT_PGSTEAL_DIRECT_DMA},
     {"pgsteal_direct_normal",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSTEAL_DIRECT_NORMAL},
+     protos::VmstatCounters::VMSTAT_PGSTEAL_DIRECT_NORMAL},
     {"pgsteal_direct_movable",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSTEAL_DIRECT_MOVABLE},
-    {"pgscan_kswapd_dma",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_KSWAPD_DMA},
+     protos::VmstatCounters::VMSTAT_PGSTEAL_DIRECT_MOVABLE},
+    {"pgscan_kswapd_dma", protos::VmstatCounters::VMSTAT_PGSCAN_KSWAPD_DMA},
     {"pgscan_kswapd_normal",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_KSWAPD_NORMAL},
+     protos::VmstatCounters::VMSTAT_PGSCAN_KSWAPD_NORMAL},
     {"pgscan_kswapd_movable",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_KSWAPD_MOVABLE},
-    {"pgscan_direct_dma",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_DIRECT_DMA},
+     protos::VmstatCounters::VMSTAT_PGSCAN_KSWAPD_MOVABLE},
+    {"pgscan_direct_dma", protos::VmstatCounters::VMSTAT_PGSCAN_DIRECT_DMA},
     {"pgscan_direct_normal",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_DIRECT_NORMAL},
+     protos::VmstatCounters::VMSTAT_PGSCAN_DIRECT_NORMAL},
     {"pgscan_direct_movable",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_DIRECT_MOVABLE},
+     protos::VmstatCounters::VMSTAT_PGSCAN_DIRECT_MOVABLE},
     {"pgscan_direct_throttle",
-     protos::pbzero::VmstatCounters::VMSTAT_PGSCAN_DIRECT_THROTTLE},
-    {"pginodesteal", protos::pbzero::VmstatCounters::VMSTAT_PGINODESTEAL},
-    {"slabs_scanned", protos::pbzero::VmstatCounters::VMSTAT_SLABS_SCANNED},
-    {"kswapd_inodesteal",
-     protos::pbzero::VmstatCounters::VMSTAT_KSWAPD_INODESTEAL},
+     protos::VmstatCounters::VMSTAT_PGSCAN_DIRECT_THROTTLE},
+    {"pginodesteal", protos::VmstatCounters::VMSTAT_PGINODESTEAL},
+    {"slabs_scanned", protos::VmstatCounters::VMSTAT_SLABS_SCANNED},
+    {"kswapd_inodesteal", protos::VmstatCounters::VMSTAT_KSWAPD_INODESTEAL},
     {"kswapd_low_wmark_hit_quickly",
-     protos::pbzero::VmstatCounters::VMSTAT_KSWAPD_LOW_WMARK_HIT_QUICKLY},
+     protos::VmstatCounters::VMSTAT_KSWAPD_LOW_WMARK_HIT_QUICKLY},
     {"kswapd_high_wmark_hit_quickly",
-     protos::pbzero::VmstatCounters::VMSTAT_KSWAPD_HIGH_WMARK_HIT_QUICKLY},
-    {"pageoutrun", protos::pbzero::VmstatCounters::VMSTAT_PAGEOUTRUN},
-    {"allocstall", protos::pbzero::VmstatCounters::VMSTAT_ALLOCSTALL},
-    {"pgrotated", protos::pbzero::VmstatCounters::VMSTAT_PGROTATED},
-    {"drop_pagecache", protos::pbzero::VmstatCounters::VMSTAT_DROP_PAGECACHE},
-    {"drop_slab", protos::pbzero::VmstatCounters::VMSTAT_DROP_SLAB},
-    {"pgmigrate_success",
-     protos::pbzero::VmstatCounters::VMSTAT_PGMIGRATE_SUCCESS},
-    {"pgmigrate_fail", protos::pbzero::VmstatCounters::VMSTAT_PGMIGRATE_FAIL},
+     protos::VmstatCounters::VMSTAT_KSWAPD_HIGH_WMARK_HIT_QUICKLY},
+    {"pageoutrun", protos::VmstatCounters::VMSTAT_PAGEOUTRUN},
+    {"allocstall", protos::VmstatCounters::VMSTAT_ALLOCSTALL},
+    {"pgrotated", protos::VmstatCounters::VMSTAT_PGROTATED},
+    {"drop_pagecache", protos::VmstatCounters::VMSTAT_DROP_PAGECACHE},
+    {"drop_slab", protos::VmstatCounters::VMSTAT_DROP_SLAB},
+    {"pgmigrate_success", protos::VmstatCounters::VMSTAT_PGMIGRATE_SUCCESS},
+    {"pgmigrate_fail", protos::VmstatCounters::VMSTAT_PGMIGRATE_FAIL},
     {"compact_migrate_scanned",
-     protos::pbzero::VmstatCounters::VMSTAT_COMPACT_MIGRATE_SCANNED},
+     protos::VmstatCounters::VMSTAT_COMPACT_MIGRATE_SCANNED},
     {"compact_free_scanned",
-     protos::pbzero::VmstatCounters::VMSTAT_COMPACT_FREE_SCANNED},
-    {"compact_isolated",
-     protos::pbzero::VmstatCounters::VMSTAT_COMPACT_ISOLATED},
-    {"compact_stall", protos::pbzero::VmstatCounters::VMSTAT_COMPACT_STALL},
-    {"compact_fail", protos::pbzero::VmstatCounters::VMSTAT_COMPACT_FAIL},
-    {"compact_success", protos::pbzero::VmstatCounters::VMSTAT_COMPACT_SUCCESS},
-    {"compact_daemon_wake",
-     protos::pbzero::VmstatCounters::VMSTAT_COMPACT_DAEMON_WAKE},
+     protos::VmstatCounters::VMSTAT_COMPACT_FREE_SCANNED},
+    {"compact_isolated", protos::VmstatCounters::VMSTAT_COMPACT_ISOLATED},
+    {"compact_stall", protos::VmstatCounters::VMSTAT_COMPACT_STALL},
+    {"compact_fail", protos::VmstatCounters::VMSTAT_COMPACT_FAIL},
+    {"compact_success", protos::VmstatCounters::VMSTAT_COMPACT_SUCCESS},
+    {"compact_daemon_wake", protos::VmstatCounters::VMSTAT_COMPACT_DAEMON_WAKE},
     {"unevictable_pgs_culled",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_CULLED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_CULLED},
     {"unevictable_pgs_scanned",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_SCANNED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_SCANNED},
     {"unevictable_pgs_rescued",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_RESCUED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_RESCUED},
     {"unevictable_pgs_mlocked",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_MLOCKED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_MLOCKED},
     {"unevictable_pgs_munlocked",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_MUNLOCKED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_MUNLOCKED},
     {"unevictable_pgs_cleared",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_CLEARED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_CLEARED},
     {"unevictable_pgs_stranded",
-     protos::pbzero::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_STRANDED},
+     protos::VmstatCounters::VMSTAT_UNEVICTABLE_PGS_STRANDED},
 };
 
 // Returns a lookup table of meminfo counter names addressable by counter id.
diff --git a/src/profiling/memory/BUILD.gn b/src/profiling/memory/BUILD.gn
index 369cb71..84411d8 100644
--- a/src/profiling/memory/BUILD.gn
+++ b/src/profiling/memory/BUILD.gn
@@ -40,6 +40,7 @@
   sources = [
     "bookkeeping.cc",
     "bookkeeping.h",
+    "queue_messages.h",
     "record_reader.cc",
     "record_reader.h",
     "socket_listener.cc",
diff --git a/src/profiling/memory/README.md b/src/profiling/memory/README.md
new file mode 100644
index 0000000..a0ecaa8
--- /dev/null
+++ b/src/profiling/memory/README.md
@@ -0,0 +1,34 @@
+# heapprofd - Android Heap Profiler
+
+_These are temporary instructions while heapprofd is under development. They are
+subject to frequent change and will be obsoleted once heapprofd is integrated
+into Perfetto._
+
+Currently heapprofd only works with SELinux disabled and when run as root.
+
+To start profiling the process `${PID}`, run the following sequence of commands.
+
+```
+adb root
+adb shell setenforce 0
+
+adb shell rm /dev/socket/heapprofd
+adb shell 'heapprofd -r 128000 /dev/socket/heapprofd' &
+adb shell kill -36 ${PID}
+```
+
+To obtain heap dumps for all profiled processes, send `SIGUSR1` to heapprofd
+which produces heap dumps in /data/local/tmp.
+
+```
+adb shell killall -USR1 heapprofd
+adb pull /data/local/tmp/heap_dump.${PID}
+```
+
+This file can then be converted to a flamegraph using Brendan Gregg's
+[`flamegraph.pl`](
+  https://github.com/brendangregg/FlameGraph/blob/master/flamegraph.pl).
+
+```
+flamegraph.pl heap_dump.${PID} > heap_dump.${PID}.svg
+```
diff --git a/src/profiling/memory/bookkeeping.cc b/src/profiling/memory/bookkeeping.cc
index 7a31502..c7f6c71 100644
--- a/src/profiling/memory/bookkeeping.cc
+++ b/src/profiling/memory/bookkeeping.cc
@@ -16,7 +16,14 @@
 
 #include "src/profiling/memory/bookkeeping.h"
 
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "perfetto/base/file_utils.h"
 #include "perfetto/base/logging.h"
+#include "perfetto/base/scoped_file.h"
 
 namespace perfetto {
 
@@ -28,6 +35,17 @@
   return child;
 }
 
+std::vector<InternedCodeLocation> GlobalCallstackTrie::Node::BuildCallstack()
+    const {
+  const Node* node = this;
+  std::vector<InternedCodeLocation> res;
+  while (node) {
+    res.emplace_back(node->location_);
+    node = node->parent_;
+  }
+  return res;
+}
+
 void HeapTracker::RecordMalloc(const std::vector<CodeLocation>& callstack,
                                uint64_t address,
                                uint64_t size,
@@ -85,6 +103,24 @@
   allocations_.erase(leaf_it);
 }
 
+void HeapTracker::Dump(int fd) {
+  // TODO(fmayer): This should dump protocol buffers into the perfetto service.
+  // For now, output a text file compatible with flamegraph.pl.
+  for (const auto& p : allocations_) {
+    std::string data;
+    const Allocation& alloc = p.second;
+    const std::vector<InternedCodeLocation> callstack =
+        alloc.node->BuildCallstack();
+    for (auto it = callstack.begin(); it != callstack.end(); ++it) {
+      if (it != callstack.begin())
+        data += ";";
+      data += it->function_name.str();
+    }
+    data += " " + std::to_string(alloc.alloc_size) + "\n";
+    base::WriteAll(fd, data.c_str(), data.size());
+  }
+}
+
 uint64_t GlobalCallstackTrie::GetCumSizeForTesting(
     const std::vector<CodeLocation>& callstack) {
   Node* node = &root_;
@@ -123,4 +159,89 @@
   }
 }
 
+void BookkeepingThread::HandleBookkeepingRecord(BookkeepingRecord* rec) {
+  BookkeepingData* bookkeeping_data = nullptr;
+  if (rec->pid != 0) {
+    std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+    auto it = bookkeeping_data_.find(rec->pid);
+    if (it == bookkeeping_data_.end()) {
+      PERFETTO_LOG("Invalid pid: %d", rec->pid);
+      PERFETTO_DCHECK(false);
+      return;
+    }
+    bookkeeping_data = &it->second;
+  }
+
+  if (rec->record_type == BookkeepingRecord::Type::Dump) {
+    PERFETTO_LOG("Dumping heaps");
+    auto it = bookkeeping_data_.begin();
+    while (it != bookkeeping_data_.end()) {
+      std::string dump_file_name = file_name_ + "." + std::to_string(it->first);
+      PERFETTO_LOG("Dumping %d to %s", it->first, dump_file_name.c_str());
+      base::ScopedFile fd =
+          base::OpenFile(dump_file_name, O_WRONLY | O_CREAT, 0755);
+      if (fd)
+        it->second.heap_tracker.Dump(fd.get());
+      else
+        PERFETTO_PLOG("Failed to open %s", dump_file_name.c_str());
+      // Garbage collect for processes that already went away.
+      if (it->second.ref_count == 0) {
+        std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+        it = bookkeeping_data_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  } else if (rec->record_type == BookkeepingRecord::Type::Free) {
+    FreeRecord& free_rec = rec->free_record;
+    FreePageEntry* entries = free_rec.metadata->entries;
+    uint64_t num_entries = free_rec.metadata->num_entries;
+    if (num_entries > kFreePageSize)
+      return;
+    for (size_t i = 0; i < num_entries; ++i) {
+      const FreePageEntry& entry = entries[i];
+      bookkeeping_data->heap_tracker.RecordFree(entry.addr,
+                                                entry.sequence_number);
+    }
+  } else if (rec->record_type == BookkeepingRecord::Type::Malloc) {
+    AllocRecord& alloc_rec = rec->alloc_record;
+    std::vector<CodeLocation> code_locations;
+    for (unwindstack::FrameData& frame : alloc_rec.frames)
+      code_locations.emplace_back(frame.map_name, frame.function_name);
+    bookkeeping_data->heap_tracker.RecordMalloc(
+        code_locations, alloc_rec.alloc_metadata.alloc_address,
+        alloc_rec.alloc_metadata.alloc_size,
+        alloc_rec.alloc_metadata.sequence_number);
+  } else {
+    PERFETTO_DCHECK(false);
+  }
+}
+
+void BookkeepingThread::NotifyClientConnected(pid_t pid) {
+  std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+  // emplace gives the existing BookkeepingData for pid if it already exists
+  // or creates a new one.
+  auto it_and_inserted = bookkeeping_data_.emplace(pid, &callsites_);
+  BookkeepingData& bk = it_and_inserted.first->second;
+  bk.ref_count++;
+}
+
+void BookkeepingThread::NotifyClientDisconnected(pid_t pid) {
+  std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+  auto it = bookkeeping_data_.find(pid);
+  if (it == bookkeeping_data_.end()) {
+    PERFETTO_DCHECK(false);
+    return;
+  }
+  it->second.ref_count--;
+}
+
+__attribute__((noreturn)) void BookkeepingThread::Run(
+    BoundedQueue<BookkeepingRecord>* input_queue) {
+  for (;;) {
+    BookkeepingRecord rec = input_queue->Get();
+    HandleBookkeepingRecord(&rec);
+  }
+}
+
 }  // namespace perfetto
diff --git a/src/profiling/memory/bookkeeping.h b/src/profiling/memory/bookkeeping.h
index bd56dd8..245c1ef 100644
--- a/src/profiling/memory/bookkeeping.h
+++ b/src/profiling/memory/bookkeeping.h
@@ -18,6 +18,8 @@
 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_
 
 #include "perfetto/base/lookup_set.h"
+#include "src/profiling/memory/bounded_queue.h"
+#include "src/profiling/memory/queue_messages.h"
 #include "src/profiling/memory/string_interner.h"
 
 #include <map>
@@ -82,6 +84,8 @@
     Node(InternedCodeLocation location, Node* parent)
         : parent_(parent), location_(std::move(location)) {}
 
+    std::vector<InternedCodeLocation> BuildCallstack() const;
+
    private:
     Node* GetOrCreateChild(const InternedCodeLocation& loc);
 
@@ -123,6 +127,7 @@
                     uint64_t size,
                     uint64_t sequence_number);
   void RecordFree(uint64_t address, uint64_t sequence_number);
+  void Dump(int fd);
 
  private:
   static constexpr uint64_t kNoopFree = 0;
@@ -188,6 +193,49 @@
   GlobalCallstackTrie* const callsites_;
 };
 
+struct BookkeepingData {
+  // Ownership of callsites remains with caller and has to outlive this object.
+  explicit BookkeepingData(GlobalCallstackTrie* callsites)
+      : heap_tracker(callsites) {}
+
+  HeapTracker heap_tracker;
+
+  // This is different to a shared_ptr to HeapTracker, because we want to keep
+  // it around until the first dump after the last socket for the PID has
+  // disconnected.
+  uint64_t ref_count = 0;
+};
+
+// BookkeepingThread owns the BookkeepingData for all processes. The Run()
+// method receives messages on the input_queue and does the bookkeeping.
+class BookkeepingThread {
+ public:
+  BookkeepingThread(std::string file_name) : file_name_(file_name) {}
+
+  void Run(BoundedQueue<BookkeepingRecord>* input_queue);
+
+  // Inform the bookkeeping thread that a socket for this pid connected.
+  //
+  // This can be called from arbitrary threads.
+  void NotifyClientConnected(pid_t pid);
+
+  // Inform the bookkeeping thread that a socket for this pid disconnected.
+  // After the last client for a PID disconnects, the BookkeepingData is
+  // retained until the next dump, upon which it gets garbage collected.
+  //
+  // This can be called from arbitrary threads.
+  void NotifyClientDisconnected(pid_t pid);
+
+  void HandleBookkeepingRecord(BookkeepingRecord* rec);
+
+ private:
+  GlobalCallstackTrie callsites_;
+
+  std::map<pid_t, BookkeepingData> bookkeeping_data_;
+  std::mutex bookkeeping_mutex_;
+  std::string file_name_;
+};
+
 }  // namespace perfetto
 
 #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_
diff --git a/src/profiling/memory/heapprofd_integrationtest.cc b/src/profiling/memory/heapprofd_integrationtest.cc
index 2703c4f..671cf02 100644
--- a/src/profiling/memory/heapprofd_integrationtest.cc
+++ b/src/profiling/memory/heapprofd_integrationtest.cc
@@ -51,22 +51,24 @@
 
 TEST_F(HeapprofdIntegrationTest, MAYBE_EndToEnd) {
   GlobalCallstackTrie callsites;
+  // TODO(fmayer): Actually test the dump.
+  BookkeepingThread bookkeeping_thread("");
 
   base::TestTaskRunner task_runner;
   auto done = task_runner.CreateCheckpoint("done");
   constexpr double kSamplingRate = 123;
   SocketListener listener(
       {kSamplingRate},
-      [&done](UnwindingRecord r) {
+      [&done, &bookkeeping_thread](UnwindingRecord r) {
         // TODO(fmayer): Test symbolization and result of unwinding.
         // This check will only work on in-tree builds as out-of-tree
         // libunwindstack is behaving a bit weirdly.
         BookkeepingRecord bookkeeping_record;
         ASSERT_TRUE(HandleUnwindingRecord(&r, &bookkeeping_record));
-        HandleBookkeepingRecord(&bookkeeping_record);
+        bookkeeping_thread.HandleBookkeepingRecord(&bookkeeping_record);
         done();
       },
-      &callsites);
+      &bookkeeping_thread);
 
   auto sock = base::UnixSocket::Listen(kSocketName, &listener, &task_runner);
   if (!sock->is_listening()) {
diff --git a/src/profiling/memory/main.cc b/src/profiling/memory/main.cc
index 850d6c0..1b12767 100644
--- a/src/profiling/memory/main.cc
+++ b/src/profiling/memory/main.cc
@@ -15,10 +15,14 @@
  */
 
 #include <stdlib.h>
+#include <unistd.h>
 #include <array>
 #include <memory>
 #include <vector>
 
+#include <signal.h>
+
+#include "perfetto/base/event.h"
 #include "perfetto/base/unix_socket.h"
 #include "src/profiling/memory/bounded_queue.h"
 #include "src/profiling/memory/socket_listener.h"
@@ -31,7 +35,13 @@
 constexpr size_t kUnwinderQueueSize = 1000;
 constexpr size_t kBookkeepingQueueSize = 1000;
 constexpr size_t kUnwinderThreads = 5;
-constexpr double kSamplingRate = 1;
+constexpr double kDefaultSamplingRate = 1;
+
+base::Event* g_dump_evt = nullptr;
+
+void DumpSignalHandler(int) {
+  g_dump_evt->Notify();
+}
 
 // We create kUnwinderThreads unwinding threads and one bookeeping thread.
 // The bookkeeping thread is singleton in order to avoid expensive and
@@ -59,12 +69,45 @@
 //           |Bookkeeping Thread|
 //           +------------------+
 int HeapprofdMain(int argc, char** argv) {
-  GlobalCallstackTrie callsites;
+  // TODO(fmayer): This is temporary until heapprofd is integrated with Perfetto
+  // and receives its configuration via that.
+  double sampling_rate = kDefaultSamplingRate;
+  int opt;
+  while ((opt = getopt(argc, argv, "r:")) != -1) {
+    switch (opt) {
+      case 'r': {
+        char* end;
+        sampling_rate = strtol(optarg, &end, 10);
+        if (*end != '\0' || *optarg == '\0')
+          PERFETTO_FATAL("Invalid sampling rate: %s", optarg);
+        break;
+      }
+    }
+  }
+
+  base::UnixTaskRunner task_runner;
+  BoundedQueue<BookkeepingRecord> bookkeeping_queue(kBookkeepingQueueSize);
+  // We set this up before launching any threads, so we do not have to use a
+  // std::atomic for g_dump_evt.
+  g_dump_evt = new base::Event();
+
+  struct sigaction action = {};
+  action.sa_handler = DumpSignalHandler;
+  PERFETTO_CHECK(sigaction(SIGUSR1, &action, nullptr) == 0);
+  task_runner.AddFileDescriptorWatch(g_dump_evt->fd(), [&bookkeeping_queue] {
+    g_dump_evt->Clear();
+
+    BookkeepingRecord rec = {};
+    rec.record_type = BookkeepingRecord::Type::Dump;
+    bookkeeping_queue.Add(std::move(rec));
+  });
+
   std::unique_ptr<base::UnixSocket> sock;
 
-  BoundedQueue<BookkeepingRecord> callsites_queue(kBookkeepingQueueSize);
-  std::thread bookkeeping_thread(
-      [&callsites_queue] { BookkeepingMainLoop(&callsites_queue); });
+  BookkeepingThread bookkeeping_thread("/data/local/tmp/heap_dump");
+  std::thread bookkeeping_th([&bookkeeping_thread, &bookkeeping_queue] {
+    bookkeeping_thread.Run(&bookkeeping_queue);
+  });
 
   std::array<BoundedQueue<UnwindingRecord>, kUnwinderThreads> unwinder_queues;
   for (size_t i = 0; i < kUnwinderThreads; ++i)
@@ -72,8 +115,8 @@
   std::vector<std::thread> unwinding_threads;
   unwinding_threads.reserve(kUnwinderThreads);
   for (size_t i = 0; i < kUnwinderThreads; ++i) {
-    unwinding_threads.emplace_back([&unwinder_queues, &callsites_queue, i] {
-      UnwindingMainLoop(&unwinder_queues[i], &callsites_queue);
+    unwinding_threads.emplace_back([&unwinder_queues, &bookkeeping_queue, i] {
+      UnwindingMainLoop(&unwinder_queues[i], &bookkeeping_queue);
     });
   }
 
@@ -81,15 +124,14 @@
     unwinder_queues[static_cast<size_t>(r.pid) % kUnwinderThreads].Add(
         std::move(r));
   };
-  SocketListener listener({kSamplingRate}, std::move(on_record_received),
-                          &callsites);
+  SocketListener listener({sampling_rate}, std::move(on_record_received),
+                          &bookkeeping_thread);
 
-  base::UnixTaskRunner read_task_runner;
-  if (argc == 2) {
+  if (optind == argc - 1) {
     // Allow to be able to manually specify the socket to listen on
     // for testing and sideloading purposes.
-    sock = base::UnixSocket::Listen(argv[1], &listener, &read_task_runner);
-  } else if (argc == 1) {
+    sock = base::UnixSocket::Listen(argv[argc - 1], &listener, &task_runner);
+  } else if (optind == argc) {
     // When running as a service launched by init on Android, the socket
     // is created by init and passed to the application using an environment
     // variable.
@@ -104,16 +146,17 @@
       PERFETTO_FATAL(
           "Invalid ANDROID_SOCKET_heapprofd. Expected decimal integer.");
     sock = base::UnixSocket::Listen(base::ScopedFile(raw_fd), &listener,
-                                    &read_task_runner);
+                                    &task_runner);
   } else {
-    PERFETTO_FATAL("Invalid number of arguments. %s [SOCKET]", argv[0]);
+    PERFETTO_FATAL("Invalid number of arguments. %s [-r rate] [SOCKET]",
+                   argv[0]);
   }
 
   if (sock->last_error() != 0)
     PERFETTO_FATAL("Failed to initialize socket: %s",
                    strerror(sock->last_error()));
 
-  read_task_runner.Run();
+  task_runner.Run();
   return 0;
 }
 }  // namespace
diff --git a/src/profiling/memory/queue_messages.h b/src/profiling/memory/queue_messages.h
new file mode 100644
index 0000000..45f5809
--- /dev/null
+++ b/src/profiling/memory/queue_messages.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file contains messages sent between the threads over BoundedQueue.
+
+#ifndef SRC_PROFILING_MEMORY_QUEUE_MESSAGES_H_
+#define SRC_PROFILING_MEMORY_QUEUE_MESSAGES_H_
+
+#include <unwindstack/Maps.h>
+#include <unwindstack/Unwinder.h>
+#include "src/profiling/memory/wire_protocol.h"
+
+namespace perfetto {
+
+struct ProcessMetadata;
+
+struct UnwindingRecord {
+  pid_t pid;
+  size_t size;
+  std::unique_ptr<uint8_t[]> data;
+  std::weak_ptr<ProcessMetadata> metadata;
+};
+
+struct FreeRecord {
+  std::unique_ptr<uint8_t[]> free_data;
+  // This is a pointer into free_data.
+  FreeMetadata* metadata;
+};
+
+struct AllocRecord {
+  AllocMetadata alloc_metadata;
+  std::vector<unwindstack::FrameData> frames;
+};
+
+struct BookkeepingRecord {
+  enum class Type {
+    Dump = 0,
+    Malloc = 1,
+    Free = 2,
+  };
+  pid_t pid;
+  // TODO(fmayer): Use a union.
+  Type record_type;
+  AllocRecord alloc_record;
+  FreeRecord free_record;
+};
+
+}  // namespace perfetto
+
+#endif  // SRC_PROFILING_MEMORY_QUEUE_MESSAGES_H_
diff --git a/src/profiling/memory/socket_listener.cc b/src/profiling/memory/socket_listener.cc
index ea5780f..6d23e64 100644
--- a/src/profiling/memory/socket_listener.cc
+++ b/src/profiling/memory/socket_listener.cc
@@ -20,6 +20,7 @@
 namespace perfetto {
 
 void SocketListener::OnDisconnect(base::UnixSocket* self) {
+  bookkeeping_thread_->NotifyClientDisconnected(self->peer_pid());
   sockets_.erase(self);
 }
 
@@ -28,6 +29,7 @@
     std::unique_ptr<base::UnixSocket> new_connection) {
   base::UnixSocket* new_connection_raw = new_connection.get();
   sockets_.emplace(new_connection_raw, std::move(new_connection));
+  bookkeeping_thread_->NotifyClientConnected(new_connection_raw->peer_pid());
 }
 
 void SocketListener::OnDataAvailable(base::UnixSocket* self) {
@@ -84,7 +86,7 @@
   if (it == process_metadata_.end() || it->second.expired()) {
     // We have not seen the PID yet or the PID is being recycled.
     entry->process_metadata = std::make_shared<ProcessMetadata>(
-        peer_pid, std::move(maps_fd), std::move(mem_fd), callsites_);
+        peer_pid, std::move(maps_fd), std::move(mem_fd));
     process_metadata_[peer_pid] = entry->process_metadata;
   } else {
     // If the process already has metadata, this is an additional socket for
diff --git a/src/profiling/memory/socket_listener.h b/src/profiling/memory/socket_listener.h
index f6d3464..b06a98d 100644
--- a/src/profiling/memory/socket_listener.h
+++ b/src/profiling/memory/socket_listener.h
@@ -19,6 +19,7 @@
 
 #include "perfetto/base/unix_socket.h"
 #include "src/profiling/memory/bookkeeping.h"
+#include "src/profiling/memory/queue_messages.h"
 #include "src/profiling/memory/record_reader.h"
 #include "src/profiling/memory/unwinding.h"
 #include "src/profiling/memory/wire_protocol.h"
@@ -32,10 +33,10 @@
  public:
   SocketListener(ClientConfiguration client_config,
                  std::function<void(UnwindingRecord)> fn,
-                 GlobalCallstackTrie* callsites)
+                 BookkeepingThread* bookkeeping_thread)
       : client_config_(client_config),
         callback_function_(std::move(fn)),
-        callsites_(callsites) {}
+        bookkeeping_thread_(bookkeeping_thread) {}
   void OnDisconnect(base::UnixSocket* self) override;
   void OnNewIncomingConnection(
       base::UnixSocket* self,
@@ -69,7 +70,7 @@
   std::map<base::UnixSocket*, Entry> sockets_;
   std::map<pid_t, std::weak_ptr<ProcessMetadata>> process_metadata_;
   std::function<void(UnwindingRecord)> callback_function_;
-  GlobalCallstackTrie* callsites_;
+  BookkeepingThread* const bookkeeping_thread_;
 };
 
 }  // namespace perfetto
diff --git a/src/profiling/memory/socket_listener_unittest.cc b/src/profiling/memory/socket_listener_unittest.cc
index f40cb9f..2e7910f 100644
--- a/src/profiling/memory/socket_listener_unittest.cc
+++ b/src/profiling/memory/socket_listener_unittest.cc
@@ -53,9 +53,9 @@
     callback_called();
   };
 
-  GlobalCallstackTrie bookkeeping;
+  BookkeepingThread actor("");
   SocketListener listener({},  // We do not care about the sampling rate.
-                          std::move(callback_fn), &bookkeeping);
+                          std::move(callback_fn), &actor);
   MockEventListener client_listener;
   EXPECT_CALL(client_listener, OnConnect(_, _))
       .WillOnce(InvokeWithoutArgs(connected));
diff --git a/src/profiling/memory/unwinding.cc b/src/profiling/memory/unwinding.cc
index f43743b..d464444 100644
--- a/src/profiling/memory/unwinding.cc
+++ b/src/profiling/memory/unwinding.cc
@@ -194,26 +194,30 @@
   if (!ReceiveWireMessage(reinterpret_cast<char*>(rec->data.get()), rec->size,
                           &msg))
     return false;
-  switch (msg.record_type) {
-    case RecordType::Malloc: {
-      std::shared_ptr<ProcessMetadata> metadata = rec->metadata.lock();
-      if (!metadata)
-        // Process has already gone away.
-        return false;
+  if (msg.record_type == RecordType::Malloc) {
+    std::shared_ptr<ProcessMetadata> metadata = rec->metadata.lock();
+    if (!metadata) {
+      // Process has already gone away.
+      return false;
+    }
 
-      out->metadata = std::move(rec->metadata);
-      out->free_record = {};
-      return DoUnwind(&msg, metadata.get(), &out->alloc_record);
+    out->pid = rec->pid;
+    out->record_type = BookkeepingRecord::Type::Malloc;
+    if (!DoUnwind(&msg, metadata.get(), &out->alloc_record)) {
+      return false;
     }
-    case RecordType::Free: {
-      // We need to keep this alive, because msg.free_header is a pointer into
-      // this.
-      out->metadata = std::move(rec->metadata);
-      out->free_record.free_data = std::move(rec->data);
-      out->free_record.metadata = msg.free_header;
-      out->alloc_record = {};
-      return true;
-    }
+    return true;
+  } else if (msg.record_type == RecordType::Free) {
+    out->record_type = BookkeepingRecord::Type::Free;
+    out->pid = rec->pid;
+    // We need to keep this alive, because msg.free_header is a pointer into
+    // this.
+    out->free_record.free_data = std::move(rec->data);
+    out->free_record.metadata = msg.free_header;
+    return true;
+  } else {
+    PERFETTO_DCHECK(false);
+    return false;
   }
 }
 
@@ -227,41 +231,4 @@
       output_queue->Add(std::move(out));
   }
 }
-
-void HandleBookkeepingRecord(BookkeepingRecord* rec) {
-  std::shared_ptr<ProcessMetadata> metadata = rec->metadata.lock();
-  if (!metadata)
-    // Process has already gone away.
-    return;
-
-  if (rec->free_record.free_data) {
-    FreeRecord& free_rec = rec->free_record;
-    FreePageEntry* entries = free_rec.metadata->entries;
-    uint64_t num_entries = free_rec.metadata->num_entries;
-    if (num_entries > kFreePageSize)
-      return;
-    for (size_t i = 0; i < num_entries; ++i) {
-      const FreePageEntry& entry = entries[i];
-      metadata->heap_dump.RecordFree(entry.addr, entry.sequence_number);
-    }
-  } else {
-    AllocRecord& alloc_rec = rec->alloc_record;
-    std::vector<CodeLocation> code_locations;
-    for (unwindstack::FrameData& frame : alloc_rec.frames)
-      code_locations.emplace_back(frame.map_name, frame.function_name);
-    metadata->heap_dump.RecordMalloc(code_locations,
-                                     alloc_rec.alloc_metadata.alloc_address,
-                                     alloc_rec.alloc_metadata.alloc_size,
-                                     alloc_rec.alloc_metadata.sequence_number);
-  }
-}
-
-__attribute__((noreturn)) void BookkeepingMainLoop(
-    BoundedQueue<BookkeepingRecord>* input_queue) {
-  for (;;) {
-    BookkeepingRecord rec = input_queue->Get();
-    HandleBookkeepingRecord(&rec);
-  }
-}
-
 }  // namespace perfetto
diff --git a/src/profiling/memory/unwinding.h b/src/profiling/memory/unwinding.h
index 1bdcf0d..471bec6 100644
--- a/src/profiling/memory/unwinding.h
+++ b/src/profiling/memory/unwinding.h
@@ -22,6 +22,7 @@
 #include "perfetto/base/scoped_file.h"
 #include "src/profiling/memory/bookkeeping.h"
 #include "src/profiling/memory/bounded_queue.h"
+#include "src/profiling/memory/queue_messages.h"
 #include "src/profiling/memory/wire_protocol.h"
 
 namespace perfetto {
@@ -39,20 +40,13 @@
 };
 
 struct ProcessMetadata {
-  ProcessMetadata(pid_t p,
-                  base::ScopedFile maps_fd,
-                  base::ScopedFile mem,
-                  GlobalCallstackTrie* callsites)
-      : pid(p),
-        maps(std::move(maps_fd)),
-        mem_fd(std::move(mem)),
-        heap_dump(callsites) {
+  ProcessMetadata(pid_t p, base::ScopedFile maps_fd, base::ScopedFile mem)
+      : pid(p), maps(std::move(maps_fd)), mem_fd(std::move(mem)) {
     PERFETTO_CHECK(maps.Parse());
   }
   pid_t pid;
   FileDescriptorMaps maps;
   base::ScopedFile mem_fd;
-  HeapTracker heap_dump;
 };
 
 // Overlays size bytes pointed to by stack for addresses in [sp, sp + size).
@@ -72,40 +66,15 @@
 
 size_t RegSize(unwindstack::ArchEnum arch);
 
-struct UnwindingRecord {
-  pid_t pid;
-  size_t size;
-  std::unique_ptr<uint8_t[]> data;
-  std::weak_ptr<ProcessMetadata> metadata;
-};
 
-struct FreeRecord {
-  std::unique_ptr<uint8_t[]> free_data;
-  FreeMetadata* metadata;
-};
-
-struct AllocRecord {
-  AllocMetadata alloc_metadata;
-  std::vector<unwindstack::FrameData> frames;
-};
-
-struct BookkeepingRecord {
-  // TODO(fmayer): Use a union.
-  std::weak_ptr<ProcessMetadata> metadata;
-  AllocRecord alloc_record;
-  FreeRecord free_record;
-};
 
 bool DoUnwind(WireMessage*, ProcessMetadata* metadata, AllocRecord* out);
 
 bool HandleUnwindingRecord(UnwindingRecord* rec, BookkeepingRecord* out);
-void HandleBookkeepingRecord(BookkeepingRecord* rec);
 
 void UnwindingMainLoop(BoundedQueue<UnwindingRecord>* input_queue,
                        BoundedQueue<BookkeepingRecord>* output_queue);
 
-void BookkeepingMainLoop(BoundedQueue<BookkeepingRecord>* input_queue);
-
 }  // namespace perfetto
 
 #endif  // SRC_PROFILING_MEMORY_UNWINDING_H_
diff --git a/src/profiling/memory/unwinding_unittest.cc b/src/profiling/memory/unwinding_unittest.cc
index 3993692..f671585 100644
--- a/src/profiling/memory/unwinding_unittest.cc
+++ b/src/profiling/memory/unwinding_unittest.cc
@@ -122,8 +122,7 @@
   base::ScopedFile proc_maps(base::OpenFile("/proc/self/maps", O_RDONLY));
   base::ScopedFile proc_mem(base::OpenFile("/proc/self/mem", O_RDONLY));
   GlobalCallstackTrie callsites;
-  ProcessMetadata metadata(getpid(), std::move(proc_maps), std::move(proc_mem),
-                           &callsites);
+  ProcessMetadata metadata(getpid(), std::move(proc_maps), std::move(proc_mem));
   WireMessage msg;
   auto record = GetRecord(&msg);
   AllocRecord out;
diff --git a/src/trace_processor/counters_table.cc b/src/trace_processor/counters_table.cc
index b01400f..000891f 100644
--- a/src/trace_processor/counters_table.cc
+++ b/src/trace_processor/counters_table.cc
@@ -27,6 +27,45 @@
 
 using namespace sqlite_utils;
 
+PERFETTO_ALWAYS_INLINE int CompareCountersOnColumn(
+    const TraceStorage* storage,
+    size_t f_idx,
+    size_t s_idx,
+    const QueryConstraints::OrderBy& ob) {
+  const auto& co = storage->counters();
+  switch (ob.iColumn) {
+    case CountersTable::Column::kTimestamp:
+      return CompareValues(co.timestamps(), f_idx, s_idx, ob.desc);
+    case CountersTable::Column::kValue:
+      return CompareValues(co.values(), f_idx, s_idx, ob.desc);
+    case CountersTable::Column::kName:
+      return CompareValues(co.name_ids(), f_idx, s_idx, ob.desc);
+    case CountersTable::Column::kRef:
+      return CompareValues(co.refs(), f_idx, s_idx, ob.desc);
+    case CountersTable::Column::kDuration:
+      return CompareValues(co.durations(), f_idx, s_idx, ob.desc);
+    case CountersTable::Column::kValueDelta:
+      return CompareValues(co.value_deltas(), f_idx, s_idx, ob.desc);
+    case CountersTable::Column::kRefType:
+      return CompareValues(co.types(), f_idx, s_idx, ob.desc);
+    default:
+      PERFETTO_FATAL("Unexpected column %d", ob.iColumn);
+  }
+}
+
+PERFETTO_ALWAYS_INLINE int CompareCounters(
+    const TraceStorage* storage,
+    size_t f_idx,
+    size_t s_idx,
+    const std::vector<QueryConstraints::OrderBy>& order_by) {
+  for (const auto& ob : order_by) {
+    int c = CompareCountersOnColumn(storage, f_idx, s_idx, ob);
+    if (c != 0)
+      return c;
+  }
+  return 0;
+}
+
 }  // namespace
 
 CountersTable::CountersTable(sqlite3*, const TraceStorage* storage)
@@ -51,49 +90,89 @@
 }
 
 std::unique_ptr<Table::Cursor> CountersTable::CreateCursor(
-    const QueryConstraints&,
-    sqlite3_value**) {
-  return std::unique_ptr<Table::Cursor>(new Cursor(storage_));
+    const QueryConstraints& qc,
+    sqlite3_value** argv) {
+  return std::unique_ptr<Table::Cursor>(new Cursor(storage_, qc, argv));
 }
 
 int CountersTable::BestIndex(const QueryConstraints&, BestIndexInfo* info) {
   // TODO(taylori): Work out cost dependant on constraints.
   info->estimated_cost =
       static_cast<uint32_t>(storage_->counters().counter_count());
+  info->order_by_consumed = true;
+
   return SQLITE_OK;
 }
 
-CountersTable::Cursor::Cursor(const TraceStorage* storage) : storage_(storage) {
-  num_rows_ = storage->counters().counter_count();
+CountersTable::Cursor::Cursor(const TraceStorage* storage,
+                              const QueryConstraints& qc,
+                              sqlite3_value** argv)
+    : storage_(storage) {
+  const auto& counters = storage->counters();
+
+  std::vector<bool> filter(counters.counter_count(), true);
+  for (size_t i = 0; i < qc.constraints().size(); i++) {
+    const auto& cs = qc.constraints()[i];
+    auto* v = argv[i];
+    switch (cs.iColumn) {
+      case CountersTable::Column::kTimestamp:
+        FilterColumn(counters.timestamps(), 0, cs, v, &filter);
+        break;
+      case CountersTable::Column::kValue:
+        FilterColumn(counters.values(), 0, cs, v, &filter);
+        break;
+      case CountersTable::Column::kName:
+        FilterColumn(counters.name_ids(), 0, cs, v, &filter);
+        break;
+      case CountersTable::Column::kRef:
+        FilterColumn(counters.refs(), 0, cs, v, &filter);
+        break;
+      case CountersTable::Column::kDuration:
+        FilterColumn(counters.durations(), 0, cs, v, &filter);
+        break;
+      case CountersTable::Column::kValueDelta:
+        FilterColumn(counters.value_deltas(), 0, cs, v, &filter);
+        break;
+      case CountersTable::Column::kRefType: {
+        // TODO(lalitm): add support for filtering here.
+      }
+    }
+  }
+
+  sorted_rows_ = CreateSortedIndexFromFilter(
+      0, filter, [this, &qc](uint32_t f, uint32_t s) {
+        return CompareCounters(storage_, f, s, qc.order_by()) < 0;
+      });
 }
 
 int CountersTable::Cursor::Column(sqlite3_context* context, int N) {
+  size_t row = sorted_rows_[next_row_idx_];
   switch (N) {
     case Column::kTimestamp: {
       sqlite3_result_int64(
           context,
-          static_cast<int64_t>(storage_->counters().timestamps()[row_]));
+          static_cast<int64_t>(storage_->counters().timestamps()[row]));
       break;
     }
     case Column::kValue: {
       sqlite3_result_int64(
-          context, static_cast<int64_t>(storage_->counters().values()[row_]));
+          context, static_cast<int64_t>(storage_->counters().values()[row]));
       break;
     }
     case Column::kName: {
       sqlite3_result_text(
           context,
-          storage_->GetString(storage_->counters().name_ids()[row_]).c_str(),
-          -1, nullptr);
+          storage_->GetString(storage_->counters().name_ids()[row]).c_str(), -1,
+          nullptr);
       break;
     }
     case Column::kRef: {
       sqlite3_result_int64(
-          context, static_cast<int64_t>(storage_->counters().refs()[row_]));
+          context, static_cast<int64_t>(storage_->counters().refs()[row]));
       break;
     }
     case Column::kRefType: {
-      switch (storage_->counters().types()[row_]) {
+      switch (storage_->counters().types()[row]) {
         case RefType::kCPU_ID: {
           sqlite3_result_text(context, "cpu", -1, nullptr);
           break;
@@ -119,14 +198,13 @@
     }
     case Column::kDuration: {
       sqlite3_result_int64(
-          context,
-          static_cast<int64_t>(storage_->counters().durations()[row_]));
+          context, static_cast<int64_t>(storage_->counters().durations()[row]));
       break;
     }
     case Column::kValueDelta: {
       sqlite3_result_int64(
           context,
-          static_cast<int64_t>(storage_->counters().value_deltas()[row_]));
+          static_cast<int64_t>(storage_->counters().value_deltas()[row]));
       break;
     }
     default:
@@ -137,12 +215,12 @@
 }
 
 int CountersTable::Cursor::Next() {
-  row_++;
+  next_row_idx_++;
   return SQLITE_OK;
 }
 
 int CountersTable::Cursor::Eof() {
-  return row_ >= num_rows_;
+  return next_row_idx_ >= sorted_rows_.size();
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/counters_table.h b/src/trace_processor/counters_table.h
index 90929ec..c35b887 100644
--- a/src/trace_processor/counters_table.h
+++ b/src/trace_processor/counters_table.h
@@ -51,7 +51,7 @@
  private:
   class Cursor : public Table::Cursor {
    public:
-    Cursor(const TraceStorage*);
+    Cursor(const TraceStorage*, const QueryConstraints&, sqlite3_value**);
 
     // Implementation of Table::Cursor.
     int Next() override;
@@ -59,8 +59,11 @@
     int Column(sqlite3_context*, int N) override;
 
    private:
-    size_t num_rows_;
-    size_t row_ = 0;
+    // Vector of row ids sorted by some order by constraints.
+    std::vector<uint32_t> sorted_rows_;
+
+    // An offset into |sorted_row_ids_| indicating the next row to return.
+    uint32_t next_row_idx_ = 0;
 
     const TraceStorage* const storage_;
   };
diff --git a/src/trace_processor/sched_slice_table.cc b/src/trace_processor/sched_slice_table.cc
index 079168f..0575f00 100644
--- a/src/trace_processor/sched_slice_table.cc
+++ b/src/trace_processor/sched_slice_table.cc
@@ -34,16 +34,6 @@
 
 constexpr uint64_t kUint64Max = std::numeric_limits<uint64_t>::max();
 
-template <class T>
-inline int Compare(T first, T second, bool desc) {
-  if (first < second) {
-    return desc ? 1 : -1;
-  } else if (first > second) {
-    return desc ? -1 : 1;
-  }
-  return 0;
-}
-
 // Compares the slice at index |f| with the slice at index |s| on the
 // criteria in |order_by|.
 // Returns -1 if the first slice is before the second in the ordering, 1 if
@@ -56,13 +46,13 @@
   const auto& sl = storage->slices();
   switch (ob.iColumn) {
     case SchedSliceTable::Column::kTimestamp:
-      return Compare(sl.start_ns()[f_idx], sl.start_ns()[s_idx], ob.desc);
+      return CompareValues(sl.start_ns(), f_idx, s_idx, ob.desc);
     case SchedSliceTable::Column::kDuration:
-      return Compare(sl.durations()[f_idx], sl.durations()[s_idx], ob.desc);
+      return CompareValues(sl.durations(), f_idx, s_idx, ob.desc);
     case SchedSliceTable::Column::kCpu:
-      return Compare(sl.cpus()[f_idx], sl.cpus()[s_idx], ob.desc);
+      return CompareValues(sl.cpus(), f_idx, s_idx, ob.desc);
     case SchedSliceTable::Column::kUtid:
-      return Compare(sl.utids()[f_idx], sl.utids()[s_idx], ob.desc);
+      return CompareValues(sl.utids(), f_idx, s_idx, ob.desc);
     default:
       PERFETTO_FATAL("Unexpected column %d", ob.iColumn);
   }
@@ -133,8 +123,6 @@
                                      uint32_t min_idx,
                                      uint32_t max_idx) {
   const auto& slices = storage->slices();
-  ptrdiff_t min_idx_ptr = static_cast<ptrdiff_t>(min_idx);
-  ptrdiff_t max_idx_ptr = static_cast<ptrdiff_t>(max_idx);
 
   auto dist = static_cast<size_t>(max_idx - min_idx);
   std::vector<bool> filter(dist, true);
@@ -142,21 +130,15 @@
     const auto& cs = qc.constraints()[i];
     auto* v = argv[i];
     switch (cs.iColumn) {
-      case SchedSliceTable::Column::kCpu: {
-        auto it = slices.cpus().begin();
-        FilterColumn(it + min_idx_ptr, it + max_idx_ptr, cs, v, &filter);
+      case SchedSliceTable::Column::kCpu:
+        FilterColumn(slices.cpus(), min_idx, cs, v, &filter);
         break;
-      }
-      case SchedSliceTable::Column::kDuration: {
-        auto it = slices.durations().begin();
-        FilterColumn(it + min_idx_ptr, it + max_idx_ptr, cs, v, &filter);
+      case SchedSliceTable::Column::kDuration:
+        FilterColumn(slices.durations(), min_idx, cs, v, &filter);
         break;
-      }
-      case SchedSliceTable::Column::kUtid: {
-        auto it = slices.utids().begin();
-        FilterColumn(it + min_idx_ptr, it + max_idx_ptr, cs, v, &filter);
+      case SchedSliceTable::Column::kUtid:
+        FilterColumn(slices.utids(), min_idx, cs, v, &filter);
         break;
-      }
     }
   }
   return filter;
@@ -219,8 +201,8 @@
     return std::unique_ptr<Table::Cursor>(
         new FilterCursor(storage_, min_idx, max_idx, std::move(filter), desc));
   }
-  return std::unique_ptr<Table::Cursor>(new SortedCursor(
-      storage_, min_idx, max_idx, qc.order_by(), std::move(filter)));
+  return std::unique_ptr<Table::Cursor>(
+      new SortedCursor(storage_, min_idx, qc.order_by(), std::move(filter)));
 }
 
 int SchedSliceTable::BestIndex(const QueryConstraints& qc,
@@ -342,27 +324,14 @@
 
 SchedSliceTable::SortedCursor::SortedCursor(
     const TraceStorage* storage,
-    uint32_t min_idx,
-    uint32_t max_idx,
+    uint32_t offset,
     const std::vector<QueryConstraints::OrderBy>& ob,
     const std::vector<bool>& filter)
     : BaseCursor(storage) {
-  auto diff = static_cast<size_t>(max_idx - min_idx);
-  PERFETTO_CHECK(diff == filter.size());
-
-  auto set_bits = std::count(filter.begin(), filter.end(), true);
-  sorted_rows_.resize(static_cast<size_t>(set_bits));
-
-  auto it = std::find(filter.begin(), filter.end(), true);
-  for (size_t i = 0; it != filter.end(); i++) {
-    auto index = static_cast<uint32_t>(std::distance(filter.begin(), it));
-    sorted_rows_[i] = min_idx + index;
-    it = std::find(it + 1, filter.end(), true);
-  }
-  std::sort(sorted_rows_.begin(), sorted_rows_.end(),
-            [this, &ob](uint32_t f, uint32_t s) {
-              return CompareSlices(storage_, f, s, ob) < 0;
-            });
+  sorted_rows_ = CreateSortedIndexFromFilter(
+      offset, filter, [this, &ob](uint32_t f, uint32_t s) {
+        return CompareSlices(storage_, f, s, ob) < 0;
+      });
 }
 
 int SchedSliceTable::SortedCursor::Next() {
diff --git a/src/trace_processor/sched_slice_table.h b/src/trace_processor/sched_slice_table.h
index bd63df8..ea41cae 100644
--- a/src/trace_processor/sched_slice_table.h
+++ b/src/trace_processor/sched_slice_table.h
@@ -122,8 +122,7 @@
                  uint32_t max_idx,
                  const std::vector<QueryConstraints::OrderBy>&);
     SortedCursor(const TraceStorage* storage,
-                 uint32_t min_idx,
-                 uint32_t max_idx,
+                 uint32_t offset,
                  const std::vector<QueryConstraints::OrderBy>&,
                  const std::vector<bool>& filter);
 
diff --git a/src/trace_processor/span_operator_table.cc b/src/trace_processor/span_operator_table.cc
index 6f898bc..091427f 100644
--- a/src/trace_processor/span_operator_table.cc
+++ b/src/trace_processor/span_operator_table.cc
@@ -334,6 +334,7 @@
         value->text_value =
             reinterpret_cast<const char*>(sqlite3_column_text(stmt, i));
         break;
+      case Table::ColumnType::kDouble:
       case Table::ColumnType::kInt:
         PERFETTO_CHECK(false);
     }
@@ -427,6 +428,7 @@
                           kSqliteTransient);
       break;
     }
+    case Table::ColumnType::kDouble:
     case Table::ColumnType::kInt:
       PERFETTO_CHECK(false);
   }
diff --git a/src/trace_processor/sqlite_utils.h b/src/trace_processor/sqlite_utils.h
index 191f316..83d5074 100644
--- a/src/trace_processor/sqlite_utils.h
+++ b/src/trace_processor/sqlite_utils.h
@@ -68,57 +68,99 @@
   }
 }
 
+template <class D>
+int CompareValues(const D& deque, size_t a, size_t b, bool desc) {
+  const auto& first = deque[a];
+  const auto& second = deque[b];
+  if (first < second) {
+    return desc ? 1 : -1;
+  } else if (first > second) {
+    return desc ? -1 : 1;
+  }
+  return 0;
+}
+
 template <typename F>
-bool Compare(uint32_t actual, sqlite3_value* value) {
+bool CompareToSqliteValue(uint32_t actual, sqlite3_value* value) {
   PERFETTO_DCHECK(sqlite3_value_type(value) == SQLITE_INTEGER);
   return F()(actual, static_cast<uint32_t>(sqlite3_value_int64(value)));
 }
 
 template <typename F>
-bool Compare(uint64_t actual, sqlite3_value* value) {
-  PERFETTO_CHECK(sqlite3_value_type(value) == SQLITE_INTEGER);
+bool CompareToSqliteValue(uint64_t actual, sqlite3_value* value) {
+  PERFETTO_DCHECK(sqlite3_value_type(value) == SQLITE_INTEGER);
   return F()(actual, static_cast<uint64_t>(sqlite3_value_int64(value)));
 }
 
-template <class RandomAccessIterator>
-void FilterColumn(RandomAccessIterator begin,
-                  RandomAccessIterator end,
+template <typename F>
+bool CompareToSqliteValue(int64_t actual, sqlite3_value* value) {
+  PERFETTO_DCHECK(sqlite3_value_type(value) == SQLITE_INTEGER);
+  return F()(actual, static_cast<int64_t>(sqlite3_value_int64(value)));
+}
+
+template <typename F>
+bool CompareToSqliteValue(double actual, sqlite3_value* value) {
+  auto type = sqlite3_value_type(value);
+  PERFETTO_DCHECK(type == SQLITE_FLOAT || type == SQLITE_INTEGER);
+  return F()(actual, sqlite3_value_double(value));
+}
+
+template <class D>
+void FilterColumn(const D& deque,
+                  size_t offset,
                   const QueryConstraints::Constraint& constraint,
                   sqlite3_value* argv,
-                  std::vector<bool>* row_filter) {
-  using T = typename RandomAccessIterator::value_type;
-  PERFETTO_DCHECK(static_cast<size_t>(std::distance(begin, end)) ==
-                  row_filter->size());
+                  std::vector<bool>* filter) {
+  using T = typename D::value_type;
 
-  auto it = std::find(row_filter->begin(), row_filter->end(), true);
-  while (it != row_filter->end()) {
-    auto index = std::distance(row_filter->begin(), it);
+  auto it = std::find(filter->begin(), filter->end(), true);
+  while (it != filter->end()) {
+    auto filter_idx = static_cast<size_t>(std::distance(filter->begin(), it));
+    T actual = deque[offset + filter_idx];
     switch (constraint.op) {
       case SQLITE_INDEX_CONSTRAINT_EQ:
-        *it = Compare<std::equal_to<T>>(begin[index], argv);
+        *it = CompareToSqliteValue<std::equal_to<T>>(actual, argv);
         break;
       case SQLITE_INDEX_CONSTRAINT_GE:
-        *it = Compare<std::greater_equal<T>>(begin[index], argv);
+        *it = CompareToSqliteValue<std::greater_equal<T>>(actual, argv);
         break;
       case SQLITE_INDEX_CONSTRAINT_GT:
-        *it = Compare<std::greater<T>>(begin[index], argv);
+        *it = CompareToSqliteValue<std::greater<T>>(actual, argv);
         break;
       case SQLITE_INDEX_CONSTRAINT_LE:
-        *it = Compare<std::less_equal<T>>(begin[index], argv);
+        *it = CompareToSqliteValue<std::less_equal<T>>(actual, argv);
         break;
       case SQLITE_INDEX_CONSTRAINT_LT:
-        *it = Compare<std::less<T>>(begin[index], argv);
+        *it = CompareToSqliteValue<std::less<T>>(actual, argv);
         break;
       case SQLITE_INDEX_CONSTRAINT_NE:
-        *it = Compare<std::not_equal_to<T>>(begin[index], argv);
+        *it = CompareToSqliteValue<std::not_equal_to<T>>(actual, argv);
         break;
       default:
         PERFETTO_CHECK(false);
     }
-    it = std::find(it + 1, row_filter->end(), true);
+    it = std::find(it + 1, filter->end(), true);
   }
 }
 
+template <class Comparator>
+std::vector<uint32_t> CreateSortedIndexFromFilter(
+    uint32_t offset,
+    const std::vector<bool>& filter,
+    Comparator comparator) {
+  auto set_bits = std::count(filter.begin(), filter.end(), true);
+
+  std::vector<uint32_t> sorted_rows(static_cast<size_t>(set_bits));
+  auto it = std::find(filter.begin(), filter.end(), true);
+  for (size_t i = 0; it != filter.end(); i++) {
+    auto filter_idx = static_cast<uint32_t>(std::distance(filter.begin(), it));
+    sorted_rows[i] = offset + filter_idx;
+    it = std::find(it + 1, filter.end(), true);
+  }
+  std::sort(sorted_rows.begin(), sorted_rows.end(), comparator);
+  return sorted_rows;
+}
+
 }  // namespace sqlite_utils
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/table.cc b/src/trace_processor/table.cc
index 228551d..72eb3c2 100644
--- a/src/trace_processor/table.cc
+++ b/src/trace_processor/table.cc
@@ -51,6 +51,8 @@
       return "UNSIGNED BIG INT";
     case Table::ColumnType::kInt:
       return "INT";
+    case Table::ColumnType::kDouble:
+      return "DOUBLE";
   }
   PERFETTO_CHECK(false);
 }
diff --git a/src/trace_processor/table.h b/src/trace_processor/table.h
index 6e3f8d4..5cf22f2 100644
--- a/src/trace_processor/table.h
+++ b/src/trace_processor/table.h
@@ -45,6 +45,7 @@
     kUlong = 2,
     kUint = 3,
     kInt = 4,
+    kDouble = 5,
   };
 
   // Describes a column of this table.
@@ -120,7 +121,7 @@
     Schema(std::vector<Column>, std::vector<size_t> primary_keys);
 
     // This class is explicitly copiable.
-    Schema(const Schema&) noexcept;
+    Schema(const Schema&);
     Schema& operator=(const Schema& t);
 
     std::string ToCreateTableStmt();
diff --git a/src/trace_processor/trace_processor_shell.cc b/src/trace_processor/trace_processor_shell.cc
index 05540e1..4fc3dbc 100644
--- a/src/trace_processor/trace_processor_shell.cc
+++ b/src/trace_processor/trace_processor_shell.cc
@@ -16,10 +16,12 @@
 
 #include <aio.h>
 #include <fcntl.h>
+#include <inttypes.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
 #include <functional>
+#include <iostream>
 
 #include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
@@ -46,8 +48,8 @@
 #include <signal.h>
 #endif
 
-using namespace perfetto;
-using namespace perfetto::trace_processor;
+namespace perfetto {
+namespace trace_processor {
 
 namespace {
 TraceProcessor* g_tp;
@@ -132,7 +134,8 @@
 
 #endif
 
-void OnQueryResult(base::TimeNanos t_start, const protos::RawQueryResult& res) {
+void PrintQueryResultInteractively(base::TimeNanos t_start,
+                                   const protos::RawQueryResult& res) {
   if (res.has_error()) {
     PERFETTO_ELOG("SQLite error: %s", res.error().c_str());
     return;
@@ -164,40 +167,156 @@
     for (int c = 0; c < res.columns_size(); c++) {
       switch (res.column_descriptors(c).type()) {
         case protos::RawQueryResult_ColumnDesc_Type_STRING:
-          printf("%-20.20s ", res.columns(c).string_values(r).c_str());
+          printf("%-20.20s", res.columns(c).string_values(r).c_str());
           break;
         case protos::RawQueryResult_ColumnDesc_Type_DOUBLE:
-          printf("%20f ", res.columns(c).double_values(r));
+          printf("%20f", res.columns(c).double_values(r));
           break;
         case protos::RawQueryResult_ColumnDesc_Type_LONG: {
           auto value = res.columns(c).long_values(r);
-          printf((value < 0xffffffll) ? "%20lld " : "%20llx ", value);
+          printf((value < 0xffffffll) ? "%20lld" : "%20llx", value);
+          break;
+        }
+      }
+      printf(" ");
+    }
+    printf("\n");
+  }
+  printf("\nQuery executed in %.3f ms\n\n", (t_end - t_start).count() / 1E6);
+}
+int StartInteractiveShell() {
+  SetupLineEditor();
 
+  for (;;) {
+    char* line = GetLine("> ");
+    if (!line || strcmp(line, "q\n") == 0)
+      break;
+    if (strcmp(line, "") == 0)
+      continue;
+    protos::RawQueryArgs query;
+    query.set_sql_query(line);
+    base::TimeNanos t_start = base::GetWallTimeNs();
+    g_tp->ExecuteQuery(query, [t_start](const protos::RawQueryResult& res) {
+      PrintQueryResultInteractively(t_start, res);
+    });
+
+    FreeLine(line);
+  }
+  return 0;
+}
+
+void PrintQueryResultAsCsv(const protos::RawQueryResult& res, FILE* output) {
+  PERFETTO_CHECK(res.columns_size() == res.column_descriptors_size());
+
+  for (int r = 0; r < static_cast<int>(res.num_records()); r++) {
+    if (r == 0) {
+      for (int c = 0; c < res.column_descriptors_size(); c++) {
+        const auto& col = res.column_descriptors(c);
+        if (c > 0)
+          fprintf(output, ",");
+        fprintf(output, "\"%s\"", col.name().c_str());
+      }
+      fprintf(output, "\n");
+    }
+
+    for (int c = 0; c < res.columns_size(); c++) {
+      if (c > 0)
+        fprintf(output, ",");
+      switch (res.column_descriptors(c).type()) {
+        case protos::RawQueryResult_ColumnDesc_Type_STRING:
+          fprintf(output, "\"%s\"", res.columns(c).string_values(r).c_str());
+          break;
+        case protos::RawQueryResult_ColumnDesc_Type_DOUBLE:
+          fprintf(output, "%f", res.columns(c).double_values(r));
+          break;
+        case protos::RawQueryResult_ColumnDesc_Type_LONG: {
+          auto value = res.columns(c).long_values(r);
+          fprintf(output, "%lld", value);
           break;
         }
       }
     }
     printf("\n");
   }
-  printf("\nQuery executed in %.3f ms\n\n", (t_end - t_start).count() / 1E6);
 }
 
-}  // namespace
+int RunQueryAndPrintResult(FILE* input, FILE* output) {
+  char buffer[4096];
+  bool is_first_query = true;
+  bool is_query_error = false;
+  bool has_output_printed = false;
+  while (!feof(input) && !ferror(input) && !is_query_error) {
+    // Add an extra newline separator between query results.
+    if (!is_first_query)
+      fprintf(output, "\n");
+    is_first_query = false;
 
-int main(int argc, char** argv) {
+    std::string sql_query;
+    while (fgets(buffer, sizeof(buffer), input)) {
+      if (strncmp(buffer, "\n", sizeof(buffer)) == 0)
+        break;
+      sql_query.append(buffer);
+    }
+    if (sql_query.back() == '\n')
+      sql_query.resize(sql_query.size() - 1);
+    PERFETTO_ILOG("Executing query: %s", sql_query.c_str());
+
+    protos::RawQueryArgs query;
+    query.set_sql_query(sql_query);
+    g_tp->ExecuteQuery(query, [output, &is_query_error, &has_output_printed](
+                                  const protos::RawQueryResult& res) {
+      if (res.has_error()) {
+        PERFETTO_ELOG("SQLite error: %s", res.error().c_str());
+        is_query_error = true;
+        return;
+      } else if (res.num_records() != 0) {
+        if (has_output_printed) {
+          PERFETTO_ELOG(
+              "More than one query generated result rows. This is "
+              "unsupported.");
+          is_query_error = true;
+          return;
+        }
+        has_output_printed = true;
+      }
+      PrintQueryResultAsCsv(res, output);
+    });
+  }
+  return is_query_error ? 1 : 0;
+}
+
+void PrintUsage(char** argv) {
+  PERFETTO_ELOG("Usage: %s [-d] [-q query.sql] trace_file.pb", argv[0]);
+}
+
+int TraceProcessorMain(int argc, char** argv) {
   if (argc < 2) {
-    PERFETTO_ELOG("Usage: %s [-d] trace_file.proto", argv[0]);
+    PrintUsage(argv);
     return 1;
   }
   const char* trace_file_path = nullptr;
+  const char* query_file_path = nullptr;
   for (int i = 1; i < argc; i++) {
     if (strcmp(argv[i], "-d") == 0) {
       EnableSQLiteVtableDebugging();
       continue;
     }
+    if (strcmp(argv[i], "-q") == 0) {
+      if (++i == argc) {
+        PrintUsage(argv);
+        return 1;
+      }
+      query_file_path = argv[i];
+      continue;
+    }
     trace_file_path = argv[i];
   }
 
+  if (trace_file_path == nullptr) {
+    PrintUsage(argv);
+    return 1;
+  }
+
   // Load the trace file into the trace processor.
   TraceProcessor::Config config;
   config.optimization_mode = OptimizationMode::kMaxBandwidth;
@@ -255,23 +374,21 @@
   signal(SIGINT, [](int) { g_tp->InterruptQuery(); });
 #endif
 
-  SetupLineEditor();
-
-  for (;;) {
-    char* line = GetLine("> ");
-    if (!line || strcmp(line, "q\n") == 0)
-      break;
-    if (strcmp(line, "") == 0)
-      continue;
-    protos::RawQueryArgs query;
-    query.set_sql_query(line);
-    base::TimeNanos t_start = base::GetWallTimeNs();
-    g_tp->ExecuteQuery(query, [t_start](const protos::RawQueryResult& res) {
-      OnQueryResult(t_start, res);
-    });
-
-    FreeLine(line);
+  // If there is no query file, start a shell.
+  if (query_file_path == nullptr) {
+    return StartInteractiveShell();
   }
 
-  return 0;
+  // Otherwise run the queries and print the results.
+  base::ScopedFstream file(fopen(query_file_path, "r"));
+  return RunQueryAndPrintResult(file.get(), stdout);
+}
+
+}  // namespace
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+int main(int argc, char** argv) {
+  return perfetto::trace_processor::TraceProcessorMain(argc, argv);
 }
diff --git a/src/traced/probes/sys_stats/sys_stats_data_source.cc b/src/traced/probes/sys_stats/sys_stats_data_source.cc
index 00ab945..5f4b740 100644
--- a/src/traced/probes/sys_stats/sys_stats_data_source.cc
+++ b/src/traced/probes/sys_stats/sys_stats_data_source.cc
@@ -34,7 +34,7 @@
 #include "perfetto/traced/sys_stats_counters.h"
 #include "perfetto/tracing/core/sys_stats_config.h"
 
-#include "perfetto/common/sys_stats_counters.pbzero.h"
+#include "perfetto/common/sys_stats_counters.pb.h"
 #include "perfetto/config/sys_stats/sys_stats_config.pb.h"
 #include "perfetto/trace/sys_stats/sys_stats.pbzero.h"
 #include "perfetto/trace/trace_packet.pbzero.h"
diff --git a/src/tracing/BUILD.gn b/src/tracing/BUILD.gn
index ba44411..b90ce18 100644
--- a/src/tracing/BUILD.gn
+++ b/src/tracing/BUILD.gn
@@ -65,32 +65,6 @@
   ]
 }
 
-source_set("api") {
-  deps = [
-    ":ipc",
-    ":tracing",
-    "../../gn:default_deps",
-    "../../protos/perfetto/config:lite",
-    "../base",
-  ]
-  sources = [
-    "api_impl/consumer_api.cc",
-  ]
-}
-
-executable("consumer_api_test") {
-  deps = [
-    ":api",
-    "../../gn:default_deps",
-    "../../protos/perfetto/config:lite",
-    "../../protos/perfetto/trace:lite",
-    "../base",
-  ]
-  sources = [
-    "api_impl/consumer_api_test.cc",
-  ]
-}
-
 source_set("unittests") {
   testonly = true
   deps = [
@@ -159,6 +133,19 @@
 }
 
 if (!build_with_chromium) {
+  executable("consumer_api_test") {
+    deps = [
+      ":api",
+      "../../gn:default_deps",
+      "../../protos/perfetto/config:lite",
+      "../../protos/perfetto/trace:lite",
+      "../base",
+    ]
+    sources = [
+      "api_impl/consumer_api_test.cc",
+    ]
+  }
+
   # IPC transport: only consumer side
   # TODO(fmayer): Remove duplication between this and ipc.
   source_set("ipc_consumer") {
@@ -225,4 +212,19 @@
       "test/hello_world_benchmark.cc",
     ]
   }
+
+  # Imlementation of the public-facing consumer API in libperfetto.so (only for
+  # Android builds).
+  source_set("api") {
+    deps = [
+      ":ipc",
+      ":tracing",
+      "../../gn:default_deps",
+      "../../protos/perfetto/config:lite",
+      "../base",
+    ]
+    sources = [
+      "api_impl/consumer_api.cc",
+    ]
+  }
 }
diff --git a/test/trace_processor/android_sched_and_ps_smoke.out b/test/trace_processor/android_sched_and_ps_smoke.out
new file mode 100644
index 0000000..d5d80e1
--- /dev/null
+++ b/test/trace_processor/android_sched_and_ps_smoke.out
@@ -0,0 +1,25 @@
+"ts","cpu","dur","utid"
+81473010031230,2,78021,1
+81473010109251,2,12500,0
+81473010121751,2,58021,2
+81473010179772,2,24114,0
+81473010203886,2,30834,3
+81473010234720,2,43802,0
+81473010278522,2,29948,4
+81473010308470,2,44322,0
+81473010341386,1,158854,5
+81473010352792,2,32917,6
+
+
+
+"ts","dur","cpu","utid","rowid","quantum","window_start","window_dur","quantum_ts"
+81473010031230,78021,2,1,2,0,0,-1,0
+81473010109251,12500,2,0,2,0,0,-1,0
+81473010121751,58021,2,2,2,0,0,-1,0
+81473010179772,24114,2,0,2,0,0,-1,0
+81473010203886,30834,2,3,2,0,0,-1,0
+81473010234720,43802,2,0,2,0,0,-1,0
+81473010278522,29948,2,4,2,0,0,-1,0
+81473010308470,44322,2,0,2,0,0,-1,0
+81473010341386,158854,1,5,1,0,0,-1,0
+81473010352792,32917,2,6,2,0,0,-1,0
diff --git a/test/trace_processor/index b/test/trace_processor/index
new file mode 100644
index 0000000..6854209
--- /dev/null
+++ b/test/trace_processor/index
@@ -0,0 +1 @@
+../data/android_sched_and_ps.pb smoke.sql android_sched_and_ps_smoke.out
diff --git a/test/trace_processor/smoke.sql b/test/trace_processor/smoke.sql
new file mode 100644
index 0000000..dd760b4
--- /dev/null
+++ b/test/trace_processor/smoke.sql
@@ -0,0 +1,7 @@
+SELECT * from sched limit 10;
+
+SELECT * from counters limit 10;
+
+CREATE VIRTUAL TABLE sp USING span(sched, window, cpu);
+
+SELECT * from sp order by ts limit 10;
diff --git a/tools/diff_test_trace_processor.py b/tools/diff_test_trace_processor.py
new file mode 100755
index 0000000..f239440
--- /dev/null
+++ b/tools/diff_test_trace_processor.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import difflib
+import glob
+import os
+import subprocess
+import sys
+
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+TEST_DATA_DIR = os.path.join(ROOT_DIR, "test", "trace_processor")
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--index', type=str, help='location of index file',
+                      default=os.path.join(TEST_DATA_DIR, "index"))
+  parser.add_argument('trace_processor', type=str,
+                      help='location of trace processor binary')
+  args = parser.parse_args()
+
+  with open(args.index, 'r') as file:
+    index_lines = file.readlines()
+
+  test_failure = 0
+  index_dir = os.path.dirname(args.index)
+  for line in index_lines:
+    [trace_fname, query_fname, expected_fname] = line.strip().split(' ')
+
+    trace_path = os.path.abspath(os.path.join(index_dir, trace_fname))
+    query_path = os.path.abspath(os.path.join(index_dir, query_fname))
+    expected_path = os.path.abspath(os.path.join(index_dir, expected_fname))
+    if not os.path.exists(trace_path):
+      print("Trace file not found {}".format(trace_path))
+      return 1
+    elif not os.path.exists(query_path):
+      print("Query file not found {}".format(query_path))
+      return 1
+    elif not os.path.exists(expected_path):
+      print("Expected file not found {}".format(expected_path))
+      return 1
+
+    actual_raw = subprocess.check_output([
+      args.trace_processor,
+      '-q',
+      query_path,
+      trace_path
+    ])
+    actual = actual_raw.decode("utf-8")
+    actual_lines = actual_raw.splitlines(True)
+
+    with open(expected_path, "r") as expected_file:
+      expected = expected_file.read()
+      if expected != actual:
+        sys.stderr.write(
+          "Expected did not match actual for trace {} and query {}"
+          .format(trace_path, query_path))
+
+        expected_lines = expected.splitlines(True)
+        diff = difflib.unified_diff(expected_lines, actual_lines,
+                                    fromfile="expected", tofile="actual")
+        for line in diff:
+          sys.stderr.write(line)
+        test_failure += 1
+
+  if test_failure == 0:
+    print("All tests passed successfully")
+    return 0
+  else:
+    print("Total failures: {}".format(test_failure))
+    return 1
+
+if __name__ == '__main__':
+  sys.exit(main())