Merge "profiling: Properly align register_data."
diff --git a/src/profiling/memory/BUILD.gn b/src/profiling/memory/BUILD.gn
index 369cb71..84411d8 100644
--- a/src/profiling/memory/BUILD.gn
+++ b/src/profiling/memory/BUILD.gn
@@ -40,6 +40,7 @@
   sources = [
     "bookkeeping.cc",
     "bookkeeping.h",
+    "queue_messages.h",
     "record_reader.cc",
     "record_reader.h",
     "socket_listener.cc",
diff --git a/src/profiling/memory/bookkeeping.cc b/src/profiling/memory/bookkeeping.cc
index 7a31502..c7f6c71 100644
--- a/src/profiling/memory/bookkeeping.cc
+++ b/src/profiling/memory/bookkeeping.cc
@@ -16,7 +16,14 @@
 
 #include "src/profiling/memory/bookkeeping.h"
 
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "perfetto/base/file_utils.h"
 #include "perfetto/base/logging.h"
+#include "perfetto/base/scoped_file.h"
 
 namespace perfetto {
 
@@ -28,6 +35,17 @@
   return child;
 }
 
+std::vector<InternedCodeLocation> GlobalCallstackTrie::Node::BuildCallstack()
+    const {
+  const Node* node = this;
+  std::vector<InternedCodeLocation> res;
+  while (node) {
+    res.emplace_back(node->location_);
+    node = node->parent_;
+  }
+  return res;
+}
+
 void HeapTracker::RecordMalloc(const std::vector<CodeLocation>& callstack,
                                uint64_t address,
                                uint64_t size,
@@ -85,6 +103,24 @@
   allocations_.erase(leaf_it);
 }
 
+void HeapTracker::Dump(int fd) {
+  // TODO(fmayer): This should dump protocol buffers into the perfetto service.
+  // For now, output a text file compatible with flamegraph.pl.
+  for (const auto& p : allocations_) {
+    std::string data;
+    const Allocation& alloc = p.second;
+    const std::vector<InternedCodeLocation> callstack =
+        alloc.node->BuildCallstack();
+    for (auto it = callstack.begin(); it != callstack.end(); ++it) {
+      if (it != callstack.begin())
+        data += ";";
+      data += it->function_name.str();
+    }
+    data += " " + std::to_string(alloc.alloc_size) + "\n";
+    base::WriteAll(fd, data.c_str(), data.size());
+  }
+}
+
 uint64_t GlobalCallstackTrie::GetCumSizeForTesting(
     const std::vector<CodeLocation>& callstack) {
   Node* node = &root_;
@@ -123,4 +159,89 @@
   }
 }
 
+void BookkeepingThread::HandleBookkeepingRecord(BookkeepingRecord* rec) {
+  BookkeepingData* bookkeeping_data = nullptr;
+  if (rec->pid != 0) {
+    std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+    auto it = bookkeeping_data_.find(rec->pid);
+    if (it == bookkeeping_data_.end()) {
+      PERFETTO_LOG("Invalid pid: %d", rec->pid);
+      PERFETTO_DCHECK(false);
+      return;
+    }
+    bookkeeping_data = &it->second;
+  }
+
+  if (rec->record_type == BookkeepingRecord::Type::Dump) {
+    PERFETTO_LOG("Dumping heaps");
+    auto it = bookkeeping_data_.begin();
+    while (it != bookkeeping_data_.end()) {
+      std::string dump_file_name = file_name_ + "." + std::to_string(it->first);
+      PERFETTO_LOG("Dumping %d to %s", it->first, dump_file_name.c_str());
+      base::ScopedFile fd =
+          base::OpenFile(dump_file_name, O_WRONLY | O_CREAT, 0755);
+      if (fd)
+        it->second.heap_tracker.Dump(fd.get());
+      else
+        PERFETTO_PLOG("Failed to open %s", dump_file_name.c_str());
+      // Garbage collect for processes that already went away.
+      if (it->second.ref_count == 0) {
+        std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+        it = bookkeeping_data_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  } else if (rec->record_type == BookkeepingRecord::Type::Free) {
+    FreeRecord& free_rec = rec->free_record;
+    FreePageEntry* entries = free_rec.metadata->entries;
+    uint64_t num_entries = free_rec.metadata->num_entries;
+    if (num_entries > kFreePageSize)
+      return;
+    for (size_t i = 0; i < num_entries; ++i) {
+      const FreePageEntry& entry = entries[i];
+      bookkeeping_data->heap_tracker.RecordFree(entry.addr,
+                                                entry.sequence_number);
+    }
+  } else if (rec->record_type == BookkeepingRecord::Type::Malloc) {
+    AllocRecord& alloc_rec = rec->alloc_record;
+    std::vector<CodeLocation> code_locations;
+    for (unwindstack::FrameData& frame : alloc_rec.frames)
+      code_locations.emplace_back(frame.map_name, frame.function_name);
+    bookkeeping_data->heap_tracker.RecordMalloc(
+        code_locations, alloc_rec.alloc_metadata.alloc_address,
+        alloc_rec.alloc_metadata.alloc_size,
+        alloc_rec.alloc_metadata.sequence_number);
+  } else {
+    PERFETTO_DCHECK(false);
+  }
+}
+
+void BookkeepingThread::NotifyClientConnected(pid_t pid) {
+  std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+  // emplace gives the existing BookkeepingData for pid if it already exists
+  // or creates a new one.
+  auto it_and_inserted = bookkeeping_data_.emplace(pid, &callsites_);
+  BookkeepingData& bk = it_and_inserted.first->second;
+  bk.ref_count++;
+}
+
+void BookkeepingThread::NotifyClientDisconnected(pid_t pid) {
+  std::lock_guard<std::mutex> l(bookkeeping_mutex_);
+  auto it = bookkeeping_data_.find(pid);
+  if (it == bookkeeping_data_.end()) {
+    PERFETTO_DCHECK(false);
+    return;
+  }
+  it->second.ref_count--;
+}
+
+__attribute__((noreturn)) void BookkeepingThread::Run(
+    BoundedQueue<BookkeepingRecord>* input_queue) {
+  for (;;) {
+    BookkeepingRecord rec = input_queue->Get();
+    HandleBookkeepingRecord(&rec);
+  }
+}
+
 }  // namespace perfetto
diff --git a/src/profiling/memory/bookkeeping.h b/src/profiling/memory/bookkeeping.h
index bd56dd8..245c1ef 100644
--- a/src/profiling/memory/bookkeeping.h
+++ b/src/profiling/memory/bookkeeping.h
@@ -18,6 +18,8 @@
 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_
 
 #include "perfetto/base/lookup_set.h"
+#include "src/profiling/memory/bounded_queue.h"
+#include "src/profiling/memory/queue_messages.h"
 #include "src/profiling/memory/string_interner.h"
 
 #include <map>
@@ -82,6 +84,8 @@
     Node(InternedCodeLocation location, Node* parent)
         : parent_(parent), location_(std::move(location)) {}
 
+    std::vector<InternedCodeLocation> BuildCallstack() const;
+
    private:
     Node* GetOrCreateChild(const InternedCodeLocation& loc);
 
@@ -123,6 +127,7 @@
                     uint64_t size,
                     uint64_t sequence_number);
   void RecordFree(uint64_t address, uint64_t sequence_number);
+  void Dump(int fd);
 
  private:
   static constexpr uint64_t kNoopFree = 0;
@@ -188,6 +193,49 @@
   GlobalCallstackTrie* const callsites_;
 };
 
+struct BookkeepingData {
+  // Ownership of callsites remains with caller and has to outlive this object.
+  explicit BookkeepingData(GlobalCallstackTrie* callsites)
+      : heap_tracker(callsites) {}
+
+  HeapTracker heap_tracker;
+
+  // This is different to a shared_ptr to HeapTracker, because we want to keep
+  // it around until the first dump after the last socket for the PID has
+  // disconnected.
+  uint64_t ref_count = 0;
+};
+
+// BookkeepingThread owns the BookkeepingData for all processes. The Run()
+// method receives messages on the input_queue and does the bookkeeping.
+class BookkeepingThread {
+ public:
+  BookkeepingThread(std::string file_name) : file_name_(file_name) {}
+
+  void Run(BoundedQueue<BookkeepingRecord>* input_queue);
+
+  // Inform the bookkeeping thread that a socket for this pid connected.
+  //
+  // This can be called from arbitrary threads.
+  void NotifyClientConnected(pid_t pid);
+
+  // Inform the bookkeeping thread that a socket for this pid disconnected.
+  // After the last client for a PID disconnects, the BookkeepingData is
+  // retained until the next dump, upon which it gets garbage collected.
+  //
+  // This can be called from arbitrary threads.
+  void NotifyClientDisconnected(pid_t pid);
+
+  void HandleBookkeepingRecord(BookkeepingRecord* rec);
+
+ private:
+  GlobalCallstackTrie callsites_;
+
+  std::map<pid_t, BookkeepingData> bookkeeping_data_;
+  std::mutex bookkeeping_mutex_;
+  std::string file_name_;
+};
+
 }  // namespace perfetto
 
 #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_
diff --git a/src/profiling/memory/heapprofd_integrationtest.cc b/src/profiling/memory/heapprofd_integrationtest.cc
index 2703c4f..671cf02 100644
--- a/src/profiling/memory/heapprofd_integrationtest.cc
+++ b/src/profiling/memory/heapprofd_integrationtest.cc
@@ -51,22 +51,24 @@
 
 TEST_F(HeapprofdIntegrationTest, MAYBE_EndToEnd) {
   GlobalCallstackTrie callsites;
+  // TODO(fmayer): Actually test the dump.
+  BookkeepingThread bookkeeping_thread("");
 
   base::TestTaskRunner task_runner;
   auto done = task_runner.CreateCheckpoint("done");
   constexpr double kSamplingRate = 123;
   SocketListener listener(
       {kSamplingRate},
-      [&done](UnwindingRecord r) {
+      [&done, &bookkeeping_thread](UnwindingRecord r) {
         // TODO(fmayer): Test symbolization and result of unwinding.
         // This check will only work on in-tree builds as out-of-tree
         // libunwindstack is behaving a bit weirdly.
         BookkeepingRecord bookkeeping_record;
         ASSERT_TRUE(HandleUnwindingRecord(&r, &bookkeeping_record));
-        HandleBookkeepingRecord(&bookkeeping_record);
+        bookkeeping_thread.HandleBookkeepingRecord(&bookkeeping_record);
         done();
       },
-      &callsites);
+      &bookkeeping_thread);
 
   auto sock = base::UnixSocket::Listen(kSocketName, &listener, &task_runner);
   if (!sock->is_listening()) {
diff --git a/src/profiling/memory/main.cc b/src/profiling/memory/main.cc
index 9b859b5..1b12767 100644
--- a/src/profiling/memory/main.cc
+++ b/src/profiling/memory/main.cc
@@ -20,6 +20,9 @@
 #include <memory>
 #include <vector>
 
+#include <signal.h>
+
+#include "perfetto/base/event.h"
 #include "perfetto/base/unix_socket.h"
 #include "src/profiling/memory/bounded_queue.h"
 #include "src/profiling/memory/socket_listener.h"
@@ -34,6 +37,12 @@
 constexpr size_t kUnwinderThreads = 5;
 constexpr double kDefaultSamplingRate = 1;
 
+base::Event* g_dump_evt = nullptr;
+
+void DumpSignalHandler(int) {
+  g_dump_evt->Notify();
+}
+
 // We create kUnwinderThreads unwinding threads and one bookeeping thread.
 // The bookkeeping thread is singleton in order to avoid expensive and
 // complicated synchronisation in the bookkeeping.
@@ -76,12 +85,29 @@
     }
   }
 
-  GlobalCallstackTrie callsites;
+  base::UnixTaskRunner task_runner;
+  BoundedQueue<BookkeepingRecord> bookkeeping_queue(kBookkeepingQueueSize);
+  // We set this up before launching any threads, so we do not have to use a
+  // std::atomic for g_dump_evt.
+  g_dump_evt = new base::Event();
+
+  struct sigaction action = {};
+  action.sa_handler = DumpSignalHandler;
+  PERFETTO_CHECK(sigaction(SIGUSR1, &action, nullptr) == 0);
+  task_runner.AddFileDescriptorWatch(g_dump_evt->fd(), [&bookkeeping_queue] {
+    g_dump_evt->Clear();
+
+    BookkeepingRecord rec = {};
+    rec.record_type = BookkeepingRecord::Type::Dump;
+    bookkeeping_queue.Add(std::move(rec));
+  });
+
   std::unique_ptr<base::UnixSocket> sock;
 
-  BoundedQueue<BookkeepingRecord> callsites_queue(kBookkeepingQueueSize);
-  std::thread bookkeeping_thread(
-      [&callsites_queue] { BookkeepingMainLoop(&callsites_queue); });
+  BookkeepingThread bookkeeping_thread("/data/local/tmp/heap_dump");
+  std::thread bookkeeping_th([&bookkeeping_thread, &bookkeeping_queue] {
+    bookkeeping_thread.Run(&bookkeeping_queue);
+  });
 
   std::array<BoundedQueue<UnwindingRecord>, kUnwinderThreads> unwinder_queues;
   for (size_t i = 0; i < kUnwinderThreads; ++i)
@@ -89,8 +115,8 @@
   std::vector<std::thread> unwinding_threads;
   unwinding_threads.reserve(kUnwinderThreads);
   for (size_t i = 0; i < kUnwinderThreads; ++i) {
-    unwinding_threads.emplace_back([&unwinder_queues, &callsites_queue, i] {
-      UnwindingMainLoop(&unwinder_queues[i], &callsites_queue);
+    unwinding_threads.emplace_back([&unwinder_queues, &bookkeeping_queue, i] {
+      UnwindingMainLoop(&unwinder_queues[i], &bookkeeping_queue);
     });
   }
 
@@ -99,14 +125,12 @@
         std::move(r));
   };
   SocketListener listener({sampling_rate}, std::move(on_record_received),
-                          &callsites);
+                          &bookkeeping_thread);
 
-  base::UnixTaskRunner read_task_runner;
   if (optind == argc - 1) {
     // Allow to be able to manually specify the socket to listen on
     // for testing and sideloading purposes.
-    sock =
-        base::UnixSocket::Listen(argv[argc - 1], &listener, &read_task_runner);
+    sock = base::UnixSocket::Listen(argv[argc - 1], &listener, &task_runner);
   } else if (optind == argc) {
     // When running as a service launched by init on Android, the socket
     // is created by init and passed to the application using an environment
@@ -122,7 +146,7 @@
       PERFETTO_FATAL(
           "Invalid ANDROID_SOCKET_heapprofd. Expected decimal integer.");
     sock = base::UnixSocket::Listen(base::ScopedFile(raw_fd), &listener,
-                                    &read_task_runner);
+                                    &task_runner);
   } else {
     PERFETTO_FATAL("Invalid number of arguments. %s [-r rate] [SOCKET]",
                    argv[0]);
@@ -132,7 +156,7 @@
     PERFETTO_FATAL("Failed to initialize socket: %s",
                    strerror(sock->last_error()));
 
-  read_task_runner.Run();
+  task_runner.Run();
   return 0;
 }
 }  // namespace
diff --git a/src/profiling/memory/queue_messages.h b/src/profiling/memory/queue_messages.h
new file mode 100644
index 0000000..45f5809
--- /dev/null
+++ b/src/profiling/memory/queue_messages.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file contains messages sent between the threads over BoundedQueue.
+
+#ifndef SRC_PROFILING_MEMORY_QUEUE_MESSAGES_H_
+#define SRC_PROFILING_MEMORY_QUEUE_MESSAGES_H_
+
+#include <unwindstack/Maps.h>
+#include <unwindstack/Unwinder.h>
+#include "src/profiling/memory/wire_protocol.h"
+
+namespace perfetto {
+
+struct ProcessMetadata;
+
+struct UnwindingRecord {
+  pid_t pid;
+  size_t size;
+  std::unique_ptr<uint8_t[]> data;
+  std::weak_ptr<ProcessMetadata> metadata;
+};
+
+struct FreeRecord {
+  std::unique_ptr<uint8_t[]> free_data;
+  // This is a pointer into free_data.
+  FreeMetadata* metadata;
+};
+
+struct AllocRecord {
+  AllocMetadata alloc_metadata;
+  std::vector<unwindstack::FrameData> frames;
+};
+
+struct BookkeepingRecord {
+  enum class Type {
+    Dump = 0,
+    Malloc = 1,
+    Free = 2,
+  };
+  pid_t pid;
+  // TODO(fmayer): Use a union.
+  Type record_type;
+  AllocRecord alloc_record;
+  FreeRecord free_record;
+};
+
+}  // namespace perfetto
+
+#endif  // SRC_PROFILING_MEMORY_QUEUE_MESSAGES_H_
diff --git a/src/profiling/memory/socket_listener.cc b/src/profiling/memory/socket_listener.cc
index ea5780f..6d23e64 100644
--- a/src/profiling/memory/socket_listener.cc
+++ b/src/profiling/memory/socket_listener.cc
@@ -20,6 +20,7 @@
 namespace perfetto {
 
 void SocketListener::OnDisconnect(base::UnixSocket* self) {
+  bookkeeping_thread_->NotifyClientDisconnected(self->peer_pid());
   sockets_.erase(self);
 }
 
@@ -28,6 +29,7 @@
     std::unique_ptr<base::UnixSocket> new_connection) {
   base::UnixSocket* new_connection_raw = new_connection.get();
   sockets_.emplace(new_connection_raw, std::move(new_connection));
+  bookkeeping_thread_->NotifyClientConnected(new_connection_raw->peer_pid());
 }
 
 void SocketListener::OnDataAvailable(base::UnixSocket* self) {
@@ -84,7 +86,7 @@
   if (it == process_metadata_.end() || it->second.expired()) {
     // We have not seen the PID yet or the PID is being recycled.
     entry->process_metadata = std::make_shared<ProcessMetadata>(
-        peer_pid, std::move(maps_fd), std::move(mem_fd), callsites_);
+        peer_pid, std::move(maps_fd), std::move(mem_fd));
     process_metadata_[peer_pid] = entry->process_metadata;
   } else {
     // If the process already has metadata, this is an additional socket for
diff --git a/src/profiling/memory/socket_listener.h b/src/profiling/memory/socket_listener.h
index f6d3464..b06a98d 100644
--- a/src/profiling/memory/socket_listener.h
+++ b/src/profiling/memory/socket_listener.h
@@ -19,6 +19,7 @@
 
 #include "perfetto/base/unix_socket.h"
 #include "src/profiling/memory/bookkeeping.h"
+#include "src/profiling/memory/queue_messages.h"
 #include "src/profiling/memory/record_reader.h"
 #include "src/profiling/memory/unwinding.h"
 #include "src/profiling/memory/wire_protocol.h"
@@ -32,10 +33,10 @@
  public:
   SocketListener(ClientConfiguration client_config,
                  std::function<void(UnwindingRecord)> fn,
-                 GlobalCallstackTrie* callsites)
+                 BookkeepingThread* bookkeeping_thread)
       : client_config_(client_config),
         callback_function_(std::move(fn)),
-        callsites_(callsites) {}
+        bookkeeping_thread_(bookkeeping_thread) {}
   void OnDisconnect(base::UnixSocket* self) override;
   void OnNewIncomingConnection(
       base::UnixSocket* self,
@@ -69,7 +70,7 @@
   std::map<base::UnixSocket*, Entry> sockets_;
   std::map<pid_t, std::weak_ptr<ProcessMetadata>> process_metadata_;
   std::function<void(UnwindingRecord)> callback_function_;
-  GlobalCallstackTrie* callsites_;
+  BookkeepingThread* const bookkeeping_thread_;
 };
 
 }  // namespace perfetto
diff --git a/src/profiling/memory/socket_listener_unittest.cc b/src/profiling/memory/socket_listener_unittest.cc
index f40cb9f..2e7910f 100644
--- a/src/profiling/memory/socket_listener_unittest.cc
+++ b/src/profiling/memory/socket_listener_unittest.cc
@@ -53,9 +53,9 @@
     callback_called();
   };
 
-  GlobalCallstackTrie bookkeeping;
+  BookkeepingThread actor("");
   SocketListener listener({},  // We do not care about the sampling rate.
-                          std::move(callback_fn), &bookkeeping);
+                          std::move(callback_fn), &actor);
   MockEventListener client_listener;
   EXPECT_CALL(client_listener, OnConnect(_, _))
       .WillOnce(InvokeWithoutArgs(connected));
diff --git a/src/profiling/memory/unwinding.cc b/src/profiling/memory/unwinding.cc
index f43743b..d464444 100644
--- a/src/profiling/memory/unwinding.cc
+++ b/src/profiling/memory/unwinding.cc
@@ -194,26 +194,30 @@
   if (!ReceiveWireMessage(reinterpret_cast<char*>(rec->data.get()), rec->size,
                           &msg))
     return false;
-  switch (msg.record_type) {
-    case RecordType::Malloc: {
-      std::shared_ptr<ProcessMetadata> metadata = rec->metadata.lock();
-      if (!metadata)
-        // Process has already gone away.
-        return false;
+  if (msg.record_type == RecordType::Malloc) {
+    std::shared_ptr<ProcessMetadata> metadata = rec->metadata.lock();
+    if (!metadata) {
+      // Process has already gone away.
+      return false;
+    }
 
-      out->metadata = std::move(rec->metadata);
-      out->free_record = {};
-      return DoUnwind(&msg, metadata.get(), &out->alloc_record);
+    out->pid = rec->pid;
+    out->record_type = BookkeepingRecord::Type::Malloc;
+    if (!DoUnwind(&msg, metadata.get(), &out->alloc_record)) {
+      return false;
     }
-    case RecordType::Free: {
-      // We need to keep this alive, because msg.free_header is a pointer into
-      // this.
-      out->metadata = std::move(rec->metadata);
-      out->free_record.free_data = std::move(rec->data);
-      out->free_record.metadata = msg.free_header;
-      out->alloc_record = {};
-      return true;
-    }
+    return true;
+  } else if (msg.record_type == RecordType::Free) {
+    out->record_type = BookkeepingRecord::Type::Free;
+    out->pid = rec->pid;
+    // We need to keep this alive, because msg.free_header is a pointer into
+    // this.
+    out->free_record.free_data = std::move(rec->data);
+    out->free_record.metadata = msg.free_header;
+    return true;
+  } else {
+    PERFETTO_DCHECK(false);
+    return false;
   }
 }
 
@@ -227,41 +231,4 @@
       output_queue->Add(std::move(out));
   }
 }
-
-void HandleBookkeepingRecord(BookkeepingRecord* rec) {
-  std::shared_ptr<ProcessMetadata> metadata = rec->metadata.lock();
-  if (!metadata)
-    // Process has already gone away.
-    return;
-
-  if (rec->free_record.free_data) {
-    FreeRecord& free_rec = rec->free_record;
-    FreePageEntry* entries = free_rec.metadata->entries;
-    uint64_t num_entries = free_rec.metadata->num_entries;
-    if (num_entries > kFreePageSize)
-      return;
-    for (size_t i = 0; i < num_entries; ++i) {
-      const FreePageEntry& entry = entries[i];
-      metadata->heap_dump.RecordFree(entry.addr, entry.sequence_number);
-    }
-  } else {
-    AllocRecord& alloc_rec = rec->alloc_record;
-    std::vector<CodeLocation> code_locations;
-    for (unwindstack::FrameData& frame : alloc_rec.frames)
-      code_locations.emplace_back(frame.map_name, frame.function_name);
-    metadata->heap_dump.RecordMalloc(code_locations,
-                                     alloc_rec.alloc_metadata.alloc_address,
-                                     alloc_rec.alloc_metadata.alloc_size,
-                                     alloc_rec.alloc_metadata.sequence_number);
-  }
-}
-
-__attribute__((noreturn)) void BookkeepingMainLoop(
-    BoundedQueue<BookkeepingRecord>* input_queue) {
-  for (;;) {
-    BookkeepingRecord rec = input_queue->Get();
-    HandleBookkeepingRecord(&rec);
-  }
-}
-
 }  // namespace perfetto
diff --git a/src/profiling/memory/unwinding.h b/src/profiling/memory/unwinding.h
index 1bdcf0d..471bec6 100644
--- a/src/profiling/memory/unwinding.h
+++ b/src/profiling/memory/unwinding.h
@@ -22,6 +22,7 @@
 #include "perfetto/base/scoped_file.h"
 #include "src/profiling/memory/bookkeeping.h"
 #include "src/profiling/memory/bounded_queue.h"
+#include "src/profiling/memory/queue_messages.h"
 #include "src/profiling/memory/wire_protocol.h"
 
 namespace perfetto {
@@ -39,20 +40,13 @@
 };
 
 struct ProcessMetadata {
-  ProcessMetadata(pid_t p,
-                  base::ScopedFile maps_fd,
-                  base::ScopedFile mem,
-                  GlobalCallstackTrie* callsites)
-      : pid(p),
-        maps(std::move(maps_fd)),
-        mem_fd(std::move(mem)),
-        heap_dump(callsites) {
+  ProcessMetadata(pid_t p, base::ScopedFile maps_fd, base::ScopedFile mem)
+      : pid(p), maps(std::move(maps_fd)), mem_fd(std::move(mem)) {
     PERFETTO_CHECK(maps.Parse());
   }
   pid_t pid;
   FileDescriptorMaps maps;
   base::ScopedFile mem_fd;
-  HeapTracker heap_dump;
 };
 
 // Overlays size bytes pointed to by stack for addresses in [sp, sp + size).
@@ -72,40 +66,15 @@
 
 size_t RegSize(unwindstack::ArchEnum arch);
 
-struct UnwindingRecord {
-  pid_t pid;
-  size_t size;
-  std::unique_ptr<uint8_t[]> data;
-  std::weak_ptr<ProcessMetadata> metadata;
-};
 
-struct FreeRecord {
-  std::unique_ptr<uint8_t[]> free_data;
-  FreeMetadata* metadata;
-};
-
-struct AllocRecord {
-  AllocMetadata alloc_metadata;
-  std::vector<unwindstack::FrameData> frames;
-};
-
-struct BookkeepingRecord {
-  // TODO(fmayer): Use a union.
-  std::weak_ptr<ProcessMetadata> metadata;
-  AllocRecord alloc_record;
-  FreeRecord free_record;
-};
 
 bool DoUnwind(WireMessage*, ProcessMetadata* metadata, AllocRecord* out);
 
 bool HandleUnwindingRecord(UnwindingRecord* rec, BookkeepingRecord* out);
-void HandleBookkeepingRecord(BookkeepingRecord* rec);
 
 void UnwindingMainLoop(BoundedQueue<UnwindingRecord>* input_queue,
                        BoundedQueue<BookkeepingRecord>* output_queue);
 
-void BookkeepingMainLoop(BoundedQueue<BookkeepingRecord>* input_queue);
-
 }  // namespace perfetto
 
 #endif  // SRC_PROFILING_MEMORY_UNWINDING_H_
diff --git a/src/profiling/memory/unwinding_unittest.cc b/src/profiling/memory/unwinding_unittest.cc
index 3993692..f671585 100644
--- a/src/profiling/memory/unwinding_unittest.cc
+++ b/src/profiling/memory/unwinding_unittest.cc
@@ -122,8 +122,7 @@
   base::ScopedFile proc_maps(base::OpenFile("/proc/self/maps", O_RDONLY));
   base::ScopedFile proc_mem(base::OpenFile("/proc/self/mem", O_RDONLY));
   GlobalCallstackTrie callsites;
-  ProcessMetadata metadata(getpid(), std::move(proc_maps), std::move(proc_mem),
-                           &callsites);
+  ProcessMetadata metadata(getpid(), std::move(proc_maps), std::move(proc_mem));
   WireMessage msg;
   auto record = GetRecord(&msg);
   AllocRecord out;
diff --git a/src/trace_processor/span_operator_table.cc b/src/trace_processor/span_operator_table.cc
index 6f898bc..091427f 100644
--- a/src/trace_processor/span_operator_table.cc
+++ b/src/trace_processor/span_operator_table.cc
@@ -334,6 +334,7 @@
         value->text_value =
             reinterpret_cast<const char*>(sqlite3_column_text(stmt, i));
         break;
+      case Table::ColumnType::kDouble:
       case Table::ColumnType::kInt:
         PERFETTO_CHECK(false);
     }
@@ -427,6 +428,7 @@
                           kSqliteTransient);
       break;
     }
+    case Table::ColumnType::kDouble:
     case Table::ColumnType::kInt:
       PERFETTO_CHECK(false);
   }
diff --git a/src/trace_processor/trace_processor_shell.cc b/src/trace_processor/trace_processor_shell.cc
index 9496b27..4fc3dbc 100644
--- a/src/trace_processor/trace_processor_shell.cc
+++ b/src/trace_processor/trace_processor_shell.cc
@@ -286,7 +286,7 @@
 }
 
 void PrintUsage(char** argv) {
-  PERFETTO_ELOG("Usage: %s [-d] [-q query.sql] trace_file.proto", argv[0]);
+  PERFETTO_ELOG("Usage: %s [-d] [-q query.sql] trace_file.pb", argv[0]);
 }
 
 int TraceProcessorMain(int argc, char** argv) {
diff --git a/src/tracing/BUILD.gn b/src/tracing/BUILD.gn
index cae4c23..b90ce18 100644
--- a/src/tracing/BUILD.gn
+++ b/src/tracing/BUILD.gn
@@ -65,19 +65,6 @@
   ]
 }
 
-executable("consumer_api_test") {
-  deps = [
-    ":api",
-    "../../gn:default_deps",
-    "../../protos/perfetto/config:lite",
-    "../../protos/perfetto/trace:lite",
-    "../base",
-  ]
-  sources = [
-    "api_impl/consumer_api_test.cc",
-  ]
-}
-
 source_set("unittests") {
   testonly = true
   deps = [
@@ -146,6 +133,19 @@
 }
 
 if (!build_with_chromium) {
+  executable("consumer_api_test") {
+    deps = [
+      ":api",
+      "../../gn:default_deps",
+      "../../protos/perfetto/config:lite",
+      "../../protos/perfetto/trace:lite",
+      "../base",
+    ]
+    sources = [
+      "api_impl/consumer_api_test.cc",
+    ]
+  }
+
   # IPC transport: only consumer side
   # TODO(fmayer): Remove duplication between this and ipc.
   source_set("ipc_consumer") {
diff --git a/tools/trace_to_text/BUILD.gn b/tools/trace_to_text/BUILD.gn
index ba7cd9f..e606369 100644
--- a/tools/trace_to_text/BUILD.gn
+++ b/tools/trace_to_text/BUILD.gn
@@ -30,6 +30,7 @@
     "ftrace_inode_handler.cc",
     "ftrace_inode_handler.h",
     "main.cc",
+    "process_formatter.h",
   ]
 }
 
diff --git a/tools/trace_to_text/ftrace_event_formatter.cc b/tools/trace_to_text/ftrace_event_formatter.cc
index f6f8825..4f312e6 100644
--- a/tools/trace_to_text/ftrace_event_formatter.cc
+++ b/tools/trace_to_text/ftrace_event_formatter.cc
@@ -2859,26 +2859,58 @@
   return (timestamp / 1000) % 1000000ul;
 }
 
-std::string FormatPrefix(uint64_t timestamp, uint64_t cpu) {
+std::string FormatPrefix(uint64_t timestamp,
+                         uint64_t cpu,
+                         uint32_t pid,
+                         uint32_t tgid,
+                         std::string name) {
   char line[2048];
   uint64_t seconds = TimestampToSeconds(timestamp);
   uint64_t useconds = TimestampToMicroseconds(timestamp);
-  sprintf(line,
-          "<idle>-0     (-----) [%03" PRIu64 "] d..3 %" PRIu64 ".%.6" PRIu64
-          ": ",
-          cpu, seconds, useconds);
+  if (pid == 0) {
+    name = "<idle>";
+  }
+  if (tgid == 0) {
+    sprintf(line,
+            "%s-%" PRIu32 "     (-----) [%03" PRIu32 "] d..3 %" PRIu64
+            ".%.6" PRIu64 ": ",
+            name.c_str(), pid, cpu, seconds, useconds);
+  } else {
+    sprintf(line,
+            "%s-%" PRIu32 "     (%5" PRIu32 ") [%03" PRIu32 "] d..3 %" PRIu64
+            ".%.6" PRIu64 ": ",
+            name.c_str(), pid, tgid, cpu, seconds, useconds);
+  }
   return std::string(line);
 }
 
 }  // namespace
 
-std::string FormatFtraceEvent(uint64_t timestamp,
-                              size_t cpu,
-                              const protos::FtraceEvent& event) {
+std::string FormatFtraceEvent(
+    uint64_t timestamp,
+    size_t cpu,
+    const protos::FtraceEvent& event,
+    const std::unordered_map<uint32_t /*tid*/, uint32_t /*tgid*/>& thread_map) {
+  // Sched_switch events contain the thread name so use that in the prefix.
+  std::string name;
+  if (event.has_sched_switch()) {
+    name = event.sched_switch().prev_comm();
+  } else {
+    name = "<...>";
+  }
+
   std::string line = FormatEventText(event);
   if (line == "")
     return "";
-  return FormatPrefix(timestamp, cpu) + line;
+
+  // Retrieve the tgid if it exists for the current event pid.
+  uint32_t pid = event.pid();
+  uint32_t tgid = 0;
+  auto it = thread_map.find(pid);
+  if (it != thread_map.end()) {
+    tgid = it->second;
+  }
+  return FormatPrefix(timestamp, cpu, pid, tgid, name) + line;
 }
 
 }  // namespace perfetto
diff --git a/tools/trace_to_text/ftrace_event_formatter.h b/tools/trace_to_text/ftrace_event_formatter.h
index 7b76e26..27b0142 100644
--- a/tools/trace_to_text/ftrace_event_formatter.h
+++ b/tools/trace_to_text/ftrace_event_formatter.h
@@ -20,14 +20,17 @@
 #include "tools/trace_to_text/ftrace_event_formatter.h"
 
 #include <string>
+#include <unordered_map>
 
 #include "perfetto/trace/trace_packet.pb.h"
 
 namespace perfetto {
 
-std::string FormatFtraceEvent(uint64_t timestamp,
-                              size_t cpu,
-                              const protos::FtraceEvent&);
+std::string FormatFtraceEvent(
+    uint64_t timestamp,
+    size_t cpu,
+    const protos::FtraceEvent&,
+    const std::unordered_map<uint32_t /*tid*/, uint32_t /*tgid*/>& thread_map);
 
 }  // namespace perfetto
 
diff --git a/tools/trace_to_text/main.cc b/tools/trace_to_text/main.cc
index 709d139..2950bb2 100644
--- a/tools/trace_to_text/main.cc
+++ b/tools/trace_to_text/main.cc
@@ -30,6 +30,7 @@
 #include <memory>
 #include <ostream>
 #include <sstream>
+#include <unordered_map>
 #include <utility>
 
 #include <google/protobuf/compiler/importer.h>
@@ -46,10 +47,13 @@
 #include "perfetto/traced/sys_stats_counters.h"
 #include "tools/trace_to_text/ftrace_event_formatter.h"
 #include "tools/trace_to_text/ftrace_inode_handler.h"
+#include "tools/trace_to_text/process_formatter.h"
 
 namespace perfetto {
 namespace {
 
+// Having an empty traceEvents object is necessary for trace viewer to
+// load the json properly.
 const char kTraceHeader[] = R"({
   "traceEvents": [],
 )";
@@ -58,6 +62,14 @@
   "controllerTraceDataKey": "systraceController"
 })";
 
+const char kProcessDumpHeader[] =
+    ""
+    "\"androidProcessDump\": "
+    "\"PROCESS DUMP\\nUSER           PID  PPID     VSZ    RSS WCHAN  "
+    "PC S NAME                        COMM                       \\n";
+
+const char kThreadHeader[] = "USER           PID   TID CMD \\n";
+
 const char kFtraceHeader[] =
     ""
     "  \"systemTraceEvents\": \""
@@ -176,21 +188,49 @@
 int TraceToSystrace(std::istream* input,
                     std::ostream* output,
                     bool wrap_in_json) {
-  std::multimap<uint64_t, std::string> sorted;
+  std::multimap<uint64_t, std::string> ftrace_sorted;
+  std::vector<std::string> proc_dump;
+  std::vector<std::string> thread_dump;
+  std::unordered_map<uint32_t /*tid*/, uint32_t /*tgid*/> thread_map;
 
   std::vector<const char*> meminfo_strs = BuildMeminfoCounterNames();
   std::vector<const char*> vmstat_strs = BuildVmstatCounterNames();
 
-  ForEachPacketInTrace(input, [&sorted, &meminfo_strs, &vmstat_strs](
-                                  const protos::TracePacket& packet) {
+  std::vector<const protos::TracePacket> packets_to_process;
+
+  ForEachPacketInTrace(
+      input, [&thread_map, &packets_to_process, &proc_dump,
+              &thread_dump](const protos::TracePacket& packet) {
+        if (!packet.has_process_tree()) {
+          packets_to_process.emplace_back(std::move(packet));
+          return;
+        }
+        const ProcessTree& process_tree = packet.process_tree();
+        for (const auto& process : process_tree.processes()) {
+          // Main threads will have the same pid as tgid.
+          thread_map[static_cast<uint32_t>(process.pid())] =
+              static_cast<uint32_t>(process.pid());
+          std::string p = FormatProcess(process);
+          proc_dump.emplace_back(p);
+        }
+        for (const auto& thread : process_tree.threads()) {
+          // Populate thread map for matching tids to tgids.
+          thread_map[static_cast<uint32_t>(thread.tid())] =
+              static_cast<uint32_t>(thread.tgid());
+          std::string t = FormatThread(thread);
+          thread_dump.emplace_back(t);
+        }
+      });
+
+  for (const auto& packet : packets_to_process) {
     if (packet.has_ftrace_events()) {
       const FtraceEventBundle& bundle = packet.ftrace_events();
       for (const FtraceEvent& event : bundle.event()) {
-        std::string line =
-            FormatFtraceEvent(event.timestamp(), bundle.cpu(), event);
+        std::string line = FormatFtraceEvent(event.timestamp(), bundle.cpu(),
+                                             event, thread_map);
         if (line == "")
           continue;
-        sorted.emplace(event.timestamp(), line);
+        ftrace_sorted.emplace(event.timestamp(), line);
       }
     }  // packet.has_ftrace_events
 
@@ -205,7 +245,7 @@
         sprintf(str, "C|1|%s|%" PRIu64, meminfo_strs[meminfo.key()],
                 static_cast<uint64_t>(meminfo.value()));
         event.mutable_print()->set_buf(str);
-        sorted.emplace(ts, FormatFtraceEvent(ts, 0, event));
+        ftrace_sorted.emplace(ts, FormatFtraceEvent(ts, 0, event, thread_map));
       }
       for (const auto& vmstat : sys_stats.vmstat()) {
         FtraceEvent event;
@@ -216,20 +256,29 @@
         sprintf(str, "C|1|%s|%" PRIu64, vmstat_strs[vmstat.key()],
                 static_cast<uint64_t>(vmstat.value()));
         event.mutable_print()->set_buf(str);
-        sorted.emplace(ts, FormatFtraceEvent(ts, 0, event));
+        ftrace_sorted.emplace(ts, FormatFtraceEvent(ts, 0, event, thread_map));
       }
     }
-  });
+  }
 
   if (wrap_in_json) {
     *output << kTraceHeader;
+    *output << kProcessDumpHeader;
+    for (const auto& process : proc_dump) {
+      *output << process << "\\n";
+    }
+    *output << kThreadHeader;
+    for (const auto& thread : thread_dump) {
+      *output << thread << "\\n";
+    }
+    *output << "\",";
     *output << kFtraceHeader;
   }
 
   fprintf(stderr, "\n");
-  size_t total_events = sorted.size();
+  size_t total_events = ftrace_sorted.size();
   size_t written_events = 0;
-  for (auto it = sorted.begin(); it != sorted.end(); it++) {
+  for (auto it = ftrace_sorted.begin(); it != ftrace_sorted.end(); it++) {
     *output << it->second << (wrap_in_json ? "\\n" : "\n");
     if (written_events++ % 100 == 0 && !isatty(STDOUT_FILENO)) {
       fprintf(stderr, "Writing trace: %.2f %%\r",
diff --git a/tools/trace_to_text/process_formatter.h b/tools/trace_to_text/process_formatter.h
new file mode 100644
index 0000000..62daf72
--- /dev/null
+++ b/tools/trace_to_text/process_formatter.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TOOLS_TRACE_TO_TEXT_PROCESS_FORMATTER_H_
+#define TOOLS_TRACE_TO_TEXT_PROCESS_FORMATTER_H_
+
+#include <string>
+
+#include "perfetto/trace/trace_packet.pb.h"
+
+namespace perfetto {
+
+inline std::string FormatProcess(const protos::ProcessTree::Process& p) {
+  char line[2048];
+  sprintf(line,
+          "root             %d     %d   00000   000 null 0000000000 S %s       "
+          "  null",
+          p.pid(), p.ppid(), p.cmdline(0).c_str());
+  return line;
+};
+
+inline std::string FormatThread(const protos::ProcessTree::Thread& t) {
+  char line[2048];
+  std::string name;
+  if (t.has_name()) {
+    name = t.name();
+  } else {
+    name = "<...>";
+  }
+  sprintf(line, "root         %d %d %s", t.tgid(), t.tid(), name.c_str());
+  return line;
+};
+
+}  // namespace perfetto
+
+#endif  // TOOLS_TRACE_TO_TEXT_PROCESS_FORMATTER_H_