traced_perf: feed samples through libunwindstack

Added per-DS bookkeeping queues with the same structure of periodic tick
tasks that process a queue of unwound samples.

All the per-instance maps have the same lifetime at the moment, but I'm
keeping them separate as the ownership will become more segmented once the
unwinding is put onto a separate thread (and ProcDescriptors will be
sharded, so keeping the current name for now).

Change-Id: Id91b7f440f68317ff429060eece1754922f28594
diff --git a/Android.bp b/Android.bp
index f9bb9a8..23a15ce 100644
--- a/Android.bp
+++ b/Android.bp
@@ -7466,6 +7466,7 @@
     ":perfetto_src_base_unix_socket",
     ":perfetto_src_ipc_client",
     ":perfetto_src_ipc_common",
+    ":perfetto_src_profiling_common_unwind_support",
     ":perfetto_src_profiling_perf_proc_descriptors",
     ":perfetto_src_profiling_perf_producer",
     ":perfetto_src_profiling_perf_regs_parsing",
diff --git a/src/profiling/common/unwind_support.cc b/src/profiling/common/unwind_support.cc
index 5738246..53d0e4a 100644
--- a/src/profiling/common/unwind_support.cc
+++ b/src/profiling/common/unwind_support.cc
@@ -89,5 +89,45 @@
   maps_.clear();
 }
 
+UnwindingMetadata::UnwindingMetadata(base::ScopedFile maps_fd,
+                                     base::ScopedFile mem_fd)
+    : fd_maps(std::move(maps_fd)),
+      fd_mem(std::make_shared<FDMemory>(std::move(mem_fd)))
+#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+      ,
+      jit_debug(std::unique_ptr<unwindstack::JitDebug>(
+          new unwindstack::JitDebug(fd_mem))),
+      dex_files(std::unique_ptr<unwindstack::DexFiles>(
+          new unwindstack::DexFiles(fd_mem)))
+#endif
+{
+  bool parsed = fd_maps.Parse();
+  if (!parsed)
+    PERFETTO_DLOG("Failed initial maps parse");
+}
+
+void UnwindingMetadata::ReparseMaps() {
+  reparses++;
+  fd_maps.Reset();
+  fd_maps.Parse();
+#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+  jit_debug =
+      std::unique_ptr<unwindstack::JitDebug>(new unwindstack::JitDebug(fd_mem));
+  dex_files =
+      std::unique_ptr<unwindstack::DexFiles>(new unwindstack::DexFiles(fd_mem));
+#endif
+}
+
+FrameData UnwindingMetadata::AnnotateFrame(unwindstack::FrameData frame) {
+  std::string build_id;
+  if (frame.map_name != "") {
+    unwindstack::MapInfo* map_info = fd_maps.Find(frame.pc);
+    if (map_info)
+      build_id = map_info->GetBuildID();
+  }
+
+  return FrameData{std::move(frame), std::move(build_id)};
+}
+
 }  // namespace profiling
 }  // namespace perfetto
diff --git a/src/profiling/common/unwind_support.h b/src/profiling/common/unwind_support.h
index 4a8eb6a..061ed61 100644
--- a/src/profiling/common/unwind_support.h
+++ b/src/profiling/common/unwind_support.h
@@ -20,6 +20,9 @@
 // defines PERFETTO_BUILDFLAG
 #include "perfetto/base/build_config.h"
 
+#include <memory>
+#include <string>
+
 #include <unwindstack/Maps.h>
 #include <unwindstack/Unwinder.h>
 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
@@ -34,6 +37,15 @@
 namespace perfetto {
 namespace profiling {
 
+// libunwindstack's FrameData annotated with the build_id.
+struct FrameData {
+  FrameData(unwindstack::FrameData f, std::string id)
+      : frame(std::move(f)), build_id(std::move(id)) {}
+
+  unwindstack::FrameData frame;
+  std::string build_id;
+};
+
 // Read /proc/[pid]/maps from an open file descriptor.
 // TODO(fmayer): Figure out deduplication to other maps.
 class FDMaps : public unwindstack::Maps {
@@ -89,35 +101,19 @@
 };
 
 struct UnwindingMetadata {
-  UnwindingMetadata(pid_t _pid,
-                    base::ScopedFile maps_fd,
-                    base::ScopedFile mem_fd)
-      : pid(_pid),
-        fd_maps(std::move(maps_fd)),
-        fd_mem(std::make_shared<FDMemory>(std::move(mem_fd)))
-#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
-        ,
-        jit_debug(std::unique_ptr<unwindstack::JitDebug>(
-            new unwindstack::JitDebug(fd_mem))),
-        dex_files(std::unique_ptr<unwindstack::DexFiles>(
-            new unwindstack::DexFiles(fd_mem)))
-#endif
-  {
-    bool parsed = fd_maps.Parse();
-    PERFETTO_DCHECK(parsed);
-  }
-  void ReparseMaps() {
-    reparses++;
-    fd_maps.Reset();
-    fd_maps.Parse();
-#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
-    jit_debug = std::unique_ptr<unwindstack::JitDebug>(
-        new unwindstack::JitDebug(fd_mem));
-    dex_files = std::unique_ptr<unwindstack::DexFiles>(
-        new unwindstack::DexFiles(fd_mem));
-#endif
-  }
-  pid_t pid;
+  UnwindingMetadata(base::ScopedFile maps_fd, base::ScopedFile mem_fd);
+
+  // move-only
+  UnwindingMetadata(const UnwindingMetadata&) = delete;
+  UnwindingMetadata& operator=(const UnwindingMetadata&) = delete;
+
+  UnwindingMetadata(UnwindingMetadata&&) = default;
+  UnwindingMetadata& operator=(UnwindingMetadata&&) = default;
+
+  void ReparseMaps();
+
+  FrameData AnnotateFrame(unwindstack::FrameData frame);
+
   FDMaps fd_maps;
   // The API of libunwindstack expects shared_ptr for Memory.
   std::shared_ptr<unwindstack::Memory> fd_mem;
diff --git a/src/profiling/memory/unwinding.cc b/src/profiling/memory/unwinding.cc
index f6c2b37..752e9d4 100644
--- a/src/profiling/memory/unwinding.cc
+++ b/src/profiling/memory/unwinding.cc
@@ -142,7 +142,7 @@
   unwinder.SetJitDebug(metadata->jit_debug.get(), regs->Arch());
   unwinder.SetDexFiles(metadata->dex_files.get(), regs->Arch());
 #endif
-  // Surpress incorrect "variable may be uninitialized" error for if condition
+  // Suppress incorrect "variable may be uninitialized" error for if condition
   // after this loop. error_code = LastErrorCode gets run at least once.
   uint8_t error_code = 0;
   for (int attempt = 0; attempt < 2; ++attempt) {
@@ -164,24 +164,17 @@
       unwinder.SetDexFiles(metadata->dex_files.get(), regs->Arch());
 #endif
     }
-    unwinder.Unwind(&kSkipMaps, nullptr);
+    unwinder.Unwind(&kSkipMaps, /*map_suffixes_to_ignore=*/nullptr);
     error_code = unwinder.LastErrorCode();
     if (error_code != unwindstack::ERROR_INVALID_MAP)
       break;
   }
   std::vector<unwindstack::FrameData> frames = unwinder.ConsumeFrames();
   for (unwindstack::FrameData& fd : frames) {
-    std::string build_id;
-    if (fd.map_name != "") {
-      unwindstack::MapInfo* map_info = metadata->fd_maps.Find(fd.pc);
-      if (map_info)
-        build_id = map_info->GetBuildID();
-    }
-
-    out->frames.emplace_back(std::move(fd), std::move(build_id));
+    out->frames.emplace_back(metadata->AnnotateFrame(std::move(fd)));
   }
 
-  if (error_code != 0) {
+  if (error_code != unwindstack::ERROR_NONE) {
     PERFETTO_DLOG("Unwinding error %" PRIu8, error_code);
     unwindstack::FrameData frame_data{};
     frame_data.function_name = "ERROR " + std::to_string(error_code);
@@ -324,7 +317,7 @@
       base::SockFamily::kUnix, base::SockType::kStream);
   pid_t peer_pid = sock->peer_pid();
 
-  UnwindingMetadata metadata(peer_pid, std::move(handoff_data.maps_fd),
+  UnwindingMetadata metadata(std::move(handoff_data.maps_fd),
                              std::move(handoff_data.mem_fd));
   ClientData client_data{
       handoff_data.data_source_instance_id,
diff --git a/src/profiling/memory/unwinding_fuzzer.cc b/src/profiling/memory/unwinding_fuzzer.cc
index 29991ba..e87202e 100644
--- a/src/profiling/memory/unwinding_fuzzer.cc
+++ b/src/profiling/memory/unwinding_fuzzer.cc
@@ -41,8 +41,7 @@
 
   pid_t self_pid = getpid();
   DataSourceInstanceID id = 0;
-  UnwindingMetadata metadata(self_pid,
-                             base::OpenFile("/proc/self/maps", O_RDONLY),
+  UnwindingMetadata metadata(base::OpenFile("/proc/self/maps", O_RDONLY),
                              base::OpenFile("/proc/self/mem", O_RDONLY));
 
   NopDelegate nop_delegate;
diff --git a/src/profiling/memory/unwinding_unittest.cc b/src/profiling/memory/unwinding_unittest.cc
index faa996e..99fe08b 100644
--- a/src/profiling/memory/unwinding_unittest.cc
+++ b/src/profiling/memory/unwinding_unittest.cc
@@ -139,8 +139,7 @@
   base::ScopedFile proc_maps(base::OpenFile("/proc/self/maps", O_RDONLY));
   base::ScopedFile proc_mem(base::OpenFile("/proc/self/mem", O_RDONLY));
   GlobalCallstackTrie callsites;
-  UnwindingMetadata metadata(getpid(), std::move(proc_maps),
-                             std::move(proc_mem));
+  UnwindingMetadata metadata(std::move(proc_maps), std::move(proc_mem));
   WireMessage msg;
   auto record = GetRecord(&msg);
   AllocRecord out;
@@ -160,8 +159,7 @@
   base::ScopedFile proc_maps(base::OpenFile("/proc/self/maps", O_RDONLY));
   base::ScopedFile proc_mem(base::OpenFile("/proc/self/mem", O_RDONLY));
   GlobalCallstackTrie callsites;
-  UnwindingMetadata metadata(getpid(), std::move(proc_maps),
-                             std::move(proc_mem));
+  UnwindingMetadata metadata(std::move(proc_maps), std::move(proc_mem));
   // Force reparse in DoUnwind.
   metadata.fd_maps.Reset();
   WireMessage msg;
diff --git a/src/profiling/memory/unwound_messages.h b/src/profiling/memory/unwound_messages.h
index 15608dd..b7101af 100644
--- a/src/profiling/memory/unwound_messages.h
+++ b/src/profiling/memory/unwound_messages.h
@@ -20,20 +20,12 @@
 #include <unwindstack/Maps.h>
 #include <unwindstack/Unwinder.h>
 
+#include "src/profiling/common/unwind_support.h"
 #include "src/profiling/memory/wire_protocol.h"
 
 namespace perfetto {
 namespace profiling {
 
-// A wrapper of libunwindstack FrameData that also includes the build_id.
-struct FrameData {
-  FrameData(unwindstack::FrameData f, std::string b)
-      : frame(std::move(f)), build_id(std::move(b)) {}
-
-  unwindstack::FrameData frame;
-  std::string build_id;
-};
-
 // Single allocation with an unwound callstack.
 struct AllocRecord {
   pid_t pid;
diff --git a/src/profiling/perf/BUILD.gn b/src/profiling/perf/BUILD.gn
index 0c209ed..9495971 100644
--- a/src/profiling/perf/BUILD.gn
+++ b/src/profiling/perf/BUILD.gn
@@ -54,6 +54,7 @@
     "../../../src/base",
     "../../../src/base:unix_socket",
     "../../../src/tracing/ipc/producer",
+    "../common:unwind_support",
   ]
   sources = [
     "event_config.h",
diff --git a/src/profiling/perf/event_reader.cc b/src/profiling/perf/event_reader.cc
index c28011b..cab7e35 100644
--- a/src/profiling/perf/event_reader.cc
+++ b/src/profiling/perf/event_reader.cc
@@ -130,6 +130,8 @@
 // Is there an argument for maintaining our own copy of |data_tail| instead of
 // reloading it?
 char* PerfRingBuffer::ReadRecordNonconsuming() {
+  static_assert(sizeof(std::atomic<uint64_t>) == sizeof(uint64_t), "");
+
   PERFETTO_CHECK(valid());
 
   // |data_tail| is written only by this userspace thread, so we can safely read
diff --git a/src/profiling/perf/perf_producer.cc b/src/profiling/perf/perf_producer.cc
index f4935ed..8969980 100644
--- a/src/profiling/perf/perf_producer.cc
+++ b/src/profiling/perf/perf_producer.cc
@@ -30,8 +30,11 @@
 #include "perfetto/tracing/core/data_source_config.h"
 #include "perfetto/tracing/core/data_source_descriptor.h"
 #include "protos/perfetto/config/profiling/perf_event_config.pbzero.h"
+#include "src/profiling/common/unwind_support.h"
 #include "src/profiling/perf/event_reader.h"
 
+#include <unwindstack/Unwinder.h>
+
 namespace perfetto {
 namespace profiling {
 namespace {
@@ -39,9 +42,12 @@
 // TODO(rsavitski): for low sampling rates, look into epoll to detect samples.
 constexpr uint32_t kReadTickPeriodMs = 200;
 constexpr uint32_t kUnwindTickPeriodMs = 200;
+constexpr uint32_t kBookkeepTickPeriodMs = 200;
 // TODO(rsavitski): this is better calculated (at setup) from the buffer and
 // sample sizes.
-constexpr size_t kMaxSamplesPerTick = 32;
+constexpr size_t kMaxSamplesPerReadTick = 32;
+
+constexpr size_t kUnwindingMaxFrames = 1000;
 
 constexpr uint32_t kInitialConnectionBackoffMs = 100;
 constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000;
@@ -108,6 +114,16 @@
           weak_this->TickDataSourceUnwind(instance_id);
       },
       kUnwindTickPeriodMs);
+
+  // Set up bookkeeping queue and kick off a periodic task to process it.
+  bookkeping_queues_.emplace(instance_id,
+                             std::queue<PerfProducer::BookkeepingEntry>{});
+  task_runner_->PostDelayedTask(
+      [weak_this, instance_id] {
+        if (weak_this)
+          weak_this->TickDataSourceBookkeep(instance_id);
+      },
+      kBookkeepTickPeriodMs);
 }
 
 // TODO(rsavitski): stop perf_event before draining ring buffer and internal
@@ -116,6 +132,7 @@
   PERFETTO_DLOG("StopDataSource(id=%" PRIu64 ")", instance_id);
   data_sources_.erase(instance_id);
   unwind_queues_.erase(instance_id);
+  bookkeping_queues_.erase(instance_id);
 }
 
 void PerfProducer::Flush(FlushRequestID flush_id,
@@ -128,9 +145,12 @@
     auto ds_it = data_sources_.find(ds_id);
     if (ds_it != data_sources_.end()) {
       auto unwind_it = unwind_queues_.find(ds_id);
+      auto book_it = bookkeping_queues_.find(ds_id);
       PERFETTO_CHECK(unwind_it != unwind_queues_.end());
+      PERFETTO_CHECK(book_it != bookkeping_queues_.end());
 
-      ProcessUnwindQueue(&unwind_it->second, ds_it->second);
+      ProcessUnwindQueue(&unwind_it->second, &book_it->second, &ds_it->second);
+      // TODO(rsavitski): also flush the bookkeeping queue.
       endpoint_->NotifyFlushComplete(flush_id);
     }
   }
@@ -152,14 +172,21 @@
                   static_cast<size_t>(ds_id), lost_events);
   };
 
-  for (size_t i = 0; i < kMaxSamplesPerTick; i++) {
+  for (size_t i = 0; i < kMaxSamplesPerReadTick; i++) {
     base::Optional<ParsedSample> sample =
         ds.event_reader.ReadUntilSample(lost_events_callback);
     if (!sample)
       break;  // caught up to the writer
 
-    // Request proc-fds for the process if this is the first time we see it yet.
     pid_t pid = sample->pid;
+    if (!sample->regs) {
+      // TODO(rsavitski): don't discard if/when doing stackless events.
+      PERFETTO_DLOG("Dropping event without register data for pid [%d]",
+                    static_cast<int>(pid));
+      break;
+    }
+
+    // Request proc-fds for the process if this is the first time we see it yet.
     auto& fd_entry = ds.proc_fds[pid];  // created if absent
 
     if (fd_entry.status == Status::kInitial) {
@@ -194,9 +221,6 @@
                                      base::ScopedFile maps_fd,
                                      base::ScopedFile mem_fd) {
   using Status = DataSource::ProcDescriptors::Status;
-  PERFETTO_DLOG("PerfProducer::OnProcDescriptors [%d]->{%d, %d}",
-                static_cast<int>(pid), maps_fd.get(), mem_fd.get());
-
   // Find first fit data source that is waiting on descriptors for the process.
   for (auto& it : data_sources_) {
     DataSource& ds = it.second;
@@ -204,8 +228,8 @@
     if (proc_fd_it != ds.proc_fds.end() &&
         proc_fd_it->second.status == Status::kResolving) {
       proc_fd_it->second.status = Status::kResolved;
-      proc_fd_it->second.maps_fd = std::move(maps_fd);
-      proc_fd_it->second.mem_fd = std::move(mem_fd);
+      proc_fd_it->second.unwind_state =
+          UnwindingMetadata{std::move(maps_fd), std::move(mem_fd)};
       PERFETTO_DLOG("Handed off proc-fds for pid [%d] to DS [%zu]",
                     static_cast<int>(pid), static_cast<size_t>(it.first));
       return;  // done
@@ -247,15 +271,19 @@
 }
 
 void PerfProducer::TickDataSourceUnwind(DataSourceInstanceID ds_id) {
-  auto q_it = unwind_queues_.find(ds_id);
   auto ds_it = data_sources_.find(ds_id);
-  if (q_it == unwind_queues_.end() || ds_it == data_sources_.end()) {
+  if (ds_it == data_sources_.end()) {
     PERFETTO_DLOG("Stopping TickDataSourceUnwind(%zu)",
                   static_cast<size_t>(ds_id));
     return;
   }
 
-  ProcessUnwindQueue(&q_it->second, ds_it->second);
+  auto unwind_it = unwind_queues_.find(ds_id);
+  auto book_it = bookkeping_queues_.find(ds_id);
+  PERFETTO_CHECK(unwind_it != unwind_queues_.end());
+  PERFETTO_CHECK(book_it != bookkeping_queues_.end());
+
+  ProcessUnwindQueue(&unwind_it->second, &book_it->second, &ds_it->second);
 
   auto weak_this = weak_factory_.GetWeakPtr();
   task_runner_->PostDelayedTask(
@@ -268,10 +296,15 @@
 
 // TODO(rsavitski): reader can purge kResolving entries from the start once the
 // queue grows too large.
-void PerfProducer::ProcessUnwindQueue(std::deque<UnwindEntry>* queue_ptr,
-                                      const DataSource& ds) {
+// TODO(rsavitski): DataSource input won't be needed once fd-tracking in the
+// unwinder is separated from fd-tracking in the reading frontend.
+void PerfProducer::ProcessUnwindQueue(
+    std::deque<UnwindEntry>* input_queue,
+    std::queue<BookkeepingEntry>* output_queue,
+    DataSource* ds_ptr) {
   using Status = DataSource::ProcDescriptors::Status;
-  auto& queue = *queue_ptr;
+  auto& queue = *input_queue;
+  auto& ds = *ds_ptr;
 
   // Iterate over the queue, handling unwindable samples, and then marking them
   // as processed.
@@ -288,6 +321,7 @@
     auto fd_status = proc_fd_it->second.status;
     PERFETTO_CHECK(fd_status != Status::kInitial);
 
+    // Giving up on the sample (proc-fd lookup timed out).
     if (fd_status == Status::kSkip) {
       PERFETTO_DLOG("Skipping sample for pid [%d]",
                     static_cast<int>(sample.pid));
@@ -295,15 +329,20 @@
       continue;
     }
 
+    // Still waiting on the proc-fds.
     if (fd_status == Status::kResolving) {
       PERFETTO_DLOG("Still resolving sample for pid [%d]",
                     static_cast<int>(sample.pid));
       continue;
     }
 
+    // Sample ready - process it.
     if (fd_status == Status::kResolved) {
-      PERFETTO_DLOG("Accepting sample: pid:[%d], ts:[%" PRIu64 "]",
-                    static_cast<int>(sample.pid), sample.timestamp);
+      PerfProducer::BookkeepingEntry unwound_sample =
+          UnwindSample(std::move(sample), &proc_fd_it->second);
+
+      output_queue->push(std::move(unwound_sample));
+
       entry.valid = false;
       continue;
     }
@@ -320,6 +359,83 @@
   PERFETTO_DLOG("Unwind queue drain: [%zu]->[%zu]", num_samples, queue.size());
 }
 
+PerfProducer::BookkeepingEntry PerfProducer::UnwindSample(
+    ParsedSample sample,
+    DataSource::ProcDescriptors* process_state) {
+  PerfProducer::BookkeepingEntry ret;
+  ret.pid = sample.pid;
+  ret.tid = sample.tid;
+  ret.timestamp = sample.timestamp;
+
+  auto& unwind_state = process_state->unwind_state;
+
+  // Overlay the stack bytes over /proc/<pid>/mem.
+  std::shared_ptr<unwindstack::Memory> overlay_memory =
+      std::make_shared<StackOverlayMemory>(
+          unwind_state.fd_mem, sample.regs->sp(),
+          reinterpret_cast<uint8_t*>(sample.stack.data()), sample.stack.size());
+
+  // Unwindstack clobbers registers, so make a copy in case we need to retry.
+  auto working_regs = std::unique_ptr<unwindstack::Regs>{sample.regs->Clone()};
+
+  uint8_t error_code = unwindstack::ERROR_NONE;
+  unwindstack::Unwinder unwinder(kUnwindingMaxFrames, &unwind_state.fd_maps,
+                                 working_regs.get(), overlay_memory);
+
+  for (int attempt = 0; attempt < 2; attempt++) {
+#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+    unwinder.SetJitDebug(unwind_state.jit_debug.get(), working_regs->Arch());
+    unwinder.SetDexFiles(unwind_state.dex_files.get(), working_regs->Arch());
+#endif
+    unwinder.Unwind(/*initial_map_names_to_skip=*/nullptr,
+                    /*map_suffixes_to_ignore=*/nullptr);
+    error_code = unwinder.LastErrorCode();
+    if (error_code != unwindstack::ERROR_INVALID_MAP)
+      break;
+
+    // Otherwise, reparse the maps, and possibly retry the unwind.
+    PERFETTO_DLOG("Reparsing maps");
+    unwind_state.ReparseMaps();
+  }
+
+  PERFETTO_DLOG("Frames:");
+  std::vector<unwindstack::FrameData> frames = unwinder.ConsumeFrames();
+  for (unwindstack::FrameData& frame : frames) {
+    PERFETTO_DLOG("%s", unwinder.FormatFrame(frame).c_str());
+    ret.frames.emplace_back(unwind_state.AnnotateFrame(std::move(frame)));
+  }
+
+  if (error_code != unwindstack::ERROR_NONE)
+    ret.unwind_error = true;
+
+  return ret;
+}
+
+void PerfProducer::TickDataSourceBookkeep(DataSourceInstanceID ds_id) {
+  auto q_it = bookkeping_queues_.find(ds_id);
+  if (q_it == bookkeping_queues_.end()) {
+    return;
+  }
+
+  auto& queue = q_it->second;
+  while (!queue.empty()) {
+    BookkeepingEntry& entry = queue.front();
+    PERFETTO_DLOG("Bookkeeping sample: pid:[%d], ts:[%" PRIu64 "]",
+                  static_cast<int>(entry.pid), entry.timestamp);
+
+    queue.pop();
+  }
+
+  // Repost tick.
+  auto weak_this = weak_factory_.GetWeakPtr();
+  task_runner_->PostDelayedTask(
+      [weak_this, ds_id] {
+        if (weak_this)
+          weak_this->TickDataSourceBookkeep(ds_id);
+      },
+      kBookkeepTickPeriodMs);
+}
+
 void PerfProducer::ConnectWithRetries(const char* socket_name) {
   PERFETTO_DCHECK(state_ == kNotStarted);
   state_ = kNotConnected;
diff --git a/src/profiling/perf/perf_producer.h b/src/profiling/perf/perf_producer.h
index 426ad70..ecd6be0 100644
--- a/src/profiling/perf/perf_producer.h
+++ b/src/profiling/perf/perf_producer.h
@@ -19,6 +19,7 @@
 
 #include <deque>
 #include <map>
+#include <queue>
 
 #include <unistd.h>
 
@@ -32,6 +33,7 @@
 #include "perfetto/ext/tracing/core/basic_types.h"
 #include "perfetto/ext/tracing/core/producer.h"
 #include "perfetto/ext/tracing/core/tracing_service.h"
+#include "src/profiling/common/unwind_support.h"
 #include "src/profiling/perf/event_config.h"
 #include "src/profiling/perf/event_reader.h"
 #include "src/profiling/perf/proc_descriptors.h"
@@ -93,17 +95,20 @@
     // will need to be done by both sides (frontend needs to know whether to
     // resolve the pid, and the unwinder needs to know whether the fd is
     // ready/poisoned).
+    // TODO(rsavitski): find a more descriptive name.
     struct ProcDescriptors {
       enum class Status { kInitial, kResolving, kResolved, kSkip };
+
       Status status = Status::kInitial;
-      base::ScopedFile maps_fd;
-      base::ScopedFile mem_fd;
+      UnwindingMetadata unwind_state{/*maps_fd=*/base::ScopedFile{},
+                                     /*mem_fd=*/base::ScopedFile{}};
     };
     std::map<pid_t, ProcDescriptors> proc_fds;  // keyed by pid
   };
 
+  // Entry in an unwinding queue. Either a sample that requires unwinding, or a
+  // tombstoned entry (valid == false).
   struct UnwindEntry {
-   public:
     UnwindEntry(ParsedSample _sample)
         : valid(true), sample(std::move(_sample)) {}
 
@@ -111,6 +116,23 @@
     ParsedSample sample;
   };
 
+  // Entry in a bookeeping queue. Represents a processed sample.
+  struct BookkeepingEntry {
+    BookkeepingEntry() = default;
+    // move-only
+    BookkeepingEntry(const BookkeepingEntry&) = delete;
+    BookkeepingEntry& operator=(const BookkeepingEntry&) = delete;
+
+    BookkeepingEntry(BookkeepingEntry&&) = default;
+    BookkeepingEntry& operator=(BookkeepingEntry&&) = default;
+
+    pid_t pid = 0;
+    pid_t tid = 0;
+    uint64_t timestamp = 0;
+    std::vector<FrameData> frames;
+    bool unwind_error = false;
+  };
+
   void ConnectService();
   void Restart();
   void ResetConnectionBackoff();
@@ -124,8 +146,14 @@
   void HandleDescriptorLookupTimeout(DataSourceInstanceID ds_id, pid_t pid);
 
   void TickDataSourceUnwind(DataSourceInstanceID ds_id);
-  void ProcessUnwindQueue(std::deque<UnwindEntry>* queue_ptr,
-                          const DataSource& ds);
+  void ProcessUnwindQueue(std::deque<UnwindEntry>* input_queue,
+                          std::queue<BookkeepingEntry>* output_queue,
+                          DataSource* ds_ptr);
+
+  BookkeepingEntry UnwindSample(ParsedSample sample,
+                                DataSource::ProcDescriptors* process_state);
+
+  void TickDataSourceBookkeep(DataSourceInstanceID ds_id);
 
   // Task runner owned by the main thread.
   base::TaskRunner* const task_runner_;
@@ -141,6 +169,8 @@
 
   std::map<DataSourceInstanceID, DataSource> data_sources_;
   std::map<DataSourceInstanceID, std::deque<UnwindEntry>> unwind_queues_;
+  std::map<DataSourceInstanceID, std::queue<BookkeepingEntry>>
+      bookkeping_queues_;
 
   base::WeakPtrFactory<PerfProducer> weak_factory_;  // keep last
 };
diff --git a/src/profiling/perf/proc_descriptors.cc b/src/profiling/perf/proc_descriptors.cc
index 3a98bfb..f468eab 100644
--- a/src/profiling/perf/proc_descriptors.cc
+++ b/src/profiling/perf/proc_descriptors.cc
@@ -36,7 +36,7 @@
 
 void DirectDescriptorGetter::GetDescriptorsForPid(pid_t pid) {
   char buf[128] = {};
-  snprintf(buf, sizeof(buf), "/proc/%d/mem", pid);
+  snprintf(buf, sizeof(buf), "/proc/%d/maps", pid);
   auto maps_fd = base::ScopedFile{open(buf, O_RDONLY | O_CLOEXEC)};
   if (!maps_fd) {
     PERFETTO_PLOG("Failed to open [%s]", buf);