perf sampling: minimal UI for stacks as slice columns

The tracker is fairly stateless atm (and we don't yet persist the perf samples
in a table), but it felt more appropriate than putting everything in
proto_trace_parser (though that in turn diffuses the logic...).

Bug: 144281346
Change-Id: Ie2c9aeabaea757bef8c886b6a1825dc1ee1a161f
diff --git a/Android.bp b/Android.bp
index 8a9da79..5c70f53 100644
--- a/Android.bp
+++ b/Android.bp
@@ -6307,6 +6307,7 @@
     "src/trace_processor/importers/proto/track_event_parser.cc",
     "src/trace_processor/importers/proto/track_event_tokenizer.cc",
     "src/trace_processor/metadata_tracker.cc",
+    "src/trace_processor/perf_sample_tracker.cc",
     "src/trace_processor/process_tracker.cc",
     "src/trace_processor/slice_tracker.cc",
     "src/trace_processor/stack_profile_tracker.cc",
diff --git a/BUILD b/BUILD
index ba81ac8..bce6463 100644
--- a/BUILD
+++ b/BUILD
@@ -975,6 +975,8 @@
         "src/trace_processor/importers/proto/track_event_tokenizer.h",
         "src/trace_processor/metadata_tracker.cc",
         "src/trace_processor/metadata_tracker.h",
+        "src/trace_processor/perf_sample_tracker.cc",
+        "src/trace_processor/perf_sample_tracker.h",
         "src/trace_processor/process_tracker.cc",
         "src/trace_processor/process_tracker.h",
         "src/trace_processor/slice_tracker.cc",
diff --git a/src/trace_processor/BUILD.gn b/src/trace_processor/BUILD.gn
index 0125a10..3e70300 100644
--- a/src/trace_processor/BUILD.gn
+++ b/src/trace_processor/BUILD.gn
@@ -113,6 +113,8 @@
     "importers/proto/track_event_tokenizer.h",
     "metadata_tracker.cc",
     "metadata_tracker.h",
+    "perf_sample_tracker.cc",
+    "perf_sample_tracker.h",
     "process_tracker.cc",
     "process_tracker.h",
     "slice_tracker.cc",
diff --git a/src/trace_processor/heap_profile_tracker.cc b/src/trace_processor/heap_profile_tracker.cc
index 7c6b1a4..8e55b92 100644
--- a/src/trace_processor/heap_profile_tracker.cc
+++ b/src/trace_processor/heap_profile_tracker.cc
@@ -18,9 +18,11 @@
 
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/trace_processor_context.h"
-
 #include "perfetto/base/logging.h"
 
+#include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
+#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
+
 namespace perfetto {
 namespace trace_processor {
 
diff --git a/src/trace_processor/heap_profile_tracker.h b/src/trace_processor/heap_profile_tracker.h
index c549d0a..fc2d7f6 100644
--- a/src/trace_processor/heap_profile_tracker.h
+++ b/src/trace_processor/heap_profile_tracker.h
@@ -17,14 +17,10 @@
 #ifndef SRC_TRACE_PROCESSOR_HEAP_PROFILE_TRACKER_H_
 #define SRC_TRACE_PROCESSOR_HEAP_PROFILE_TRACKER_H_
 
-#include <deque>
 #include <set>
 #include <unordered_map>
 
 #include "perfetto/ext/base/optional.h"
-
-#include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
-#include "protos/perfetto/trace/profiling/profile_packet.pbzero.h"
 #include "src/trace_processor/stack_profile_tracker.h"
 #include "src/trace_processor/storage/trace_storage.h"
 
diff --git a/src/trace_processor/importers/proto/proto_trace_parser.cc b/src/trace_processor/importers/proto/proto_trace_parser.cc
index 4bc2432..23d977f 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser.cc
@@ -36,6 +36,7 @@
 #include "src/trace_processor/importers/ftrace/ftrace_module.h"
 #include "src/trace_processor/importers/proto/packet_sequence_state.h"
 #include "src/trace_processor/metadata_tracker.h"
+#include "src/trace_processor/perf_sample_tracker.h"
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/stack_profile_tracker.h"
@@ -229,6 +230,10 @@
                                 packet.streaming_profile_packet());
   }
 
+  if (packet.has_perf_sample()) {
+    ParsePerfSample(ts, data->sequence_state, packet.perf_sample());
+  }
+
   if (packet.has_chrome_benchmark_metadata()) {
     ParseChromeBenchmarkMetadata(packet.chrome_benchmark_metadata());
   }
@@ -467,6 +472,41 @@
   }
 }
 
+void ProtoTraceParser::ParsePerfSample(
+    int64_t ts,
+    PacketSequenceStateGeneration* sequence_state,
+    ConstBytes blob) {
+  protos::pbzero::PerfSample::Decoder sample(blob.data, blob.size);
+
+  // Not a sample, but an indication of data loss.
+  if (sample.kernel_records_lost() > 0) {
+    PERFETTO_DCHECK(sample.pid() == 0);
+
+    context_->storage->IncrementIndexedStats(
+        stats::perf_cpu_lost_records, static_cast<int>(sample.cpu()),
+        static_cast<int64_t>(sample.kernel_records_lost()));
+    return;
+  }
+
+  uint64_t callstack_iid = sample.callstack_iid();
+  StackProfileTracker& stack_tracker =
+      sequence_state->state()->stack_profile_tracker();
+  ProfilePacketInternLookup intern_lookup(sequence_state);
+
+  base::Optional<CallsiteId> cs_id =
+      stack_tracker.FindOrInsertCallstack(callstack_iid, &intern_lookup);
+  if (!cs_id) {
+    context_->storage->IncrementStats(stats::stackprofile_parser_error);
+    PERFETTO_ELOG("PerfSample referencing invalid callstack iid [%" PRIu64
+                  "] at timestamp [%" PRIi64 "]",
+                  callstack_iid, ts);
+    return;
+  }
+
+  context_->perf_sample_tracker_->AddStackToSliceTrack(
+      ts, *cs_id, sample.pid(), sample.tid(), sample.cpu());
+}
+
 void ProtoTraceParser::ParseChromeBenchmarkMetadata(ConstBytes blob) {
   TraceStorage* storage = context_->storage.get();
   MetadataTracker* metadata = context_->metadata_tracker.get();
diff --git a/src/trace_processor/importers/proto/proto_trace_parser.h b/src/trace_processor/importers/proto/proto_trace_parser.h
index d7570c8..3f65fab 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser.h
+++ b/src/trace_processor/importers/proto/proto_trace_parser.h
@@ -67,6 +67,7 @@
                           uint32_t seq_id,
                           ConstBytes);
   void ParseStreamingProfilePacket(PacketSequenceStateGeneration*, ConstBytes);
+  void ParsePerfSample(int64_t ts, PacketSequenceStateGeneration*, ConstBytes);
   void ParseChromeBenchmarkMetadata(ConstBytes);
   void ParseChromeEvents(int64_t ts, ConstBytes);
   void ParseMetatraceEvent(int64_t ts, ConstBytes);
diff --git a/src/trace_processor/perf_sample_tracker.cc b/src/trace_processor/perf_sample_tracker.cc
new file mode 100644
index 0000000..3a078b3
--- /dev/null
+++ b/src/trace_processor/perf_sample_tracker.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/trace_processor/perf_sample_tracker.h"
+
+#include <vector>
+
+#include <inttypes.h>
+
+#include "perfetto/ext/base/optional.h"
+#include "src/trace_processor/process_tracker.h"
+#include "src/trace_processor/slice_tracker.h"
+#include "src/trace_processor/storage/trace_storage.h"
+#include "src/trace_processor/trace_processor_context.h"
+#include "src/trace_processor/track_tracker.h"
+
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+#include <cxxabi.h>
+#endif
+
+namespace perfetto {
+namespace trace_processor {
+namespace {
+// TODO(rsavitski): consider using the sampling rate from the trace config.
+constexpr int64_t kFixedStackSliceDurationNs = 1 * 1000 * 1000;
+}  // namespace
+
+void PerfSampleTracker::AddStackToSliceTrack(int64_t timestamp,
+                                             CallsiteId leaf_id,
+                                             uint32_t pid,
+                                             uint32_t tid,
+                                             uint32_t cpu) {
+  UniquePid upid =
+      context_->process_tracker->GetOrCreateProcess(static_cast<uint32_t>(pid));
+
+  TrackId track_id = context_->track_tracker->InternPerfStackTrack(upid);
+  const auto& callsites = context_->storage->stack_profile_callsite_table();
+  const auto& frames = context_->storage->stack_profile_frame_table();
+  const auto& mappings = context_->storage->stack_profile_mapping_table();
+
+  // Synthetic frame for more context, as the track is process-wide.
+  char buf[128] = {};
+  snprintf(buf, sizeof(buf), "cpu: [%" PRIu32 "]; thread: [%" PRIi32 "]", cpu,
+           tid);
+  StringId synth = context_->storage->InternString(buf);
+  context_->slice_tracker->Scoped(timestamp, track_id, kNullStringId, synth,
+                                  kFixedStackSliceDurationNs);
+
+  // The callstack id references the leaf frame, while we want the slice stack
+  // to have the root frame at the top in the UI, so walk the chain in reverse.
+  std::vector<uint32_t> callsite_rows;
+  callsite_rows.reserve(64);
+  base::Optional<CallsiteId> cs_id = leaf_id;
+  while (cs_id) {
+    uint32_t row = *callsites.id().IndexOf(*cs_id);
+    callsite_rows.push_back(row);
+    cs_id = callsites.parent_id()[row];
+  }
+
+  for (auto rit = callsite_rows.rbegin(); rit != callsite_rows.rend(); ++rit) {
+    uint32_t callsite_row = *rit;
+    FrameId frame_id = callsites.frame_id()[callsite_row];
+    uint32_t frame_row = *frames.id().IndexOf(frame_id);
+
+    MappingId mapping_id = frames.mapping()[frame_row];
+    uint32_t mapping_row = *mappings.id().IndexOf(mapping_id);
+
+    StringId mangled_fname = frames.name()[frame_row];
+    StringId mname = mappings.name()[mapping_row];
+
+    StringId fname = MaybeDemangle(mangled_fname);
+    context_->slice_tracker->Scoped(timestamp, track_id, mname, fname,
+                                    kFixedStackSliceDurationNs);
+  }
+}
+
+#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
+StringId PerfSampleTracker::MaybeDemangle(StringId fname) {
+  StringId ret = fname;
+  const char* raw_fname = context_->storage->GetString(fname).c_str();
+  int ignored;
+  char* data = abi::__cxa_demangle(raw_fname, nullptr, nullptr, &ignored);
+  if (data) {
+    ret = context_->storage->InternString(data);
+    free(data);
+  }
+  return ret;
+}
+#else
+StringId PerfSampleTracker::MaybeDemangle(StringId fname) {
+  return fname;
+}
+#endif
+
+}  // namespace trace_processor
+}  // namespace perfetto
diff --git a/src/trace_processor/perf_sample_tracker.h b/src/trace_processor/perf_sample_tracker.h
new file mode 100644
index 0000000..5d9c548
--- /dev/null
+++ b/src/trace_processor/perf_sample_tracker.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACE_PROCESSOR_PERF_SAMPLE_TRACKER_H_
+#define SRC_TRACE_PROCESSOR_PERF_SAMPLE_TRACKER_H_
+
+#include <stdint.h>
+
+#include "src/trace_processor/storage/trace_storage.h"
+
+namespace perfetto {
+namespace trace_processor {
+
+class TraceProcessorContext;
+
+class PerfSampleTracker {
+ public:
+  explicit PerfSampleTracker(TraceProcessorContext* context)
+      : context_(context) {}
+
+  // Interim UI track for visualizing stack samples as stacks of slices.
+  void AddStackToSliceTrack(int64_t timestamp,
+                            CallsiteId leaf_id,
+                            uint32_t pid,
+                            uint32_t tid,
+                            uint32_t cpu);
+
+ private:
+  StringId MaybeDemangle(StringId original);
+
+  TraceProcessorContext* const context_;
+};
+
+}  // namespace trace_processor
+}  // namespace perfetto
+
+#endif  // SRC_TRACE_PROCESSOR_PERF_SAMPLE_TRACKER_H_
diff --git a/src/trace_processor/storage/stats.h b/src/trace_processor/storage/stats.h
index b645163..96ed568 100644
--- a/src/trace_processor/storage/stats.h
+++ b/src/trace_processor/storage/stats.h
@@ -129,7 +129,8 @@
   F(sched_waking_out_of_order,                kSingle,  kError,    kAnalysis), \
   F(compact_sched_switch_skipped,             kSingle,  kInfo,     kAnalysis), \
   F(compact_sched_waking_skipped,             kSingle,  kInfo,     kAnalysis), \
-  F(empty_chrome_metadata,                    kSingle,  kError,    kTrace)
+  F(empty_chrome_metadata,                    kSingle,  kError,    kTrace),    \
+  F(perf_cpu_lost_records,                    kIndexed, kDataLoss, kTrace)
 // clang-format on
 
 enum Type {
diff --git a/src/trace_processor/trace_processor_context.cc b/src/trace_processor/trace_processor_context.cc
index 6ad4f13..0d1b475 100644
--- a/src/trace_processor/trace_processor_context.cc
+++ b/src/trace_processor/trace_processor_context.cc
@@ -28,6 +28,7 @@
 #include "src/trace_processor/importers/proto/proto_trace_parser.h"
 #include "src/trace_processor/importers/proto/track_event_module.h"
 #include "src/trace_processor/metadata_tracker.h"
+#include "src/trace_processor/perf_sample_tracker.h"
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/stack_profile_tracker.h"
diff --git a/src/trace_processor/trace_processor_context.h b/src/trace_processor/trace_processor_context.h
index 876aea6..39c1a9b 100644
--- a/src/trace_processor/trace_processor_context.h
+++ b/src/trace_processor/trace_processor_context.h
@@ -37,6 +37,7 @@
 class HeapGraphTracker;
 class HeapProfileTracker;
 class MetadataTracker;
+class PerfSampleTracker;
 class ProcessTracker;
 class SliceTracker;
 class TraceParser;
@@ -63,6 +64,7 @@
   std::unique_ptr<ChunkedTraceReader> chunk_reader;
   std::unique_ptr<HeapProfileTracker> heap_profile_tracker;
   std::unique_ptr<MetadataTracker> metadata_tracker;
+  std::unique_ptr<PerfSampleTracker> perf_sample_tracker_;
 
   // Keep the global tracker before the args tracker as we access the global
   // tracker in the destructor of the args tracker.
diff --git a/src/trace_processor/trace_processor_storage_impl.cc b/src/trace_processor/trace_processor_storage_impl.cc
index a8d8dee..c304bec 100644
--- a/src/trace_processor/trace_processor_storage_impl.cc
+++ b/src/trace_processor/trace_processor_storage_impl.cc
@@ -27,6 +27,7 @@
 #include "src/trace_processor/importers/proto/proto_trace_tokenizer.h"
 #include "src/trace_processor/importers/proto/track_event_module.h"
 #include "src/trace_processor/metadata_tracker.h"
+#include "src/trace_processor/perf_sample_tracker.h"
 #include "src/trace_processor/process_tracker.h"
 #include "src/trace_processor/slice_tracker.h"
 #include "src/trace_processor/stack_profile_tracker.h"
@@ -49,6 +50,7 @@
   context_.heap_profile_tracker.reset(new HeapProfileTracker(&context_));
   context_.metadata_tracker.reset(new MetadataTracker(&context_));
   context_.global_args_tracker.reset(new GlobalArgsTracker(&context_));
+  context_.perf_sample_tracker_.reset(new PerfSampleTracker(&context_));
 
   context_.modules.emplace_back(new FtraceModule());
   // Ftrace module is special, because it has one extra method for parsing
diff --git a/src/trace_processor/track_tracker.cc b/src/trace_processor/track_tracker.cc
index 02b8fb6..e983fb6 100644
--- a/src/trace_processor/track_tracker.cc
+++ b/src/trace_processor/track_tracker.cc
@@ -148,6 +148,19 @@
   return id;
 }
 
+TrackId TrackTracker::InternPerfStackTrack(UniquePid upid) {
+  auto it = perf_stack_tracks_.find(upid);
+  if (it != perf_stack_tracks_.end())
+    return it->second;
+
+  StringId name = context_->storage->InternString("Stack samples");
+  tables::ProcessTrackTable::Row row(name);
+  row.upid = upid;
+  auto id = context_->storage->mutable_process_track_table()->Insert(row).id;
+  perf_stack_tracks_[upid] = id;
+  return id;
+}
+
 TrackId TrackTracker::InternLegacyChromeProcessInstantTrack(UniquePid upid) {
   auto it = chrome_process_instant_tracks_.find(upid);
   if (it != chrome_process_instant_tracks_.end())
diff --git a/src/trace_processor/track_tracker.h b/src/trace_processor/track_tracker.h
index 8e11e5e..d3b225a 100644
--- a/src/trace_processor/track_tracker.h
+++ b/src/trace_processor/track_tracker.h
@@ -52,6 +52,9 @@
                                   UniquePid upid,
                                   int64_t cookie);
 
+  // Interns a track for perf event stack samples, with process-wide grouping.
+  TrackId InternPerfStackTrack(UniquePid upid);
+
   // Interns a track for legacy Chrome process-scoped instant events into the
   // storage.
   TrackId InternLegacyChromeProcessInstantTrack(UniquePid upid);
@@ -198,6 +201,7 @@
   std::map<uint64_t /* uuid */, DescriptorTrackReservation>
       reserved_descriptor_tracks_;
   std::map<uint64_t /* uuid */, TrackId> resolved_descriptor_tracks_;
+  std::map<UniquePid, TrackId> perf_stack_tracks_;
 
   std::map<StringId, TrackId> global_counter_tracks_by_name_;
   std::map<std::pair<StringId, uint32_t>, TrackId> cpu_counter_tracks_;