Ftrace: Add Setup/Start support for fast triggering

Introduce the notion of Setup vs Start in the ftrace
data source. This make it so that ftrace can be in a
state where M configurations are "setup" (buffers are
allocated, categories configured ...) but only N <= M
of these are "started" (tracing is enabled).

E2E latency (incl all IPC) measured on Pixel2: 2-3ms

Bug: 116547201
Change-Id: I1a995f25c30891b53eae902e137b7adf1dbbc61d
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.cc b/src/traced/probes/ftrace/ftrace_config_muxer.cc
index 6af02b4..5374ba7 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.cc
@@ -228,23 +228,25 @@
     : ftrace_(ftrace), table_(table), current_state_(), configs_() {}
 FtraceConfigMuxer::~FtraceConfigMuxer() = default;
 
-FtraceConfigId FtraceConfigMuxer::RequestConfig(const FtraceConfig& request) {
+FtraceConfigId FtraceConfigMuxer::SetupConfig(const FtraceConfig& request) {
   FtraceConfig actual;
 
   bool is_ftrace_enabled = ftrace_->IsTracingEnabled();
   if (configs_.empty()) {
+    PERFETTO_DCHECK(active_configs_.empty());
     PERFETTO_DCHECK(!current_state_.tracing_on);
 
     // If someone outside of perfetto is using ftrace give up now.
     if (is_ftrace_enabled)
       return 0;
 
-    // If we're about to turn tracing on use this opportunity do some setup:
+    // Setup ftrace, without starting it. Setting buffers can be quite slow
+    // (up to hundreds of ms).
     SetupClock(request);
     SetupBufferSize(request);
   } else {
     // Did someone turn ftrace off behind our back? If so give up.
-    if (!is_ftrace_enabled)
+    if (!active_configs_.empty() && !is_ftrace_enabled)
       return 0;
   }
 
@@ -272,17 +274,34 @@
     }
   }
 
-  if (configs_.empty()) {
-    PERFETTO_DCHECK(!current_state_.tracing_on);
-    ftrace_->EnableTracing();
-    current_state_.tracing_on = true;
-  }
-
   FtraceConfigId id = ++last_id_;
   configs_.emplace(id, std::move(actual));
   return id;
 }
 
+bool FtraceConfigMuxer::ActivateConfig(FtraceConfigId id) {
+  if (!id || configs_.count(id) == 0) {
+    PERFETTO_DCHECK(false);
+    return false;
+  }
+
+  active_configs_.insert(id);
+  if (active_configs_.size() > 1) {
+    PERFETTO_DCHECK(current_state_.tracing_on);
+    return true;  // We are not the first, ftrace is already enabled. All done.
+  }
+
+  PERFETTO_DCHECK(!current_state_.tracing_on);
+  if (ftrace_->IsTracingEnabled()) {
+    // If someone outside of perfetto is using ftrace give up now.
+    return false;
+  }
+
+  ftrace_->EnableTracing();
+  current_state_.tracing_on = true;
+  return true;
+}
+
 bool FtraceConfigMuxer::RemoveConfig(FtraceConfigId id) {
   if (!id || !configs_.erase(id))
     return false;
@@ -305,13 +324,25 @@
       current_state_.ftrace_events.erase(name);
   }
 
-  if (configs_.empty()) {
+  // If there aren't any more active configs, disable ftrace.
+  auto active_it = active_configs_.find(id);
+  if (active_it != active_configs_.end()) {
     PERFETTO_DCHECK(current_state_.tracing_on);
-    ftrace_->DisableTracing();
+    active_configs_.erase(active_it);
+    if (active_configs_.empty()) {
+      // This was the last active config, disable ftrace.
+      ftrace_->DisableTracing();
+      current_state_.tracing_on = false;
+    }
+  }
+
+  // Even if we don't have any other active configs, we might still have idle
+  // configs around. Tear down the rest of the ftrace config only if all
+  // configs are removed.
+  if (configs_.empty()) {
     ftrace_->SetCpuBufferSizeInPages(0);
     ftrace_->DisableAllEvents();
     ftrace_->ClearTrace();
-    current_state_.tracing_on = false;
     if (current_state_.atrace_on)
       DisableAtrace();
   }
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.h b/src/traced/probes/ftrace/ftrace_config_muxer.h
index e8164b9..85ad05b 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.h
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.h
@@ -17,6 +17,9 @@
 #ifndef SRC_TRACED_PROBES_FTRACE_FTRACE_CONFIG_MUXER_H_
 #define SRC_TRACED_PROBES_FTRACE_FTRACE_CONFIG_MUXER_H_
 
+#include <map>
+#include <set>
+
 #include "src/traced/probes/ftrace/ftrace_config.h"
 #include "src/traced/probes/ftrace/ftrace_controller.h"
 #include "src/traced/probes/ftrace/ftrace_procfs.h"
@@ -29,7 +32,7 @@
 // messing with the ftrace settings at the same time as us.
 
 // Specifically FtraceConfigMuxer takes in a *requested* FtraceConfig
-// (|RequestConfig|), makes a best effort attempt to modify the ftrace
+// (|SetupConfig|), makes a best effort attempt to modify the ftrace
 // debugfs files to honor those settings without interupting other perfetto
 // traces already in progress or other users of ftrace, then returns an
 // FtraceConfigId representing that config or zero on failure.
@@ -53,11 +56,14 @@
   // If someone else is tracing we won't touch atrace (since it resets the
   // buffer).
   // To see the config you ended up with use |GetConfig|.
-  FtraceConfigId RequestConfig(const FtraceConfig& request);
+  FtraceConfigId SetupConfig(const FtraceConfig& request);
+
+  // Activate ftrace for the given config (if not already active).
+  bool ActivateConfig(FtraceConfigId);
 
   // Undo changes for the given config. Returns false iff the id is 0
   // or already removed.
-  bool RemoveConfig(FtraceConfigId id);
+  bool RemoveConfig(FtraceConfigId);
 
   // public for testing
   void SetupClockForTesting(const FtraceConfig& request) {
@@ -91,7 +97,15 @@
   const ProtoTranslationTable* table_;
 
   FtraceState current_state_;
+
+  // Set of all configurations. Note that not all of them might be active.
+  // When a config is present but not active, we do setup buffer sizes and
+  // events, but don't enable ftrace (i.e. tracing_on).
   std::map<FtraceConfigId, FtraceConfig> configs_;
+
+  // Subset of |configs_| that are currently active. At any time ftrace is
+  // enabled iff |active_configs_| is not empty.
+  std::set<FtraceConfigId> active_configs_;
 };
 
 std::set<std::string> GetFtraceEvents(const FtraceConfig& request,
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc b/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
index b67c8a0..3e3fe28 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer_unittest.cc
@@ -163,14 +163,16 @@
       .Times(AnyNumber());
 
   EXPECT_CALL(ftrace, ReadOneCharFromFile("/root/tracing_on"))
-      .WillOnce(Return('0'));
+      .Times(2)
+      .WillRepeatedly(Return('0'));
   EXPECT_CALL(ftrace, WriteToFile("/root/buffer_size_kb", "512"));
   EXPECT_CALL(ftrace, WriteToFile("/root/trace_clock", "boot"));
   EXPECT_CALL(ftrace, WriteToFile("/root/tracing_on", "1"));
   EXPECT_CALL(ftrace,
               WriteToFile("/root/events/sched/sched_switch/enable", "1"));
-  FtraceConfigId id = model.RequestConfig(config);
+  FtraceConfigId id = model.SetupConfig(config);
   ASSERT_TRUE(id);
+  ASSERT_TRUE(model.ActivateConfig(id));
 
   const FtraceConfig* actual_config = model.GetConfig(id);
   EXPECT_TRUE(actual_config);
@@ -198,7 +200,7 @@
   // If someone is using ftrace already don't stomp on what they are doing.
   EXPECT_CALL(ftrace, ReadOneCharFromFile("/root/tracing_on"))
       .WillOnce(Return('1'));
-  FtraceConfigId id = model.RequestConfig(config);
+  FtraceConfigId id = model.SetupConfig(config);
   ASSERT_FALSE(id);
 }
 
@@ -219,7 +221,7 @@
                   {"atrace", "--async_start", "--only_userspace", "sched"})))
       .WillOnce(Return(true));
 
-  FtraceConfigId id = model.RequestConfig(config);
+  FtraceConfigId id = model.SetupConfig(config);
   ASSERT_TRUE(id);
 
   const FtraceConfig* actual_config = model.GetConfig(id);
@@ -253,7 +255,7 @@
            "com.google.android.gms.persistent,com.google.android.gms"})))
       .WillOnce(Return(true));
 
-  FtraceConfigId id = model.RequestConfig(config);
+  FtraceConfigId id = model.SetupConfig(config);
   ASSERT_TRUE(id);
 
   const FtraceConfig* actual_config = model.GetConfig(id);
diff --git a/src/traced/probes/ftrace/ftrace_controller.cc b/src/traced/probes/ftrace/ftrace_controller.cc
index 86685bc..5cff028 100644
--- a/src/traced/probes/ftrace/ftrace_controller.cc
+++ b/src/traced/probes/ftrace/ftrace_controller.cc
@@ -145,6 +145,7 @@
   for (const auto* data_source : data_sources_)
     ftrace_config_muxer_->RemoveConfig(data_source->config_id());
   data_sources_.clear();
+  started_data_sources_.clear();
   StopIfNeeded();
 }
 
@@ -180,7 +181,7 @@
       continue;
     // This method reads the pipe and converts the raw ftrace data into
     // protobufs using the |data_source|'s TraceWriter.
-    ctrl->cpu_readers_[cpu]->Drain(ctrl->data_sources_);
+    ctrl->cpu_readers_[cpu]->Drain(ctrl->started_data_sources_);
     ctrl->OnDrainCpuForTesting(cpu);
   }
 
@@ -206,9 +207,9 @@
 }
 
 void FtraceController::StartIfNeeded() {
-  if (data_sources_.size() > 1)
+  if (started_data_sources_.size() > 1)
     return;
-  PERFETTO_CHECK(!data_sources_.empty());
+  PERFETTO_DCHECK(!started_data_sources_.empty());
   {
     std::unique_lock<std::mutex> lock(lock_);
     PERFETTO_CHECK(!listening_for_raw_trace_data_);
@@ -249,14 +250,16 @@
 }
 
 void FtraceController::StopIfNeeded() {
-  if (!data_sources_.empty())
+  if (!started_data_sources_.empty())
     return;
+
   {
     // Unblock any readers that are waiting for us to drain data.
     std::unique_lock<std::mutex> lock(lock_);
     listening_for_raw_trace_data_ = false;
     cpus_to_drain_.reset();
   }
+
   data_drained_.notify_all();
   cpu_readers_.clear();
 }
@@ -297,7 +300,7 @@
   if (!ValidConfig(data_source->config()))
     return false;
 
-  auto config_id = ftrace_config_muxer_->RequestConfig(data_source->config());
+  auto config_id = ftrace_config_muxer_->SetupConfig(data_source->config());
   if (!config_id)
     return false;
 
@@ -305,13 +308,27 @@
       *table_, FtraceEventsAsSet(*ftrace_config_muxer_->GetConfig(config_id))));
   auto it_and_inserted = data_sources_.insert(data_source);
   PERFETTO_DCHECK(it_and_inserted.second);
-  StartIfNeeded();
   data_source->Initialize(config_id, std::move(filter));
   return true;
 }
 
+bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
+  PERFETTO_DCHECK_THREAD(thread_checker_);
+
+  FtraceConfigId config_id = data_source->config_id();
+  PERFETTO_CHECK(config_id);
+
+  if (!ftrace_config_muxer_->ActivateConfig(config_id))
+    return false;
+
+  started_data_sources_.insert(data_source);
+  StartIfNeeded();
+  return true;
+}
+
 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  started_data_sources_.erase(data_source);
   size_t removed = data_sources_.erase(data_source);
   if (!removed)
     return;  // Can happen if AddDataSource failed (e.g. too many sessions).
diff --git a/src/traced/probes/ftrace/ftrace_controller.h b/src/traced/probes/ftrace/ftrace_controller.h
index b950e31..fae0a68 100644
--- a/src/traced/probes/ftrace/ftrace_controller.h
+++ b/src/traced/probes/ftrace/ftrace_controller.h
@@ -64,6 +64,7 @@
   void ClearTrace();
 
   bool AddDataSource(FtraceDataSource*) PERFETTO_WARN_UNUSED_RESULT;
+  bool StartDataSource(FtraceDataSource*);
   void RemoveDataSource(FtraceDataSource*);
 
   void DumpFtraceStats(FtraceStats*);
@@ -123,6 +124,7 @@
   bool atrace_running_ = false;
   std::map<size_t, std::unique_ptr<CpuReader>> cpu_readers_;
   std::set<FtraceDataSource*> data_sources_;
+  std::set<FtraceDataSource*> started_data_sources_;
   base::WeakPtrFactory<FtraceController> weak_factory_;  // Keep last.
   PERFETTO_THREAD_CHECKER(thread_checker_)
 };
diff --git a/src/traced/probes/ftrace/ftrace_controller_unittest.cc b/src/traced/probes/ftrace/ftrace_controller_unittest.cc
index a441b2e..9e74a0a 100644
--- a/src/traced/probes/ftrace/ftrace_controller_unittest.cc
+++ b/src/traced/probes/ftrace/ftrace_controller_unittest.cc
@@ -302,12 +302,14 @@
 
   FtraceConfig config = CreateFtraceConfig({"foo"});
 
-  EXPECT_CALL(*controller->procfs(), WriteToFile("/root/tracing_on", "1"));
   EXPECT_CALL(*controller->procfs(), WriteToFile(kFooEnablePath, "1"));
   EXPECT_CALL(*controller->procfs(), WriteToFile("/root/buffer_size_kb", _));
   auto data_source = controller->AddFakeDataSource(config);
   ASSERT_TRUE(data_source);
 
+  EXPECT_CALL(*controller->procfs(), WriteToFile("/root/tracing_on", "1"));
+  ASSERT_TRUE(controller->StartDataSource(data_source.get()));
+
   EXPECT_CALL(*controller->procfs(), WriteToFile("/root/buffer_size_kb", "0"));
   EXPECT_CALL(*controller->procfs(), ClearFile("/root/trace"))
       .WillOnce(Return(true));
@@ -330,12 +332,16 @@
   FtraceConfig configA = CreateFtraceConfig({"foo"});
   FtraceConfig configB = CreateFtraceConfig({"foo", "bar"});
 
-  EXPECT_CALL(*controller->procfs(), WriteToFile("/root/tracing_on", "1"));
   EXPECT_CALL(*controller->procfs(), WriteToFile("/root/buffer_size_kb", _));
   EXPECT_CALL(*controller->procfs(), WriteToFile(kFooEnablePath, "1"));
   auto data_sourceA = controller->AddFakeDataSource(configA);
   EXPECT_CALL(*controller->procfs(), WriteToFile(kBarEnablePath, "1"));
   auto data_sourceB = controller->AddFakeDataSource(configB);
+
+  EXPECT_CALL(*controller->procfs(), WriteToFile("/root/tracing_on", "1"));
+  ASSERT_TRUE(controller->StartDataSource(data_sourceA.get()));
+  ASSERT_TRUE(controller->StartDataSource(data_sourceB.get()));
+
   data_sourceA.reset();
 
   EXPECT_CALL(*controller->procfs(), WriteToFile(kFooEnablePath, "0"));
@@ -356,10 +362,12 @@
   FtraceConfig config = CreateFtraceConfig({"foo"});
 
   EXPECT_CALL(*controller->procfs(), WriteToFile("/root/buffer_size_kb", _));
-  EXPECT_CALL(*controller->procfs(), WriteToFile("/root/tracing_on", "1"));
   EXPECT_CALL(*controller->procfs(), WriteToFile(kFooEnablePath, "1"));
   auto data_source = controller->AddFakeDataSource(config);
 
+  EXPECT_CALL(*controller->procfs(), WriteToFile("/root/tracing_on", "1"));
+  ASSERT_TRUE(controller->StartDataSource(data_source.get()));
+
   EXPECT_CALL(*controller->procfs(), WriteToFile(kFooEnablePath, "0"));
   EXPECT_CALL(*controller->procfs(), ClearFile("/root/trace"))
       .WillOnce(Return(true));
@@ -383,6 +391,7 @@
 
   FtraceConfig config = CreateFtraceConfig({"foo"});
   auto data_source = controller->AddFakeDataSource(config);
+  ASSERT_TRUE(controller->StartDataSource(data_source.get()));
 
   // Only one call to drain should be scheduled for the next drain period.
   EXPECT_CALL(*controller->runner(), PostDelayedTask(_, 100));
@@ -427,6 +436,7 @@
 
   FtraceConfig config = CreateFtraceConfig({"foo"});
   auto data_source = controller->AddFakeDataSource(config);
+  ASSERT_TRUE(controller->StartDataSource(data_source.get()));
 
   // Test several cycles of a worker producing data and make sure the drain
   // delay is consistent with the drain period.
@@ -469,6 +479,7 @@
   EXPECT_CALL(*controller->runner(), PostDelayedTask(_, 100)).Times(2);
   FtraceConfig config = CreateFtraceConfig({"foo"});
   auto data_source = controller->AddFakeDataSource(config);
+  ASSERT_TRUE(controller->StartDataSource(data_source.get()));
 
   auto on_data_available = controller->GetDataAvailableCallback(0u);
   std::thread worker([on_data_available] { on_data_available(); });
@@ -482,6 +493,7 @@
 
   // Register another data source and wait for it to generate data.
   data_source = controller->AddFakeDataSource(config);
+  ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   std::thread worker2([on_data_available] { on_data_available(); });
   controller->WaitForData(0u);
 
@@ -506,6 +518,7 @@
                 WriteToFile("/root/buffer_size_kb", "512"));
     FtraceConfig config = CreateFtraceConfig({"foo"});
     auto data_source = controller->AddFakeDataSource(config);
+    ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   }
 
   {
@@ -515,6 +528,7 @@
     FtraceConfig config = CreateFtraceConfig({"foo"});
     config.set_buffer_size_kb(10 * 1024 * 1024);
     auto data_source = controller->AddFakeDataSource(config);
+    ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   }
 
   {
@@ -525,6 +539,7 @@
     ON_CALL(*controller->procfs(), NumberOfCpus()).WillByDefault(Return(2));
     config.set_buffer_size_kb(65 * 1024);
     auto data_source = controller->AddFakeDataSource(config);
+    ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   }
 
   {
@@ -534,6 +549,7 @@
     FtraceConfig config = CreateFtraceConfig({"foo"});
     config.set_buffer_size_kb(1);
     auto data_source = controller->AddFakeDataSource(config);
+    ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   }
 
   {
@@ -543,6 +559,7 @@
     FtraceConfig config = CreateFtraceConfig({"foo"});
     config.set_buffer_size_kb(42);
     auto data_source = controller->AddFakeDataSource(config);
+    ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   }
 
   {
@@ -553,6 +570,7 @@
     ON_CALL(*controller->procfs(), NumberOfCpus()).WillByDefault(Return(2));
     config.set_buffer_size_kb(42);
     auto data_source = controller->AddFakeDataSource(config);
+    ASSERT_TRUE(controller->StartDataSource(data_source.get()));
   }
 }
 
diff --git a/src/traced/probes/ftrace/ftrace_data_source.cc b/src/traced/probes/ftrace/ftrace_data_source.cc
index d209709..d7bc843 100644
--- a/src/traced/probes/ftrace/ftrace_data_source.cc
+++ b/src/traced/probes/ftrace/ftrace_data_source.cc
@@ -43,15 +43,20 @@
     controller_weak_->RemoveDataSource(this);
 };
 
-void FtraceDataSource::Start() {
-  // TODO(primiano): implement in next CL. For now the ftrace data source
-  // still starts immediately on creation.
-}
-
 void FtraceDataSource::Initialize(FtraceConfigId config_id,
                                   std::unique_ptr<EventFilter> event_filter) {
+  PERFETTO_CHECK(config_id);
   config_id_ = config_id;
   event_filter_ = std::move(event_filter);
+}
+
+void FtraceDataSource::Start() {
+  FtraceController* ftrace = controller_weak_.get();
+  if (!ftrace)
+    return;
+  PERFETTO_CHECK(config_id_);  // Must be initialized at this point.
+  if (!ftrace->StartDataSource(this))
+    return;
   DumpFtraceStats(&stats_before_);
 }
 
diff --git a/src/traced/probes/probes_producer.cc b/src/traced/probes/probes_producer.cc
index 6a56c71..d91f066 100644
--- a/src/traced/probes/probes_producer.cc
+++ b/src/traced/probes/probes_producer.cc
@@ -163,6 +163,7 @@
                 config.name().c_str());
   auto it = data_sources_.find(instance_id);
   if (it == data_sources_.end()) {
+    // Can happen if SetupDataSource() failed (e.g. ftrace was busy).
     PERFETTO_ELOG("Data source id=%" PRIu64 " not found", instance_id);
     return;
   }
@@ -210,7 +211,7 @@
       endpoint_->CreateTraceWriter(buffer_id)));
   if (!ftrace_->AddDataSource(data_source.get())) {
     PERFETTO_ELOG(
-        "Failed to start tracing (too many concurrent sessions or ftrace is "
+        "Failed to setup tracing (too many concurrent sessions or ftrace is "
         "already in use)");
     return nullptr;
   }
@@ -237,10 +238,8 @@
     const DataSourceConfig& config) {
   base::ignore_result(id);
   auto buffer_id = static_cast<BufferID>(config.target_buffer());
-  auto data_source =
-      std::unique_ptr<ProcessStatsDataSource>(new ProcessStatsDataSource(
-          session_id, endpoint_->CreateTraceWriter(buffer_id), config));
-  return std::move(data_source);
+  return std::unique_ptr<ProcessStatsDataSource>(new ProcessStatsDataSource(
+      session_id, endpoint_->CreateTraceWriter(buffer_id), config));
 }
 
 std::unique_ptr<SysStatsDataSource> ProbesProducer::CreateSysStatsDataSource(
@@ -249,16 +248,16 @@
     const DataSourceConfig& config) {
   base::ignore_result(id);
   auto buffer_id = static_cast<BufferID>(config.target_buffer());
-  auto data_source = std::unique_ptr<SysStatsDataSource>(
+  return std::unique_ptr<SysStatsDataSource>(
       new SysStatsDataSource(task_runner_, session_id,
                              endpoint_->CreateTraceWriter(buffer_id), config));
-  return data_source;
 }
 
 void ProbesProducer::StopDataSource(DataSourceInstanceID id) {
   PERFETTO_LOG("Producer stop (id=%" PRIu64 ")", id);
   auto it = data_sources_.find(id);
   if (it == data_sources_.end()) {
+    // Can happen if SetupDataSource() failed (e.g. ftrace was busy).
     PERFETTO_ELOG("Cannot stop data source id=%" PRIu64 ", not found", id);
     return;
   }