traced_perf: make read period configurable
Additionally:
* shuffle validation / defaulting of config values into EventConfig.
* make the per-tick sample guardrail calculation based on the expected
ratio of sampling and reading frequencies.
Bug: 144281346
Change-Id: I785495e7b3249aa74f3d2160e3c400c98f59447c
diff --git a/Android.bp b/Android.bp
index 7c7278f..19ea519 100644
--- a/Android.bp
+++ b/Android.bp
@@ -5948,6 +5948,7 @@
filegroup {
name: "perfetto_src_profiling_perf_producer",
srcs: [
+ "src/profiling/perf/event_config.cc",
"src/profiling/perf/event_reader.cc",
"src/profiling/perf/perf_producer.cc",
],
diff --git a/protos/perfetto/config/perfetto_config.proto b/protos/perfetto/config/perfetto_config.proto
index ba6908b..9c911b6 100644
--- a/protos/perfetto/config/perfetto_config.proto
+++ b/protos/perfetto/config/perfetto_config.proto
@@ -1324,7 +1324,6 @@
// Begin of protos/perfetto/config/profiling/perf_event_config.proto
-// TODO(b/144281346): unstable, do not use. Will change in incompatible ways.
// At the time of writing, the config options are restricted to the periodic
// system-wide stack sampling use-case.
message PerfEventConfig {
@@ -1336,6 +1335,10 @@
// If unset, an implementation-defined default is used.
optional uint32 sampling_frequency = 2;
+ // How often the per-cpu ring buffers are read by the producer.
+ // If unset, an implementation-defined default is used.
+ optional uint32 ring_buffer_read_period_ms = 8;
+
// Size (in 4k pages) of each per-cpu ring buffer that is filled by the
// kernel. If set, must be a power of two.
// If unset, an implementation-defined default is used.
diff --git a/protos/perfetto/config/profiling/perf_event_config.proto b/protos/perfetto/config/profiling/perf_event_config.proto
index 17031b8..03b572e 100644
--- a/protos/perfetto/config/profiling/perf_event_config.proto
+++ b/protos/perfetto/config/profiling/perf_event_config.proto
@@ -18,7 +18,6 @@
package perfetto.protos;
-// TODO(b/144281346): unstable, do not use. Will change in incompatible ways.
// At the time of writing, the config options are restricted to the periodic
// system-wide stack sampling use-case.
message PerfEventConfig {
@@ -30,6 +29,10 @@
// If unset, an implementation-defined default is used.
optional uint32 sampling_frequency = 2;
+ // How often the per-cpu ring buffers are read by the producer.
+ // If unset, an implementation-defined default is used.
+ optional uint32 ring_buffer_read_period_ms = 8;
+
// Size (in 4k pages) of each per-cpu ring buffer that is filled by the
// kernel. If set, must be a power of two.
// If unset, an implementation-defined default is used.
diff --git a/protos/perfetto/trace/perfetto_trace.proto b/protos/perfetto/trace/perfetto_trace.proto
index 1bbba21..ff5d8f7 100644
--- a/protos/perfetto/trace/perfetto_trace.proto
+++ b/protos/perfetto/trace/perfetto_trace.proto
@@ -5920,7 +5920,6 @@
// Begin of protos/perfetto/config/profiling/perf_event_config.proto
-// TODO(b/144281346): unstable, do not use. Will change in incompatible ways.
// At the time of writing, the config options are restricted to the periodic
// system-wide stack sampling use-case.
message PerfEventConfig {
@@ -5932,6 +5931,10 @@
// If unset, an implementation-defined default is used.
optional uint32 sampling_frequency = 2;
+ // How often the per-cpu ring buffers are read by the producer.
+ // If unset, an implementation-defined default is used.
+ optional uint32 ring_buffer_read_period_ms = 8;
+
// Size (in 4k pages) of each per-cpu ring buffer that is filled by the
// kernel. If set, must be a power of two.
// If unset, an implementation-defined default is used.
diff --git a/src/perfetto_cmd/perfetto_config.descriptor.h b/src/perfetto_cmd/perfetto_config.descriptor.h
index 44da498..e01befa 100644
--- a/src/perfetto_cmd/perfetto_config.descriptor.h
+++ b/src/perfetto_cmd/perfetto_config.descriptor.h
@@ -27,15 +27,15 @@
// SHA1(tools/gen_binary_descriptors)
// 3df80477da2ea38cc659967487b37051a154bb69
// SHA1(protos/perfetto/config/perfetto_config.proto)
-// d20593e1fad9f82c52e700c6a1f42c927d359182
+// a4f0f29b32a435ea67a36117f8fde7198eaac7e0
// This is the proto PerfettoConfig encoded as a ProtoFileDescriptor to allow
// for reflection without libprotobuf full/non-lite protos.
namespace perfetto {
-constexpr std::array<uint8_t, 17877> kPerfettoConfigDescriptor{
- {0x0a, 0xd1, 0x8b, 0x01, 0x0a, 0x2c, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73,
+constexpr std::array<uint8_t, 17937> kPerfettoConfigDescriptor{
+ {0x0a, 0x8d, 0x8c, 0x01, 0x0a, 0x2c, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73,
0x2f, 0x70, 0x65, 0x72, 0x66, 0x65, 0x74, 0x74, 0x6f, 0x2f, 0x63, 0x6f,
0x6e, 0x66, 0x69, 0x67, 0x2f, 0x70, 0x65, 0x72, 0x66, 0x65, 0x74, 0x74,
0x6f, 0x5f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x70, 0x72, 0x6f,
@@ -1143,7 +1143,7 @@
0x10, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x76,
0x61, 0x6c, 0x5f, 0x6d, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52,
0x0e, 0x64, 0x75, 0x6d, 0x70, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x76, 0x61,
- 0x6c, 0x4d, 0x73, 0x22, 0x97, 0x02, 0x0a, 0x0f, 0x50, 0x65, 0x72, 0x66,
+ 0x6c, 0x4d, 0x73, 0x22, 0xd3, 0x02, 0x0a, 0x0f, 0x50, 0x65, 0x72, 0x66,
0x45, 0x76, 0x65, 0x6e, 0x74, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12,
0x19, 0x0a, 0x08, 0x61, 0x6c, 0x6c, 0x5f, 0x63, 0x70, 0x75, 0x73, 0x18,
0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x61, 0x6c, 0x6c, 0x43, 0x70,
@@ -1151,7 +1151,12 @@
0x6e, 0x67, 0x5f, 0x66, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63, 0x79,
0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x11, 0x73, 0x61, 0x6d, 0x70,
0x6c, 0x69, 0x6e, 0x67, 0x46, 0x72, 0x65, 0x71, 0x75, 0x65, 0x6e, 0x63,
- 0x79, 0x12, 0x2a, 0x0a, 0x11, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75,
+ 0x79, 0x12, 0x3a, 0x0a, 0x1a, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75,
+ 0x66, 0x66, 0x65, 0x72, 0x5f, 0x72, 0x65, 0x61, 0x64, 0x5f, 0x70, 0x65,
+ 0x72, 0x69, 0x6f, 0x64, 0x5f, 0x6d, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28,
+ 0x0d, 0x52, 0x16, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65,
+ 0x72, 0x52, 0x65, 0x61, 0x64, 0x50, 0x65, 0x72, 0x69, 0x6f, 0x64, 0x4d,
+ 0x73, 0x12, 0x2a, 0x0a, 0x11, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x62, 0x75,
0x66, 0x66, 0x65, 0x72, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x73, 0x18, 0x03,
0x20, 0x01, 0x28, 0x0d, 0x52, 0x0f, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75,
0x66, 0x66, 0x65, 0x72, 0x50, 0x61, 0x67, 0x65, 0x73, 0x12, 0x1d, 0x0a,
diff --git a/src/profiling/perf/BUILD.gn b/src/profiling/perf/BUILD.gn
index 3425945..b715de1 100644
--- a/src/profiling/perf/BUILD.gn
+++ b/src/profiling/perf/BUILD.gn
@@ -65,6 +65,7 @@
"../common:proc_utils",
]
sources = [
+ "event_config.cc",
"event_config.h",
"event_reader.cc",
"event_reader.h",
@@ -133,8 +134,7 @@
"../../../gn:default_deps",
"../../../gn:gtest_and_gmock",
"../../../protos/perfetto/config:cpp",
- "../../../protos/perfetto/config:zero",
- "../../../protos/perfetto/config/profiling:zero",
+ "../../../protos/perfetto/config/profiling:cpp",
"../../../protos/perfetto/trace:zero",
"../../../src/protozero",
"../../base",
diff --git a/src/profiling/perf/event_config.cc b/src/profiling/perf/event_config.cc
new file mode 100644
index 0000000..8e11b91
--- /dev/null
+++ b/src/profiling/perf/event_config.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/profiling/perf/event_config.h"
+
+#include <linux/perf_event.h>
+#include <time.h>
+
+#include <unwindstack/Regs.h>
+
+#include "perfetto/base/flat_set.h"
+#include "perfetto/ext/base/optional.h"
+#include "perfetto/profiling/normalize.h"
+#include "src/profiling/perf/regs_parsing.h"
+
+#include "protos/perfetto/config/profiling/perf_event_config.pbzero.h"
+
+namespace perfetto {
+namespace profiling {
+
+namespace {
+constexpr uint64_t kDefaultSamplingFrequencyHz = 10;
+constexpr uint32_t kDefaultDataPagesPerRingBuffer = 256; // 1 MB: 256x 4k pages
+constexpr uint32_t kDefaultReadTickPeriodMs = 100;
+
+base::Optional<std::string> Normalize(const protozero::ConstChars& src) {
+ // Construct a null-terminated string that will be mutated by the normalizer.
+ std::vector<char> base(src.size + 1);
+ memcpy(base.data(), src.data, src.size);
+ base[src.size] = '\0';
+
+ char* new_start = base.data();
+ ssize_t new_sz = NormalizeCmdLine(&new_start, base.size());
+ if (new_sz < 0) {
+ PERFETTO_ELOG("Failed to normalize config cmdline [%s], aborting",
+ base.data());
+ return base::nullopt;
+ }
+ return base::make_optional<std::string>(new_start,
+ static_cast<size_t>(new_sz));
+}
+
+// returns |base::nullopt| if any of the input cmdlines couldn't be normalized.
+base::Optional<TargetFilter> ParseTargetFilter(
+ const protos::pbzero::PerfEventConfig::Decoder& cfg) {
+ TargetFilter filter;
+ for (auto it = cfg.target_cmdline(); it; ++it) {
+ base::Optional<std::string> opt = Normalize(*it);
+ if (!opt.has_value())
+ return base::nullopt;
+ filter.cmdlines.insert(std::move(opt.value()));
+ }
+
+ for (auto it = cfg.exclude_cmdline(); it; ++it) {
+ base::Optional<std::string> opt = Normalize(*it);
+ if (!opt.has_value())
+ return base::nullopt;
+ filter.exclude_cmdlines.insert(std::move(opt.value()));
+ }
+
+ for (auto it = cfg.target_pid(); it; ++it) {
+ filter.pids.insert(*it);
+ }
+
+ for (auto it = cfg.exclude_pid(); it; ++it) {
+ filter.exclude_pids.insert(*it);
+ }
+ return base::make_optional(std::move(filter));
+}
+
+constexpr bool IsPowerOfTwo(size_t v) {
+ return (v != 0 && ((v & (v - 1)) == 0));
+}
+
+// returns |base::nullopt| if the input is invalid.
+base::Optional<uint32_t> ChooseActualRingBufferPages(uint32_t config_value) {
+ if (!config_value) {
+ static_assert(IsPowerOfTwo(kDefaultDataPagesPerRingBuffer), "");
+ return base::make_optional(kDefaultDataPagesPerRingBuffer);
+ }
+
+ if (!IsPowerOfTwo(config_value)) {
+ PERFETTO_ELOG("kernel buffer size must be a power of two pages");
+ return base::nullopt;
+ }
+
+ return base::make_optional(config_value);
+}
+
+} // namespace
+
+// static
+base::Optional<EventConfig> EventConfig::Create(
+ const DataSourceConfig& ds_config) {
+ protos::pbzero::PerfEventConfig::Decoder pb_config(
+ ds_config.perf_event_config_raw());
+
+ base::Optional<TargetFilter> filter = ParseTargetFilter(pb_config);
+ if (!filter.has_value())
+ return base::nullopt;
+
+ base::Optional<uint32_t> ring_buffer_pages =
+ ChooseActualRingBufferPages(pb_config.ring_buffer_pages());
+ if (!ring_buffer_pages.has_value())
+ return base::nullopt;
+
+ uint32_t read_tick_period_ms = pb_config.ring_buffer_read_period_ms()
+ ? pb_config.ring_buffer_read_period_ms()
+ : kDefaultReadTickPeriodMs;
+
+ uint32_t sampling_frequency = pb_config.sampling_frequency()
+ ? pb_config.sampling_frequency()
+ : kDefaultSamplingFrequencyHz;
+
+ // Take the ratio of sampling and reading frequencies, which gives the
+ // upper bound on number of samples per tick (for a single per-cpu buffer).
+ // Overflow not a concern for sane inputs.
+ uint32_t expected_samples_per_tick =
+ 1 + (sampling_frequency * read_tick_period_ms) / 1000;
+
+ // Use double the expected value as the actual guardrail (don't assume that
+ // periodic read task is as exact as the kernel).
+ uint32_t samples_per_tick_limit = 2 * expected_samples_per_tick;
+ PERFETTO_DCHECK(samples_per_tick_limit > 0);
+ PERFETTO_DLOG("Capping samples (not records) per tick to [%" PRIu32 "]",
+ samples_per_tick_limit);
+
+ return EventConfig(pb_config, sampling_frequency, ring_buffer_pages.value(),
+ read_tick_period_ms, samples_per_tick_limit,
+ std::move(filter.value()));
+}
+
+EventConfig::EventConfig(const protos::pbzero::PerfEventConfig::Decoder& cfg,
+ uint32_t sampling_frequency,
+ uint32_t ring_buffer_pages,
+ uint32_t read_tick_period_ms,
+ uint32_t samples_per_tick_limit,
+ TargetFilter target_filter)
+ : target_all_cpus_(cfg.all_cpus()),
+ ring_buffer_pages_(ring_buffer_pages),
+ read_tick_period_ms_(read_tick_period_ms),
+ samples_per_tick_limit_(samples_per_tick_limit),
+ target_filter_(std::move(target_filter)) {
+ auto& pe = perf_event_attr_;
+ pe.size = sizeof(perf_event_attr);
+
+ pe.disabled = false;
+
+ // Ask the kernel to sample at a given frequency.
+ pe.type = PERF_TYPE_SOFTWARE;
+ pe.config = PERF_COUNT_SW_CPU_CLOCK;
+ pe.freq = true;
+ pe.sample_freq = sampling_frequency;
+
+ pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_STACK_USER |
+ PERF_SAMPLE_REGS_USER;
+ // PERF_SAMPLE_TIME:
+ pe.clockid = CLOCK_BOOTTIME;
+ pe.use_clockid = true;
+ // PERF_SAMPLE_STACK_USER:
+ // Needs to be < ((u16)(~0u)), and have bottom 8 bits clear.
+ pe.sample_stack_user = (1u << 15);
+ // PERF_SAMPLE_REGS_USER:
+ pe.sample_regs_user =
+ PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch());
+}
+
+} // namespace profiling
+} // namespace perfetto
diff --git a/src/profiling/perf/event_config.h b/src/profiling/perf/event_config.h
index f638294..86be0c6 100644
--- a/src/profiling/perf/event_config.h
+++ b/src/profiling/perf/event_config.h
@@ -22,15 +22,10 @@
#include <linux/perf_event.h>
#include <stdint.h>
#include <sys/types.h>
-#include <time.h>
-
-#include <unwindstack/Regs.h>
#include "perfetto/base/flat_set.h"
#include "perfetto/ext/base/optional.h"
-#include "perfetto/profiling/normalize.h"
#include "perfetto/tracing/core/data_source_config.h"
-#include "src/profiling/perf/regs_parsing.h"
#include "protos/perfetto/config/profiling/perf_event_config.pbzero.h"
@@ -46,113 +41,31 @@
base::FlatSet<pid_t> exclude_pids;
};
-namespace {
-constexpr uint64_t kDefaultSamplingFrequency = 100; // Hz
-
-base::Optional<std::string> Normalize(const protozero::ConstChars& src) {
- // Construct a null-terminated string that will be mutated by the normalizer.
- std::vector<char> base(src.size + 1);
- memcpy(base.data(), src.data, src.size);
- base[src.size] = '\0';
-
- char* new_start = base.data();
- ssize_t new_sz = NormalizeCmdLine(&new_start, base.size());
- if (new_sz < 0) {
- PERFETTO_ELOG("Failed to normalize config cmdline [%s], aborting",
- base.data());
- return base::nullopt;
- }
- return base::make_optional<std::string>(new_start,
- static_cast<size_t>(new_sz));
-}
-
-// returns |base::nullopt| if any of the input cmdlines couldn't be normalized.
-base::Optional<TargetFilter> ParseTargetFilter(
- const protos::pbzero::PerfEventConfig::Decoder& cfg) {
- TargetFilter filter;
- for (auto it = cfg.target_cmdline(); it; ++it) {
- base::Optional<std::string> opt = Normalize(*it);
- if (opt.has_value())
- filter.cmdlines.insert(std::move(opt.value()));
- else
- return base::nullopt;
- }
-
- for (auto it = cfg.exclude_cmdline(); it; ++it) {
- base::Optional<std::string> opt = Normalize(*it);
- if (opt.has_value())
- filter.exclude_cmdlines.insert(std::move(opt.value()));
- else
- return base::nullopt;
- }
-
- for (auto it = cfg.target_pid(); it; ++it) {
- filter.pids.insert(*it);
- }
-
- for (auto it = cfg.exclude_pid(); it; ++it) {
- filter.exclude_pids.insert(*it);
- }
- return base::make_optional(std::move(filter));
-}
-
-} // namespace
-
// Describes a single profiling configuration. Bridges the gap between the data
// source config proto, and the raw "perf_event_attr" structs to pass to the
// perf_event_open syscall.
class EventConfig {
public:
- static base::Optional<EventConfig> Create(const DataSourceConfig& ds_config) {
- protos::pbzero::PerfEventConfig::Decoder pb_config(
- ds_config.perf_event_config_raw());
-
- base::Optional<TargetFilter> filter = ParseTargetFilter(pb_config);
- if (!filter.has_value())
- return base::nullopt;
-
- return EventConfig(pb_config, std::move(filter.value()));
- }
+ static base::Optional<EventConfig> Create(const DataSourceConfig& ds_config);
uint32_t target_all_cpus() const { return target_all_cpus_; }
- size_t ring_buffer_pages() const { return ring_buffer_pages_; }
+ uint32_t ring_buffer_pages() const { return ring_buffer_pages_; }
+ uint32_t read_tick_period_ms() const { return read_tick_period_ms_; }
+ uint32_t samples_per_tick_limit() const { return samples_per_tick_limit_; }
+
+ const TargetFilter& filter() const { return target_filter_; }
perf_event_attr* perf_attr() const {
return const_cast<perf_event_attr*>(&perf_event_attr_);
}
- const TargetFilter& filter() const { return target_filter_; }
-
private:
EventConfig(const protos::pbzero::PerfEventConfig::Decoder& cfg,
- TargetFilter target_filter)
- : target_all_cpus_(cfg.all_cpus()),
- ring_buffer_pages_(cfg.ring_buffer_pages()),
- target_filter_(std::move(target_filter)) {
- auto& pe = perf_event_attr_;
- pe.size = sizeof(perf_event_attr);
-
- pe.disabled = false;
-
- // Ask the kernel to sample at a given frequency.
- pe.type = PERF_TYPE_SOFTWARE;
- pe.config = PERF_COUNT_SW_CPU_CLOCK;
- pe.freq = true;
- pe.sample_freq = (cfg.sampling_frequency() > 0) ? cfg.sampling_frequency()
- : kDefaultSamplingFrequency;
-
- pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
- PERF_SAMPLE_STACK_USER | PERF_SAMPLE_REGS_USER;
- // PERF_SAMPLE_TIME:
- pe.clockid = CLOCK_BOOTTIME;
- pe.use_clockid = true;
- // PERF_SAMPLE_STACK_USER:
- // Needs to be < ((u16)(~0u)), and have bottom 8 bits clear.
- pe.sample_stack_user = (1u << 15);
- // PERF_SAMPLE_REGS_USER:
- pe.sample_regs_user =
- PerfUserRegsMaskForArch(unwindstack::Regs::CurrentArch());
- }
+ uint32_t sampling_frequency,
+ uint32_t ring_buffer_pages,
+ uint32_t read_tick_period_ms,
+ uint32_t samples_per_tick_limit,
+ TargetFilter target_filter);
// If true, process all system-wide samples.
const bool target_all_cpus_;
@@ -160,12 +73,17 @@
// Size (in 4k pages) of each per-cpu ring buffer shared with the kernel. If
// zero, |EventReader| will choose a default value. Must be a power of two
// otherwise.
- const size_t ring_buffer_pages_;
+ const uint32_t ring_buffer_pages_;
- // TODO(rsavitski): if we allow for event groups containing multiple sampled
- // counters, we'll need to vary the .type & .config fields per
- // perf_event_open.
- perf_event_attr perf_event_attr_ = {};
+ // Parameter struct for |perf_event_open| calls.
+ struct perf_event_attr perf_event_attr_ = {};
+
+ // How often the ring buffers should be read.
+ const uint32_t read_tick_period_ms_;
+
+ // Guardrail for the amount of samples a given read attempt will extract from
+ // *each* per-cpu buffer.
+ const uint32_t samples_per_tick_limit_;
// Parsed whitelist/blacklist for filtering samples.
const TargetFilter target_filter_;
diff --git a/src/profiling/perf/event_config_unittest.cc b/src/profiling/perf/event_config_unittest.cc
index 0d7fa52..4c4977b 100644
--- a/src/profiling/perf/event_config_unittest.cc
+++ b/src/profiling/perf/event_config_unittest.cc
@@ -21,34 +21,86 @@
#include "perfetto/base/logging.h"
#include "perfetto/ext/base/optional.h"
-#include "perfetto/protozero/scattered_heap_buffer.h"
-#include "perfetto/tracing/core/data_source_config.h"
#include "test/gtest_and_gmock.h"
-#include "protos/perfetto/config/data_source_config.pbzero.h"
-#include "protos/perfetto/config/profiling/perf_event_config.pbzero.h"
+#include "protos/perfetto/config/data_source_config.gen.h"
+#include "protos/perfetto/config/profiling/perf_event_config.gen.h"
namespace perfetto {
namespace profiling {
namespace {
-static DataSourceConfig CreateEmptyConfig() {
- protozero::HeapBuffered<protos::pbzero::PerfEventConfig> pb_config;
- protozero::HeapBuffered<protos::pbzero::DataSourceConfig> ds_config;
- ds_config->set_perf_event_config_raw(pb_config.SerializeAsString());
- DataSourceConfig cfg;
- PERFETTO_CHECK(cfg.ParseFromString(ds_config.SerializeAsString()));
- return cfg;
+bool IsPowerOfTwo(size_t v) {
+ return (v != 0 && ((v & (v - 1)) == 0));
+}
+
+static DataSourceConfig AsDataSourceConfig(
+ const protos::gen::PerfEventConfig& perf_cfg) {
+ protos::gen::DataSourceConfig ds_cfg;
+ ds_cfg.set_perf_event_config_raw(perf_cfg.SerializeAsString());
+ return ds_cfg;
}
TEST(EventConfigTest, AttrStructConstructed) {
- auto cfg = CreateEmptyConfig();
- base::Optional<EventConfig> event_config = EventConfig::Create(cfg);
+ protos::gen::PerfEventConfig cfg;
+ base::Optional<EventConfig> event_config =
+ EventConfig::Create(AsDataSourceConfig(cfg));
ASSERT_TRUE(event_config.has_value());
ASSERT_TRUE(event_config->perf_attr() != nullptr);
}
+TEST(EventConfigTest, RingBufferPagesValidated) {
+ { // if unset, a default is used
+ protos::gen::PerfEventConfig cfg;
+ base::Optional<EventConfig> event_config =
+ EventConfig::Create(AsDataSourceConfig(cfg));
+
+ ASSERT_TRUE(event_config.has_value());
+ ASSERT_GT(event_config->ring_buffer_pages(), 0u);
+ ASSERT_TRUE(IsPowerOfTwo(event_config->ring_buffer_pages()));
+ }
+ { // power of two pages accepted
+ uint32_t num_pages = 128;
+ protos::gen::PerfEventConfig cfg;
+ cfg.set_ring_buffer_pages(num_pages);
+ base::Optional<EventConfig> event_config =
+ EventConfig::Create(AsDataSourceConfig(cfg));
+
+ ASSERT_TRUE(event_config.has_value());
+ ASSERT_EQ(event_config->ring_buffer_pages(), num_pages);
+ }
+ { // entire config rejected if not a power of two of pages
+ protos::gen::PerfEventConfig cfg;
+ cfg.set_ring_buffer_pages(7);
+ base::Optional<EventConfig> event_config =
+ EventConfig::Create(AsDataSourceConfig(cfg));
+
+ ASSERT_FALSE(event_config.has_value());
+ }
+}
+
+TEST(EventConfigTest, ReadTickPeriodDefaultedIfUnset) {
+ { // if unset, a default is used
+ protos::gen::PerfEventConfig cfg;
+ base::Optional<EventConfig> event_config =
+ EventConfig::Create(AsDataSourceConfig(cfg));
+
+ ASSERT_TRUE(event_config.has_value());
+ ASSERT_GT(event_config->read_tick_period_ms(), 0u);
+ }
+ { // otherwise, given value used
+ uint32_t period_ms = 250;
+ protos::gen::PerfEventConfig cfg;
+ cfg.set_ring_buffer_read_period_ms(period_ms);
+ base::Optional<EventConfig> event_config =
+ EventConfig::Create(AsDataSourceConfig(cfg));
+
+ ASSERT_TRUE(event_config.has_value());
+ ASSERT_EQ(event_config->read_tick_period_ms(), period_ms);
+ }
+}
+
} // namespace
} // namespace profiling
} // namespace perfetto
diff --git a/src/profiling/perf/event_reader.cc b/src/profiling/perf/event_reader.cc
index 701991e..b5f8ca2 100644
--- a/src/profiling/perf/event_reader.cc
+++ b/src/profiling/perf/event_reader.cc
@@ -31,8 +31,6 @@
namespace {
-constexpr size_t kDefaultDataPagesPerRingBuffer = 256; // 1 MB (256 x 4k pages)
-
template <typename T>
const char* ReadValue(T* value_out, const char* ptr) {
memcpy(value_out, reinterpret_cast<const void*>(ptr), sizeof(T));
@@ -220,18 +218,8 @@
return base::nullopt;
}
- // choose a reasonable ring buffer size
- size_t ring_buffer_pages = kDefaultDataPagesPerRingBuffer;
- size_t config_pages = event_cfg.ring_buffer_pages();
- if (config_pages) {
- if (!IsPowerOfTwo(config_pages)) {
- PERFETTO_ELOG("kernel buffer size must be a power of two pages");
- return base::nullopt;
- }
- ring_buffer_pages = config_pages;
- }
-
- auto ring_buffer = PerfRingBuffer::Allocate(perf_fd.get(), ring_buffer_pages);
+ auto ring_buffer =
+ PerfRingBuffer::Allocate(perf_fd.get(), event_cfg.ring_buffer_pages());
if (!ring_buffer.has_value()) {
return base::nullopt;
}
diff --git a/src/profiling/perf/perf_producer.cc b/src/profiling/perf/perf_producer.cc
index d972df8..da627a2 100644
--- a/src/profiling/perf/perf_producer.cc
+++ b/src/profiling/perf/perf_producer.cc
@@ -48,11 +48,6 @@
namespace profiling {
namespace {
-constexpr uint32_t kReadTickPeriodMs = 200;
-// TODO(rsavitski): this is better calculated (at setup) from the buffer and
-// sample sizes.
-constexpr size_t kMaxSamplesPerCpuPerReadTick = 64;
-
constexpr uint32_t kProcDescriptorTimeoutMs = 400;
constexpr uint32_t kInitialConnectionBackoffMs = 100;
@@ -65,17 +60,17 @@
return static_cast<size_t>(sysconf(_SC_NPROCESSORS_CONF));
}
-uint32_t TimeToNextReadTickMs(DataSourceInstanceID ds_id) {
- // Normally, we'd schedule the next tick at the next |kReadTickPeriodMs|
- // boundary of the boot clock. However, to avoid aligning the read tasaks of
+uint32_t TimeToNextReadTickMs(DataSourceInstanceID ds_id, uint32_t period_ms) {
+ // Normally, we'd schedule the next tick at the next |period_ms|
+ // boundary of the boot clock. However, to avoid aligning the read tasks of
// all concurrent data sources, we select a deterministic offset based on the
// data source id.
std::minstd_rand prng(static_cast<std::minstd_rand::result_type>(ds_id));
- std::uniform_int_distribution<uint32_t> dist(0, kReadTickPeriodMs - 1);
+ std::uniform_int_distribution<uint32_t> dist(0, period_ms - 1);
uint32_t ds_period_offset = dist(prng);
uint64_t now_ms = static_cast<uint64_t>(base::GetWallTimeMs().count());
- return kReadTickPeriodMs - ((now_ms - ds_period_offset) % kReadTickPeriodMs);
+ return period_ms - ((now_ms - ds_period_offset) % period_ms);
}
bool ShouldRejectDueToFilter(pid_t pid, const TargetFilter& filter) {
@@ -222,13 +217,14 @@
unwinding_worker_->PostStartDataSource(instance_id);
// Kick off periodic read task.
+ auto tick_period_ms = ds_it->second.event_config.read_tick_period_ms();
auto weak_this = weak_factory_.GetWeakPtr();
task_runner_->PostDelayedTask(
[weak_this, instance_id] {
if (weak_this)
weak_this->TickDataSourceRead(instance_id);
},
- TimeToNextReadTickMs(instance_id));
+ TimeToNextReadTickMs(instance_id, tick_period_ms));
}
void PerfProducer::StopDataSource(DataSourceInstanceID instance_id) {
@@ -291,10 +287,10 @@
PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_READ_TICK);
// Make a pass over all per-cpu readers.
+ uint32_t max_samples = ds.event_config.samples_per_tick_limit();
bool more_records_available = false;
for (EventReader& reader : ds.per_cpu_readers) {
- if (ReadAndParsePerCpuBuffer(&reader, kMaxSamplesPerCpuPerReadTick, ds_id,
- &ds)) {
+ if (ReadAndParsePerCpuBuffer(&reader, max_samples, ds_id, &ds)) {
more_records_available = true;
}
}
@@ -307,18 +303,19 @@
unwinding_worker_->PostInitiateDataSourceStop(ds_id);
} else {
// otherwise, keep reading
+ auto tick_period_ms = it->second.event_config.read_tick_period_ms();
auto weak_this = weak_factory_.GetWeakPtr();
task_runner_->PostDelayedTask(
[weak_this, ds_id] {
if (weak_this)
weak_this->TickDataSourceRead(ds_id);
},
- TimeToNextReadTickMs(ds_id));
+ TimeToNextReadTickMs(ds_id, tick_period_ms));
}
}
bool PerfProducer::ReadAndParsePerCpuBuffer(EventReader* reader,
- size_t max_samples,
+ uint32_t max_samples,
DataSourceInstanceID ds_id,
DataSourceState* ds) {
PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_READ_CPU);
@@ -333,7 +330,7 @@
});
};
- for (size_t i = 0; i < max_samples; i++) {
+ for (uint32_t i = 0; i < max_samples; i++) {
base::Optional<ParsedSample> sample =
reader->ReadUntilSample(records_lost_callback);
if (!sample) {
@@ -369,7 +366,7 @@
// Check whether samples for this new process should be
// dropped due to the target whitelist/blacklist.
- const TargetFilter& filter = ds->event_cfg.filter();
+ const TargetFilter& filter = ds->event_config.filter();
if (ShouldRejectDueToFilter(pid, filter)) {
process_state = ProcessTrackingStatus::kRejected;
continue;
@@ -393,7 +390,7 @@
UnwindEntry{ds_id, std::move(sample.value())};
queue.CommitWrite();
} else {
- PERFETTO_DLOG("Unwinder queue full, skipping sample.");
+ PERFETTO_DLOG("Unwinder queue full, skipping sample");
PostEmitSkippedSample(ds_id, std::move(sample.value()),
SampleSkipReason::kUnwindEnqueue);
}
diff --git a/src/profiling/perf/perf_producer.h b/src/profiling/perf/perf_producer.h
index acfbc4b..585c460 100644
--- a/src/profiling/perf/perf_producer.h
+++ b/src/profiling/perf/perf_producer.h
@@ -121,15 +121,15 @@
struct DataSourceState {
enum class Status { kActive, kShuttingDown };
- DataSourceState(EventConfig _event_cfg,
+ DataSourceState(EventConfig _event_config,
std::unique_ptr<TraceWriter> _trace_writer,
std::vector<EventReader> _per_cpu_readers)
- : event_cfg(std::move(_event_cfg)),
+ : event_config(std::move(_event_config)),
trace_writer(std::move(_trace_writer)),
per_cpu_readers(std::move(_per_cpu_readers)) {}
Status status = Status::kActive;
- const EventConfig event_cfg;
+ const EventConfig event_config;
std::unique_ptr<TraceWriter> trace_writer;
// Indexed by cpu, vector never resized.
std::vector<EventReader> per_cpu_readers;
@@ -163,7 +163,7 @@
// on the amount of samples that will be parsed, which might be more than the
// number of underlying records (as there might be non-sample records).
bool ReadAndParsePerCpuBuffer(EventReader* reader,
- size_t max_samples,
+ uint32_t max_samples,
DataSourceInstanceID ds_id,
DataSourceState* ds);