TraceProcessor: Add clipping support to sched table + minor fixes
- Fix mac build by not building libunwind stack on mac
- Fix cost estimation of the sched_slices table, which was always
return an estimated cost of 0.
- Fix heuristics for sorting.
- Introduce ts_clip=true|false column in sched table. When set to
true, it truncates the start and duration of slices around the
ts >< operators. For instance:
Given shced slices: [ A ][ B ][ C ]
And WHERE ts BETWEEN x and y ^x y^
The output would be: [ A ][ B ][ C ]
Change-Id: I0c9a244f8b539e1b1e509cc7709cc5a98451a40b
diff --git a/BUILD.gn b/BUILD.gn
index aee8466..01c1e0d 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -34,7 +34,7 @@
# libunwindstack requires API level 26 or newer.
should_build_heapprofd =
- !build_with_chromium && is_clang &&
+ !build_with_chromium && is_clang && (is_linux || is_android) &&
(!is_android || android_api_level >= 26 || build_with_android)
}
assert(!monolithic_binaries || !build_with_android)
diff --git a/src/trace_processor/sched_slice_table.cc b/src/trace_processor/sched_slice_table.cc
index eeb4bb2..6ddec12 100644
--- a/src/trace_processor/sched_slice_table.cc
+++ b/src/trace_processor/sched_slice_table.cc
@@ -32,6 +32,8 @@
using namespace sqlite_utils;
+constexpr uint64_t kUint64Max = std::numeric_limits<uint64_t>::max();
+
template <size_t N = base::kMaxCpus>
bool PopulateFilterBitmap(int op,
sqlite3_value* value,
@@ -93,14 +95,15 @@
void SchedSliceTable::RegisterTable(sqlite3* db, const TraceStorage* storage) {
Table::Register<SchedSliceTable>(db, storage,
"CREATE TABLE sched("
- "quantum HIDDEN BIG INT, "
- "ts_lower_bound HIDDEN BIG INT, "
"ts UNSIGNED BIG INT, "
"cpu UNSIGNED INT, "
"dur UNSIGNED BIG INT, "
"quantized_group UNSIGNED BIG INT, "
- "utid UNSIGNED INT,"
- "cycles UNSIGNED BIG INT,"
+ "utid UNSIGNED INT, "
+ "cycles UNSIGNED BIG INT, "
+ "quantum HIDDEN BIG INT, "
+ "ts_lower_bound HIDDEN BIG INT, "
+ "ts_clip HIDDEN BOOLEAN, "
"PRIMARY KEY(cpu, ts)"
") WITHOUT ROWID;");
}
@@ -111,19 +114,29 @@
int SchedSliceTable::BestIndex(const QueryConstraints& qc,
BestIndexInfo* info) {
- // Omit SQLite constraint checks on the hidden columns, so the client can
- // write queries of the form "quantum == x" and "ts_lower_bound == x".
- // Disallow any other constraint on these columns.
+ bool is_time_constrained = false;
for (size_t i = 0; i < qc.constraints().size(); i++) {
const auto& cs = qc.constraints()[i];
+
+ // Omit SQLite constraint checks on the hidden columns, so the client can
+ // write queries of the form "quantum=x" "ts_lower_bound=x" "ts_clip=true".
+ // Disallow any other constraint on these columns.
if (cs.iColumn == Column::kTimestampLowerBound ||
- cs.iColumn == Column::kQuantizedGroup) {
+ cs.iColumn == Column::kQuantizedGroup ||
+ cs.iColumn == Column::kClipTimestamp) {
if (!IsOpEq(cs.op))
return SQLITE_CONSTRAINT_FUNCTION;
info->omit[i] = true;
}
+
+ if (cs.iColumn == Column::kTimestampLowerBound ||
+ cs.iColumn == Column::kTimestamp) {
+ is_time_constrained = true;
+ }
}
+ info->estimated_cost = is_time_constrained ? 10 : 10000;
+
bool is_quantized_group_order_desc = false;
bool is_duration_timestamp_order = false;
for (const auto& ob : qc.order_by()) {
@@ -136,9 +149,14 @@
case Column::kDuration:
is_duration_timestamp_order = true;
break;
- case Column::kQuantum:
case Column::kCpu:
break;
+
+ // Can't order on hidden columns.
+ case Column::kQuantum:
+ case Column::kTimestampLowerBound:
+ case Column::kClipTimestamp:
+ return SQLITE_CONSTRAINT_FUNCTION;
}
}
@@ -206,7 +224,8 @@
const auto& slices = storage_->SlicesForCpu(cpu);
switch (N) {
case Column::kTimestamp: {
- auto timestamp = state->next_timestamp();
+ uint64_t timestamp = state->next_timestamp();
+ timestamp = std::max(timestamp, state->ts_clip_min());
sqlite3_result_int64(context, static_cast<sqlite3_int64>(timestamp));
break;
}
@@ -218,6 +237,14 @@
uint64_t duration;
if (quantum == 0) {
duration = slices.durations()[row];
+ uint64_t start_ns = state->next_timestamp();
+ uint64_t end_ns = start_ns + duration;
+ uint64_t clip_trim_ns = 0;
+ if (state->ts_clip_min() > start_ns)
+ clip_trim_ns += state->ts_clip_min() - start_ns;
+ if (end_ns > state->ts_clip_max())
+ clip_trim_ns += end_ns - state->ts_clip_min();
+ duration -= std::min(clip_trim_ns, duration);
} else {
uint64_t start_quantised_group = state->next_timestamp() / quantum;
uint64_t end = slices.start_ns()[row] + slices.durations()[row];
@@ -263,8 +290,9 @@
cpu_filter.set();
uint64_t min_ts = 0;
- uint64_t max_ts = std::numeric_limits<uint64_t>::max();
+ uint64_t max_ts = kUint64Max;
uint64_t ts_lower_bound = 0;
+ bool ts_clip = false;
for (size_t i = 0; i < query_constraints.constraints().size(); i++) {
const auto& cs = query_constraints.constraints()[i];
@@ -278,6 +306,9 @@
case Column::kTimestampLowerBound:
ts_lower_bound = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
break;
+ case Column::kClipTimestamp:
+ ts_clip = sqlite3_value_int(argv[i]) ? true : false;
+ break;
case Column::kTimestamp: {
auto ts = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
if (IsOpGe(cs.op) || IsOpGt(cs.op)) {
@@ -290,13 +321,21 @@
}
}
+ if (ts_clip) {
+ PERFETTO_DCHECK(ts_lower_bound == 0);
+ if (ts_lower_bound)
+ PERFETTO_ELOG("Cannot use ts_lower_bound and ts_clip together");
+ ts_lower_bound = min_ts;
+ min_ts = 0;
+ }
+
// If the query specifies a lower bound on ts, find that bound across all
// CPUs involved in the query and turn that into a min_ts constraint.
// ts_lower_bound is defined as the largest timestamp < X, or if none, the
// smallest timestamp >= X.
if (ts_lower_bound > 0) {
uint64_t largest_ts_before = 0;
- uint64_t smallest_ts_after = std::numeric_limits<uint64_t>::max();
+ uint64_t smallest_ts_after = kUint64Max;
for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
if (!cpu_filter.test(cpu))
continue;
@@ -323,8 +362,10 @@
for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
if (!cpu_filter.test(cpu))
continue;
+ uint64_t ts_clip_min = ts_clip ? min_ts : 0;
+ uint64_t ts_clip_max = ts_clip ? max_ts : kUint64Max;
StateForCpu(cpu)->Initialize(
- cpu, storage_, quantum_,
+ cpu, storage_, quantum_, ts_clip_min, ts_clip_max,
CreateSortedIndexVectorForCpu(cpu, min_ts, max_ts));
}
@@ -441,10 +482,14 @@
uint32_t cpu,
const TraceStorage* storage,
uint64_t quantum,
+ uint64_t ts_clip_min,
+ uint64_t ts_clip_max,
std::vector<uint32_t> sorted_row_ids) {
cpu_ = cpu;
storage_ = storage;
quantum_ = quantum;
+ ts_clip_min_ = ts_clip_min;
+ ts_clip_max_ = ts_clip_max;
sorted_row_ids_ = std::move(sorted_row_ids);
UpdateNextTimestampForNextRow();
}
diff --git a/src/trace_processor/sched_slice_table.h b/src/trace_processor/sched_slice_table.h
index 80f5a66..06f5799 100644
--- a/src/trace_processor/sched_slice_table.h
+++ b/src/trace_processor/sched_slice_table.h
@@ -34,14 +34,17 @@
class SchedSliceTable : public Table {
public:
enum Column {
- kQuantum = 0,
- kTimestampLowerBound = 1,
- kTimestamp = 2,
- kCpu = 3,
- kDuration = 4,
- kQuantizedGroup = 5,
- kUtid = 6,
- kCycles = 7,
+ kTimestamp = 0,
+ kCpu = 1,
+ kDuration = 2,
+ kQuantizedGroup = 3,
+ kUtid = 4,
+ kCycles = 5,
+
+ // Hidden columns.
+ kQuantum = 6,
+ kTimestampLowerBound = 7,
+ kClipTimestamp = 8,
};
SchedSliceTable(const TraceStorage* storage);
@@ -60,6 +63,8 @@
void Initialize(uint32_t cpu,
const TraceStorage* storage,
uint64_t quantum,
+ uint64_t ts_clip_min,
+ uint64_t ts_clip_max,
std::vector<uint32_t> sorted_row_ids);
void FindNextSlice();
bool IsNextRowIdIndexValid() const {
@@ -68,6 +73,8 @@
size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; }
uint64_t next_timestamp() const { return next_timestamp_; }
+ uint64_t ts_clip_min() const { return ts_clip_min_; }
+ uint64_t ts_clip_max() const { return ts_clip_max_; }
private:
const TraceStorage::SlicesPerCpu& Slices() {
@@ -93,6 +100,11 @@
// The quantum the output slices should fall within.
uint64_t quantum_ = 0;
+ // When clipping is applied (i.e. WHERE ts_clip between X and Y), slices are
+ // cut and shrunk around the min-max boundaries to fit in the clip window.
+ uint64_t ts_clip_min_ = 0;
+ uint64_t ts_clip_max_ = std::numeric_limits<uint64_t>::max();
+
const TraceStorage* storage_ = nullptr;
};
@@ -104,7 +116,7 @@
sqlite3_value** argv);
// Chooses the next CPU which should be returned according to the sorting
- // citeria specified by |order_by_|.
+ // criteria specified by |order_by_|.
void FindCpuWithNextSlice();
// Returns whether the next CPU to be returned by this filter operation is
diff --git a/src/trace_processor/sched_tracker.cc b/src/trace_processor/sched_tracker.cc
index f2619c2..df2a689 100644
--- a/src/trace_processor/sched_tracker.cc
+++ b/src/trace_processor/sched_tracker.cc
@@ -36,7 +36,11 @@
base::StringView prev_comm,
uint32_t next_pid) {
// At this stage all events should be globally timestamp ordered.
- PERFETTO_DCHECK(prev_timestamp_ <= timestamp);
+ if (timestamp < prev_timestamp_) {
+ PERFETTO_ELOG("sched_switch event out of order by %.4f ms, skipping",
+ (prev_timestamp_ - timestamp) / 1e6);
+ return;
+ }
prev_timestamp_ = timestamp;
PERFETTO_DCHECK(cpu < base::kMaxCpus);
SchedSwitchEvent* prev = &last_sched_per_cpu_[cpu];
diff --git a/src/trace_processor/trace_processor.h b/src/trace_processor/trace_processor.h
index 1dfac9a..5235514 100644
--- a/src/trace_processor/trace_processor.h
+++ b/src/trace_processor/trace_processor.h
@@ -40,7 +40,7 @@
public:
struct Config {
OptimizationMode optimization_mode = OptimizationMode::kMaxBandwidth;
- uint64_t window_size_ns = 10 * 1000 * 1000 * 1000ULL; // 10 seconds.
+ uint64_t window_size_ns = 60 * 1000 * 1000 * 1000ULL; // 60 seconds.
};
explicit TraceProcessor(const Config&);
~TraceProcessor();
diff --git a/src/trace_processor/trace_sorter.h b/src/trace_processor/trace_sorter.h
index 4294f6d..484090c 100644
--- a/src/trace_processor/trace_sorter.h
+++ b/src/trace_processor/trace_sorter.h
@@ -154,7 +154,7 @@
// too many events in the staging area.
if (optimization_ == OptimizationMode::kMaxBandwidth &&
latest_timestamp_ - earliest_timestamp_ < window_size_ns_ * 10 &&
- events_.size() < 1e6) {
+ events_.size() < 5 * 1e6) {
return;
}
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index f5a9dbc..925f307 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -217,7 +217,11 @@
uint32_t cpu,
uint32_t new_freq) {
auto& freqs = cpu_freq_[cpu];
- PERFETTO_DCHECK(freqs.empty() || timestamp > freqs.back().first);
+ if (!freqs.empty() && timestamp < freqs.back().first) {
+ PERFETTO_ELOG("cpufreq out of order by %.4f ms, skipping",
+ (freqs.back().first - timestamp) / 1e6);
+ return;
+ }
freqs.emplace_back(timestamp, new_freq);
}