TraceProcessor: Add clipping support to sched table + minor fixes

- Fix mac build by not building libunwind stack on mac
- Fix cost estimation of the sched_slices table, which was always
  return an estimated cost of 0.
- Fix heuristics for sorting.
- Introduce ts_clip=true|false column in sched table. When set to
  true, it truncates the start and duration of slices around the
  ts >< operators. For instance:
  Given shced slices:                [  A   ][ B ][   C       ]
  And WHERE ts BETWEEN x and y          ^x           y^
  The output would be:                  [ A ][ B ][ C ]

Change-Id: I0c9a244f8b539e1b1e509cc7709cc5a98451a40b
diff --git a/BUILD.gn b/BUILD.gn
index aee8466..01c1e0d 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -34,7 +34,7 @@
 
   # libunwindstack requires API level 26 or newer.
   should_build_heapprofd =
-      !build_with_chromium && is_clang &&
+      !build_with_chromium && is_clang && (is_linux || is_android) &&
       (!is_android || android_api_level >= 26 || build_with_android)
 }
 assert(!monolithic_binaries || !build_with_android)
diff --git a/src/trace_processor/sched_slice_table.cc b/src/trace_processor/sched_slice_table.cc
index eeb4bb2..6ddec12 100644
--- a/src/trace_processor/sched_slice_table.cc
+++ b/src/trace_processor/sched_slice_table.cc
@@ -32,6 +32,8 @@
 
 using namespace sqlite_utils;
 
+constexpr uint64_t kUint64Max = std::numeric_limits<uint64_t>::max();
+
 template <size_t N = base::kMaxCpus>
 bool PopulateFilterBitmap(int op,
                           sqlite3_value* value,
@@ -93,14 +95,15 @@
 void SchedSliceTable::RegisterTable(sqlite3* db, const TraceStorage* storage) {
   Table::Register<SchedSliceTable>(db, storage,
                                    "CREATE TABLE sched("
-                                   "quantum HIDDEN BIG INT, "
-                                   "ts_lower_bound HIDDEN BIG INT, "
                                    "ts UNSIGNED BIG INT, "
                                    "cpu UNSIGNED INT, "
                                    "dur UNSIGNED BIG INT, "
                                    "quantized_group UNSIGNED BIG INT, "
-                                   "utid UNSIGNED INT,"
-                                   "cycles UNSIGNED BIG INT,"
+                                   "utid UNSIGNED INT, "
+                                   "cycles UNSIGNED BIG INT, "
+                                   "quantum HIDDEN BIG INT, "
+                                   "ts_lower_bound HIDDEN BIG INT, "
+                                   "ts_clip HIDDEN BOOLEAN, "
                                    "PRIMARY KEY(cpu, ts)"
                                    ") WITHOUT ROWID;");
 }
@@ -111,19 +114,29 @@
 
 int SchedSliceTable::BestIndex(const QueryConstraints& qc,
                                BestIndexInfo* info) {
-  // Omit SQLite constraint checks on the hidden columns, so the client can
-  // write queries of the form "quantum == x" and "ts_lower_bound == x".
-  // Disallow any other constraint on these columns.
+  bool is_time_constrained = false;
   for (size_t i = 0; i < qc.constraints().size(); i++) {
     const auto& cs = qc.constraints()[i];
+
+    // Omit SQLite constraint checks on the hidden columns, so the client can
+    // write queries of the form "quantum=x" "ts_lower_bound=x" "ts_clip=true".
+    // Disallow any other constraint on these columns.
     if (cs.iColumn == Column::kTimestampLowerBound ||
-        cs.iColumn == Column::kQuantizedGroup) {
+        cs.iColumn == Column::kQuantizedGroup ||
+        cs.iColumn == Column::kClipTimestamp) {
       if (!IsOpEq(cs.op))
         return SQLITE_CONSTRAINT_FUNCTION;
       info->omit[i] = true;
     }
+
+    if (cs.iColumn == Column::kTimestampLowerBound ||
+        cs.iColumn == Column::kTimestamp) {
+      is_time_constrained = true;
+    }
   }
 
+  info->estimated_cost = is_time_constrained ? 10 : 10000;
+
   bool is_quantized_group_order_desc = false;
   bool is_duration_timestamp_order = false;
   for (const auto& ob : qc.order_by()) {
@@ -136,9 +149,14 @@
       case Column::kDuration:
         is_duration_timestamp_order = true;
         break;
-      case Column::kQuantum:
       case Column::kCpu:
         break;
+
+      // Can't order on hidden columns.
+      case Column::kQuantum:
+      case Column::kTimestampLowerBound:
+      case Column::kClipTimestamp:
+        return SQLITE_CONSTRAINT_FUNCTION;
     }
   }
 
@@ -206,7 +224,8 @@
   const auto& slices = storage_->SlicesForCpu(cpu);
   switch (N) {
     case Column::kTimestamp: {
-      auto timestamp = state->next_timestamp();
+      uint64_t timestamp = state->next_timestamp();
+      timestamp = std::max(timestamp, state->ts_clip_min());
       sqlite3_result_int64(context, static_cast<sqlite3_int64>(timestamp));
       break;
     }
@@ -218,6 +237,14 @@
       uint64_t duration;
       if (quantum == 0) {
         duration = slices.durations()[row];
+        uint64_t start_ns = state->next_timestamp();
+        uint64_t end_ns = start_ns + duration;
+        uint64_t clip_trim_ns = 0;
+        if (state->ts_clip_min() > start_ns)
+          clip_trim_ns += state->ts_clip_min() - start_ns;
+        if (end_ns > state->ts_clip_max())
+          clip_trim_ns += end_ns - state->ts_clip_min();
+        duration -= std::min(clip_trim_ns, duration);
       } else {
         uint64_t start_quantised_group = state->next_timestamp() / quantum;
         uint64_t end = slices.start_ns()[row] + slices.durations()[row];
@@ -263,8 +290,9 @@
   cpu_filter.set();
 
   uint64_t min_ts = 0;
-  uint64_t max_ts = std::numeric_limits<uint64_t>::max();
+  uint64_t max_ts = kUint64Max;
   uint64_t ts_lower_bound = 0;
+  bool ts_clip = false;
 
   for (size_t i = 0; i < query_constraints.constraints().size(); i++) {
     const auto& cs = query_constraints.constraints()[i];
@@ -278,6 +306,9 @@
       case Column::kTimestampLowerBound:
         ts_lower_bound = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
         break;
+      case Column::kClipTimestamp:
+        ts_clip = sqlite3_value_int(argv[i]) ? true : false;
+        break;
       case Column::kTimestamp: {
         auto ts = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
         if (IsOpGe(cs.op) || IsOpGt(cs.op)) {
@@ -290,13 +321,21 @@
     }
   }
 
+  if (ts_clip) {
+    PERFETTO_DCHECK(ts_lower_bound == 0);
+    if (ts_lower_bound)
+      PERFETTO_ELOG("Cannot use ts_lower_bound and ts_clip together");
+    ts_lower_bound = min_ts;
+    min_ts = 0;
+  }
+
   // If the query specifies a lower bound on ts, find that bound across all
   // CPUs involved in the query and turn that into a min_ts constraint.
   // ts_lower_bound is defined as the largest timestamp < X, or if none, the
   // smallest timestamp >= X.
   if (ts_lower_bound > 0) {
     uint64_t largest_ts_before = 0;
-    uint64_t smallest_ts_after = std::numeric_limits<uint64_t>::max();
+    uint64_t smallest_ts_after = kUint64Max;
     for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
       if (!cpu_filter.test(cpu))
         continue;
@@ -323,8 +362,10 @@
   for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
     if (!cpu_filter.test(cpu))
       continue;
+    uint64_t ts_clip_min = ts_clip ? min_ts : 0;
+    uint64_t ts_clip_max = ts_clip ? max_ts : kUint64Max;
     StateForCpu(cpu)->Initialize(
-        cpu, storage_, quantum_,
+        cpu, storage_, quantum_, ts_clip_min, ts_clip_max,
         CreateSortedIndexVectorForCpu(cpu, min_ts, max_ts));
   }
 
@@ -441,10 +482,14 @@
     uint32_t cpu,
     const TraceStorage* storage,
     uint64_t quantum,
+    uint64_t ts_clip_min,
+    uint64_t ts_clip_max,
     std::vector<uint32_t> sorted_row_ids) {
   cpu_ = cpu;
   storage_ = storage;
   quantum_ = quantum;
+  ts_clip_min_ = ts_clip_min;
+  ts_clip_max_ = ts_clip_max;
   sorted_row_ids_ = std::move(sorted_row_ids);
   UpdateNextTimestampForNextRow();
 }
diff --git a/src/trace_processor/sched_slice_table.h b/src/trace_processor/sched_slice_table.h
index 80f5a66..06f5799 100644
--- a/src/trace_processor/sched_slice_table.h
+++ b/src/trace_processor/sched_slice_table.h
@@ -34,14 +34,17 @@
 class SchedSliceTable : public Table {
  public:
   enum Column {
-    kQuantum = 0,
-    kTimestampLowerBound = 1,
-    kTimestamp = 2,
-    kCpu = 3,
-    kDuration = 4,
-    kQuantizedGroup = 5,
-    kUtid = 6,
-    kCycles = 7,
+    kTimestamp = 0,
+    kCpu = 1,
+    kDuration = 2,
+    kQuantizedGroup = 3,
+    kUtid = 4,
+    kCycles = 5,
+
+    // Hidden columns.
+    kQuantum = 6,
+    kTimestampLowerBound = 7,
+    kClipTimestamp = 8,
   };
 
   SchedSliceTable(const TraceStorage* storage);
@@ -60,6 +63,8 @@
     void Initialize(uint32_t cpu,
                     const TraceStorage* storage,
                     uint64_t quantum,
+                    uint64_t ts_clip_min,
+                    uint64_t ts_clip_max,
                     std::vector<uint32_t> sorted_row_ids);
     void FindNextSlice();
     bool IsNextRowIdIndexValid() const {
@@ -68,6 +73,8 @@
 
     size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; }
     uint64_t next_timestamp() const { return next_timestamp_; }
+    uint64_t ts_clip_min() const { return ts_clip_min_; }
+    uint64_t ts_clip_max() const { return ts_clip_max_; }
 
    private:
     const TraceStorage::SlicesPerCpu& Slices() {
@@ -93,6 +100,11 @@
     // The quantum the output slices should fall within.
     uint64_t quantum_ = 0;
 
+    // When clipping is applied (i.e. WHERE ts_clip between X and Y), slices are
+    // cut and shrunk around the min-max boundaries to fit in the clip window.
+    uint64_t ts_clip_min_ = 0;
+    uint64_t ts_clip_max_ = std::numeric_limits<uint64_t>::max();
+
     const TraceStorage* storage_ = nullptr;
   };
 
@@ -104,7 +116,7 @@
                 sqlite3_value** argv);
 
     // Chooses the next CPU which should be returned according to the sorting
-    // citeria specified by |order_by_|.
+    // criteria specified by |order_by_|.
     void FindCpuWithNextSlice();
 
     // Returns whether the next CPU to be returned by this filter operation is
diff --git a/src/trace_processor/sched_tracker.cc b/src/trace_processor/sched_tracker.cc
index f2619c2..df2a689 100644
--- a/src/trace_processor/sched_tracker.cc
+++ b/src/trace_processor/sched_tracker.cc
@@ -36,7 +36,11 @@
                                    base::StringView prev_comm,
                                    uint32_t next_pid) {
   // At this stage all events should be globally timestamp ordered.
-  PERFETTO_DCHECK(prev_timestamp_ <= timestamp);
+  if (timestamp < prev_timestamp_) {
+    PERFETTO_ELOG("sched_switch event out of order by %.4f ms, skipping",
+                  (prev_timestamp_ - timestamp) / 1e6);
+    return;
+  }
   prev_timestamp_ = timestamp;
   PERFETTO_DCHECK(cpu < base::kMaxCpus);
   SchedSwitchEvent* prev = &last_sched_per_cpu_[cpu];
diff --git a/src/trace_processor/trace_processor.h b/src/trace_processor/trace_processor.h
index 1dfac9a..5235514 100644
--- a/src/trace_processor/trace_processor.h
+++ b/src/trace_processor/trace_processor.h
@@ -40,7 +40,7 @@
  public:
   struct Config {
     OptimizationMode optimization_mode = OptimizationMode::kMaxBandwidth;
-    uint64_t window_size_ns = 10 * 1000 * 1000 * 1000ULL;  // 10 seconds.
+    uint64_t window_size_ns = 60 * 1000 * 1000 * 1000ULL;  // 60 seconds.
   };
   explicit TraceProcessor(const Config&);
   ~TraceProcessor();
diff --git a/src/trace_processor/trace_sorter.h b/src/trace_processor/trace_sorter.h
index 4294f6d..484090c 100644
--- a/src/trace_processor/trace_sorter.h
+++ b/src/trace_processor/trace_sorter.h
@@ -154,7 +154,7 @@
     // too many events in the staging area.
     if (optimization_ == OptimizationMode::kMaxBandwidth &&
         latest_timestamp_ - earliest_timestamp_ < window_size_ns_ * 10 &&
-        events_.size() < 1e6) {
+        events_.size() < 5 * 1e6) {
       return;
     }
 
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index f5a9dbc..925f307 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -217,7 +217,11 @@
                            uint32_t cpu,
                            uint32_t new_freq) {
     auto& freqs = cpu_freq_[cpu];
-    PERFETTO_DCHECK(freqs.empty() || timestamp > freqs.back().first);
+    if (!freqs.empty() && timestamp < freqs.back().first) {
+      PERFETTO_ELOG("cpufreq out of order by %.4f ms, skipping",
+                    (freqs.back().first - timestamp) / 1e6);
+      return;
+    }
     freqs.emplace_back(timestamp, new_freq);
   }