trace_processor: collapse sched slice table to a flat table

Instead of storing sched events per-cpu, store them all in a flat map.
This makes queries significantly faster on large traces as we do not
constantly need to search for the next index across CPUs.

Instead we can skip across the vector using std find on a vector<bool>
which is very fast.

While this happens, also remove ts_lower_bound, ts_clip and quantum.
quantum and ts_clip can either be expressed as a combination of a
span join with a window table or a quantum table.

ts_bound is used just for displaying tracks accurately in the UI
and can be replaced by clipping instead.

Change-Id: I1715658b17fcf83c13eebbd8ef01b46f85503b90
diff --git a/src/trace_processor/sched_slice_table.cc b/src/trace_processor/sched_slice_table.cc
index 6ddec12..2813dd7 100644
--- a/src/trace_processor/sched_slice_table.cc
+++ b/src/trace_processor/sched_slice_table.cc
@@ -98,12 +98,8 @@
                                    "ts UNSIGNED BIG INT, "
                                    "cpu UNSIGNED INT, "
                                    "dur UNSIGNED BIG INT, "
-                                   "quantized_group UNSIGNED BIG INT, "
                                    "utid UNSIGNED INT, "
                                    "cycles UNSIGNED BIG INT, "
-                                   "quantum HIDDEN BIG INT, "
-                                   "ts_lower_bound HIDDEN BIG INT, "
-                                   "ts_clip HIDDEN BOOLEAN, "
                                    "PRIMARY KEY(cpu, ts)"
                                    ") WITHOUT ROWID;");
 }
@@ -117,82 +113,16 @@
   bool is_time_constrained = false;
   for (size_t i = 0; i < qc.constraints().size(); i++) {
     const auto& cs = qc.constraints()[i];
-
-    // Omit SQLite constraint checks on the hidden columns, so the client can
-    // write queries of the form "quantum=x" "ts_lower_bound=x" "ts_clip=true".
-    // Disallow any other constraint on these columns.
-    if (cs.iColumn == Column::kTimestampLowerBound ||
-        cs.iColumn == Column::kQuantizedGroup ||
-        cs.iColumn == Column::kClipTimestamp) {
-      if (!IsOpEq(cs.op))
-        return SQLITE_CONSTRAINT_FUNCTION;
-      info->omit[i] = true;
-    }
-
-    if (cs.iColumn == Column::kTimestampLowerBound ||
-        cs.iColumn == Column::kTimestamp) {
+    if (cs.iColumn == Column::kTimestamp)
       is_time_constrained = true;
-    }
   }
 
   info->estimated_cost = is_time_constrained ? 10 : 10000;
-
-  bool is_quantized_group_order_desc = false;
-  bool is_duration_timestamp_order = false;
-  for (const auto& ob : qc.order_by()) {
-    switch (ob.iColumn) {
-      case Column::kQuantizedGroup:
-        if (ob.desc)
-          is_quantized_group_order_desc = true;
-        break;
-      case Column::kTimestamp:
-      case Column::kDuration:
-        is_duration_timestamp_order = true;
-        break;
-      case Column::kCpu:
-        break;
-
-      // Can't order on hidden columns.
-      case Column::kQuantum:
-      case Column::kTimestampLowerBound:
-      case Column::kClipTimestamp:
-        return SQLITE_CONSTRAINT_FUNCTION;
-    }
-  }
-
-  bool has_quantum_constraint = false;
-  for (const auto& cs : qc.constraints()) {
-    if (cs.iColumn == Column::kQuantum)
-      has_quantum_constraint = true;
-  }
-
-  // If a quantum constraint is present, we don't support native ordering by
-  // time related parameters or by quantized group in descending order.
-  bool needs_sqlite_orderby =
-      has_quantum_constraint &&
-      (is_duration_timestamp_order || is_quantized_group_order_desc);
-
-  info->order_by_consumed = !needs_sqlite_orderby;
+  info->order_by_consumed = true;
 
   return SQLITE_OK;
 }
 
-int SchedSliceTable::FindFunction(const char* name,
-                                  FindFunctionFn fn,
-                                  void** args) {
-  // Add an identity match function to prevent throwing an exception when
-  // matching on the quantum column.
-  if (strcmp(name, "match") == 0) {
-    *fn = [](sqlite3_context* ctx, int n, sqlite3_value** v) {
-      PERFETTO_DCHECK(n == 2 && sqlite3_value_type(v[0]) == SQLITE_INTEGER);
-      sqlite3_result_int64(ctx, sqlite3_value_int64(v[0]));
-    };
-    *args = nullptr;
-    return 1;
-  }
-  return 0;
-}
-
 SchedSliceTable::Cursor::Cursor(const TraceStorage* storage)
     : storage_(storage) {}
 
@@ -203,71 +133,34 @@
 }
 
 int SchedSliceTable::Cursor::Next() {
-  auto* state = filter_state_->StateForCpu(filter_state_->next_cpu());
-  state->FindNextSlice();
-  filter_state_->FindCpuWithNextSlice();
+  filter_state_->FindNextSlice();
   return SQLITE_OK;
 }
 
 int SchedSliceTable::Cursor::Eof() {
-  return !filter_state_->IsNextCpuValid();
+  return !filter_state_->IsNextRowIdIndexValid();
 }
 
 int SchedSliceTable::Cursor::Column(sqlite3_context* context, int N) {
-  if (!filter_state_->IsNextCpuValid())
-    return SQLITE_ERROR;
+  PERFETTO_DCHECK(filter_state_->IsNextRowIdIndexValid());
 
-  uint64_t quantum = filter_state_->quantum();
-  uint32_t cpu = filter_state_->next_cpu();
-  const auto* state = filter_state_->StateForCpu(cpu);
-  size_t row = state->next_row_id();
-  const auto& slices = storage_->SlicesForCpu(cpu);
+  size_t row = filter_state_->next_row_id();
+  const auto& slices = storage_->slices();
   switch (N) {
     case Column::kTimestamp: {
-      uint64_t timestamp = state->next_timestamp();
-      timestamp = std::max(timestamp, state->ts_clip_min());
-      sqlite3_result_int64(context, static_cast<sqlite3_int64>(timestamp));
+      uint64_t ts = slices.start_ns()[row];
+      sqlite3_result_int64(context, static_cast<sqlite3_int64>(ts));
       break;
     }
     case Column::kCpu: {
-      sqlite3_result_int(context, static_cast<int>(cpu));
+      sqlite3_result_int(context, static_cast<int>(slices.cpus()[row]));
       break;
     }
     case Column::kDuration: {
-      uint64_t duration;
-      if (quantum == 0) {
-        duration = slices.durations()[row];
-        uint64_t start_ns = state->next_timestamp();
-        uint64_t end_ns = start_ns + duration;
-        uint64_t clip_trim_ns = 0;
-        if (state->ts_clip_min() > start_ns)
-          clip_trim_ns += state->ts_clip_min() - start_ns;
-        if (end_ns > state->ts_clip_max())
-          clip_trim_ns += end_ns - state->ts_clip_min();
-        duration -= std::min(clip_trim_ns, duration);
-      } else {
-        uint64_t start_quantised_group = state->next_timestamp() / quantum;
-        uint64_t end = slices.start_ns()[row] + slices.durations()[row];
-        uint64_t next_group_start = (start_quantised_group + 1) * quantum;
-
-        // Compute the minimum of the start of the next group boundary and the
-        // end of this slice.
-        uint64_t min_slice_end = std::min<uint64_t>(end, next_group_start);
-        duration = min_slice_end - state->next_timestamp();
-      }
+      uint64_t duration = slices.durations()[row];
       sqlite3_result_int64(context, static_cast<sqlite3_int64>(duration));
       break;
     }
-    case Column::kQuantizedGroup: {
-      auto group = quantum == 0 ? state->next_timestamp()
-                                : state->next_timestamp() / quantum;
-      sqlite3_result_int64(context, static_cast<sqlite3_int64>(group));
-      break;
-    }
-    case Column::kQuantum: {
-      sqlite3_result_int64(context, static_cast<sqlite3_int64>(quantum));
-      break;
-    }
     case Column::kUtid: {
       sqlite3_result_int64(context, slices.utids()[row]);
       break;
@@ -291,8 +184,6 @@
 
   uint64_t min_ts = 0;
   uint64_t max_ts = kUint64Max;
-  uint64_t ts_lower_bound = 0;
-  bool ts_clip = false;
 
   for (size_t i = 0; i < query_constraints.constraints().size(); i++) {
     const auto& cs = query_constraints.constraints()[i];
@@ -300,15 +191,6 @@
       case Column::kCpu:
         PopulateFilterBitmap(cs.op, argv[i], &cpu_filter);
         break;
-      case Column::kQuantum:
-        quantum_ = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
-        break;
-      case Column::kTimestampLowerBound:
-        ts_lower_bound = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
-        break;
-      case Column::kClipTimestamp:
-        ts_clip = sqlite3_value_int(argv[i]) ? true : false;
-        break;
       case Column::kTimestamp: {
         auto ts = static_cast<uint64_t>(sqlite3_value_int64(argv[i]));
         if (IsOpGe(cs.op) || IsOpGt(cs.op)) {
@@ -320,92 +202,22 @@
       }
     }
   }
+  SetupSortedRowIds(min_ts, max_ts);
 
-  if (ts_clip) {
-    PERFETTO_DCHECK(ts_lower_bound == 0);
-    if (ts_lower_bound)
-      PERFETTO_ELOG("Cannot use ts_lower_bound and ts_clip together");
-    ts_lower_bound = min_ts;
-    min_ts = 0;
-  }
-
-  // If the query specifies a lower bound on ts, find that bound across all
-  // CPUs involved in the query and turn that into a min_ts constraint.
-  // ts_lower_bound is defined as the largest timestamp < X, or if none, the
-  // smallest timestamp >= X.
-  if (ts_lower_bound > 0) {
-    uint64_t largest_ts_before = 0;
-    uint64_t smallest_ts_after = kUint64Max;
-    for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
-      if (!cpu_filter.test(cpu))
-        continue;
-      const auto& start_ns = storage_->SlicesForCpu(cpu).start_ns();
-      // std::lower_bound will find the first timestamp >= |ts_lower_bound|.
-      // From there we need to move one back, if possible.
-      auto it =
-          std::lower_bound(start_ns.begin(), start_ns.end(), ts_lower_bound);
-      if (std::distance(start_ns.begin(), it) > 0)
-        it--;
-      if (it == start_ns.end())
-        continue;
-      if (*it < ts_lower_bound) {
-        largest_ts_before = std::max(largest_ts_before, *it);
-      } else {
-        smallest_ts_after = std::min(smallest_ts_after, *it);
-      }
+  // Filter rows on CPUs if any CPUs need to be excluded.
+  const auto& slices = storage_->slices();
+  row_filter_.resize(sorted_row_ids_.size(), true);
+  if (cpu_filter.count() < cpu_filter.size()) {
+    for (size_t i = 0; i < sorted_row_ids_.size(); i++) {
+      row_filter_[i] = cpu_filter.test(slices.cpus()[sorted_row_ids_[i]]);
     }
-    uint64_t lower_bound = std::min(largest_ts_before, smallest_ts_after);
-    min_ts = std::max(min_ts, lower_bound);
-  }  // if (ts_lower_bound)
-
-  // Setup CPU filtering because the trace storage is indexed by CPU.
-  for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
-    if (!cpu_filter.test(cpu))
-      continue;
-    uint64_t ts_clip_min = ts_clip ? min_ts : 0;
-    uint64_t ts_clip_max = ts_clip ? max_ts : kUint64Max;
-    StateForCpu(cpu)->Initialize(
-        cpu, storage_, quantum_, ts_clip_min, ts_clip_max,
-        CreateSortedIndexVectorForCpu(cpu, min_ts, max_ts));
   }
-
-  // Set the cpu index to be the first item to look at.
-  FindCpuWithNextSlice();
+  FindNextRowAndTimestamp();
 }
 
-void SchedSliceTable::FilterState::FindCpuWithNextSlice() {
-  next_cpu_ = base::kMaxCpus;
-
-  for (uint32_t cpu = 0; cpu < base::kMaxCpus; cpu++) {
-    const auto& cpu_state = per_cpu_state_[cpu];
-    if (!cpu_state.IsNextRowIdIndexValid())
-      continue;
-
-    // The first CPU with a valid slice can be set to the next CPU.
-    if (next_cpu_ == base::kMaxCpus) {
-      next_cpu_ = cpu;
-      continue;
-    }
-
-    // If the current CPU is ordered before the current "next" CPU, then update
-    // the cpu value.
-    int cmp = CompareCpuToNextCpu(cpu);
-    if (cmp < 0)
-      next_cpu_ = cpu;
-  }
-}
-
-int SchedSliceTable::FilterState::CompareCpuToNextCpu(uint32_t cpu) {
-  size_t next_row = per_cpu_state_[next_cpu_].next_row_id();
-  size_t row = per_cpu_state_[cpu].next_row_id();
-  return CompareSlices(cpu, row, next_cpu_, next_row);
-}
-
-std::vector<uint32_t>
-SchedSliceTable::FilterState::CreateSortedIndexVectorForCpu(uint32_t cpu,
-                                                            uint64_t min_ts,
-                                                            uint64_t max_ts) {
-  const auto& slices = storage_->SlicesForCpu(cpu);
+void SchedSliceTable::FilterState::SetupSortedRowIds(uint64_t min_ts,
+                                                     uint64_t max_ts) {
+  const auto& slices = storage_->slices();
   const auto& start_ns = slices.start_ns();
   PERFETTO_CHECK(slices.slice_count() <= std::numeric_limits<uint32_t>::max());
 
@@ -414,26 +226,22 @@
   ptrdiff_t dist = std::distance(min_it, max_it);
   PERFETTO_CHECK(dist >= 0 && static_cast<size_t>(dist) <= start_ns.size());
 
-  std::vector<uint32_t> indices(static_cast<size_t>(dist));
-
   // Fill |indices| with the consecutive row numbers affected by the filtering.
-  std::iota(indices.begin(), indices.end(),
+  sorted_row_ids_.resize(static_cast<size_t>(dist));
+  std::iota(sorted_row_ids_.begin(), sorted_row_ids_.end(),
             std::distance(start_ns.begin(), min_it));
 
-  // In other cases, sort by the given criteria.
-  std::sort(indices.begin(), indices.end(),
-            [this, cpu](uint32_t f, uint32_t s) {
-              return CompareSlices(cpu, f, cpu, s) < 0;
-            });
-  return indices;
+  // Sort if there is any order by constraints.
+  if (!order_by_.empty()) {
+    std::sort(
+        sorted_row_ids_.begin(), sorted_row_ids_.end(),
+        [this](uint32_t f, uint32_t s) { return CompareSlices(f, s) < 0; });
+  }
 }
 
-int SchedSliceTable::FilterState::CompareSlices(uint32_t f_cpu,
-                                                size_t f_idx,
-                                                uint32_t s_cpu,
-                                                size_t s_idx) {
+int SchedSliceTable::FilterState::CompareSlices(size_t f_idx, size_t s_idx) {
   for (const auto& ob : order_by_) {
-    int c = CompareSlicesOnColumn(f_cpu, f_idx, s_cpu, s_idx, ob);
+    int c = CompareSlicesOnColumn(f_idx, s_idx, ob);
     if (c != 0)
       return c;
   }
@@ -441,85 +249,37 @@
 }
 
 int SchedSliceTable::FilterState::CompareSlicesOnColumn(
-    uint32_t f_cpu,
     size_t f_idx,
-    uint32_t s_cpu,
     size_t s_idx,
     const QueryConstraints::OrderBy& ob) {
-  const auto& f_sl = storage_->SlicesForCpu(f_cpu);
-  const auto& s_sl = storage_->SlicesForCpu(s_cpu);
+  const auto& sl = storage_->slices();
   switch (ob.iColumn) {
-    case SchedSliceTable::Column::kQuantum:
-    case SchedSliceTable::Column::kTimestampLowerBound:
-      PERFETTO_CHECK(false);
     case SchedSliceTable::Column::kTimestamp:
-      return Compare(f_sl.start_ns()[f_idx], s_sl.start_ns()[s_idx], ob.desc);
+      return Compare(sl.start_ns()[f_idx], sl.start_ns()[s_idx], ob.desc);
     case SchedSliceTable::Column::kDuration:
-      return Compare(f_sl.durations()[f_idx], s_sl.durations()[s_idx], ob.desc);
+      return Compare(sl.durations()[f_idx], sl.durations()[s_idx], ob.desc);
     case SchedSliceTable::Column::kCpu:
-      return Compare(f_cpu, s_cpu, ob.desc);
+      return Compare(sl.cpus()[f_idx], sl.cpus()[s_idx], ob.desc);
     case SchedSliceTable::Column::kUtid:
-      return Compare(f_sl.utids()[f_idx], s_sl.utids()[s_idx], ob.desc);
+      return Compare(sl.utids()[f_idx], sl.utids()[s_idx], ob.desc);
     case SchedSliceTable::Column::kCycles:
-      return Compare(f_sl.cycles()[f_idx], s_sl.cycles()[s_idx], ob.desc);
-    case SchedSliceTable::Column::kQuantizedGroup: {
-      // We don't support sorting in descending order on quantized group when
-      // we have a non-zero quantum.
-      PERFETTO_CHECK(!ob.desc || quantum_ == 0);
-
-      uint64_t f_timestamp = StateForCpu(f_cpu)->next_timestamp();
-      uint64_t s_timestamp = StateForCpu(s_cpu)->next_timestamp();
-
-      uint64_t f_group = quantum_ == 0 ? f_timestamp : f_timestamp / quantum_;
-      uint64_t s_group = quantum_ == 0 ? s_timestamp : s_timestamp / quantum_;
-      return Compare(f_group, s_group, ob.desc);
-    }
+      return Compare(sl.cycles()[f_idx], sl.cycles()[s_idx], ob.desc);
   }
   PERFETTO_FATAL("Unexpected column %d", ob.iColumn);
 }
 
-void SchedSliceTable::PerCpuState::Initialize(
-    uint32_t cpu,
-    const TraceStorage* storage,
-    uint64_t quantum,
-    uint64_t ts_clip_min,
-    uint64_t ts_clip_max,
-    std::vector<uint32_t> sorted_row_ids) {
-  cpu_ = cpu;
-  storage_ = storage;
-  quantum_ = quantum;
-  ts_clip_min_ = ts_clip_min;
-  ts_clip_max_ = ts_clip_max;
-  sorted_row_ids_ = std::move(sorted_row_ids);
-  UpdateNextTimestampForNextRow();
+void SchedSliceTable::FilterState::FindNextSlice() {
+  next_row_id_index_++;
+  FindNextRowAndTimestamp();
 }
 
-void SchedSliceTable::PerCpuState::FindNextSlice() {
-  PERFETTO_DCHECK(next_timestamp_ != 0);
-
-  const auto& slices = Slices();
-  if (quantum_ == 0) {
-    next_row_id_index_++;
-    UpdateNextTimestampForNextRow();
-    return;
-  }
-
-  uint64_t start_group = next_timestamp_ / quantum_;
-  uint64_t end_slice =
-      slices.start_ns()[next_row_id()] + slices.durations()[next_row_id()];
-  uint64_t next_group_start = (start_group + 1) * quantum_;
-
-  if (next_group_start >= end_slice) {
-    next_row_id_index_++;
-    UpdateNextTimestampForNextRow();
-  } else {
-    next_timestamp_ = next_group_start;
-  }
-}
-
-void SchedSliceTable::PerCpuState::UpdateNextTimestampForNextRow() {
-  next_timestamp_ =
-      IsNextRowIdIndexValid() ? Slices().start_ns()[next_row_id()] : 0;
+void SchedSliceTable::FilterState::FindNextRowAndTimestamp() {
+  auto start =
+      row_filter_.begin() +
+      static_cast<decltype(row_filter_)::difference_type>(next_row_id_index_);
+  auto next_it = std::find(start, row_filter_.end(), true);
+  next_row_id_index_ =
+      static_cast<uint32_t>(std::distance(row_filter_.begin(), next_it));
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/sched_slice_table.h b/src/trace_processor/sched_slice_table.h
index 06f5799..f3e1750 100644
--- a/src/trace_processor/sched_slice_table.h
+++ b/src/trace_processor/sched_slice_table.h
@@ -37,14 +37,8 @@
     kTimestamp = 0,
     kCpu = 1,
     kDuration = 2,
-    kQuantizedGroup = 3,
-    kUtid = 4,
-    kCycles = 5,
-
-    // Hidden columns.
-    kQuantum = 6,
-    kTimestampLowerBound = 7,
-    kClipTimestamp = 8,
+    kUtid = 3,
+    kCycles = 4,
   };
 
   SchedSliceTable(const TraceStorage* storage);
@@ -54,60 +48,8 @@
   // Table implementation.
   std::unique_ptr<Table::Cursor> CreateCursor() override;
   int BestIndex(const QueryConstraints&, BestIndexInfo*) override;
-  int FindFunction(const char* name, FindFunctionFn fn, void** args) override;
 
  private:
-  // Transient filter state for each CPU of this trace.
-  class PerCpuState {
-   public:
-    void Initialize(uint32_t cpu,
-                    const TraceStorage* storage,
-                    uint64_t quantum,
-                    uint64_t ts_clip_min,
-                    uint64_t ts_clip_max,
-                    std::vector<uint32_t> sorted_row_ids);
-    void FindNextSlice();
-    bool IsNextRowIdIndexValid() const {
-      return next_row_id_index_ < sorted_row_ids_.size();
-    }
-
-    size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; }
-    uint64_t next_timestamp() const { return next_timestamp_; }
-    uint64_t ts_clip_min() const { return ts_clip_min_; }
-    uint64_t ts_clip_max() const { return ts_clip_max_; }
-
-   private:
-    const TraceStorage::SlicesPerCpu& Slices() {
-      return storage_->SlicesForCpu(cpu_);
-    }
-
-    void UpdateNextTimestampForNextRow();
-
-    // Vector of row ids sorted by the the given order by constraints.
-    std::vector<uint32_t> sorted_row_ids_;
-
-    // An offset into |sorted_row_ids_| indicating the next row to return.
-    uint32_t next_row_id_index_ = 0;
-
-    // The timestamp of the row to index. This is either the timestamp of
-    // the slice at |next_row_id_index_| or the timestamp of the next quantized
-    // group boundary.
-    uint64_t next_timestamp_ = 0;
-
-    // The CPU this state is associated with.
-    uint32_t cpu_ = 0;
-
-    // The quantum the output slices should fall within.
-    uint64_t quantum_ = 0;
-
-    // When clipping is applied (i.e. WHERE ts_clip between X and Y), slices are
-    // cut and shrunk around the min-max boundaries to fit in the clip window.
-    uint64_t ts_clip_min_ = 0;
-    uint64_t ts_clip_max_ = std::numeric_limits<uint64_t>::max();
-
-    const TraceStorage* storage_ = nullptr;
-  };
-
   // Transient state for a filter operation on a Cursor.
   class FilterState {
    public:
@@ -115,57 +57,43 @@
                 const QueryConstraints& query_constraints,
                 sqlite3_value** argv);
 
-    // Chooses the next CPU which should be returned according to the sorting
-    // criteria specified by |order_by_|.
-    void FindCpuWithNextSlice();
+    void FindNextSlice();
 
-    // Returns whether the next CPU to be returned by this filter operation is
-    // valid.
-    bool IsNextCpuValid() const { return next_cpu_ < per_cpu_state_.size(); }
+    inline bool IsNextRowIdIndexValid() const {
+      return next_row_id_index_ < sorted_row_ids_.size();
+    }
 
-    // Returns the transient state associated with a single CPU.
-    PerCpuState* StateForCpu(uint32_t cpu) { return &per_cpu_state_[cpu]; }
-
-    uint32_t next_cpu() const { return next_cpu_; }
-    uint64_t quantum() const { return quantum_; }
+    size_t next_row_id() const { return sorted_row_ids_[next_row_id_index_]; }
 
    private:
-    // Creates a vector of indices into the slices for the given |cpu| sorted
-    // by the order by criteria.
-    std::vector<uint32_t> CreateSortedIndexVectorForCpu(uint32_t cpu,
-                                                        uint64_t min_ts,
-                                                        uint64_t max_ts);
+    // Updates |sorted_row_ids_| with the indices into the slices sorted by the
+    // order by criteria.
+    void SetupSortedRowIds(uint64_t min_ts, uint64_t max_ts);
 
-    // Compares the next slice of the given |cpu| with the next slice of the
-    // |next_cpu_|. Return <0 if |cpu| is ordered before, >0 if ordered after,
-    // and 0 if they are equal.
-    int CompareCpuToNextCpu(uint32_t cpu);
-
-    // Compares the slice at index |f| in |f_slices| for CPU |f_cpu| with the
-    // slice at index |s| in |s_slices| for CPU |s_cpu| on all columns.
+    // Compares the slice at index |f| with the slice at index |s|on all
+    // columns.
     // Returns -1 if the first slice is before the second in the ordering, 1 if
     // the first slice is after the second and 0 if they are equal.
-    int CompareSlices(uint32_t f_cpu, size_t f, uint32_t s_cpu, size_t s);
+    int CompareSlices(size_t f, size_t s);
 
-    // Compares the slice at index |f| in |f_slices| for CPU |f_cpu| with the
-    // slice at index |s| in |s_slices| for CPU |s_cpu| on the criteria in
-    // |order_by|.
+    // Compares the slice at index |f| with the slice at index |s| on the
+    // criteria in |order_by|.
     // Returns -1 if the first slice is before the second in the ordering, 1 if
     // the first slice is after the second and 0 if they are equal.
-    int CompareSlicesOnColumn(uint32_t f_cpu,
-                              size_t f,
-                              uint32_t s_cpu,
+    int CompareSlicesOnColumn(size_t f,
                               size_t s,
                               const QueryConstraints::OrderBy& order_by);
 
-    // One entry for each cpu which is used in filtering.
-    std::array<PerCpuState, base::kMaxCpus> per_cpu_state_;
+    void FindNextRowAndTimestamp();
 
-    // The next CPU which should be returned to the user.
-    uint32_t next_cpu_ = 0;
+    // Vector of row ids sorted by the the given order by constraints.
+    std::vector<uint32_t> sorted_row_ids_;
 
-    // The quantum the output slices should fall within.
-    uint64_t quantum_ = 0;
+    // Bitset for filtering slices.
+    std::vector<bool> row_filter_;
+
+    // An offset into |sorted_row_ids_| indicating the next row to return.
+    uint32_t next_row_id_index_ = 0;
 
     // The sorting criteria for this filter operation.
     std::vector<QueryConstraints::OrderBy> order_by_;
diff --git a/src/trace_processor/sched_slice_table_unittest.cc b/src/trace_processor/sched_slice_table_unittest.cc
index fbc84ff..f5033fc 100644
--- a/src/trace_processor/sched_slice_table_unittest.cc
+++ b/src/trace_processor/sched_slice_table_unittest.cc
@@ -169,124 +169,6 @@
   ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
 }
 
-TEST_F(SchedSliceTableTest, QuanitsiationCpuNativeOrder) {
-  uint32_t cpu_1 = 3;
-  uint32_t cpu_2 = 8;
-  uint64_t timestamp = 100;
-  uint32_t pid_1 = 2;
-  uint32_t prev_state = 32;
-  static const char kCommProc1[] = "process1";
-  static const char kCommProc2[] = "process2";
-  uint32_t pid_2 = 4;
-  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp, pid_1, prev_state,
-                                          kCommProc1, pid_2);
-  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 3, pid_2,
-                                          prev_state, kCommProc2, pid_1);
-  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 4, pid_1,
-                                          prev_state, kCommProc1, pid_2);
-  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 10, pid_2,
-                                          prev_state, kCommProc2, pid_1);
-
-  PrepareValidStatement(
-      "SELECT dur, ts, cpu FROM sched WHERE quantum = 5 ORDER BY cpu");
-
-  // Event at ts + 3 sliced off at quantum boundary (105).
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 2 /* duration */);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 3);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_1);
-
-  // Remainder of event at ts + 3 after quantum boundary (105 onwards).
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 5 /* duration */);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 5);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_1);
-
-  // Full event at ts.
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 4 /* duration */);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_2);
-
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
-}
-
-TEST_F(SchedSliceTableTest, QuantizationSqliteDurationOrder) {
-  uint32_t cpu_1 = 3;
-  uint32_t cpu_2 = 8;
-  uint64_t timestamp = 100;
-  uint32_t pid_1 = 2;
-  uint32_t prev_state = 32;
-  static const char kCommProc1[] = "process1";
-  static const char kCommProc2[] = "process2";
-  uint32_t pid_2 = 4;
-  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp, pid_1, prev_state,
-                                          kCommProc1, pid_2);
-  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 3, pid_2,
-                                          prev_state, kCommProc2, pid_1);
-  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 4, pid_1,
-                                          prev_state, kCommProc1, pid_2);
-  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 10, pid_2,
-                                          prev_state, kCommProc2, pid_1);
-
-  PrepareValidStatement(
-      "SELECT dur, ts, cpu FROM sched WHERE quantum = 5 ORDER BY dur");
-
-  // Event at ts + 3 sliced off at quantum boundary (105).
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 2 /* duration */);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 3);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_2);
-
-  // Full event at ts.
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 4 /* duration */);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_1);
-
-  // Remainder of event at ts + 3 after quantum boundary (105 onwards).
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 5 /* duration */);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 1), timestamp + 5);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 2), cpu_2);
-
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
-}
-
-TEST_F(SchedSliceTableTest, QuantizationGroupAndSum) {
-  uint32_t cpu_1 = 3;
-  uint32_t cpu_2 = 8;
-  uint64_t timestamp = 100;
-  uint32_t pid_1 = 2;
-  uint32_t prev_state = 32;
-  static const char kCommProc1[] = "process1";
-  static const char kCommProc2[] = "process2";
-  uint32_t pid_2 = 4;
-  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp, pid_1, prev_state,
-                                          kCommProc1, pid_2);
-  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 3, pid_2,
-                                          prev_state, kCommProc2, pid_1);
-  context_.sched_tracker->PushSchedSwitch(cpu_1, timestamp + 4, pid_1,
-                                          prev_state, kCommProc1, pid_2);
-  context_.sched_tracker->PushSchedSwitch(cpu_2, timestamp + 10, pid_2,
-                                          prev_state, kCommProc2, pid_1);
-
-  PrepareValidStatement(
-      "SELECT SUM(dur) as sum_dur "
-      "FROM sched "
-      "WHERE quantum = 5 "
-      "GROUP BY quantized_group "
-      "ORDER BY sum_dur");
-
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 5 /* SUM(duration) */);
-
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_ROW);
-  ASSERT_EQ(sqlite3_column_int64(*stmt_, 0), 6 /* SUM(duration) */);
-
-  ASSERT_EQ(sqlite3_step(*stmt_), SQLITE_DONE);
-}
-
 TEST_F(SchedSliceTableTest, UtidTest) {
   uint32_t cpu = 3;
   uint64_t timestamp = 100;
@@ -349,21 +231,6 @@
   ASSERT_THAT(query("ts >= 55 and ts < 52"), IsEmpty());
   ASSERT_THAT(query("ts >= 70 and ts < 71"), ElementsAre(70));
   ASSERT_THAT(query("ts >= 59 and ts < 73"), ElementsAre(59, 60, 70, 71, 72));
-
-  // Test the special ts_lower_bound column.
-  ASSERT_THAT(query("ts_lower_bound = 1 and ts < 10"), IsEmpty());
-  ASSERT_THAT(query("ts_lower_bound = 50 and ts <= 50"), ElementsAre(50));
-  ASSERT_THAT(query("ts_lower_bound = 100"), ElementsAre(80));
-  ASSERT_THAT(query("ts_lower_bound = 100 and cpu = 5"), ElementsAre(60));
-  ASSERT_THAT(query("ts_lower_bound = 100 and cpu = 7"), ElementsAre(80));
-  ASSERT_THAT(query("ts_lower_bound = 1 and ts <= 52"),
-              ElementsAre(50, 51, 52));
-  ASSERT_THAT(query("ts_lower_bound = 70 and ts <= 71"),
-              ElementsAre(60, 70, 71));
-  ASSERT_THAT(query("ts_lower_bound = 60 and ts > 58 and ts <= 71"),
-              ElementsAre(59, 60, 70, 71));
-  ASSERT_THAT(query("ts_lower_bound = 70 and ts > 70 and ts <= 71"),
-              ElementsAre(71));
 }
 
 TEST_F(SchedSliceTableTest, CyclesOrdering) {
diff --git a/src/trace_processor/sched_tracker_unittest.cc b/src/trace_processor/sched_tracker_unittest.cc
index b20a7d9..b17ecfa 100644
--- a/src/trace_processor/sched_tracker_unittest.cc
+++ b/src/trace_processor/sched_tracker_unittest.cc
@@ -49,7 +49,7 @@
   static const char kCommProc2[] = "process2";
   uint32_t pid_2 = 4;
 
-  const auto& timestamps = context.storage->SlicesForCpu(cpu).start_ns();
+  const auto& timestamps = context.storage->slices().start_ns();
   context.sched_tracker->PushSchedSwitch(cpu, timestamp, pid_1, prev_state,
                                          kCommProc1, pid_2);
   ASSERT_EQ(timestamps.size(), 0);
@@ -63,7 +63,7 @@
   ASSERT_EQ(std::string(context.storage->GetString(
                 context.storage->GetThread(1).name_id)),
             kCommProc2);
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).utids().front(), 1);
+  ASSERT_EQ(context.storage->slices().utids().front(), 1);
 }
 
 TEST_F(SchedTrackerTest, InsertThirdSched_SameThread) {
@@ -73,7 +73,7 @@
   static const char kCommProc1[] = "process1";
   static const char kCommProc2[] = "process2";
 
-  const auto& timestamps = context.storage->SlicesForCpu(cpu).start_ns();
+  const auto& timestamps = context.storage->slices().start_ns();
   context.sched_tracker->PushSchedSwitch(cpu, timestamp, /*tid=*/4, prev_state,
                                          kCommProc1,
                                          /*tid=*/2);
@@ -92,12 +92,12 @@
   ASSERT_EQ(timestamps.size(), 3ul);
   ASSERT_EQ(timestamps[0], timestamp);
   ASSERT_EQ(context.storage->GetThread(1).start_ns, timestamp);
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).durations().at(0), 1u);
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).durations().at(1), 11u - 1u);
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).durations().at(2), 31u - 11u);
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).utids().at(0),
-            context.storage->SlicesForCpu(cpu).utids().at(2));
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).cycles().at(0), 0);
+  ASSERT_EQ(context.storage->slices().durations().at(0), 1u);
+  ASSERT_EQ(context.storage->slices().durations().at(1), 11u - 1u);
+  ASSERT_EQ(context.storage->slices().durations().at(2), 31u - 11u);
+  ASSERT_EQ(context.storage->slices().utids().at(0),
+            context.storage->slices().utids().at(2));
+  ASSERT_EQ(context.storage->slices().cycles().at(0), 0);
 }
 
 TEST_F(SchedTrackerTest, TestCyclesCalculation) {
@@ -122,7 +122,7 @@
       cpu, static_cast<uint64_t>(timestamp + 3e8L), /*tid=*/4, prev_state,
       kCommProc2,
       /*tid=*/2);
-  ASSERT_EQ(context.storage->SlicesForCpu(cpu).cycles().at(0), 590000000);
+  ASSERT_EQ(context.storage->slices().cycles().at(0), 590000000);
 }
 
 }  // namespace
diff --git a/src/trace_processor/trace_storage.cc b/src/trace_processor/trace_storage.cc
index ddd46b2..4781c23 100644
--- a/src/trace_processor/trace_storage.cc
+++ b/src/trace_processor/trace_storage.cc
@@ -41,8 +41,8 @@
                                  uint64_t duration_ns,
                                  UniqueTid utid,
                                  uint64_t cycles) {
-  cpu_events_[cpu].AddSlice(start_ns, duration_ns, utid, cycles);
-};
+  slices_.AddSlice(cpu, start_ns, duration_ns, utid, cycles);
+}
 
 StringId TraceStorage::InternString(base::StringView str) {
   auto hash = str.Hash();
diff --git a/src/trace_processor/trace_storage.h b/src/trace_processor/trace_storage.h
index 2fe305d..3515f76 100644
--- a/src/trace_processor/trace_storage.h
+++ b/src/trace_processor/trace_storage.h
@@ -80,12 +80,14 @@
     uint32_t tid = 0;
   };
 
-  class SlicesPerCpu {
+  class Slices {
    public:
-    inline void AddSlice(uint64_t start_ns,
+    inline void AddSlice(uint32_t cpu,
+                         uint64_t start_ns,
                          uint64_t duration_ns,
                          UniqueTid utid,
                          uint64_t cycles) {
+      cpus_.emplace_back(cpu);
       start_ns_.emplace_back(start_ns);
       durations_.emplace_back(duration_ns);
       utids_.emplace_back(utid);
@@ -94,6 +96,8 @@
 
     size_t slice_count() const { return start_ns_.size(); }
 
+    const std::deque<uint32_t>& cpus() const { return cpus_; }
+
     const std::deque<uint64_t>& start_ns() const { return start_ns_; }
 
     const std::deque<uint64_t>& durations() const { return durations_; }
@@ -105,6 +109,7 @@
    private:
     // Each deque below has the same number of entries (the number of slices
     // in the trace for the CPU).
+    std::deque<uint32_t> cpus_;
     std::deque<uint64_t> start_ns_;
     std::deque<uint64_t> durations_;
     std::deque<UniqueTid> utids_;
@@ -189,11 +194,6 @@
   }
 
   // Reading methods.
-  const SlicesPerCpu& SlicesForCpu(uint32_t cpu) const {
-    PERFETTO_DCHECK(cpu < cpu_events_.size());
-    return cpu_events_[cpu];
-  }
-
   const std::string& GetString(StringId id) const {
     PERFETTO_DCHECK(id < string_pool_.size());
     return string_pool_[id];
@@ -209,6 +209,7 @@
     return unique_threads_[utid];
   }
 
+  const Slices& slices() const { return slices_; }
   const NestableSlices& nestable_slices() const { return nestable_slices_; }
   NestableSlices* mutable_nestable_slices() { return &nestable_slices_; }
 
@@ -251,7 +252,7 @@
   Stats stats_;
 
   // One entry for each CPU in the trace.
-  std::array<SlicesPerCpu, base::kMaxCpus> cpu_events_;
+  Slices slices_;
 
   // One map containing frequencies for every CPU in the trace. The map contains
   // timestamps and the cpu frequency value at that time.