trace_processor: add support for strings in db tables

This CL adds support for filtering sorting and querying strings
from the new DB supports as well as allowing the macro tables to
use them by specifying StringId as the column type.

We temporarily create a "common" source set to hold classes which
are needed by the db set to prevent circular dependencies; this
will be cleaned up in future CLs.

Change-Id: I514b0689deacc3ea2c8ec861be71537804731561
Bug: 135177627
diff --git a/include/perfetto/trace_processor/basic_types.h b/include/perfetto/trace_processor/basic_types.h
index 59fb559..7b29427 100644
--- a/include/perfetto/trace_processor/basic_types.h
+++ b/include/perfetto/trace_processor/basic_types.h
@@ -18,11 +18,14 @@
 #define INCLUDE_PERFETTO_TRACE_PROCESSOR_BASIC_TYPES_H_
 
 #include <assert.h>
+#include <math.h>
 #include <stdarg.h>
 #include <stdint.h>
+#include <functional>
 #include <string>
 
 #include "perfetto/base/export.h"
+#include "perfetto/base/logging.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -34,17 +37,68 @@
   // Represents the type of the value.
   enum Type {
     kNull = 0,
-    kString,
     kLong,
     kDouble,
+    kString,
     kBytes,
   };
 
+  SqlValue() = default;
+
+  static SqlValue Long(int64_t v) {
+    SqlValue value;
+    value.long_value = v;
+    value.type = Type::kLong;
+    return value;
+  }
+
+  static SqlValue String(const char* v) {
+    SqlValue value;
+    value.string_value = v;
+    value.type = Type::kString;
+    return value;
+  }
+
   double AsDouble() {
     assert(type == kDouble);
     return double_value;
   }
 
+  int Compare(const SqlValue& value) const {
+    // TODO(lalitm): this is almost the same as what SQLite does with the
+    // exception of comparisions between long and double - we choose (for
+    // performance reasons) to omit comparisions between them.
+    if (type != value.type)
+      return type - value.type;
+
+    switch (type) {
+      case Type::kNull:
+        return 0;
+      case Type::kLong:
+        return signbit(long_value - value.long_value);
+      case Type::kDouble:
+        return signbit(double_value - value.double_value);
+      case Type::kString:
+        return strcmp(string_value, value.string_value);
+      case Type::kBytes: {
+        size_t bytes = std::min(bytes_count, value.bytes_count);
+        int ret = memcmp(bytes_value, value.bytes_value, bytes);
+        if (ret != 0)
+          return ret;
+        return signbit(bytes_count - value.bytes_count);
+      }
+    }
+    PERFETTO_FATAL("For GCC");
+  }
+  bool operator==(const SqlValue& value) const { return Compare(value) == 0; }
+  bool operator<(const SqlValue& value) const { return Compare(value) < 0; }
+  bool operator!=(const SqlValue& value) const { return !(*this == value); }
+  bool operator>=(const SqlValue& value) const { return !(*this < value); }
+  bool operator<=(const SqlValue& value) const { return !(value < *this); }
+  bool operator>(const SqlValue& value) const { return value < *this; }
+
+  bool is_null() const { return type == Type::kNull; }
+
   // Up to 1 of these fields can be accessed depending on |type|.
   union {
     // This string will be owned by the iterator that returned it and is valid
diff --git a/src/trace_processor/BUILD.gn b/src/trace_processor/BUILD.gn
index cf89958..154b0b2 100644
--- a/src/trace_processor/BUILD.gn
+++ b/src/trace_processor/BUILD.gn
@@ -95,7 +95,6 @@
     "metadata.h",
     "metadata_table.cc",
     "metadata_table.h",
-    "null_term_string_view.h",
     "process_table.cc",
     "process_table.h",
     "process_tracker.cc",
@@ -136,8 +135,6 @@
     "storage_schema.h",
     "storage_table.cc",
     "storage_table.h",
-    "string_pool.cc",
-    "string_pool.h",
     "syscall_tracker.cc",
     "syscall_tracker.h",
     "syscalls_aarch32.h",
@@ -172,6 +169,7 @@
   ]
 
   deps = [
+    ":common",
     "../../gn:default_deps",
     "../../gn:sqlite",
     "../../gn:zlib",
@@ -217,6 +215,21 @@
   }
 }
 
+# TODO(lalitm): we need to find a better home for the classes here.
+source_set("common") {
+  sources = [
+    "null_term_string_view.h",
+    "string_pool.cc",
+    "string_pool.h",
+  ]
+
+  deps = [
+    "../../gn:default_deps",
+    "../base",
+    "../protozero",
+  ]
+}
+
 executable("trace_processor_shell") {
   testonly = true  # We need this for proto full.
   deps = [
@@ -266,6 +279,7 @@
     "trace_sorter_unittest.cc",
   ]
   deps = [
+    ":common",
     ":lib",
     "../../gn:default_deps",
     "../../gn:gtest_and_gmock",
diff --git a/src/trace_processor/db/BUILD.gn b/src/trace_processor/db/BUILD.gn
index c835ea2..b6e76cd 100644
--- a/src/trace_processor/db/BUILD.gn
+++ b/src/trace_processor/db/BUILD.gn
@@ -27,9 +27,11 @@
     "table.h",
   ]
   deps = [
+    "../:common",
     "../../../gn:default_deps",
     "../../../include/perfetto/base",
     "../../../include/perfetto/ext/base",
+    "../../../include/perfetto/trace_processor",
   ]
 }
 
diff --git a/src/trace_processor/db/column.cc b/src/trace_processor/db/column.cc
index 6ab4c00..20350d4 100644
--- a/src/trace_processor/db/column.cc
+++ b/src/trace_processor/db/column.cc
@@ -21,26 +21,29 @@
 namespace perfetto {
 namespace trace_processor {
 
-void Column::FilterInto(FilterOp op, int64_t value, RowMap* iv) const {
+Column::Column(const char* name,
+               ColumnType type,
+               Table* table,
+               uint32_t col_idx,
+               uint32_t row_map_idx)
+    : name_(name),
+      table_(table),
+      string_pool_(table->string_pool_),
+      col_idx_(col_idx),
+      row_map_idx_(row_map_idx),
+      type_(type) {}
+
+void Column::FilterInto(FilterOp op, SqlValue value, RowMap* iv) const {
   // Assume op == kEq.
   switch (op) {
     case FilterOp::kLt:
-      iv->RemoveIf([this, value](uint32_t row) {
-        auto opt_value = Get(row);
-        return !opt_value || opt_value.value() >= value;
-      });
+      iv->RemoveIf([this, value](uint32_t row) { return Get(row) >= value; });
       break;
     case FilterOp::kEq:
-      iv->RemoveIf([this, value](uint32_t row) {
-        auto opt_value = Get(row);
-        return !opt_value || opt_value.value() != value;
-      });
+      iv->RemoveIf([this, value](uint32_t row) { return Get(row) != value; });
       break;
     case FilterOp::kGt:
-      iv->RemoveIf([this, value](uint32_t row) {
-        auto opt_value = Get(row);
-        return !opt_value || opt_value.value() <= value;
-      });
+      iv->RemoveIf([this, value](uint32_t row) { return Get(row) <= value; });
       break;
   }
 }
diff --git a/src/trace_processor/db/column.h b/src/trace_processor/db/column.h
index 834ba44..8459add 100644
--- a/src/trace_processor/db/column.h
+++ b/src/trace_processor/db/column.h
@@ -21,8 +21,10 @@
 
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/optional.h"
+#include "perfetto/trace_processor/basic_types.h"
 #include "src/trace_processor/db/row_map.h"
 #include "src/trace_processor/db/sparse_vector.h"
+#include "src/trace_processor/string_pool.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -38,7 +40,7 @@
 struct Constraint {
   uint32_t col_idx;
   FilterOp op;
-  int64_t value;
+  SqlValue value;
 };
 
 // Represents an order by operation on a column.
@@ -68,6 +70,19 @@
     data_.int64_sv = storage;
   }
 
+  // Create an nullable string Column.
+  // Note: |name| must be a long lived string.
+  // TODO(lalitm): change this to a std::deque instead as StringIds already
+  // have the concept of nullability in them.
+  Column(const char* name,
+         const SparseVector<StringPool::Id>* storage,
+         Table* table,
+         uint32_t col_idx,
+         uint32_t row_map_idx)
+      : Column(name, ColumnType::kString, table, col_idx, row_map_idx) {
+    data_.string_sv = storage;
+  }
+
   // Create a Column has the same name and is backed by the same data as
   // |column| but is associated to a different table.
   Column(const Column& column,
@@ -87,44 +102,64 @@
   }
 
   // Gets the value of the Column at the given |row|
-  base::Optional<int64_t> Get(uint32_t row) const {
-    auto opt_idx = row_map().Get(row);
+  SqlValue Get(uint32_t row) const {
+    auto idx = row_map().Get(row);
     switch (type_) {
-      case ColumnType::kInt64:
-        return data_.int64_sv->Get(opt_idx);
+      case ColumnType::kInt64: {
+        auto opt_value = data_.int64_sv->Get(idx);
+        return opt_value ? SqlValue::Long(*opt_value) : SqlValue();
+      }
+      case ColumnType::kString: {
+        auto opt_id = data_.string_sv->Get(idx);
+        // We DCHECK here because although we are using SparseVector, the null
+        // info is handled by the StringPool rather than by the SparseVector.
+        // The value returned by the SparseVector should always be non-null.
+        // TODO(lalitm): remove this check when we support std::deque<StringId>.
+        PERFETTO_DCHECK(opt_id.has_value());
+        auto str = string_pool_->Get(*opt_id).c_str();
+        return str == nullptr ? SqlValue() : SqlValue::String(str);
+      }
       case ColumnType::kId:
-        return opt_idx;
+        return SqlValue::Long(idx);
     }
     PERFETTO_FATAL("For GCC");
   }
 
   // Returns the row containing the given value in the Column.
-  base::Optional<uint32_t> IndexOf(int64_t value) const {
+  base::Optional<uint32_t> IndexOf(SqlValue value) const {
     switch (type_) {
+      // TODO(lalitm): investigate whether we could make this more efficient
+      // by first checking the type of the column and comparing explicitly
+      // based on that type.
       case ColumnType::kInt64:
+      case ColumnType::kString: {
         for (uint32_t i = 0; i < row_map().size(); i++) {
           if (Get(i) == value)
             return i;
         }
         return base::nullopt;
-      case ColumnType::kId:
-        return row_map().IndexOf(static_cast<uint32_t>(value));
+      }
+      case ColumnType::kId: {
+        if (value.type != SqlValue::Type::kLong)
+          return base::nullopt;
+        return row_map().IndexOf(static_cast<uint32_t>(value.long_value));
+      }
     }
     PERFETTO_FATAL("For GCC");
   }
 
   // Updates the given RowMap by only keeping rows where this column meets the
   // given filter constraint.
-  void FilterInto(FilterOp, int64_t value, RowMap*) const;
+  void FilterInto(FilterOp, SqlValue value, RowMap*) const;
 
   // Returns a Constraint for each type of filter operation for this Column.
-  Constraint eq(int64_t value) const {
+  Constraint eq(SqlValue value) const {
     return Constraint{col_idx_, FilterOp::kEq, value};
   }
-  Constraint gt(int64_t value) const {
+  Constraint gt(SqlValue value) const {
     return Constraint{col_idx_, FilterOp::kGt, value};
   }
-  Constraint lt(int64_t value) const {
+  Constraint lt(SqlValue value) const {
     return Constraint{col_idx_, FilterOp::kLt, value};
   }
 
@@ -144,6 +179,7 @@
   enum ColumnType {
     // Standard primitive types.
     kInt64,
+    kString,
 
     // Types generated on the fly.
     kId,
@@ -153,18 +189,14 @@
          ColumnType type,
          Table* table,
          uint32_t col_idx,
-         uint32_t row_map_idx)
-      : name_(name),
-        table_(table),
-        col_idx_(col_idx),
-        row_map_idx_(row_map_idx),
-        type_(type) {}
+         uint32_t row_map_idx);
 
   Column(const Column&) = delete;
   Column& operator=(const Column&) = delete;
 
   const char* name_ = nullptr;
-  Table* table_ = nullptr;
+  const Table* table_ = nullptr;
+  const StringPool* string_pool_ = nullptr;
   uint32_t col_idx_ = 0;
   uint32_t row_map_idx_ = 0;
 
@@ -172,6 +204,9 @@
   union {
     // Valid when |type_| == ColumnType::kInt64.
     const SparseVector<int64_t>* int64_sv = nullptr;
+
+    // Valid when |type_| == ColumnType::kString.
+    const SparseVector<StringPool::Id>* string_sv;
   } data_;
 };
 
diff --git a/src/trace_processor/db/table.cc b/src/trace_processor/db/table.cc
index ac975de..ee7d41d 100644
--- a/src/trace_processor/db/table.cc
+++ b/src/trace_processor/db/table.cc
@@ -19,7 +19,7 @@
 namespace perfetto {
 namespace trace_processor {
 
-Table::Table(const Table* parent) {
+Table::Table(const StringPool* pool, const Table* parent) : string_pool_(pool) {
   if (!parent)
     return;
 
@@ -106,7 +106,7 @@
 Table Table::LookupJoin(JoinKey left, const Table& other, JoinKey right) {
   // The join table will have the same size and RowMaps as the left (this)
   // table because the left column is indexing the right table.
-  Table table(nullptr);
+  Table table(string_pool_, nullptr);
   table.size_ = size_;
   for (const RowMap& rm : row_maps_) {
     table.row_maps_.emplace_back(rm.Copy());
@@ -129,9 +129,9 @@
   // in the right table.
   std::vector<uint32_t> indices(size_);
   for (uint32_t i = 0; i < size_; ++i) {
-    base::Optional<int64_t> val = left_col.Get(i);
-    PERFETTO_CHECK(val.has_value());
-    indices[i] = right_col.IndexOf(static_cast<uint32_t>(val.value())).value();
+    SqlValue val = left_col.Get(i);
+    PERFETTO_CHECK(val.type != SqlValue::Type::kNull);
+    indices[i] = right_col.IndexOf(val).value();
   }
 
   // Apply the computed RowMap to each of the right RowMaps, adding it to the
diff --git a/src/trace_processor/db/table.h b/src/trace_processor/db/table.h
index 127a44b..aa2addb 100644
--- a/src/trace_processor/db/table.h
+++ b/src/trace_processor/db/table.h
@@ -26,6 +26,7 @@
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/optional.h"
 #include "src/trace_processor/db/column.h"
+#include "src/trace_processor/string_pool.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -41,7 +42,7 @@
     bool Next() { return ++row_ < table_->size(); }
 
     // Returns the value at the current row for column |col_idx|.
-    base::Optional<int64_t> Get(uint32_t col_idx) {
+    SqlValue Get(uint32_t col_idx) {
       return table_->columns_[col_idx].Get(row_);
     }
 
@@ -92,7 +93,7 @@
   const std::vector<RowMap>& row_maps() const { return row_maps_; }
 
  protected:
-  explicit Table(const Table* parent);
+  Table(const StringPool* pool, const Table* parent);
 
   std::vector<RowMap> row_maps_;
   std::vector<Column> columns_;
@@ -105,6 +106,8 @@
   // the Table pointer in each column to the Table being copied into.
   Table(const Table& other) { *this = other; }
   Table& operator=(const Table& other);
+
+  const StringPool* string_pool_ = nullptr;
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/tables/macros_internal.h b/src/trace_processor/tables/macros_internal.h
index 08a6b34..1c0724c 100644
--- a/src/trace_processor/tables/macros_internal.h
+++ b/src/trace_processor/tables/macros_internal.h
@@ -38,7 +38,8 @@
 // code size.
 class MacroTable : public Table {
  public:
-  MacroTable(Table* parent) : Table(parent), parent_(parent) {
+  MacroTable(const StringPool* pool, Table* parent)
+      : Table(pool, parent), parent_(parent) {
     if (!parent) {
       columns_.emplace_back(
           Column::IdColumn(this, static_cast<uint32_t>(columns_.size()),
@@ -131,8 +132,8 @@
 #define PERFETTO_TP_TABLE_INTERNAL(class_name, parent_class_name, DEF)        \
   class class_name : public macros_internal::MacroTable {                     \
    public:                                                                    \
-    class_name(parent_class_name* parent)                                     \
-        : macros_internal::MacroTable(parent), parent_(parent) {              \
+    class_name(const StringPool* pool, parent_class_name* parent)             \
+        : macros_internal::MacroTable(pool, parent), parent_(parent) {        \
       /* Expands to                                                           \
        * columns_.emplace_back("col1", col1_, this, columns_.size(),          \
        *                       row_maps_.size() - 1);                         \
diff --git a/src/trace_processor/tables/macros_unittest.cc b/src/trace_processor/tables/macros_unittest.cc
index 76c5b53..3735241 100644
--- a/src/trace_processor/tables/macros_unittest.cc
+++ b/src/trace_processor/tables/macros_unittest.cc
@@ -40,50 +40,57 @@
   NAME(TestCpuSliceTable)                                     \
   PARENT(PERFETTO_TP_TEST_SLICE_TABLE_DEF, C)                 \
   C(int64_t, cpu)                                             \
-  C(int64_t, priority)
+  C(int64_t, priority)                                        \
+  C(StringPool::Id, end_state)
 PERFETTO_TP_TABLE(PERFETTO_TP_TEST_CPU_SLICE_TABLE_DEF);
 
 TEST(TableMacrosUnittest, InsertParent) {
-  TestEventTable event(nullptr);
-  TestSliceTable slice(&event);
+  StringPool pool;
+  TestEventTable event(&pool, nullptr);
+  TestSliceTable slice(&pool, &event);
+
   uint32_t id = event.Insert(100, 0);
   ASSERT_EQ(id, 0u);
-  ASSERT_EQ(event.ts().Get(0), 100);
-  ASSERT_EQ(event.arg_set_id().Get(0), 0);
+  ASSERT_EQ(event.ts().Get(0), SqlValue::Long(100));
+  ASSERT_EQ(event.arg_set_id().Get(0), SqlValue::Long(0));
 
   id = slice.Insert(200, 123, 10, 0);
   ASSERT_EQ(id, 1u);
-  ASSERT_EQ(event.ts().Get(1), 200);
-  ASSERT_EQ(event.arg_set_id().Get(1), 123);
-  ASSERT_EQ(slice.ts().Get(0), 200);
-  ASSERT_EQ(slice.arg_set_id().Get(0), 123);
-  ASSERT_EQ(slice.dur().Get(0), 10);
-  ASSERT_EQ(slice.depth().Get(0), 0);
+  ASSERT_EQ(event.ts().Get(1), SqlValue::Long(200));
+  ASSERT_EQ(event.arg_set_id().Get(1), SqlValue::Long(123));
+  ASSERT_EQ(slice.ts().Get(0), SqlValue::Long(200));
+  ASSERT_EQ(slice.arg_set_id().Get(0), SqlValue::Long(123));
+  ASSERT_EQ(slice.dur().Get(0), SqlValue::Long(10));
+  ASSERT_EQ(slice.depth().Get(0), SqlValue::Long(0));
 }
 
 TEST(TableMacrosUnittest, InsertChild) {
-  TestEventTable event(nullptr);
-  TestSliceTable slice(&event);
-  TestCpuSliceTable cpu_slice(&slice);
+  StringPool pool;
+  TestEventTable event(&pool, nullptr);
+  TestSliceTable slice(&pool, &event);
+  TestCpuSliceTable cpu_slice(&pool, &slice);
+
   event.Insert(100, 0);
   slice.Insert(200, 123, 10, 0);
 
-  uint32_t id = cpu_slice.Insert(205, 456, 5, 1, 4, 1024);
+  auto reason = pool.InternString("R");
+  uint32_t id = cpu_slice.Insert(205, 456, 5, 1, 4, 1024, reason);
   ASSERT_EQ(id, 2u);
-  ASSERT_EQ(event.ts().Get(2), 205);
-  ASSERT_EQ(event.arg_set_id().Get(2), 456);
+  ASSERT_EQ(event.ts().Get(2), SqlValue::Long(205));
+  ASSERT_EQ(event.arg_set_id().Get(2), SqlValue::Long(456));
 
-  ASSERT_EQ(slice.ts().Get(1), 205);
-  ASSERT_EQ(slice.arg_set_id().Get(1), 456);
-  ASSERT_EQ(slice.dur().Get(1), 5);
-  ASSERT_EQ(slice.depth().Get(1), 1);
+  ASSERT_EQ(slice.ts().Get(1), SqlValue::Long(205));
+  ASSERT_EQ(slice.arg_set_id().Get(1), SqlValue::Long(456));
+  ASSERT_EQ(slice.dur().Get(1), SqlValue::Long(5));
+  ASSERT_EQ(slice.depth().Get(1), SqlValue::Long(1));
 
-  ASSERT_EQ(cpu_slice.ts().Get(0), 205);
-  ASSERT_EQ(cpu_slice.arg_set_id().Get(0), 456);
-  ASSERT_EQ(cpu_slice.dur().Get(0), 5);
-  ASSERT_EQ(cpu_slice.depth().Get(0), 1);
-  ASSERT_EQ(cpu_slice.cpu().Get(0), 4);
-  ASSERT_EQ(cpu_slice.priority().Get(0), 1024);
+  ASSERT_EQ(cpu_slice.ts().Get(0), SqlValue::Long(205));
+  ASSERT_EQ(cpu_slice.arg_set_id().Get(0), SqlValue::Long(456));
+  ASSERT_EQ(cpu_slice.dur().Get(0), SqlValue::Long(5));
+  ASSERT_EQ(cpu_slice.depth().Get(0), SqlValue::Long(1));
+  ASSERT_EQ(cpu_slice.cpu().Get(0), SqlValue::Long(4));
+  ASSERT_EQ(cpu_slice.priority().Get(0), SqlValue::Long(1024));
+  ASSERT_EQ(cpu_slice.end_state().Get(0), SqlValue::String("R"));
 }
 
 }  // namespace