Add /proc/kallsyms parsing

Adds an efficient parser for /proc/kallsyms.
It takes ~1MB to keep in memory kallsyms of a
typical android kernel. See comments in the
header and doc for more details.

Design doc: go/perfetto-kallsyms
Bug: 147809529
Change-Id: Iddc631dcdc436e3812a3a70e61fd2d21c1732c2d
diff --git a/Android.bp b/Android.bp
index dc3fb3c..6a217d4 100644
--- a/Android.bp
+++ b/Android.bp
@@ -6198,6 +6198,22 @@
   ],
 }
 
+// GN: //src/traced/probes/ftrace/kallsyms:kallsyms
+filegroup {
+  name: "perfetto_src_traced_probes_ftrace_kallsyms_kallsyms",
+  srcs: [
+    "src/traced/probes/ftrace/kallsyms/kernel_symbol_map.cc",
+  ],
+}
+
+// GN: //src/traced/probes/ftrace/kallsyms:unittests
+filegroup {
+  name: "perfetto_src_traced_probes_ftrace_kallsyms_unittests",
+  srcs: [
+    "src/traced/probes/ftrace/kallsyms/kernel_symbol_map_unittest.cc",
+  ],
+}
+
 // GN: //src/traced/probes/ftrace:test_messages_cpp
 genrule {
   name: "perfetto_src_traced_probes_ftrace_test_messages_cpp_gen",
@@ -6850,6 +6866,8 @@
     ":perfetto_src_traced_probes_filesystem_unittests",
     ":perfetto_src_traced_probes_ftrace_format_parser",
     ":perfetto_src_traced_probes_ftrace_ftrace",
+    ":perfetto_src_traced_probes_ftrace_kallsyms_kallsyms",
+    ":perfetto_src_traced_probes_ftrace_kallsyms_unittests",
     ":perfetto_src_traced_probes_ftrace_test_messages_cpp_gen",
     ":perfetto_src_traced_probes_ftrace_test_messages_lite_gen",
     ":perfetto_src_traced_probes_ftrace_test_messages_zero_gen",
diff --git a/gn/perfetto_benchmarks.gni b/gn/perfetto_benchmarks.gni
index 122c6ff..0571557 100644
--- a/gn/perfetto_benchmarks.gni
+++ b/gn/perfetto_benchmarks.gni
@@ -21,6 +21,7 @@
   "src/trace_processor/containers:benchmarks",
   "src/trace_processor/tables:benchmarks",
   "src/tracing:benchmarks",
+  "src/traced/probes/ftrace/kallsyms:benchmarks",
   "test:benchmark_main",
   "test:end_to_end_benchmarks",
 ]
diff --git a/gn/perfetto_unittests.gni b/gn/perfetto_unittests.gni
index 11ba49f..3574664 100644
--- a/gn/perfetto_unittests.gni
+++ b/gn/perfetto_unittests.gni
@@ -45,6 +45,7 @@
     "src/traced/probes:unittests",
     "src/traced/probes/filesystem:unittests",
     "src/traced/probes/ftrace:unittests",
+    "src/traced/probes/ftrace/kallsyms:unittests",
     "src/traced/service:unittests",
   ]
 }
diff --git a/include/perfetto/ext/base/metatrace_events.h b/include/perfetto/ext/base/metatrace_events.h
index 4f62244..e3cfb77 100644
--- a/include/perfetto/ext/base/metatrace_events.h
+++ b/include/perfetto/ext/base/metatrace_events.h
@@ -59,7 +59,8 @@
   F(TRACE_WRITER_COMMIT_STARTUP_WRITER_BATCH), \
   F(FTRACE_READ_TICK), \
   F(FTRACE_CPU_READ_CYCLE), \
-  F(FTRACE_CPU_READ_BATCH)
+  F(FTRACE_CPU_READ_BATCH), \
+  F(KALLSYMS_PARSE)
 
 // Append only, see above.
 //
diff --git a/src/traced/probes/ftrace/kallsyms/BUILD.gn b/src/traced/probes/ftrace/kallsyms/BUILD.gn
new file mode 100644
index 0000000..c4e6ca7
--- /dev/null
+++ b/src/traced/probes/ftrace/kallsyms/BUILD.gn
@@ -0,0 +1,56 @@
+# Copyright (C) 2019 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import("../../../../../gn/test.gni")
+
+source_set("kallsyms") {
+  deps = [
+    "../../../../../gn:default_deps",
+    "../../../../../include/perfetto/protozero",
+    "../../../../base",
+  ]
+  sources = [
+    "kernel_symbol_map.cc",
+    "kernel_symbol_map.h",
+  ]
+}
+
+perfetto_unittest_source_set("unittests") {
+  testonly = true
+  deps = [
+    ":kallsyms",
+    "../../../../../gn:default_deps",
+    "../../../../../gn:gtest_and_gmock",
+    "../../../../base",
+  ]
+  sources = [
+    "kernel_symbol_map_unittest.cc",
+  ]
+}
+
+if (enable_perfetto_benchmarks) {
+  source_set("benchmarks") {
+    testonly = true
+    deps = [
+      ":kallsyms",
+      "../../../../../gn:benchmark",
+      "../../../../../gn:default_deps",
+      "../../../../base",
+      "../../../../base:test_support",
+    ]
+    sources = [
+      "kernel_symbol_map_benchmark.cc",
+    ]
+  }
+}
diff --git a/src/traced/probes/ftrace/kallsyms/kernel_symbol_map.cc b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map.cc
new file mode 100644
index 0000000..f2d55f0
--- /dev/null
+++ b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map.cc
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h"
+
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/metatrace.h"
+#include "perfetto/ext/base/paged_memory.h"
+#include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/string_view.h"
+#include "perfetto/ext/base/utils.h"
+#include "perfetto/protozero/proto_utils.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <utility>
+
+namespace perfetto {
+
+// On a Pixel 3 this gives an avg. lookup time of 600 ns and a memory usage
+// of 1.1 MB for 65k symbols. See go/kallsyms-parser-bench.
+size_t KernelSymbolMap::kSymIndexSampling = 16;
+size_t KernelSymbolMap::kTokenIndexSampling = 4;
+
+namespace {
+
+using TokenId = KernelSymbolMap::TokenTable::TokenId;
+constexpr size_t kSymNameMaxLen = 128;
+
+// Reads a kallsyms file in blocks of 4 pages each and decode its lines using
+// a simple FSM. Calls the passed lambda for each valid symbol.
+// It skips undefined symbols and other useless stuff.
+template <typename Lambda /* void(uint64_t, const char*) */>
+void ForEachSym(const std::string& kallsyms_path, Lambda fn) {
+  base::ScopedFile fd = base::OpenFile(kallsyms_path.c_str(), O_RDONLY);
+  if (!fd) {
+    PERFETTO_PLOG("Cannot open %s", kallsyms_path.c_str());
+    return;
+  }
+
+  // /proc/kallsyms looks as follows:
+  // 0000000000026a80 A bpf_trace_sds
+  //
+  // ffffffffc03a6000 T cpufreq_gov_powersave_init<TAB> [cpufreq_powersave]
+  // ffffffffc035d000 T cpufreq_gov_userspace_init<TAB> [cpufreq_userspace]
+  //
+  // We parse it with a state machine that has four states, one for each column.
+  // We don't care about the part in the square brackets and ignore everything
+  // after the symbol name.
+
+  static constexpr size_t kBufSize = 16 * 1024;
+  base::PagedMemory buffer = base::PagedMemory::Allocate(kBufSize);
+  enum { kSymAddr, kSymType, kSymName, kEatRestOfLine } state = kSymAddr;
+  uint64_t sym_addr = 0;
+  char sym_type = '\0';
+  char sym_name[kSymNameMaxLen + 1];
+  size_t sym_name_len = 0;
+  for (;;) {
+    char* buf = static_cast<char*>(buffer.Get());
+    auto rsize = PERFETTO_EINTR(read(*fd, buf, kBufSize));
+    if (rsize < 0) {
+      PERFETTO_PLOG("read(%s) failed", kallsyms_path.c_str());
+      return;
+    }
+    if (rsize == 0)
+      return;  // EOF
+    for (size_t i = 0; i < static_cast<size_t>(rsize); i++) {
+      char c = buf[i];
+      const bool is_space = c == ' ' || c == '\t';
+      switch (state) {
+        case kSymAddr:
+          if (c >= '0' && c <= '9') {
+            sym_addr = (sym_addr << 4) | static_cast<uint8_t>(c - '0');
+          } else if (c >= 'a' && c <= 'f') {
+            sym_addr = (sym_addr << 4) | static_cast<uint8_t>(c - 'a' + 10);
+          } else if (is_space) {
+            state = kSymType;
+          } else if (c == '\0') {
+            return;
+          } else {
+            PERFETTO_ELOG("kallsyms parser error: chr 0x%x @ off=%zu", c, i);
+            return;
+          }
+          break;
+
+        case kSymType:
+          if (is_space)
+            break;  // Eat leading spaces.
+          sym_type = c;
+          state = kSymName;
+          sym_name_len = 0;
+          break;
+
+        case kSymName:
+          if (is_space && sym_name_len == 0)
+            break;  // Eat leading spaces.
+          if (c && c != '\n' && !is_space && sym_name_len < kSymNameMaxLen) {
+            sym_name[sym_name_len++] = c;
+            break;
+          }
+          sym_name[sym_name_len] = '\0';
+          fn(sym_addr, sym_type, sym_name);
+          sym_addr = 0;
+          sym_type = '\0';
+          state = c == '\n' ? kSymAddr : kEatRestOfLine;
+          break;
+
+        case kEatRestOfLine:
+          if (c == '\n')
+            state = kSymAddr;
+          break;
+      }  // switch(state)
+    }    // for (char in buf)
+  }      // for (read chunk)
+}
+
+// Splits a symbol name into tokens using '_' as a separator, calling the passed
+// lambda for each token. It splits tokens in a way that allows the original
+// string to be rebuilt as-is by re-joining using a '_' between each token.
+// For instance:
+// _fo_a_b      ->  ["", fo, a, b]
+// __fo_a_b     ->  [_, fo, a, b]
+// __fo_a_b_    ->  [_, fo, a, b, ""]
+// __fo_a_b____ ->  [_, fo, a, b, ___]
+template <typename Lambda /* void(base::StringView) */>
+void Tokenize(const char* name, Lambda fn) {
+  const char* tok_start = name;
+  bool is_start_of_token = true;
+  bool tok_is_sep = false;
+  for (const char* ptr = name;; ptr++) {
+    const char c = *ptr;
+    if (is_start_of_token) {
+      tok_is_sep = *tok_start == '_';  // Deals with tokens made of '_'s.
+      is_start_of_token = false;
+    }
+    // Scan until either the end of string or the next character (which is a '_'
+    // in nominal cases, or anything != '_' for tokens made by 1+ '_').
+    if (c == '\0' || (!tok_is_sep && c == '_') || (tok_is_sep && c != '_')) {
+      size_t tok_len = static_cast<size_t>(ptr - tok_start);
+      if (tok_is_sep && c != '\0')
+        --tok_len;
+      fn(base::StringView(tok_start, tok_len));
+      if (c == '\0')
+        return;
+      tok_start = tok_is_sep ? ptr : ptr + 1;
+      is_start_of_token = true;
+    }
+  }
+}
+
+}  // namespace
+
+KernelSymbolMap::TokenTable::TokenTable() {
+  // Insert a null token as id 0. We can't just add "" because the empty string
+  // is special-cased and doesn't insert an actual token. So we push a string of
+  // size one that contains only the null character instead.
+  char null_tok = 0;
+  Add(std::string(&null_tok, 1));
+}
+
+KernelSymbolMap::TokenTable::~TokenTable() = default;
+
+// Adds a new token to the db. Does not dedupe identical token (with the
+// exception of the empty string). The caller has to deal with that.
+// Supports only ASCII characters in the range [1, 127].
+// The last character of the token will have the MSB set.
+TokenId KernelSymbolMap::TokenTable::Add(const std::string& token) {
+  const size_t token_size = token.size();
+  if (token_size == 0)
+    return 0;
+  TokenId id = num_tokens_++;
+
+  const size_t buf_size_before_insertion = buf_.size();
+  if (id % kTokenIndexSampling == 0)
+    index_.emplace_back(buf_size_before_insertion);
+
+  const size_t prev_size = buf_.size();
+  buf_.resize(prev_size + token_size);
+  char* tok_wptr = &buf_[prev_size];
+  for (size_t i = 0; i < token_size - 1; i++) {
+    PERFETTO_DCHECK((token.at(i) & 0x80) == 0);  // |token| must be ASCII only.
+    *(tok_wptr++) = token.at(i) & 0x7f;
+  }
+  *(tok_wptr++) = token.at(token_size - 1) | 0x80;
+  PERFETTO_DCHECK(tok_wptr == &buf_[buf_.size()]);
+  return id;
+}
+
+// NOTE: the caller need to mask the returned chars with 0x7f. The last char of
+// the StringView will have its MSB set (it's used as a EOF char internally).
+base::StringView KernelSymbolMap::TokenTable::Lookup(TokenId id) {
+  if (id == 0)
+    return base::StringView();
+  if (id > num_tokens_)
+    return base::StringView("<error>");
+  // We don't know precisely where the id-th token starts in the buffer. We
+  // store only one position every kTokenIndexSampling. From there, the token
+  // can be found with a linear scan of at most kTokenIndexSampling steps.
+  size_t index_off = id / kTokenIndexSampling;
+  PERFETTO_DCHECK(index_off < index_.size());
+  TokenId cur_id = static_cast<TokenId>(index_off * kTokenIndexSampling);
+  uint32_t begin = index_[index_off];
+  PERFETTO_DCHECK(begin == 0 || buf_[begin - 1] & 0x80);
+  const size_t buf_size = buf_.size();
+  for (uint32_t off = begin; off < buf_size; ++off) {
+    // Advance |off| until the end of the token (which has the MSB set).
+    if ((buf_[off] & 0x80) == 0)
+      continue;
+    if (cur_id == id)
+      return base::StringView(&buf_[begin], off - begin + 1);
+    ++cur_id;
+    begin = off + 1;
+  }
+  return base::StringView();
+}
+
+size_t KernelSymbolMap::Parse(const std::string& kallsyms_path) {
+  PERFETTO_METATRACE_SCOPED(TAG_FTRACE, KALLSYMS_PARSE);
+  using SymAddr = uint64_t;
+
+  struct TokenInfo {
+    uint32_t count = 0;
+    TokenId id = 0;
+  };
+
+  // Note if changing the container: the code below relies on stable iterators.
+  using TokenMap = std::map<std::string, TokenInfo>;
+  using TokenMapPtr = TokenMap::value_type*;
+  TokenMap tokens;
+
+  // Keep the (ordered) list of tokens for each symbol.
+  std::multimap<SymAddr, TokenMapPtr> symbols;
+
+  ForEachSym(kallsyms_path, [&](SymAddr addr, char type, const char* name) {
+    if (addr == 0 || (type != 't' && type != 'T') || name[0] == '$') {
+      return;
+    }
+
+    // Split each symbol name in tokens, using '_' as a separator (so that
+    // "foo_bar" -> ["foo", "bar"]). For each token hash:
+    // 1. Keep track of the frequency of each token.
+    // 2. Keep track of the list of token hashes for each symbol.
+    Tokenize(name, [&tokens, &symbols, addr](base::StringView token) {
+      // Strip the .cfi part if present.
+      if (token.substr(token.size() - 4) == ".cfi")
+        token = token.substr(0, token.size() - 4);
+      auto it_and_ins = tokens.emplace(token.ToStdString(), TokenInfo{});
+      it_and_ins.first->second.count++;
+      symbols.emplace(addr, &*it_and_ins.first);
+    });
+  });
+
+  // At this point we have broken down each symbol into a set of token hashes.
+  // Now generate the token ids, putting high freq tokens first, so they use
+  // only one byte to varint encode.
+
+  // This block limits the lifetime of |tokens_by_freq|.
+  {
+    std::vector<TokenMapPtr> tokens_by_freq;
+    tokens_by_freq.resize(tokens.size());
+    size_t tok_idx = 0;
+    for (auto& kv : tokens)
+      tokens_by_freq[tok_idx++] = &kv;
+
+    auto comparer = [](TokenMapPtr a, TokenMapPtr b) {
+      PERFETTO_DCHECK(a && b);
+      return b->second.count < a->second.count;
+    };
+    std::sort(tokens_by_freq.begin(), tokens_by_freq.end(), comparer);
+    for (TokenMapPtr tinfo : tokens_by_freq) {
+      tinfo->second.id = tokens_.Add(tinfo->first);
+    }
+  }
+  tokens_.shrink_to_fit();
+
+  buf_.resize(2 * 1024 * 1024);  // Based on real-word observations.
+  base_addr_ = symbols.empty() ? 0 : symbols.begin()->first;
+  SymAddr prev_sym_addr = base_addr_;
+  uint8_t* wptr = buf_.data();
+
+  for (auto it = symbols.begin(); it != symbols.end();) {
+    const SymAddr sym_addr = it->first;
+
+    // Find the iterator to the first token of the next symbol (or the end).
+    auto sym_start = it;
+    auto sym_end = it;
+    while (sym_end != symbols.end() && sym_end->first == sym_addr)
+      ++sym_end;
+
+    // The range [sym_start, sym_end) has all the tokens for the current symbol.
+    uint32_t size_before = static_cast<uint32_t>(wptr - buf_.data());
+
+    // Make sure there is enough headroom to write the symbol.
+    if (buf_.size() - size_before < 1024) {
+      buf_.resize(buf_.size() + 32768);
+      wptr = buf_.data() + size_before;
+    }
+
+    uint32_t sym_rel_addr = static_cast<uint32_t>(sym_addr - base_addr_);
+    const size_t sym_num = num_syms_++;
+    if (sym_num % kSymIndexSampling == 0)
+      index_.emplace_back(std::make_pair(sym_rel_addr, size_before));
+    PERFETTO_DCHECK(sym_addr >= prev_sym_addr);
+    uint32_t delta = static_cast<uint32_t>(sym_addr - prev_sym_addr);
+    wptr = protozero::proto_utils::WriteVarInt(delta, wptr);
+    // Append all the token ids.
+    for (it = sym_start; it != sym_end;) {
+      PERFETTO_DCHECK(it->first == sym_addr);
+      TokenId token_id = it->second->second.id << 1;
+      ++it;
+      token_id |= (it == sym_end) ? 1 : 0;  // Last one has LSB set to 1.
+      wptr = protozero::proto_utils::WriteVarInt(token_id, wptr);
+    }
+    prev_sym_addr = sym_addr;
+  }  // for (symbols)
+
+  buf_.resize(static_cast<size_t>(wptr - buf_.data()));
+  buf_.shrink_to_fit();
+
+  PERFETTO_DLOG(
+      "Loaded %zu kalllsyms entries. Mem usage: %zu B (addresses) + %zu B "
+      "(tokens), total: %zu B",
+      num_syms_, addr_bytes(), tokens_.size_bytes(), size_bytes());
+
+  return num_syms_;
+}
+
+std::string KernelSymbolMap::Lookup(uint64_t sym_addr) {
+  if (index_.empty() || sym_addr < base_addr_)
+    return "";
+
+  // First find the highest symbol address <= sym_addr.
+  // Start with a binary search using the sparse index.
+
+  const uint32_t sym_rel_addr = static_cast<uint32_t>(sym_addr - base_addr_);
+  auto it = std::upper_bound(index_.cbegin(), index_.cend(),
+                             std::make_pair(sym_rel_addr, 0u));
+  if (it != index_.cbegin())
+    --it;
+
+  // Then continue with a linear scan (of at most kSymIndexSampling steps).
+  uint32_t addr = it->first;
+  uint32_t off = it->second;
+  const uint8_t* rdptr = &buf_[off];
+  const uint8_t* const buf_end = &buf_[buf_.size()];
+  bool parsing_addr = true;
+  const uint8_t* next_rdptr = nullptr;
+  for (bool is_first_addr = true;; is_first_addr = false) {
+    uint64_t v = 0;
+    const auto* prev_rdptr = rdptr;
+    rdptr = protozero::proto_utils::ParseVarInt(rdptr, buf_end, &v);
+    if (rdptr == prev_rdptr)
+      break;
+    if (parsing_addr) {
+      addr += is_first_addr ? 0 : static_cast<uint32_t>(v);
+      parsing_addr = false;
+      if (addr > sym_rel_addr)
+        break;
+      next_rdptr = rdptr;
+    } else {
+      // This is a token. Wait for the EOF maker.
+      parsing_addr = (v & 1) == 1;
+    }
+  }
+
+  if (!next_rdptr)
+    return "";
+
+  // The address has been found. Now rejoin the tokens to form the symbol name.
+
+  rdptr = next_rdptr;
+  std::string sym_name;
+  sym_name.reserve(kSymNameMaxLen);
+  for (bool eof = false, is_first_token = true; !eof; is_first_token = false) {
+    uint64_t v = 0;
+    const auto* old = rdptr;
+    rdptr = protozero::proto_utils::ParseVarInt(rdptr, buf_end, &v);
+    if (rdptr == old)
+      break;
+    eof = v & 1;
+    base::StringView token = tokens_.Lookup(static_cast<TokenId>(v >> 1));
+    if (!is_first_token)
+      sym_name.push_back('_');
+    for (size_t i = 0; i < token.size(); i++)
+      sym_name.push_back(token.at(i) & 0x7f);
+  }
+  return sym_name;
+}
+
+}  // namespace perfetto
diff --git a/src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h
new file mode 100644
index 0000000..c13546a
--- /dev/null
+++ b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACED_PROBES_FTRACE_KALLSYMS_KERNEL_SYMBOL_MAP_H_
+#define SRC_TRACED_PROBES_FTRACE_KALLSYMS_KERNEL_SYMBOL_MAP_H_
+
+#include <stdint.h>
+#include <array>
+#include <forward_list>
+#include <vector>
+
+namespace perfetto {
+
+namespace base {
+class StringView;
+}
+
+// A parser and memory-efficient container for /proc/kallsyms.
+// It can store a full kernel symbol table in ~1.2MB of memory and perform fast
+// lookups using logarithmic binary searches + bounded linear scans.
+//
+// /proc/kallsyms is a ~10 MB text file that contains the map of kernel symbols,
+// as follows:
+// ffffff8f77682f8c t el0_sync_invalid
+// ffffff8f77683060 t el0_irq_invalid
+// ...
+// In a typipcal Android kernel, it consists of 213K lines. Out of these, only
+// 116K are interesting for the sake of symbolizing kernel functions, the rest
+// are .rodata (variables), weak or other useless symbols.
+// Still, even keeping around 116K pointers would require 116K * 8 ~= 1 MB of
+// memory, without accounting for any strings for the symbols names.
+// The SUM(str.len) for the 116K symbol names adds up to 2.7 MB (without
+// counting their addresses).
+// However consider the following:
+// - Symbol addresses are mostly contiguous. Modulo the initial KASLR loading
+//   address, most symbols are few hundreds bytes apart from each other.
+// - Symbol names are made of tokens that are quite frequent (token: the result
+//   of name.split('_')). If we tokenize the 2.7 MB of strings, the resulting
+//   SUM(distinct_token.len) goes down 2.7MB -> 146 KB. This is because tokens
+//   like "get", "set" or "event" show up thousands of times.
+// - Symbol names are ASCII strings using only 7 out of 8 bits.
+//
+// In the light of this, the in-memory architecture of this data structure is
+// as follows:
+// We keep two tables around: (1) a token table and (2) a symbol table. Both
+// table are a flat byte vector with some sparse lookaside index to make lookups
+// faster and avoid full linear scans.
+//
+// Token table
+// -----------
+// The token table is a flat char buffer. Tokens are variable size (>0). Each
+// token is identified by its ordinality, so token id 3 is the 3rd token in
+// the table. All tokens are concatenated together.
+// Given the ASCII encoding, the MSB is used as a terminator. So instead of
+// wasting an extra NUL byte for each string, the last char of each token has
+// the MSB set.
+// Furthermore, a lookaside index stores the offset of tokens (i.e. Token N
+// starts at offset O in the buffer) to allow fast lookups. In order to avoid
+// wasting too much memory, the index is sparse and track the offsets of only
+// one every kTokenIndexSamplinig tokens.
+// When looking up a token ID N, the table seeks at the offset of the closest
+// token <= N, and then scans linearly the next (at most kTokenIndexSamplinig)
+// tokens, counting the MSBs found, until the right token id is found.
+// buf:   set*get*kernel*load*fpsimd*return*wrapper*el0*skip*sync*neon*bit*aes
+//        ^                   ^                         ^
+//        |                   |                         |
+// index: 0@0                 4@15                      8@21
+
+// Symbol table
+// ------------
+// The symbol table is a flat char buffer that stores for each symbol: its
+// address + the list of token indexes in the token table. The main caveats are
+// that:
+// - Symbol addresses are delta encoded (delta from prev symbol's addr).
+// - Both delta addresses and token indexes are var-int encoded.
+// - The LSB of token indexes is used as EOF marker (i.e. the next varint is
+//   the delta-addr for the next symbol). This time the LSB is used because of
+//   the varint encoding.
+// At parsing time symbols are ordered by address and tokens are sorted by
+// frequency, so that the top used 64 tokens can be represented with 1 byte.
+// (Rationale for 64: 1 byte = 8 bits. The MSB bit of each byte is used for the
+// varint encoding, the LSB bit of each number is used as end-of-tokens marker.
+// There are 6 bits left -> 64 indexes can be represented using one byte).
+// In summary the symbol table looks as follows:
+//
+// Base address: 0xbeef0000
+// Symbol buffer:
+// 0 1|0  4|0  6|1    // 0xbeef0000: 1,4,6 -> get_fpsimd_wrapper
+// 8 7|0  3|1         // 0xbeef0008: 7,3   -> el0_load
+// ...
+// Like in the case of the token table, a lookaside index keeps track of the
+// offset of one every kSymIndexSamplinig addresses.
+// The Lookup(ADDR) function operates as follows:
+// 1. Performs a logarithmic binary search in the symbols index, finding the
+//    offset of the closest addres <= ADDR.
+// 2. Skip over at most kSymIndexSamplinig until the symbol is found.
+// 3. For each token index, lookup the corresponding token string and
+//    concatenate them to build the symbol name.
+
+class KernelSymbolMap {
+ public:
+  // The two constants below are changeable only for the benchmark use.
+  // Trades off size of the root |index_| vs worst-case linear scans size.
+  // A higher number makes the index more sparse.
+  static size_t kSymIndexSampling;
+
+  // Trades off size of the TokenTable |index_| vs worst-case linear scans size.
+  static size_t kTokenIndexSampling;
+
+  // Parses a kallsyms file. Returns the number of valid symbols decoded.
+  size_t Parse(const std::string& kallsyms_path);
+
+  // Looks up the closest symbol (i.e. the one with the highest address <=
+  // |addr|) from its absolute 64-bit address.
+  // Returns an empty string if the symbol is not found (which can happen only
+  // if the passed |addr| is < min(addr)).
+  std::string Lookup(uint64_t addr);
+
+  // Returns the numberr of valid symbols decoded.
+  size_t num_syms() const { return num_syms_; }
+
+  // Returns the size in bytes used by the adddress table (without counting
+  // the tokens).
+  size_t addr_bytes() const { return buf_.size() + index_.size() * 8; }
+
+  // Returns the total memory usage in bytes.
+  size_t size_bytes() const { return addr_bytes() + tokens_.size_bytes(); }
+
+  // Token table.
+  class TokenTable {
+   public:
+    using TokenId = uint32_t;
+    TokenTable();
+    ~TokenTable();
+    TokenId Add(const std::string&);
+    base::StringView Lookup(TokenId);
+    size_t size_bytes() const { return buf_.size() + index_.size() * 4; }
+
+    void shrink_to_fit() {
+      buf_.shrink_to_fit();
+      index_.shrink_to_fit();
+    }
+
+   private:
+    TokenId num_tokens_ = 0;
+
+    std::vector<char> buf_;  // Token buffer.
+
+    // The value i-th in the vector contains the offset (within |buf_|) of the
+    // (i * kTokenIndexSamplinig)-th token.
+    std::vector<uint32_t> index_;
+  };
+
+ private:
+  TokenTable tokens_;  // Token table.
+
+  uint64_t base_addr_ = 0;    // Address of the first symbol (after sorting).
+  size_t num_syms_ = 0;       // Number of valid symbols stored.
+  std::vector<uint8_t> buf_;  // Symbol buffer.
+
+  // The key is (address - base_addr_), the value is the byte offset in |buf_|
+  // where the symbol entry starts (i.e. the start of the varint that tells the
+  // delta from the previous symbol).
+  std::vector<std::pair<uint32_t /*rel_addr*/, uint32_t /*offset*/>> index_;
+};
+
+}  // namespace perfetto
+
+#endif  // SRC_TRACED_PROBES_FTRACE_KALLSYMS_KERNEL_SYMBOL_MAP_H_
diff --git a/src/traced/probes/ftrace/kallsyms/kernel_symbol_map_benchmark.cc b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map_benchmark.cc
new file mode 100644
index 0000000..cdd4e38
--- /dev/null
+++ b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map_benchmark.cc
@@ -0,0 +1,116 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <random>
+#include <set>
+#include <unordered_set>
+
+#include <benchmark/benchmark.h>
+
+#include "perfetto/base/logging.h"
+#include "perfetto/ext/base/utils.h"
+#include "src/base/test/utils.h"
+#include "src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h"
+
+namespace {
+
+bool IsBenchmarkFunctionalOnly() {
+  return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
+}
+
+void BenchmarkArgs(benchmark::internal::Benchmark* b) {
+  if (IsBenchmarkFunctionalOnly()) {
+    b->Ranges({{16, 16}, {16, 16}});
+  } else {
+    b->RangeMultiplier(2)->Ranges({{4, 512}, {4, 512}});
+  }
+}
+
+struct ExpectedSym {
+  uint64_t addr;
+  const char* name;
+};
+
+// This set of symbols has been chosen by randomly picking 40 random symbols
+// from the original kallsyms.
+ExpectedSym kExpectedSyms[] = {
+    {0xffffff8f79c0d978, "__map_memblock"},
+    {0xffffff8f78fddbb8, "smack_inode_getsecid"},
+    {0xffffff8f78fe43b4, "msm_smmu_set_attribute"},
+    {0xffffff8f79d23e20, "__initcall_41_dm_verity_init6"},
+    {0xffffff8f74206c5c, "sme_update_fast_transition_enabled"},
+    {0xffffff8f74878c8c, "tavil_hph_idle_detect_put"},
+    {0xffffff8f78fd7db0, "privileged_wrt_inode_uidgid"},
+    {0xffffff8f78ffe030, "__hrtimer_tasklet_trampoline"},
+    {0xffffff8f78fd86b0, "store_enable"},
+    {0xffffff8f78ffbcb8, "raw6_exit_net"},
+    {0xffffff8f78ffa6ec, "idProduct_show"},
+    {0xffffff8f78fd99c0, "perf_tp_event"},
+    {0xffffff8f78fe1468, "rpmh_tx_done"},
+    {0xffffff8f78fda274, "page_unlock_anon_vma_read"},
+    {0xffffff8f78ffedfc, "vmstat_period_ms_operations_open"},
+    {0xffffff8f78fe0148, "devm_gpio_request"},
+    {0xffffff8f77915028, "ctx_sched_out"},
+    {0xffffff8f77ccdc2c, "gcm_hash_crypt_remain_continue"},
+    {0xffffff8f790022ec, "loop_init"},
+    {0xffffff8f78ff0004, "pcim_release"},
+    {0xffffff8f78fe1d8c, "uart_close"},
+    {0xffffff8f78fda9d4, "pipe_lock"},
+    {0xffffff8f78e62c68, "local_bh_enable.117091"},
+    {0xffffff8f78fd918c, "fork_idle"},
+    {0xffffff8f78fe24c4, "drm_dp_downstream_debug"},
+    {0xffffff8f78ff41d0, "inet_addr_onlink"},
+    {0xffffff8f78fdf2d4, "idr_alloc"},
+    {0xffffff8f78ff073c, "fts_remove"},
+    {0xffffff8f78ffe294, "xfrm4_local_error"},
+    {0xffffff8f79001994, "cpu_feature_match_PMULL_init"},
+    {0xffffff8f78ff4740, "xfrm_state_find"},
+    {0xffffff8f78ff58b0, "inet_del_offload"},
+    {0xffffff8f742041ac, "csr_is_conn_state_connected_infra"},
+    {0xffffff8f78fe1fd4, "diag_add_client"},
+    {0xffffff8f78ffc000, "trace_raw_output_mm_vmscan_kswapd_sleep"},
+    {0xffffff8f78fe6388, "scsi_queue_insert"},
+    {0xffffff8f78fdd480, "selinux_sb_clone_mnt_opts"},
+    {0xffffff8f78fe0e9c, "clk_fixed_rate_recalc_rate"},
+    {0xffffff8f78fedaec, "cap_inode_killpriv"},
+    {0xffffff8f79002b64, "audio_amrwb_init"},
+};
+
+}  // namespace
+
+static void BM_KallSyms(benchmark::State& state) {
+  perfetto::KernelSymbolMap::kTokenIndexSampling =
+      static_cast<size_t>(state.range(0));
+  perfetto::KernelSymbolMap::kSymIndexSampling =
+      static_cast<size_t>(state.range(1));
+  perfetto::KernelSymbolMap kallsyms;
+
+  // Don't run the benchmark on the CI as it requires pushing all test data,
+  // which slows down significantly the CI.
+  const bool skip = IsBenchmarkFunctionalOnly();
+  if (!skip) {
+    kallsyms.Parse(perfetto::base::GetTestDataPath("test/data/kallsyms.txt"));
+  }
+
+  for (auto _ : state) {
+    for (size_t i = 0; i < perfetto::base::ArraySize(kExpectedSyms); i++) {
+      const auto& exp = kExpectedSyms[i];
+      PERFETTO_CHECK(skip || kallsyms.Lookup(exp.addr) == exp.name);
+    }
+  }
+
+  state.counters["mem"] = kallsyms.size_bytes();
+}
+
+BENCHMARK(BM_KallSyms)->Apply(BenchmarkArgs);
diff --git a/src/traced/probes/ftrace/kallsyms/kernel_symbol_map_unittest.cc b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map_unittest.cc
new file mode 100644
index 0000000..942820a
--- /dev/null
+++ b/src/traced/probes/ftrace/kallsyms/kernel_symbol_map_unittest.cc
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/ftrace/kallsyms/kernel_symbol_map.h"
+
+#include <inttypes.h>
+
+#include <random>
+#include <unordered_map>
+
+#include "perfetto/ext/base/file_utils.h"
+#include "perfetto/ext/base/string_view.h"
+#include "perfetto/ext/base/temp_file.h"
+
+#include "test/gtest_and_gmock.h"
+
+namespace perfetto {
+namespace {
+
+using TokenId = KernelSymbolMap::TokenTable::TokenId;
+
+std::string Lookup(KernelSymbolMap::TokenTable& tokens, TokenId id) {
+  base::StringView sv = tokens.Lookup(id);
+  std::string str;
+  str.reserve(sv.size() + 1);
+  for (const char c : sv)
+    str.push_back(c & 0x7f);
+  return str;
+}
+
+TEST(KernelSymbolMapTest, TokenTable) {
+  KernelSymbolMap::TokenTable tokens;
+  ASSERT_EQ(tokens.Add("a"), 1u);
+  ASSERT_EQ(tokens.Add("bb"), 2u);
+  ASSERT_EQ(tokens.Add("ccc"), 3u);
+  ASSERT_EQ(tokens.Add("foobar"), 4u);
+  ASSERT_EQ(tokens.Add("foobaz"), 5u);
+  ASSERT_EQ(tokens.Add("_"), 6u);
+  ASSERT_EQ(Lookup(tokens, 0), "");
+  ASSERT_EQ(Lookup(tokens, 1), "a");
+  ASSERT_EQ(Lookup(tokens, 2), "bb");
+  ASSERT_EQ(Lookup(tokens, 3), "ccc");
+  ASSERT_EQ(Lookup(tokens, 4), "foobar");
+  ASSERT_EQ(Lookup(tokens, 5), "foobaz");
+  ASSERT_EQ(Lookup(tokens, 6), "_");
+  ASSERT_EQ(Lookup(tokens, 0), "");
+  ASSERT_EQ(Lookup(tokens, 42), "<error>");
+}
+
+TEST(KernelSymbolMapTest, ManyTokens) {
+  KernelSymbolMap::TokenTable tokens;
+  std::unordered_map<TokenId, std::string> tok_map;
+  static std::minstd_rand rng(0);
+  for (int rep = 0; rep < 10000; rep++) {
+    static const size_t kNameMax = 128;
+    std::string sym_name;
+    sym_name.reserve(kNameMax);
+    size_t len = 1 + (rng() % kNameMax);
+    for (size_t j = 0; j < len; j++)
+      sym_name.push_back(rng() & 0x7f);
+    TokenId id = tokens.Add(sym_name);
+    ASSERT_EQ(tok_map.count(id), 0u);
+    tok_map[id] = sym_name;
+  }
+  for (const auto& kv : tok_map) {
+    ASSERT_EQ(Lookup(tokens, kv.first), kv.second);
+  }
+}
+
+TEST(KernelSymbolMapTest, EdgeCases) {
+  base::TempFile tmp = base::TempFile::Create();
+  static const char kContents[] = R"(ffffff8f73e2fa10 t one
+ffffff8f73e2fa20 t two_
+ffffff8f73e2fa30 t _three  [module_name_ignored]
+ffffff8f73e2fa40 x ignore
+ffffff8f73e2fa40 t _fo_ur_
+ffffff8f73e2fa50 t _five__.cfi
+ffffff8f73e2fa60 t __si__x__
+ffffff8f73e2fa70 t ___se___v_e__n___
+ffffff8f73e2fa80 t _eight_omg_this_name_is_so_loooooooooooooooooooooooooooooooooooong_should_be_truncated_exactly_here_because_this_is_the_128_char_TRUNCATED_HERE
+ffffff8f73e2fa90 t NiNe
+)";
+  base::WriteAll(tmp.fd(), kContents, sizeof(kContents));
+  base::FlushFile(tmp.fd());
+
+  KernelSymbolMap kallsyms;
+  EXPECT_EQ(kallsyms.Lookup(0x42), "");
+
+  kallsyms.Parse(tmp.path().c_str());
+  EXPECT_EQ(kallsyms.num_syms(), 9u);
+
+  // Test first exact lookups.
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa10ULL), "one");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa20ULL), "two_");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa30LL), "_three");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa40ULL), "_fo_ur_");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa50ULL), "_five__");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa60ULL), "__si__x__");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa70ULL), "___se___v_e__n___");
+  EXPECT_EQ(
+      kallsyms.Lookup(0xffffff8f73e2fa80ULL),
+      "_eight_omg_this_name_is_so_loooooooooooooooooooooooooooooooooooong_"
+      "should_be_truncated_exactly_here_because_this_is_the_128_char");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa90ULL), "NiNe");
+
+  // Now check bound searches.
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa00ULL), "");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa11ULL), "one");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa19ULL), "one");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa71ULL), "___se___v_e__n___");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8f73e2fa91ULL), "NiNe");
+  EXPECT_EQ(kallsyms.Lookup(0xffffff8fffffffffULL), "NiNe");
+}
+
+TEST(KernelSymbolMapTest, GoldenTest) {
+  std::string fake_kallsyms;
+  fake_kallsyms.reserve(8 * 1024 * 1024);
+  static std::minstd_rand rng(0);
+  static const size_t kNameMax = 64;
+  std::map<uint64_t, std::string> symbols;
+  for (int rep = 0; rep < 1000; rep++) {
+    uint64_t addr = static_cast<uint64_t>(rng());
+    if (symbols.count(addr))
+      continue;
+    static const char kCharset[] = "_abcdef";
+    char sym_name[kNameMax + 1];  // Deliberately not initialized.
+    size_t sym_name_len = 1 + (rng() % kNameMax);
+    for (size_t i = 0; i < sym_name_len; i++)
+      sym_name[i] = kCharset[rng() % strlen(kCharset)];
+    sym_name[sym_name_len] = '\0';
+    char line[kNameMax + 40];
+    sprintf(line, "%" PRIx64 " t %s\n", addr, sym_name);
+    symbols[addr] = sym_name;
+    fake_kallsyms += line;
+  }
+  base::TempFile tmp = base::TempFile::Create();
+  base::WriteAll(tmp.fd(), fake_kallsyms.data(), fake_kallsyms.size());
+  base::FlushFile(tmp.fd());
+
+  KernelSymbolMap kallsyms;
+  kallsyms.Parse(tmp.path().c_str());
+  ASSERT_EQ(kallsyms.num_syms(), symbols.size());
+  for (const auto& kv : symbols) {
+    ASSERT_EQ(kallsyms.Lookup(kv.first), kv.second);
+  }
+}
+
+}  // namespace
+}  // namespace perfetto
diff --git a/tools/gen_android_bp b/tools/gen_android_bp
index 227c055..a69915b 100755
--- a/tools/gen_android_bp
+++ b/tools/gen_android_bp
@@ -146,6 +146,10 @@
     lines = f.readlines()
   for line in (line.strip() for line in lines if not line.startswith('#')):
     assert os.path.exists(line), line
+    if line.startswith('test/data/'):
+        # Skip test data files that require GCS. They are only for benchmarks.
+        # We don't run benchmarks in the android tree.
+        continue
     if line.endswith('/'):
       yield line + '**/*'
     else:
diff --git a/tools/install-build-deps b/tools/install-build-deps
index 200a22d..db5aa59 100755
--- a/tools/install-build-deps
+++ b/tools/install-build-deps
@@ -146,8 +146,8 @@
     # Example traces for regression tests.
     (
         'buildtools/test_data.zip',
-        'https://storage.googleapis.com/perfetto/test-data-20200120-171652.zip',
-        'a99364ac93ec2bd4407c5a392f759f45fb3f22b0',
+        'https://storage.googleapis.com/perfetto/test-data-20200121-101157.zip',
+        'ddfe8aa6ceca7be17f222740927dc50aa9f8ed02',
         'all',
     ),
 
diff --git a/tools/test_data.txt b/tools/test_data.txt
index d2dd686..abcb65d 100644
--- a/tools/test_data.txt
+++ b/tools/test_data.txt
@@ -2,3 +2,4 @@
 # to the root.
 src/traced/probes/ftrace/test/data/
 src/traced/probes/filesystem/testdata/
+test/data/kallsyms.txt