Add cache to hint where to look for inodes.

Bug: 74584014
Change-Id: I212750ffda0612f59c3eb517bac722459b853d14
diff --git a/Android.bp b/Android.bp
index 0d2d31f..da39ea9 100644
--- a/Android.bp
+++ b/Android.bp
@@ -64,6 +64,8 @@
     "src/traced/probes/filesystem/fs_mount.cc",
     "src/traced/probes/filesystem/inode_file_data_source.cc",
     "src/traced/probes/filesystem/lru_inode_cache.cc",
+    "src/traced/probes/filesystem/prefix_finder.cc",
+    "src/traced/probes/filesystem/range_tree.cc",
     "src/traced/probes/probes.cc",
     "src/traced/probes/probes_producer.cc",
     "src/traced/probes/process_stats_data_source.cc",
@@ -293,6 +295,8 @@
     "src/traced/probes/filesystem/fs_mount.cc",
     "src/traced/probes/filesystem/inode_file_data_source.cc",
     "src/traced/probes/filesystem/lru_inode_cache.cc",
+    "src/traced/probes/filesystem/prefix_finder.cc",
+    "src/traced/probes/filesystem/range_tree.cc",
     "src/traced/probes/probes_producer.cc",
     "src/traced/probes/process_stats_data_source.cc",
     "src/tracing/core/chrome_config.cc",
@@ -3266,6 +3270,10 @@
     "src/traced/probes/filesystem/inode_file_data_source.cc",
     "src/traced/probes/filesystem/lru_inode_cache.cc",
     "src/traced/probes/filesystem/lru_inode_cache_unittest.cc",
+    "src/traced/probes/filesystem/prefix_finder.cc",
+    "src/traced/probes/filesystem/prefix_finder_unittest.cc",
+    "src/traced/probes/filesystem/range_tree.cc",
+    "src/traced/probes/filesystem/range_tree_unittest.cc",
     "src/traced/probes/probes_producer.cc",
     "src/traced/probes/process_stats_data_source.cc",
     "src/traced/probes/process_stats_data_source_unittest.cc",
diff --git a/include/perfetto/base/BUILD.gn b/include/perfetto/base/BUILD.gn
index c9efc2d..03d24eb 100644
--- a/include/perfetto/base/BUILD.gn
+++ b/include/perfetto/base/BUILD.gn
@@ -19,6 +19,7 @@
     "logging.h",
     "page_allocator.h",
     "scoped_file.h",
+    "small_set.h",
     "string_splitter.h",
     "task_runner.h",
     "thread_checker.h",
diff --git a/include/perfetto/base/small_set.h b/include/perfetto/base/small_set.h
new file mode 100644
index 0000000..94f2324
--- /dev/null
+++ b/include/perfetto/base/small_set.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_BASE_SMALL_SET_H_
+#define INCLUDE_PERFETTO_BASE_SMALL_SET_H_
+
+#include <array>
+
+namespace perfetto {
+
+// Set that can store up to Size items of DataType.
+// Lookup is O(Size), so it is only usable for very small sets.
+template <typename DataType, size_t Size>
+class SmallSet {
+  static_assert(Size < 16, "Do not use SmallSet for many items");
+
+ public:
+  // Name for consistency with STL.
+  using const_iterator = typename std::array<DataType, Size>::const_iterator;
+  bool Add(DataType n) {
+    if (Contains(n))
+      return true;
+    if (filled_ < Size) {
+      arr_[filled_++] = std::move(n);
+      return true;
+    }
+    return false;
+  }
+
+  bool Contains(const DataType& n) const {
+    for (size_t i = 0; i < filled_; ++i) {
+      if (arr_[i] == n)
+        return true;
+    }
+    return false;
+  }
+
+  const_iterator begin() const { return arr_.cbegin(); }
+  const_iterator end() const { return arr_.cbegin() + filled_; }
+  size_t size() const { return filled_; }
+
+ private:
+  std::array<DataType, Size> arr_;
+  size_t filled_ = 0;
+};
+
+}  // namespace perfetto
+
+#endif  // INCLUDE_PERFETTO_BASE_SMALL_SET_H_
diff --git a/src/traced/probes/filesystem/BUILD.gn b/src/traced/probes/filesystem/BUILD.gn
index c03601b..ce481d6 100644
--- a/src/traced/probes/filesystem/BUILD.gn
+++ b/src/traced/probes/filesystem/BUILD.gn
@@ -28,6 +28,10 @@
     "inode_file_data_source.h",
     "lru_inode_cache.cc",
     "lru_inode_cache.h",
+    "prefix_finder.cc",
+    "prefix_finder.h",
+    "range_tree.cc",
+    "range_tree.h",
   ]
 }
 
@@ -41,5 +45,7 @@
   sources = [
     "fs_mount_unittest.cc",
     "lru_inode_cache_unittest.cc",
+    "prefix_finder_unittest.cc",
+    "range_tree_unittest.cc",
   ]
 }
diff --git a/src/traced/probes/filesystem/prefix_finder.cc b/src/traced/probes/filesystem/prefix_finder.cc
new file mode 100644
index 0000000..dbc0241
--- /dev/null
+++ b/src/traced/probes/filesystem/prefix_finder.cc
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/filesystem/prefix_finder.h"
+#include "perfetto/base/logging.h"
+#include "perfetto/base/string_splitter.h"
+
+namespace perfetto {
+
+std::string PrefixFinder::Node::ToString() {
+  if (parent_ != nullptr)
+    return parent_->ToString() + "/" + name_;
+  return name_;
+}
+
+std::unique_ptr<PrefixFinder::Node>& PrefixFinder::Node::Child(
+    const std::string& name) {
+  return children_[name];
+}
+
+PrefixFinder::Node* PrefixFinder::Node::MaybeChild(const std::string& name) {
+  auto it = children_.find(name);
+  if (it == children_.end())
+    return nullptr;
+  return it->second.get();
+}
+
+PrefixFinder::PrefixFinder(size_t limit) : limit_(limit) {}
+
+void PrefixFinder::InsertPrefix(size_t len) {
+  Node* cur = &root_;
+  for (auto it = state_.cbegin() + 1;
+       it != state_.cbegin() + static_cast<ssize_t>(len + 1); it++) {
+    std::unique_ptr<Node>& next = cur->Child(it->first);
+    if (!next)
+      next.reset(new Node(it->first, cur));
+    cur = next.get();
+  }
+}
+
+void PrefixFinder::Flush(size_t i) {
+  PERFETTO_CHECK(i > 0);
+  for (size_t j = i; j < state_.size(); ++j) {
+    if (state_[j - 1].second > limit_ && state_[j].second <= limit_) {
+      InsertPrefix(i);
+      break;
+    }
+  }
+}
+
+void PrefixFinder::Finalize() {
+  Flush(1);
+  state_.resize(1);
+#if PERFETTO_DCHECK_IS_ON()
+  PERFETTO_DCHECK(!finalized_);
+  finalized_ = true;
+#endif
+}
+
+void PrefixFinder::AddPath(std::string path) {
+  perfetto::base::StringSplitter s(std::move(path), '/');
+  // An artificial element for the root directory.
+  // This simplifies the logic below because we can always assume
+  // there is a parent element.
+  state_[0].second++;
+  for (size_t i = 1; s.Next(); ++i) {
+    char* token = s.cur_token();
+    if (i < state_.size()) {
+      std::pair<std::string, size_t>& elem = state_[i];
+      if (elem.first == token) {
+        elem.second++;
+      } else {
+        // Check if we need to write a prefix for any element
+        // in the previous state.
+        Flush(i);
+        elem.first = token;
+        elem.second = 1;
+        state_.resize(i + 1);
+      }
+    } else {
+      state_.emplace_back(token, 1);
+    }
+  }
+}
+
+PrefixFinder::Node* PrefixFinder::GetPrefix(std::string path) {
+#if PERFETTO_DCHECK_IS_ON()
+  PERFETTO_DCHECK(finalized_);
+#endif
+  perfetto::base::StringSplitter s(std::move(path), '/');
+  Node* cur = &root_;
+  for (size_t i = 0; s.Next(); ++i) {
+    char* token = s.cur_token();
+    Node* next = cur->MaybeChild(token);
+    if (next == nullptr)
+      break;
+    cur = next;
+    PERFETTO_DCHECK(cur->name_ == token);
+  }
+  return cur;
+}
+
+}  // namespace perfetto
diff --git a/src/traced/probes/filesystem/prefix_finder.h b/src/traced/probes/filesystem/prefix_finder.h
new file mode 100644
index 0000000..2df7741
--- /dev/null
+++ b/src/traced/probes/filesystem/prefix_finder.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_TRACED_PROBES_FILESYSTEM_PREFIX_FINDER_H_
+#define SRC_TRACED_PROBES_FILESYSTEM_PREFIX_FINDER_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "perfetto/base/logging.h"
+
+namespace perfetto {
+
+// PrefixFinder allows to find prefixes for filenames that ensure that
+// they can be found again within a provided limit of steps when searching
+// within that prefix in the same order.
+//
+// For instance, let the limit be 4 and the filesystem be.
+// /a/1
+// /a/2
+// /a/3
+// /b/4
+// /b/5
+//
+// The prefix for /a/1, /a/2 and /a/3/ is /, the one for /b/4 and /b/5 is /b/.
+class PrefixFinder {
+ public:
+  // Opaque placeholder for a prefix that can be turned into a string
+  // using ToString.
+  // Can not be constructed outside of PrefixFinder.
+  class Node {
+   public:
+    friend class PrefixFinder;
+
+    Node(const Node& that) = delete;
+    Node& operator=(const Node&) = delete;
+
+    // Return string representation of prefix, e.g. /foo/bar.
+    // Does not enclude a trailing /.
+    std::string ToString();
+
+   private:
+    Node(std::string name, Node* parent) : name_(name), parent_(parent) {}
+
+    std::unique_ptr<Node>& Child(const std::string& name);
+    Node* MaybeChild(const std::string& name);
+
+    std::string name_;
+    std::map<std::string, std::unique_ptr<Node>> children_;
+    Node* parent_;
+  };
+
+  PrefixFinder(size_t limit);
+
+  // Add path to prefix mapping.
+  // Must be called in DFS order.
+  // Must be called before GetPrefix(path) for the same path.
+  // Must not be called after Finalize.
+  void AddPath(std::string path);
+
+  // Return identifier for prefix. Ownership remains with the PrefixFinder.
+  // Must be called after AddPath(path) for the same path.
+  // Must not be before after Finalize.
+  Node* GetPrefix(std::string path);
+
+  // Call this after the last AddPath and before the first GetPrefix.
+  void Finalize();
+
+ private:
+  // We're about to remove the suffix of state from i onwards,
+  // if necessary add a prefix for anything in that suffix.
+  void Flush(size_t i);
+  void InsertPrefix(size_t len);
+  const size_t limit_;
+  // (path element, count) tuples for last path seen.
+  std::vector<std::pair<std::string, size_t>> state_{{"", 0}};
+  Node root_{"", nullptr};
+#if PERFETTO_DCHECK_IS_ON()
+  bool finalized_ = false;
+#endif
+};
+
+}  // namespace perfetto
+#endif  // SRC_TRACED_PROBES_FILESYSTEM_PREFIX_FINDER_H_
diff --git a/src/traced/probes/filesystem/prefix_finder_unittest.cc b/src/traced/probes/filesystem/prefix_finder_unittest.cc
new file mode 100644
index 0000000..b9972c5
--- /dev/null
+++ b/src/traced/probes/filesystem/prefix_finder_unittest.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/filesystem/prefix_finder.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "perfetto/base/build_config.h"
+#include "perfetto/base/scoped_file.h"
+#include "perfetto/base/temp_file.h"
+#include "perfetto/base/utils.h"
+
+namespace perfetto {
+namespace {
+
+TEST(PrefixFinderTest, Basic) {
+  PrefixFinder pr(4);
+  pr.AddPath("/a/1");
+  pr.AddPath("/a/2");
+  pr.AddPath("/a/3");
+  pr.AddPath("/b/4");
+  pr.AddPath("/b/5");
+
+  pr.Finalize();
+
+  ASSERT_EQ(pr.GetPrefix("/a/1")->ToString(), "");
+  ASSERT_EQ(pr.GetPrefix("/a/2")->ToString(), "");
+  ASSERT_EQ(pr.GetPrefix("/a/3")->ToString(), "");
+  ASSERT_EQ(pr.GetPrefix("/b/4")->ToString(), "/b");
+  ASSERT_EQ(pr.GetPrefix("/b/5")->ToString(), "/b");
+}
+
+}  // namespace
+}  // namespace perfetto
diff --git a/src/traced/probes/filesystem/range_tree.cc b/src/traced/probes/filesystem/range_tree.cc
new file mode 100644
index 0000000..0820763
--- /dev/null
+++ b/src/traced/probes/filesystem/range_tree.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/filesystem/range_tree.h"
+#include "perfetto/base/logging.h"
+
+namespace perfetto {
+
+const std::set<std::string> RangeTree::Get(Inode inode) {
+  std::set<std::string> ret;
+  auto lower = map_.upper_bound(inode);
+  if (lower != map_.begin())
+    lower--;
+  for (const DataType& x : lower->second)
+    ret.emplace(x->ToString());
+  return ret;
+}
+
+void RangeTree::Insert(Inode inode, RangeTree::DataType value) {
+  auto lower = map_.rbegin();
+  if (!map_.empty()) {
+    PERFETTO_DCHECK(inode > lower->first);
+  }
+
+  if (map_.empty() || !lower->second.Add(value)) {
+    PERFETTO_DCHECK(map_[inode].Add(value));
+  }
+}
+
+}  // namespace perfetto
diff --git a/src/traced/probes/filesystem/range_tree.h b/src/traced/probes/filesystem/range_tree.h
new file mode 100644
index 0000000..b28695a
--- /dev/null
+++ b/src/traced/probes/filesystem/range_tree.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <map>
+#include <set>
+#include <sstream>
+
+#include <stdio.h>
+
+#include "perfetto/base/small_set.h"
+#include "src/traced/probes/filesystem/inode_file_data_source.h"
+#include "src/traced/probes/filesystem/prefix_finder.h"
+
+#ifndef SRC_TRACED_PROBES_FILESYSTEM_RANGE_TREE_H_
+#define SRC_TRACED_PROBES_FILESYSTEM_RANGE_TREE_H_
+
+namespace perfetto {
+namespace {
+
+constexpr size_t kSetSize = 3;
+
+}  // namespace
+
+// Keep key value associations in ranges. Keeps kSetSize=3 possible values
+// for every key, where one is the correct one.
+// For instance,
+// 1 -> a
+// 2 -> b
+// 3 -> c
+// 4 -> d
+//
+// will be stored as
+// [1, 4) {a, b, c}
+// [4, inf) {d}
+//
+// This comes from the observation that close-by inode numbers tend to be
+// in the same directory. We are storing multiple values to be able to
+// aggregate to larger ranges and reduce memory usage.
+class RangeTree {
+ public:
+  using DataType = PrefixFinder::Node*;
+
+  const std::set<std::string> Get(Inode inode);
+  void Insert(Inode inode, DataType value);
+
+ private:
+  std::map<Inode, SmallSet<DataType, kSetSize>> map_;
+};
+
+}  // namespace perfetto
+
+#endif  // SRC_TRACED_PROBES_FILESYSTEM_RANGE_TREE_H_
diff --git a/src/traced/probes/filesystem/range_tree_unittest.cc b/src/traced/probes/filesystem/range_tree_unittest.cc
new file mode 100644
index 0000000..288647a
--- /dev/null
+++ b/src/traced/probes/filesystem/range_tree_unittest.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/traced/probes/filesystem/range_tree.h"
+#include "src/traced/probes/filesystem/prefix_finder.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "perfetto/base/build_config.h"
+#include "perfetto/base/scoped_file.h"
+#include "perfetto/base/temp_file.h"
+#include "perfetto/base/utils.h"
+
+namespace perfetto {
+namespace {
+
+using ::testing::Contains;
+using ::testing::Not;
+
+TEST(RangeTreeTest, Basic) {
+  PrefixFinder pr(1);
+  pr.AddPath("/a/foo");
+  pr.AddPath("/b/foo");
+  pr.AddPath("/c/foo");
+  pr.AddPath("/d/foo");
+  pr.Finalize();
+
+  auto a = pr.GetPrefix("/a/foo");
+  auto b = pr.GetPrefix("/b/foo");
+  auto c = pr.GetPrefix("/c/foo");
+  auto d = pr.GetPrefix("/d/foo");
+
+  ASSERT_EQ(a->ToString(), "/a");
+  ASSERT_EQ(b->ToString(), "/b");
+  ASSERT_EQ(c->ToString(), "/c");
+  ASSERT_EQ(d->ToString(), "/d");
+
+  // This test needs to be changed for other kSetSize.
+  ASSERT_EQ(kSetSize, 3u);
+
+  RangeTree t;
+  t.Insert(1, a);
+  t.Insert(2, a);
+  t.Insert(20, b);
+  t.Insert(24, a);
+  t.Insert(25, c);
+  t.Insert(27, d);
+
+  EXPECT_THAT(t.Get(1), Contains("/a"));
+  EXPECT_THAT(t.Get(2), Contains("/a"));
+  EXPECT_THAT(t.Get(20), Contains("/b"));
+  EXPECT_THAT(t.Get(24), Contains("/a"));
+  EXPECT_THAT(t.Get(25), Contains("/c"));
+  EXPECT_THAT(t.Get(27), Contains("/d"));
+  // 27 will have overflowed kSetSize = 3;
+  EXPECT_THAT(t.Get(27), Not(Contains("/a")));
+  EXPECT_THAT(t.Get(27), Not(Contains("/b")));
+  EXPECT_THAT(t.Get(27), Not(Contains("/c")));
+}
+
+}  // namespace
+}  // namespace perfetto